1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/o2iblnd/o2iblnd_modparams.c
38 * Author: Eric Barton <eric@bartonsoftware.com>
43 static int service = 987;
44 CFS_MODULE_PARM(service, "i", int, 0444,
45 "service number (within RDMA_PS_TCP)");
48 CFS_MODULE_PARM(cksum, "i", int, 0644,
49 "set non-zero to enable message (not RDMA) checksums");
51 static int timeout = 50;
52 CFS_MODULE_PARM(timeout, "i", int, 0644,
56 CFS_MODULE_PARM(ntx, "i", int, 0444,
57 "# of message descriptors");
59 static int credits = 64;
60 CFS_MODULE_PARM(credits, "i", int, 0444,
61 "# concurrent sends");
63 static int peer_credits = 8;
64 CFS_MODULE_PARM(peer_credits, "i", int, 0444,
65 "# concurrent sends to 1 peer");
67 static int peer_credits_hiw = 0;
68 CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
69 "when eagerly to return credits");
71 static int peer_timeout = 0;
72 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
73 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
75 static char *ipif_name = "ib0";
76 CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
77 "IPoIB interface name");
79 static int retry_count = 5;
80 CFS_MODULE_PARM(retry_count, "i", int, 0644,
81 "Retransmissions when no ACK received");
83 static int rnr_retry_count = 6;
84 CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
85 "RNR retransmissions");
87 static int keepalive = 100;
88 CFS_MODULE_PARM(keepalive, "i", int, 0644,
89 "Idle time in seconds before sending a keepalive");
91 static int ib_mtu = 0;
92 CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
93 "IB MTU 256/512/1024/2048/4096");
95 static int concurrent_sends = 0;
96 CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
97 "send work-queue sizing");
99 static int map_on_demand = 0;
100 CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
103 static int fmr_pool_size = 512;
104 CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
105 "size of the fmr pool (>= ntx)");
107 static int fmr_flush_trigger = 384;
108 CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
109 "# dirty FMRs that triggers pool flush");
111 static int fmr_cache = 1;
112 CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
113 "non-zero to enable FMR caching");
115 static int pmr_pool_size = 512;
116 CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
117 "size of the MR cache pmr pool");
119 kib_tunables_t kiblnd_tunables = {
120 .kib_service = &service,
122 .kib_timeout = &timeout,
123 .kib_keepalive = &keepalive,
125 .kib_credits = &credits,
126 .kib_peercredits = &peer_credits,
127 .kib_peercredits_hiw = &peer_credits_hiw,
128 .kib_peertimeout = &peer_timeout,
129 .kib_default_ipif = &ipif_name,
130 .kib_retry_count = &retry_count,
131 .kib_rnr_retry_count = &rnr_retry_count,
132 .kib_concurrent_sends = &concurrent_sends,
133 .kib_ib_mtu = &ib_mtu,
134 .kib_map_on_demand = &map_on_demand,
135 .kib_fmr_pool_size = &fmr_pool_size,
136 .kib_fmr_flush_trigger = &fmr_flush_trigger,
137 .kib_fmr_cache = &fmr_cache,
138 .kib_pmr_pool_size = &pmr_pool_size,
141 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
143 static char ipif_basename_space[32];
145 #ifndef HAVE_SYSCTL_UNNUMBERED
153 O2IBLND_PEER_CREDITS,
154 O2IBLND_PEER_CREDITS_HIW,
155 O2IBLND_PEER_TIMEOUT,
156 O2IBLND_IPIF_BASENAME,
158 O2IBLND_RNR_RETRY_COUNT,
160 O2IBLND_CONCURRENT_SENDS,
162 O2IBLND_MAP_ON_DEMAND,
163 O2IBLND_FMR_POOL_SIZE,
164 O2IBLND_FMR_FLUSH_TRIGGER,
166 O2IBLND_PMR_POOL_SIZE
170 #define O2IBLND_SERVICE CTL_UNNUMBERED
171 #define O2IBLND_CKSUM CTL_UNNUMBERED
172 #define O2IBLND_TIMEOUT CTL_UNNUMBERED
173 #define O2IBLND_NTX CTL_UNNUMBERED
174 #define O2IBLND_CREDITS CTL_UNNUMBERED
175 #define O2IBLND_PEER_CREDITS CTL_UNNUMBERED
176 #define O2IBLND_PEER_CREDITS_HIW CTL_UNNUMBERED
177 #define O2IBLND_PEER_TIMEOUT CTL_UNNUMBERED
178 #define O2IBLND_IPIF_BASENAME CTL_UNNUMBERED
179 #define O2IBLND_RETRY_COUNT CTL_UNNUMBERED
180 #define O2IBLND_RNR_RETRY_COUNT CTL_UNNUMBERED
181 #define O2IBLND_KEEPALIVE CTL_UNNUMBERED
182 #define O2IBLND_CONCURRENT_SENDS CTL_UNNUMBERED
183 #define O2IBLND_IB_MTU CTL_UNNUMBERED
184 #define O2IBLND_MAP_ON_DEMAND CTL_UNNUMBERED
185 #define O2IBLND_FMR_POOL_SIZE CTL_UNNUMBERED
186 #define O2IBLND_FMR_FLUSH_TRIGGER CTL_UNNUMBERED
187 #define O2IBLND_FMR_CACHE CTL_UNNUMBERED
188 #define O2IBLND_PMR_POOL_SIZE CTL_UNNUMBERED
192 static cfs_sysctl_table_t kiblnd_ctl_table[] = {
194 .ctl_name = O2IBLND_SERVICE,
195 .procname = "service",
197 .maxlen = sizeof(int),
199 .proc_handler = &proc_dointvec
202 .ctl_name = O2IBLND_CKSUM,
205 .maxlen = sizeof(int),
207 .proc_handler = &proc_dointvec
210 .ctl_name = O2IBLND_TIMEOUT,
211 .procname = "timeout",
213 .maxlen = sizeof(int),
215 .proc_handler = &proc_dointvec
218 .ctl_name = O2IBLND_NTX,
221 .maxlen = sizeof(int),
223 .proc_handler = &proc_dointvec
226 .ctl_name = O2IBLND_CREDITS,
227 .procname = "credits",
229 .maxlen = sizeof(int),
231 .proc_handler = &proc_dointvec
234 .ctl_name = O2IBLND_PEER_CREDITS,
235 .procname = "peer_credits",
236 .data = &peer_credits,
237 .maxlen = sizeof(int),
239 .proc_handler = &proc_dointvec
242 .ctl_name = O2IBLND_PEER_CREDITS_HIW,
243 .procname = "peer_credits_hiw",
244 .data = &peer_credits_hiw,
245 .maxlen = sizeof(int),
247 .proc_handler = &proc_dointvec
250 .ctl_name = O2IBLND_PEER_TIMEOUT,
251 .procname = "peer_timeout",
252 .data = &peer_timeout,
253 .maxlen = sizeof(int),
255 .proc_handler = &proc_dointvec
258 .ctl_name = O2IBLND_IPIF_BASENAME,
259 .procname = "ipif_name",
260 .data = ipif_basename_space,
261 .maxlen = sizeof(ipif_basename_space),
263 .proc_handler = &proc_dostring
266 .ctl_name = O2IBLND_RETRY_COUNT,
267 .procname = "retry_count",
268 .data = &retry_count,
269 .maxlen = sizeof(int),
271 .proc_handler = &proc_dointvec
274 .ctl_name = O2IBLND_RNR_RETRY_COUNT,
275 .procname = "rnr_retry_count",
276 .data = &rnr_retry_count,
277 .maxlen = sizeof(int),
279 .proc_handler = &proc_dointvec
282 .ctl_name = O2IBLND_KEEPALIVE,
283 .procname = "keepalive",
285 .maxlen = sizeof(int),
287 .proc_handler = &proc_dointvec
290 .ctl_name = O2IBLND_CONCURRENT_SENDS,
291 .procname = "concurrent_sends",
292 .data = &concurrent_sends,
293 .maxlen = sizeof(int),
295 .proc_handler = &proc_dointvec
298 .ctl_name = O2IBLND_IB_MTU,
299 .procname = "ib_mtu",
301 .maxlen = sizeof(int),
303 .proc_handler = &proc_dointvec
306 .ctl_name = O2IBLND_MAP_ON_DEMAND,
307 .procname = "map_on_demand",
308 .data = &map_on_demand,
309 .maxlen = sizeof(int),
311 .proc_handler = &proc_dointvec
315 .ctl_name = O2IBLND_FMR_POOL_SIZE,
316 .procname = "fmr_pool_size",
317 .data = &fmr_pool_size,
318 .maxlen = sizeof(int),
320 .proc_handler = &proc_dointvec
323 .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
324 .procname = "fmr_flush_trigger",
325 .data = &fmr_flush_trigger,
326 .maxlen = sizeof(int),
328 .proc_handler = &proc_dointvec
331 .ctl_name = O2IBLND_FMR_CACHE,
332 .procname = "fmr_cache",
334 .maxlen = sizeof(int),
336 .proc_handler = &proc_dointvec
339 .ctl_name = O2IBLND_PMR_POOL_SIZE,
340 .procname = "pmr_pool_size",
341 .data = &pmr_pool_size,
342 .maxlen = sizeof(int),
344 .proc_handler = &proc_dointvec
349 static cfs_sysctl_table_t kiblnd_top_ctl_table[] = {
351 .ctl_name = CTL_O2IBLND,
352 .procname = "o2iblnd",
356 .child = kiblnd_ctl_table
362 kiblnd_initstrtunable(char *space, char *str, int size)
364 strncpy(space, str, size);
369 kiblnd_sysctl_init (void)
371 kiblnd_initstrtunable(ipif_basename_space, ipif_name,
372 sizeof(ipif_basename_space));
374 kiblnd_tunables.kib_sysctl =
375 cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
377 if (kiblnd_tunables.kib_sysctl == NULL)
378 CWARN("Can't setup /proc tunables\n");
382 kiblnd_sysctl_fini (void)
384 if (kiblnd_tunables.kib_sysctl != NULL)
385 cfs_unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
391 kiblnd_sysctl_init (void)
396 kiblnd_sysctl_fini (void)
403 kiblnd_tunables_init (void)
405 if (*kiblnd_tunables.kib_credits > *kiblnd_tunables.kib_ntx) {
406 CERROR("Can't set credits(%d) > ntx(%d)\n",
407 *kiblnd_tunables.kib_credits,
408 *kiblnd_tunables.kib_ntx);
412 if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
413 CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
414 *kiblnd_tunables.kib_ib_mtu);
418 if (*kiblnd_tunables.kib_peercredits < IBLND_CREDITS_DEFAULT)
419 *kiblnd_tunables.kib_peercredits = IBLND_CREDITS_DEFAULT;
421 if (*kiblnd_tunables.kib_peercredits > IBLND_CREDITS_MAX)
422 *kiblnd_tunables.kib_peercredits = IBLND_CREDITS_MAX;
424 if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peercredits / 2)
425 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peercredits / 2;
427 if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peercredits)
428 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peercredits - 1;
430 if (*kiblnd_tunables.kib_map_on_demand < 0 ||
431 *kiblnd_tunables.kib_map_on_demand >= IBLND_MAX_RDMA_FRAGS)
432 *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
434 if (*kiblnd_tunables.kib_concurrent_sends == 0) {
435 if (*kiblnd_tunables.kib_map_on_demand > 0 &&
436 *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
437 *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peercredits) * 2;
439 *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peercredits);
442 if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peercredits * 2)
443 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peercredits * 2;
445 if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peercredits / 2)
446 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peercredits / 2;
448 if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peercredits) {
449 CWARN("Concurrent sends %d is lower than message queue size: %d, "
450 "performance may drop slightly.\n",
451 *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peercredits);
454 kiblnd_sysctl_init();
459 kiblnd_tunables_fini (void)
461 kiblnd_sysctl_fini();