Whamcloud - gitweb
cdddcd7db7c9c228a00ef44a515ead404aedac46
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_modparams.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lnet/klnds/o2iblnd/o2iblnd_modparams.c
37  *
38  * Author: Eric Barton <eric@bartonsoftware.com>
39  */
40
41 #include "o2iblnd.h"
42
43 static int service = 987;
44 CFS_MODULE_PARM(service, "i", int, 0444,
45                 "service number (within RDMA_PS_TCP)");
46
47 static int cksum = 0;
48 CFS_MODULE_PARM(cksum, "i", int, 0644,
49                 "set non-zero to enable message (not RDMA) checksums");
50
51 static int timeout = 50;
52 CFS_MODULE_PARM(timeout, "i", int, 0644,
53                 "timeout (seconds)");
54
55 /* Number of threads in each scheduler pool which is percpt,
56  * we will estimate reasonable value based on CPUs if it's set to zero. */
57 static int nscheds;
58 CFS_MODULE_PARM(nscheds, "i", int, 0444,
59                 "number of threads in each scheduler pool");
60
61 /* NB: this value is shared by all CPTs, it can grow at runtime */
62 static int ntx = 512;
63 CFS_MODULE_PARM(ntx, "i", int, 0444,
64                 "# of message descriptors allocated for each pool");
65
66 /* NB: this value is shared by all CPTs */
67 static int credits = 256;
68 CFS_MODULE_PARM(credits, "i", int, 0444,
69                 "# concurrent sends");
70
71 static int peer_credits = 8;
72 CFS_MODULE_PARM(peer_credits, "i", int, 0444,
73                 "# concurrent sends to 1 peer");
74
75 static int peer_credits_hiw = 0;
76 CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
77                 "when eagerly to return credits");
78
79 static int peer_buffer_credits = 0;
80 CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
81                 "# per-peer router buffer credits");
82
83 static int peer_timeout = 180;
84 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
85                 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
86
87 static char *ipif_name = "ib0";
88 CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
89                 "IPoIB interface name");
90
91 static int retry_count = 5;
92 CFS_MODULE_PARM(retry_count, "i", int, 0644,
93                 "Retransmissions when no ACK received");
94
95 static int rnr_retry_count = 6;
96 CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
97                 "RNR retransmissions");
98
99 static int keepalive = 100;
100 CFS_MODULE_PARM(keepalive, "i", int, 0644,
101                 "Idle time in seconds before sending a keepalive");
102
103 static int ib_mtu = 0;
104 CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
105                 "IB MTU 256/512/1024/2048/4096");
106
107 static int concurrent_sends = 0;
108 CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
109                 "send work-queue sizing");
110
111 static int map_on_demand = 0;
112 CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
113                 "map on demand");
114
115 /* NB: this value is shared by all CPTs, it can grow at runtime */
116 static int fmr_pool_size = 512;
117 CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
118                 "size of fmr pool on each CPT (>= ntx / 4)");
119
120 /* NB: this value is shared by all CPTs, it can grow at runtime */
121 static int fmr_flush_trigger = 384;
122 CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
123                 "# dirty FMRs that triggers pool flush");
124
125 static int fmr_cache = 1;
126 CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
127                 "non-zero to enable FMR caching");
128
129 /*
130  * 0: disable failover
131  * 1: enable failover if necessary
132  * 2: force to failover (for debug)
133  */
134 static int dev_failover = 0;
135 CFS_MODULE_PARM(dev_failover, "i", int, 0444,
136                "HCA failover for bonding (0 off, 1 on, other values reserved)");
137
138
139 static int require_privileged_port = 0;
140 CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
141                 "require privileged port when accepting connection");
142
143 static int use_privileged_port = 1;
144 CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
145                 "use privileged port when initiating connection");
146
147 kib_tunables_t kiblnd_tunables = {
148         .kib_dev_failover           = &dev_failover,
149         .kib_service                = &service,
150         .kib_cksum                  = &cksum,
151         .kib_timeout                = &timeout,
152         .kib_keepalive              = &keepalive,
153         .kib_ntx                    = &ntx,
154         .kib_credits                = &credits,
155         .kib_peertxcredits          = &peer_credits,
156         .kib_peercredits_hiw        = &peer_credits_hiw,
157         .kib_peerrtrcredits         = &peer_buffer_credits,
158         .kib_peertimeout            = &peer_timeout,
159         .kib_default_ipif           = &ipif_name,
160         .kib_retry_count            = &retry_count,
161         .kib_rnr_retry_count        = &rnr_retry_count,
162         .kib_concurrent_sends       = &concurrent_sends,
163         .kib_ib_mtu                 = &ib_mtu,
164         .kib_map_on_demand          = &map_on_demand,
165         .kib_fmr_pool_size          = &fmr_pool_size,
166         .kib_fmr_flush_trigger      = &fmr_flush_trigger,
167         .kib_fmr_cache              = &fmr_cache,
168         .kib_require_priv_port      = &require_privileged_port,
169         .kib_use_priv_port          = &use_privileged_port,
170         .kib_nscheds                = &nscheds
171 };
172
173 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
174
175 static char ipif_basename_space[32];
176
177 static struct ctl_table kiblnd_ctl_table[] = {
178         {
179                 INIT_CTL_NAME
180                 .procname       = "service",
181                 .data           = &service,
182                 .maxlen         = sizeof(int),
183                 .mode           = 0444,
184                 .proc_handler   = &proc_dointvec
185         },
186         {
187                 INIT_CTL_NAME
188                 .procname       = "cksum",
189                 .data           = &cksum,
190                 .maxlen         = sizeof(int),
191                 .mode           = 0644,
192                 .proc_handler   = &proc_dointvec
193         },
194         {
195                 INIT_CTL_NAME
196                 .procname       = "timeout",
197                 .data           = &timeout,
198                 .maxlen         = sizeof(int),
199                 .mode           = 0644,
200                 .proc_handler   = &proc_dointvec
201         },
202         {
203                 INIT_CTL_NAME
204                 .procname       = "ntx",
205                 .data           = &ntx,
206                 .maxlen         = sizeof(int),
207                 .mode           = 0444,
208                 .proc_handler   = &proc_dointvec
209         },
210         {
211                 INIT_CTL_NAME
212                 .procname       = "credits",
213                 .data           = &credits,
214                 .maxlen         = sizeof(int),
215                 .mode           = 0444,
216                 .proc_handler   = &proc_dointvec
217         },
218         {
219                 INIT_CTL_NAME
220                 .procname       = "peer_credits",
221                 .data           = &peer_credits,
222                 .maxlen         = sizeof(int),
223                 .mode           = 0444,
224                 .proc_handler   = &proc_dointvec
225         },
226         {
227                 INIT_CTL_NAME
228                 .procname       = "peer_credits_hiw",
229                 .data           = &peer_credits_hiw,
230                 .maxlen         = sizeof(int),
231                 .mode           = 0444,
232                 .proc_handler   = &proc_dointvec
233         },
234         {
235                 INIT_CTL_NAME
236                 .procname       = "peer_buffer_credits",
237                 .data           = &peer_buffer_credits,
238                 .maxlen         = sizeof(int),
239                 .mode           = 0444,
240                 .proc_handler   = &proc_dointvec
241         },
242         {
243                 INIT_CTL_NAME
244                 .procname       = "peer_timeout",
245                 .data           = &peer_timeout,
246                 .maxlen         = sizeof(int),
247                 .mode           = 0444,
248                 .proc_handler   = &proc_dointvec
249         },
250         {
251                 INIT_CTL_NAME
252                 .procname       = "ipif_name",
253                 .data           = ipif_basename_space,
254                 .maxlen         = sizeof(ipif_basename_space),
255                 .mode           = 0444,
256                 .proc_handler   = &proc_dostring
257         },
258         {
259                 INIT_CTL_NAME
260                 .procname       = "retry_count",
261                 .data           = &retry_count,
262                 .maxlen         = sizeof(int),
263                 .mode           = 0644,
264                 .proc_handler   = &proc_dointvec
265         },
266         {
267                 INIT_CTL_NAME
268                 .procname       = "rnr_retry_count",
269                 .data           = &rnr_retry_count,
270                 .maxlen         = sizeof(int),
271                 .mode           = 0644,
272                 .proc_handler   = &proc_dointvec
273         },
274         {
275                 INIT_CTL_NAME
276                 .procname       = "keepalive",
277                 .data           = &keepalive,
278                 .maxlen         = sizeof(int),
279                 .mode           = 0644,
280                 .proc_handler   = &proc_dointvec
281         },
282         {
283                 INIT_CTL_NAME
284                 .procname       = "concurrent_sends",
285                 .data           = &concurrent_sends,
286                 .maxlen         = sizeof(int),
287                 .mode           = 0444,
288                 .proc_handler   = &proc_dointvec
289         },
290         {
291                 INIT_CTL_NAME
292                 .procname       = "ib_mtu",
293                 .data           = &ib_mtu,
294                 .maxlen         = sizeof(int),
295                 .mode           = 0444,
296                 .proc_handler   = &proc_dointvec
297         },
298         {
299                 INIT_CTL_NAME
300                 .procname       = "map_on_demand",
301                 .data           = &map_on_demand,
302                 .maxlen         = sizeof(int),
303                 .mode           = 0444,
304                 .proc_handler   = &proc_dointvec
305         },
306         {
307                 INIT_CTL_NAME
308                 .procname       = "fmr_pool_size",
309                 .data           = &fmr_pool_size,
310                 .maxlen         = sizeof(int),
311                 .mode           = 0444,
312                 .proc_handler   = &proc_dointvec
313         },
314         {
315                 INIT_CTL_NAME
316                 .procname       = "fmr_flush_trigger",
317                 .data           = &fmr_flush_trigger,
318                 .maxlen         = sizeof(int),
319                 .mode           = 0444,
320                 .proc_handler   = &proc_dointvec
321         },
322         {
323                 INIT_CTL_NAME
324                 .procname       = "fmr_cache",
325                 .data           = &fmr_cache,
326                 .maxlen         = sizeof(int),
327                 .mode           = 0444,
328                 .proc_handler   = &proc_dointvec
329         },
330         {
331                 INIT_CTL_NAME
332                 .procname       = "dev_failover",
333                 .data           = &dev_failover,
334                 .maxlen         = sizeof(int),
335                 .mode           = 0444,
336                 .proc_handler   = &proc_dointvec
337         },
338         { 0 }
339 };
340
341 static struct ctl_table kiblnd_top_ctl_table[] = {
342         {
343                 INIT_CTL_NAME
344                 .procname       = "o2iblnd",
345                 .data           = NULL,
346                 .maxlen         = 0,
347                 .mode           = 0555,
348                 .child          = kiblnd_ctl_table
349         },
350         { 0 }
351 };
352
353 void
354 kiblnd_initstrtunable(char *space, char *str, int size)
355 {
356         strncpy(space, str, size);
357         space[size-1] = 0;
358 }
359
360 static void
361 kiblnd_sysctl_init (void)
362 {
363         kiblnd_initstrtunable(ipif_basename_space, ipif_name,
364                               sizeof(ipif_basename_space));
365
366         kiblnd_tunables.kib_sysctl =
367                 register_sysctl_table(kiblnd_top_ctl_table);
368
369         if (kiblnd_tunables.kib_sysctl == NULL)
370                 CWARN("Can't setup /proc tunables\n");
371 }
372
373 static void
374 kiblnd_sysctl_fini (void)
375 {
376         if (kiblnd_tunables.kib_sysctl != NULL)
377                 unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
378 }
379
380 #else
381
382 static void
383 kiblnd_sysctl_init (void)
384 {
385 }
386
387 static void
388 kiblnd_sysctl_fini (void)
389 {
390 }
391
392 #endif
393
394 int
395 kiblnd_tunables_init (void)
396 {
397         if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
398                 CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
399                        *kiblnd_tunables.kib_ib_mtu);
400                 return -EINVAL;
401         }
402
403         if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
404                 *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
405
406         if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
407                 *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
408
409         if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
410                 *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
411
412         if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
413                 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
414
415         if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
416                 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
417
418         if (*kiblnd_tunables.kib_map_on_demand < 0 ||
419             *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
420                 *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
421
422         if (*kiblnd_tunables.kib_map_on_demand == 1)
423                 *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
424
425         if (*kiblnd_tunables.kib_concurrent_sends == 0) {
426                 if (*kiblnd_tunables.kib_map_on_demand > 0 &&
427                     *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
428                         *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
429                 else
430                         *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
431         }
432
433         if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
434                 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
435
436         if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
437                 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
438
439         if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
440                 CWARN("Concurrent sends %d is lower than message queue size: %d, "
441                       "performance may drop slightly.\n",
442                       *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
443         }
444
445         kiblnd_sysctl_init();
446         return 0;
447 }
448
449 void
450 kiblnd_tunables_fini (void)
451 {
452         kiblnd_sysctl_fini();
453 }