Whamcloud - gitweb
LU-9680 utils: fix nested attribute handling in liblnetconfig
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  */
31
32 #define DEBUG_SUBSYSTEM S_LNET
33
34 #include <linux/ctype.h>
35 #include <linux/generic-radix-tree.h>
36 #include <linux/log2.h>
37 #include <linux/ktime.h>
38 #include <linux/moduleparam.h>
39 #include <linux/uaccess.h>
40 #ifdef HAVE_SCHED_HEADERS
41 #include <linux/sched/signal.h>
42 #endif
43 #include <net/genetlink.h>
44
45 #include <libcfs/linux/linux-net.h>
46 #include <lnet/udsp.h>
47 #include <lnet/lib-lnet.h>
48
49 #define D_LNI D_CONSOLE
50
51 /*
52  * initialize ln_api_mutex statically, since it needs to be used in
53  * discovery_set callback. That module parameter callback can be called
54  * before module init completes. The mutex needs to be ready for use then.
55  */
56 struct lnet the_lnet = {
57         .ln_api_mutex = __MUTEX_INITIALIZER(the_lnet.ln_api_mutex),
58 };              /* THE state of the network */
59 EXPORT_SYMBOL(the_lnet);
60
61 static char *ip2nets = "";
62 module_param(ip2nets, charp, 0444);
63 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
64
65 static char *networks = "";
66 module_param(networks, charp, 0444);
67 MODULE_PARM_DESC(networks, "local networks");
68
69 static char *routes = "";
70 module_param(routes, charp, 0444);
71 MODULE_PARM_DESC(routes, "routes to non-local networks");
72
73 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
74 module_param(rnet_htable_size, int, 0444);
75 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
76
77 static int use_tcp_bonding;
78 module_param(use_tcp_bonding, int, 0444);
79 MODULE_PARM_DESC(use_tcp_bonding,
80                  "use_tcp_bonding parameter has been removed");
81
82 unsigned int lnet_numa_range = 0;
83 module_param(lnet_numa_range, uint, 0444);
84 MODULE_PARM_DESC(lnet_numa_range,
85                 "NUMA range to consider during Multi-Rail selection");
86
87 /*
88  * lnet_health_sensitivity determines by how much we decrement the health
89  * value on sending error. The value defaults to 100, which means health
90  * interface health is decremented by 100 points every failure.
91  */
92 unsigned int lnet_health_sensitivity = 100;
93 static int sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp);
94 #ifdef HAVE_KERNEL_PARAM_OPS
95 static struct kernel_param_ops param_ops_health_sensitivity = {
96         .set = sensitivity_set,
97         .get = param_get_int,
98 };
99 #define param_check_health_sensitivity(name, p) \
100                 __param_check(name, p, int)
101 module_param(lnet_health_sensitivity, health_sensitivity, S_IRUGO|S_IWUSR);
102 #else
103 module_param_call(lnet_health_sensitivity, sensitivity_set, param_get_int,
104                   &lnet_health_sensitivity, S_IRUGO|S_IWUSR);
105 #endif
106 MODULE_PARM_DESC(lnet_health_sensitivity,
107                 "Value to decrement the health value by on error");
108
109 /*
110  * lnet_recovery_interval determines how often we should perform recovery
111  * on unhealthy interfaces.
112  */
113 unsigned int lnet_recovery_interval = 1;
114 static int recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp);
115 #ifdef HAVE_KERNEL_PARAM_OPS
116 static struct kernel_param_ops param_ops_recovery_interval = {
117         .set = recovery_interval_set,
118         .get = param_get_int,
119 };
120 #define param_check_recovery_interval(name, p) \
121                 __param_check(name, p, int)
122 module_param(lnet_recovery_interval, recovery_interval, S_IRUGO|S_IWUSR);
123 #else
124 module_param_call(lnet_recovery_interval, recovery_interval_set, param_get_int,
125                   &lnet_recovery_interval, S_IRUGO|S_IWUSR);
126 #endif
127 MODULE_PARM_DESC(lnet_recovery_interval,
128                 "DEPRECATED - Interval to recover unhealthy interfaces in seconds");
129
130 unsigned int lnet_recovery_limit;
131 module_param(lnet_recovery_limit, uint, 0644);
132 MODULE_PARM_DESC(lnet_recovery_limit,
133                  "How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery");
134
135 unsigned int lnet_max_recovery_ping_interval = 900;
136 unsigned int lnet_max_recovery_ping_count = 9;
137 static int max_recovery_ping_interval_set(const char *val,
138                                           cfs_kernel_param_arg_t *kp);
139
140 #define param_check_max_recovery_ping_interval(name, p) \
141                 __param_check(name, p, int)
142
143 #ifdef HAVE_KERNEL_PARAM_OPS
144 static struct kernel_param_ops param_ops_max_recovery_ping_interval = {
145         .set = max_recovery_ping_interval_set,
146         .get = param_get_int,
147 };
148 module_param(lnet_max_recovery_ping_interval, max_recovery_ping_interval, 0644);
149 #else
150 module_param_call(lnet_max_recovery_ping_interval, max_recovery_ping_interval,
151                   param_get_int, &lnet_max_recovery_ping_interval, 0644);
152 #endif
153 MODULE_PARM_DESC(lnet_max_recovery_ping_interval,
154                  "The max interval between LNet recovery pings, in seconds");
155
156 static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
157 static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp);
158
159 static struct kernel_param_ops param_ops_interfaces_max = {
160         .set = intf_max_set,
161         .get = param_get_int,
162 };
163
164 #define param_check_interfaces_max(name, p) \
165                 __param_check(name, p, int)
166
167 #ifdef HAVE_KERNEL_PARAM_OPS
168 module_param(lnet_interfaces_max, interfaces_max, 0644);
169 #else
170 module_param_call(lnet_interfaces_max, intf_max_set, param_get_int,
171                   &param_ops_interfaces_max, 0644);
172 #endif
173 MODULE_PARM_DESC(lnet_interfaces_max,
174                 "Maximum number of interfaces in a node.");
175
176 unsigned lnet_peer_discovery_disabled = 0;
177 static int discovery_set(const char *val, cfs_kernel_param_arg_t *kp);
178
179 static struct kernel_param_ops param_ops_discovery_disabled = {
180         .set = discovery_set,
181         .get = param_get_int,
182 };
183
184 #define param_check_discovery_disabled(name, p) \
185                 __param_check(name, p, int)
186 #ifdef HAVE_KERNEL_PARAM_OPS
187 module_param(lnet_peer_discovery_disabled, discovery_disabled, 0644);
188 #else
189 module_param_call(lnet_peer_discovery_disabled, discovery_set, param_get_int,
190                   &param_ops_discovery_disabled, 0644);
191 #endif
192 MODULE_PARM_DESC(lnet_peer_discovery_disabled,
193                 "Set to 1 to disable peer discovery on this node.");
194
195 unsigned int lnet_drop_asym_route;
196 static int drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp);
197
198 static struct kernel_param_ops param_ops_drop_asym_route = {
199         .set = drop_asym_route_set,
200         .get = param_get_int,
201 };
202
203 #define param_check_drop_asym_route(name, p)    \
204         __param_check(name, p, int)
205 #ifdef HAVE_KERNEL_PARAM_OPS
206 module_param(lnet_drop_asym_route, drop_asym_route, 0644);
207 #else
208 module_param_call(lnet_drop_asym_route, drop_asym_route_set, param_get_int,
209                   &param_ops_drop_asym_route, 0644);
210 #endif
211 MODULE_PARM_DESC(lnet_drop_asym_route,
212                  "Set to 1 to drop asymmetrical route messages.");
213
214 #define LNET_TRANSACTION_TIMEOUT_DEFAULT 150
215 unsigned int lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_DEFAULT;
216 static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
217 #ifdef HAVE_KERNEL_PARAM_OPS
218 static struct kernel_param_ops param_ops_transaction_timeout = {
219         .set = transaction_to_set,
220         .get = param_get_int,
221 };
222
223 #define param_check_transaction_timeout(name, p) \
224                 __param_check(name, p, int)
225 module_param(lnet_transaction_timeout, transaction_timeout, S_IRUGO|S_IWUSR);
226 #else
227 module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
228                   &lnet_transaction_timeout, S_IRUGO|S_IWUSR);
229 #endif
230 MODULE_PARM_DESC(lnet_transaction_timeout,
231                 "Maximum number of seconds to wait for a peer response.");
232
233 #define LNET_RETRY_COUNT_DEFAULT 2
234 unsigned int lnet_retry_count = LNET_RETRY_COUNT_DEFAULT;
235 static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
236 #ifdef HAVE_KERNEL_PARAM_OPS
237 static struct kernel_param_ops param_ops_retry_count = {
238         .set = retry_count_set,
239         .get = param_get_int,
240 };
241
242 #define param_check_retry_count(name, p) \
243                 __param_check(name, p, int)
244 module_param(lnet_retry_count, retry_count, S_IRUGO|S_IWUSR);
245 #else
246 module_param_call(lnet_retry_count, retry_count_set, param_get_int,
247                   &lnet_retry_count, S_IRUGO|S_IWUSR);
248 #endif
249 MODULE_PARM_DESC(lnet_retry_count,
250                  "Maximum number of times to retry transmitting a message");
251
252 unsigned int lnet_response_tracking = 3;
253 static int response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp);
254
255 #ifdef HAVE_KERNEL_PARAM_OPS
256 static struct kernel_param_ops param_ops_response_tracking = {
257         .set = response_tracking_set,
258         .get = param_get_int,
259 };
260
261 #define param_check_response_tracking(name, p)  \
262         __param_check(name, p, int)
263 module_param(lnet_response_tracking, response_tracking, 0644);
264 #else
265 module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
266                   &lnet_response_tracking, 0644);
267 #endif
268 MODULE_PARM_DESC(lnet_response_tracking,
269                  "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
270
271 int lock_prim_nid = 1;
272 module_param(lock_prim_nid, int, 0444);
273 MODULE_PARM_DESC(lock_prim_nid,
274                  "Whether nid passed down by Lustre is locked as primary");
275
276 #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
277                                   (LNET_RETRY_COUNT_DEFAULT + 1))
278 unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
279 static void lnet_set_lnd_timeout(void)
280 {
281         lnet_lnd_timeout = max((lnet_transaction_timeout - 1) /
282                                (lnet_retry_count + 1), 1U);
283 }
284
285 /*
286  * This sequence number keeps track of how many times DLC was used to
287  * update the local NIs. It is incremented when a NI is added or
288  * removed and checked when sending a message to determine if there is
289  * a need to re-run the selection algorithm. See lnet_select_pathway()
290  * for more details on its usage.
291  */
292 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
293
294 struct lnet_fail_ping {
295         struct lnet_processid           lfp_id;
296         int                             lfp_errno;
297         char                            lfp_msg[256];
298 };
299
300 struct lnet_genl_ping_list {
301         unsigned int                    lgpl_index;
302         unsigned int                    lgpl_list_count;
303         unsigned int                    lgpl_failed_count;
304         signed long                     lgpl_timeout;
305         struct lnet_nid                 lgpl_src_nid;
306         GENRADIX(struct lnet_fail_ping) lgpl_failed;
307         GENRADIX(struct lnet_processid) lgpl_list;
308 };
309
310 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
311                      signed long timeout, struct lnet_genl_ping_list *plist,
312                      int n_ids);
313
314 static int lnet_discover(struct lnet_processid *id, u32 force,
315                          struct lnet_genl_ping_list *dlists);
316
317 static int
318 sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
319 {
320         int rc;
321         unsigned *sensitivity = (unsigned *)kp->arg;
322         unsigned long value;
323
324         rc = kstrtoul(val, 0, &value);
325         if (rc) {
326                 CERROR("Invalid module parameter value for 'lnet_health_sensitivity'\n");
327                 return rc;
328         }
329
330         /*
331          * The purpose of locking the api_mutex here is to ensure that
332          * the correct value ends up stored properly.
333          */
334         mutex_lock(&the_lnet.ln_api_mutex);
335
336         if (value > LNET_MAX_HEALTH_VALUE) {
337                 mutex_unlock(&the_lnet.ln_api_mutex);
338                 CERROR("Invalid health value. Maximum: %d value = %lu\n",
339                        LNET_MAX_HEALTH_VALUE, value);
340                 return -EINVAL;
341         }
342
343         if (*sensitivity != 0 && value == 0 && lnet_retry_count != 0) {
344                 lnet_retry_count = 0;
345                 lnet_set_lnd_timeout();
346         }
347
348         *sensitivity = value;
349
350         mutex_unlock(&the_lnet.ln_api_mutex);
351
352         return 0;
353 }
354
355 static int
356 recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
357 {
358         CWARN("'lnet_recovery_interval' has been deprecated\n");
359
360         return 0;
361 }
362
363 static int
364 max_recovery_ping_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
365 {
366         int rc;
367         unsigned long value;
368
369         rc = kstrtoul(val, 0, &value);
370         if (rc) {
371                 CERROR("Invalid module parameter value for 'lnet_max_recovery_ping_interval'\n");
372                 return rc;
373         }
374
375         if (!value) {
376                 CERROR("Invalid max ping timeout. Must be strictly positive\n");
377                 return -EINVAL;
378         }
379
380         /* The purpose of locking the api_mutex here is to ensure that
381          * the correct value ends up stored properly.
382          */
383         mutex_lock(&the_lnet.ln_api_mutex);
384         lnet_max_recovery_ping_interval = value;
385         lnet_max_recovery_ping_count = 0;
386         value >>= 1;
387         while (value) {
388                 lnet_max_recovery_ping_count++;
389                 value >>= 1;
390         }
391         mutex_unlock(&the_lnet.ln_api_mutex);
392
393         return 0;
394 }
395
396 static int
397 discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
398 {
399         int rc;
400         unsigned *discovery_off = (unsigned *)kp->arg;
401         unsigned long value;
402         struct lnet_ping_buffer *pbuf;
403
404         rc = kstrtoul(val, 0, &value);
405         if (rc) {
406                 CERROR("Invalid module parameter value for 'lnet_peer_discovery_disabled'\n");
407                 return rc;
408         }
409
410         value = (value) ? 1 : 0;
411
412         /*
413          * The purpose of locking the api_mutex here is to ensure that
414          * the correct value ends up stored properly.
415          */
416         mutex_lock(&the_lnet.ln_api_mutex);
417
418         if (value == *discovery_off) {
419                 mutex_unlock(&the_lnet.ln_api_mutex);
420                 return 0;
421         }
422
423         /*
424          * We still want to set the discovery value even when LNet is not
425          * running. This is the case when LNet is being loaded and we want
426          * the module parameters to take effect. Otherwise if we're
427          * changing the value dynamically, we want to set it after
428          * updating the peers
429          */
430         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
431                 *discovery_off = value;
432                 mutex_unlock(&the_lnet.ln_api_mutex);
433                 return 0;
434         }
435
436         /* tell peers that discovery setting has changed */
437         lnet_net_lock(LNET_LOCK_EX);
438         pbuf = the_lnet.ln_ping_target;
439         if (value)
440                 pbuf->pb_info.pi_features &= ~LNET_PING_FEAT_DISCOVERY;
441         else
442                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
443         lnet_net_unlock(LNET_LOCK_EX);
444
445         /* only send a push when we're turning off discovery */
446         if (*discovery_off <= 0 && value > 0)
447                 lnet_push_update_to_peers(1);
448         *discovery_off = value;
449
450         mutex_unlock(&the_lnet.ln_api_mutex);
451
452         return 0;
453 }
454
455 static int
456 drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp)
457 {
458         int rc;
459         unsigned int *drop_asym_route = (unsigned int *)kp->arg;
460         unsigned long value;
461
462         rc = kstrtoul(val, 0, &value);
463         if (rc) {
464                 CERROR("Invalid module parameter value for "
465                        "'lnet_drop_asym_route'\n");
466                 return rc;
467         }
468
469         /*
470          * The purpose of locking the api_mutex here is to ensure that
471          * the correct value ends up stored properly.
472          */
473         mutex_lock(&the_lnet.ln_api_mutex);
474
475         if (value == *drop_asym_route) {
476                 mutex_unlock(&the_lnet.ln_api_mutex);
477                 return 0;
478         }
479
480         *drop_asym_route = value;
481
482         mutex_unlock(&the_lnet.ln_api_mutex);
483
484         return 0;
485 }
486
487 static int
488 transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp)
489 {
490         int rc;
491         unsigned *transaction_to = (unsigned *)kp->arg;
492         unsigned long value;
493
494         rc = kstrtoul(val, 0, &value);
495         if (rc) {
496                 CERROR("Invalid module parameter value for 'lnet_transaction_timeout'\n");
497                 return rc;
498         }
499
500         /*
501          * The purpose of locking the api_mutex here is to ensure that
502          * the correct value ends up stored properly.
503          */
504         mutex_lock(&the_lnet.ln_api_mutex);
505
506         if (value <= lnet_retry_count || value == 0) {
507                 mutex_unlock(&the_lnet.ln_api_mutex);
508                 CERROR("Invalid value for lnet_transaction_timeout (%lu). "
509                        "Has to be greater than lnet_retry_count (%u)\n",
510                        value, lnet_retry_count);
511                 return -EINVAL;
512         }
513
514         if (value == *transaction_to) {
515                 mutex_unlock(&the_lnet.ln_api_mutex);
516                 return 0;
517         }
518
519         *transaction_to = value;
520         /* Update the lnet_lnd_timeout now that we've modified the
521          * transaction timeout
522          */
523         lnet_set_lnd_timeout();
524
525         mutex_unlock(&the_lnet.ln_api_mutex);
526
527         return 0;
528 }
529
530 static int
531 retry_count_set(const char *val, cfs_kernel_param_arg_t *kp)
532 {
533         int rc;
534         unsigned *retry_count = (unsigned *)kp->arg;
535         unsigned long value;
536
537         rc = kstrtoul(val, 0, &value);
538         if (rc) {
539                 CERROR("Invalid module parameter value for 'lnet_retry_count'\n");
540                 return rc;
541         }
542
543         /*
544          * The purpose of locking the api_mutex here is to ensure that
545          * the correct value ends up stored properly.
546          */
547         mutex_lock(&the_lnet.ln_api_mutex);
548
549         if (lnet_health_sensitivity == 0 && value > 0) {
550                 mutex_unlock(&the_lnet.ln_api_mutex);
551                 CERROR("Can not set lnet_retry_count when health feature is turned off\n");
552                 return -EINVAL;
553         }
554
555         if (value > lnet_transaction_timeout) {
556                 mutex_unlock(&the_lnet.ln_api_mutex);
557                 CERROR("Invalid value for lnet_retry_count (%lu). "
558                        "Has to be smaller than lnet_transaction_timeout (%u)\n",
559                        value, lnet_transaction_timeout);
560                 return -EINVAL;
561         }
562
563         *retry_count = value;
564
565         /* Update the lnet_lnd_timeout now that we've modified the
566          * retry count
567          */
568         lnet_set_lnd_timeout();
569
570         mutex_unlock(&the_lnet.ln_api_mutex);
571
572         return 0;
573 }
574
575 static int
576 intf_max_set(const char *val, cfs_kernel_param_arg_t *kp)
577 {
578         int value, rc;
579
580         rc = kstrtoint(val, 0, &value);
581         if (rc) {
582                 CERROR("Invalid module parameter value for 'lnet_interfaces_max'\n");
583                 return rc;
584         }
585
586         if (value < LNET_INTERFACES_MIN) {
587                 CWARN("max interfaces provided are too small, setting to %d\n",
588                       LNET_INTERFACES_MAX_DEFAULT);
589                 value = LNET_INTERFACES_MAX_DEFAULT;
590         }
591
592         *(int *)kp->arg = value;
593
594         return 0;
595 }
596
597 static int
598 response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp)
599 {
600         int rc;
601         unsigned long new_value;
602
603         rc = kstrtoul(val, 0, &new_value);
604         if (rc) {
605                 CERROR("Invalid value for 'lnet_response_tracking'\n");
606                 return -EINVAL;
607         }
608
609         if (new_value < 0 || new_value > 3) {
610                 CWARN("Invalid value (%lu) for 'lnet_response_tracking'\n",
611                       new_value);
612                 return -EINVAL;
613         }
614
615         lnet_response_tracking = new_value;
616
617         return 0;
618 }
619
620 static const char *
621 lnet_get_routes(void)
622 {
623         return routes;
624 }
625
626 static const char *
627 lnet_get_networks(void)
628 {
629         const char *nets;
630         int rc;
631
632         if (*networks != 0 && *ip2nets != 0) {
633                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
634                                    "'ip2nets' but not both at once\n");
635                 return NULL;
636         }
637
638         if (*ip2nets != 0) {
639                 rc = lnet_parse_ip2nets(&nets, ip2nets);
640                 return (rc == 0) ? nets : NULL;
641         }
642
643         if (*networks != 0)
644                 return networks;
645
646         return "tcp";
647 }
648
649 static void
650 lnet_init_locks(void)
651 {
652         spin_lock_init(&the_lnet.ln_eq_wait_lock);
653         spin_lock_init(&the_lnet.ln_msg_resend_lock);
654         init_completion(&the_lnet.ln_mt_wait_complete);
655         mutex_init(&the_lnet.ln_lnd_mutex);
656 }
657
658 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
659 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
660                                             *  MDs kmem_cache */
661 struct kmem_cache *lnet_udsp_cachep;       /* udsp cache */
662 struct kmem_cache *lnet_rspt_cachep;       /* response tracker cache */
663 struct kmem_cache *lnet_msg_cachep;
664
665 static int
666 lnet_slab_setup(void)
667 {
668         /* create specific kmem_cache for MEs and small MDs (i.e., originally
669          * allocated in <size-xxx> kmem_cache).
670          */
671         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(struct lnet_me),
672                                             0, 0, NULL);
673         if (!lnet_mes_cachep)
674                 return -ENOMEM;
675
676         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
677                                                   LNET_SMALL_MD_SIZE, 0, 0,
678                                                   NULL);
679         if (!lnet_small_mds_cachep)
680                 return -ENOMEM;
681
682         lnet_udsp_cachep = kmem_cache_create("lnet_udsp",
683                                              sizeof(struct lnet_udsp),
684                                              0, 0, NULL);
685         if (!lnet_udsp_cachep)
686                 return -ENOMEM;
687
688         lnet_rspt_cachep = kmem_cache_create("lnet_rspt", sizeof(struct lnet_rsp_tracker),
689                                             0, 0, NULL);
690         if (!lnet_rspt_cachep)
691                 return -ENOMEM;
692
693         lnet_msg_cachep = kmem_cache_create("lnet_msg", sizeof(struct lnet_msg),
694                                             0, 0, NULL);
695         if (!lnet_msg_cachep)
696                 return -ENOMEM;
697
698         return 0;
699 }
700
701 static void
702 lnet_slab_cleanup(void)
703 {
704         if (lnet_msg_cachep) {
705                 kmem_cache_destroy(lnet_msg_cachep);
706                 lnet_msg_cachep = NULL;
707         }
708
709         if (lnet_rspt_cachep) {
710                 kmem_cache_destroy(lnet_rspt_cachep);
711                 lnet_rspt_cachep = NULL;
712         }
713
714         if (lnet_udsp_cachep) {
715                 kmem_cache_destroy(lnet_udsp_cachep);
716                 lnet_udsp_cachep = NULL;
717         }
718
719         if (lnet_small_mds_cachep) {
720                 kmem_cache_destroy(lnet_small_mds_cachep);
721                 lnet_small_mds_cachep = NULL;
722         }
723
724         if (lnet_mes_cachep) {
725                 kmem_cache_destroy(lnet_mes_cachep);
726                 lnet_mes_cachep = NULL;
727         }
728 }
729
730 static int
731 lnet_create_remote_nets_table(void)
732 {
733         int               i;
734         struct list_head *hash;
735
736         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
737         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
738         CFS_ALLOC_PTR_ARRAY(hash, LNET_REMOTE_NETS_HASH_SIZE);
739         if (hash == NULL) {
740                 CERROR("Failed to create remote nets hash table\n");
741                 return -ENOMEM;
742         }
743
744         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
745                 INIT_LIST_HEAD(&hash[i]);
746         the_lnet.ln_remote_nets_hash = hash;
747         return 0;
748 }
749
750 static void
751 lnet_destroy_remote_nets_table(void)
752 {
753         int i;
754
755         if (the_lnet.ln_remote_nets_hash == NULL)
756                 return;
757
758         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
759                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
760
761         CFS_FREE_PTR_ARRAY(the_lnet.ln_remote_nets_hash,
762                            LNET_REMOTE_NETS_HASH_SIZE);
763         the_lnet.ln_remote_nets_hash = NULL;
764 }
765
766 static void
767 lnet_destroy_locks(void)
768 {
769         if (the_lnet.ln_res_lock != NULL) {
770                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
771                 the_lnet.ln_res_lock = NULL;
772         }
773
774         if (the_lnet.ln_net_lock != NULL) {
775                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
776                 the_lnet.ln_net_lock = NULL;
777         }
778 }
779
780 static int
781 lnet_create_locks(void)
782 {
783         lnet_init_locks();
784
785         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
786         if (the_lnet.ln_res_lock == NULL)
787                 goto failed;
788
789         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
790         if (the_lnet.ln_net_lock == NULL)
791                 goto failed;
792
793         return 0;
794
795  failed:
796         lnet_destroy_locks();
797         return -ENOMEM;
798 }
799
800 static void lnet_assert_wire_constants(void)
801 {
802         /* Wire protocol assertions generated by 'wirecheck'
803          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
804          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
805          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
806          */
807
808         /* Constants... */
809         BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
810         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
811         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
812         BUILD_BUG_ON(LNET_MSG_ACK != 0);
813         BUILD_BUG_ON(LNET_MSG_PUT != 1);
814         BUILD_BUG_ON(LNET_MSG_GET != 2);
815         BUILD_BUG_ON(LNET_MSG_REPLY != 3);
816         BUILD_BUG_ON(LNET_MSG_HELLO != 4);
817
818         BUILD_BUG_ON((int)sizeof(lnet_nid_t) != 8);
819         BUILD_BUG_ON((int)sizeof(lnet_pid_t) != 4);
820
821         /* Checks for struct lnet_nid */
822         BUILD_BUG_ON((int)sizeof(struct lnet_nid) != 20);
823         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_size) != 0);
824         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_size) != 1);
825         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_type) != 1);
826         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_type) != 1);
827         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_num) != 2);
828         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_num) != 2);
829         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_addr) != 4);
830         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_addr) != 16);
831
832         /* Checks for struct lnet_process_id_packed */
833         BUILD_BUG_ON((int)sizeof(struct lnet_process_id_packed) != 12);
834         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, nid) != 0);
835         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->nid) != 8);
836         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, pid) != 8);
837         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->pid) != 4);
838
839         /* Checks for struct lnet_handle_wire */
840         BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
841         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
842                                    wh_interface_cookie) != 0);
843         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
844         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
845                                    wh_object_cookie) != 8);
846         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
847
848         /* Checks for struct struct lnet_magicversion */
849         BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
850         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
851         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
852         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
853         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
854         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion,
855                                    version_minor) != 6);
856         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
857
858         /* Checks for struct _lnet_hdr_nid4 */
859         BUILD_BUG_ON((int)sizeof(struct _lnet_hdr_nid4) != 72);
860         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_nid) != 0);
861         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_nid) != 8);
862         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_nid) != 8);
863         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_nid) != 8);
864         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_pid) != 16);
865         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_pid) != 4);
866         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_pid) != 20);
867         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_pid) != 4);
868         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, type) != 24);
869         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->type) != 4);
870         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, payload_length) != 28);
871         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->payload_length) != 4);
872         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg) != 32);
873         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg) != 40);
874
875         /* Ack */
876         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.dst_wmd) != 32);
877         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.dst_wmd) != 16);
878         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.match_bits) != 48);
879         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.match_bits) != 8);
880         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.mlength) != 56);
881         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.mlength) != 4);
882
883         /* Put */
884         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ack_wmd) != 32);
885         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ack_wmd) != 16);
886         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.match_bits) != 48);
887         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.match_bits) != 8);
888         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.hdr_data) != 56);
889         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.hdr_data) != 8);
890         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ptl_index) != 64);
891         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ptl_index) != 4);
892         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.offset) != 68);
893         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.offset) != 4);
894
895         /* Get */
896         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.return_wmd) != 32);
897         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.return_wmd) != 16);
898         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.match_bits) != 48);
899         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.match_bits) != 8);
900         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.ptl_index) != 56);
901         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.ptl_index) != 4);
902         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.src_offset) != 60);
903         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.src_offset) != 4);
904         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.sink_length) != 64);
905         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.sink_length) != 4);
906
907         /* Reply */
908         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.reply.dst_wmd) != 32);
909         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.reply.dst_wmd) != 16);
910
911         /* Hello */
912         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.incarnation) != 32);
913         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.incarnation) != 8);
914         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.type) != 40);
915         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.type) != 4);
916
917         /* Checks for struct lnet_ni_status and related constants */
918         BUILD_BUG_ON(LNET_NI_STATUS_INVALID != 0x00000000);
919         BUILD_BUG_ON(LNET_NI_STATUS_UP != 0x15aac0de);
920         BUILD_BUG_ON(LNET_NI_STATUS_DOWN != 0xdeadface);
921
922         /* Checks for struct lnet_ni_status */
923         BUILD_BUG_ON((int)sizeof(struct lnet_ni_status) != 16);
924         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_nid) != 0);
925         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) != 8);
926         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_status) != 8);
927         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_status) != 4);
928         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_msg_size) != 12);
929         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_msg_size) != 4);
930
931         /* Checks for struct lnet_ni_large_status */
932         BUILD_BUG_ON((int)sizeof(struct lnet_ni_large_status) != 24);
933         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_status) != 0);
934         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_status) != 4);
935         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_nid) != 4);
936         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_nid) != 20);
937
938         /* Checks for struct lnet_ping_info and related constants */
939         BUILD_BUG_ON(LNET_PROTO_PING_MAGIC != 0x70696E67);
940         BUILD_BUG_ON(LNET_PING_FEAT_INVAL != 0);
941         BUILD_BUG_ON(LNET_PING_FEAT_BASE != 1);
942         BUILD_BUG_ON(LNET_PING_FEAT_NI_STATUS != 2);
943         BUILD_BUG_ON(LNET_PING_FEAT_RTE_DISABLED != 4);
944         BUILD_BUG_ON(LNET_PING_FEAT_MULTI_RAIL != 8);
945         BUILD_BUG_ON(LNET_PING_FEAT_DISCOVERY != 16);
946         BUILD_BUG_ON(LNET_PING_FEAT_LARGE_ADDR != 32);
947         BUILD_BUG_ON(LNET_PING_FEAT_PRIMARY_LARGE != 64);
948         BUILD_BUG_ON(LNET_PING_FEAT_BITS != 127);
949
950         /* Checks for struct lnet_ping_info */
951         BUILD_BUG_ON((int)sizeof(struct lnet_ping_info) != 16);
952         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_magic) != 0);
953         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) != 4);
954         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_features) != 4);
955         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_features) != 4);
956         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_pid) != 8);
957         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) != 4);
958         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_nnis) != 12);
959         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) != 4);
960         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_ni) != 16);
961         BUILD_BUG_ON(offsetof(struct lnet_ping_info, pi_ni) != sizeof(struct lnet_ping_info));
962
963         /* Acceptor connection request */
964         BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
965
966         /* Checks for struct lnet_acceptor_connreq */
967         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq) != 16);
968         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_magic) != 0);
969         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_magic) != 4);
970         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_version) != 4);
971         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_version) != 4);
972         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_nid) != 8);
973         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_nid) != 8);
974
975         /* Checks for struct lnet_acceptor_connreq_v2 */
976         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq_v2) != 28);
977         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_magic) != 0);
978         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_magic) != 4);
979         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_version) != 4);
980         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_version) != 4);
981         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_nid) != 8);
982         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_nid) != 20);
983
984         /* Checks for struct lnet_counters_common */
985         BUILD_BUG_ON((int)sizeof(struct lnet_counters_common) != 60);
986         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_alloc) != 0);
987         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_alloc) != 4);
988         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_max) != 4);
989         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_max) != 4);
990         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_errors) != 8);
991         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_errors) != 4);
992         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_count) != 12);
993         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_count) != 4);
994         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_count) != 16);
995         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_count) != 4);
996         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_count) != 20);
997         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_count) != 4);
998         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_count) != 24);
999         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_count) != 4);
1000         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_length) != 28);
1001         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_length) != 8);
1002         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_length) != 36);
1003         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_length) != 8);
1004         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_length) != 44);
1005         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_length) != 8);
1006         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_length) != 52);
1007         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_length) != 8);
1008 }
1009
1010 static const struct lnet_lnd *lnet_find_lnd_by_type(__u32 type)
1011 {
1012         const struct lnet_lnd *lnd;
1013
1014         /* holding lnd mutex */
1015         if (type >= NUM_LNDS)
1016                 return NULL;
1017         lnd = the_lnet.ln_lnds[type];
1018         LASSERT(!lnd || lnd->lnd_type == type);
1019
1020         return lnd;
1021 }
1022
1023 unsigned int
1024 lnet_get_lnd_timeout(void)
1025 {
1026         return lnet_lnd_timeout;
1027 }
1028 EXPORT_SYMBOL(lnet_get_lnd_timeout);
1029
1030 void
1031 lnet_register_lnd(const struct lnet_lnd *lnd)
1032 {
1033         mutex_lock(&the_lnet.ln_lnd_mutex);
1034
1035         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
1036         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
1037
1038         the_lnet.ln_lnds[lnd->lnd_type] = lnd;
1039
1040         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
1041
1042         mutex_unlock(&the_lnet.ln_lnd_mutex);
1043 }
1044 EXPORT_SYMBOL(lnet_register_lnd);
1045
1046 void
1047 lnet_unregister_lnd(const struct lnet_lnd *lnd)
1048 {
1049         mutex_lock(&the_lnet.ln_lnd_mutex);
1050
1051         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
1052
1053         the_lnet.ln_lnds[lnd->lnd_type] = NULL;
1054         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
1055
1056         mutex_unlock(&the_lnet.ln_lnd_mutex);
1057 }
1058 EXPORT_SYMBOL(lnet_unregister_lnd);
1059
1060 static void
1061 lnet_counters_get_common_locked(struct lnet_counters_common *common)
1062 {
1063         struct lnet_counters *ctr;
1064         int i;
1065
1066         /* FIXME !!! Their is no assert_lnet_net_locked() to ensure this
1067          * actually called under the protection of the lnet_net_lock.
1068          */
1069         memset(common, 0, sizeof(*common));
1070
1071         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1072                 common->lcc_msgs_max     += ctr->lct_common.lcc_msgs_max;
1073                 common->lcc_msgs_alloc   += ctr->lct_common.lcc_msgs_alloc;
1074                 common->lcc_errors       += ctr->lct_common.lcc_errors;
1075                 common->lcc_send_count   += ctr->lct_common.lcc_send_count;
1076                 common->lcc_recv_count   += ctr->lct_common.lcc_recv_count;
1077                 common->lcc_route_count  += ctr->lct_common.lcc_route_count;
1078                 common->lcc_drop_count   += ctr->lct_common.lcc_drop_count;
1079                 common->lcc_send_length  += ctr->lct_common.lcc_send_length;
1080                 common->lcc_recv_length  += ctr->lct_common.lcc_recv_length;
1081                 common->lcc_route_length += ctr->lct_common.lcc_route_length;
1082                 common->lcc_drop_length  += ctr->lct_common.lcc_drop_length;
1083         }
1084 }
1085
1086 void
1087 lnet_counters_get_common(struct lnet_counters_common *common)
1088 {
1089         lnet_net_lock(LNET_LOCK_EX);
1090         lnet_counters_get_common_locked(common);
1091         lnet_net_unlock(LNET_LOCK_EX);
1092 }
1093 EXPORT_SYMBOL(lnet_counters_get_common);
1094
1095 int
1096 lnet_counters_get(struct lnet_counters *counters)
1097 {
1098         struct lnet_counters *ctr;
1099         struct lnet_counters_health *health = &counters->lct_health;
1100         int i, rc = 0;
1101
1102         memset(counters, 0, sizeof(*counters));
1103
1104         lnet_net_lock(LNET_LOCK_EX);
1105
1106         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1107                 GOTO(out_unlock, rc = -ENODEV);
1108
1109         lnet_counters_get_common_locked(&counters->lct_common);
1110
1111         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1112                 health->lch_rst_alloc    += ctr->lct_health.lch_rst_alloc;
1113                 health->lch_resend_count += ctr->lct_health.lch_resend_count;
1114                 health->lch_response_timeout_count +=
1115                                 ctr->lct_health.lch_response_timeout_count;
1116                 health->lch_local_interrupt_count +=
1117                                 ctr->lct_health.lch_local_interrupt_count;
1118                 health->lch_local_dropped_count +=
1119                                 ctr->lct_health.lch_local_dropped_count;
1120                 health->lch_local_aborted_count +=
1121                                 ctr->lct_health.lch_local_aborted_count;
1122                 health->lch_local_no_route_count +=
1123                                 ctr->lct_health.lch_local_no_route_count;
1124                 health->lch_local_timeout_count +=
1125                                 ctr->lct_health.lch_local_timeout_count;
1126                 health->lch_local_error_count +=
1127                                 ctr->lct_health.lch_local_error_count;
1128                 health->lch_remote_dropped_count +=
1129                                 ctr->lct_health.lch_remote_dropped_count;
1130                 health->lch_remote_error_count +=
1131                                 ctr->lct_health.lch_remote_error_count;
1132                 health->lch_remote_timeout_count +=
1133                                 ctr->lct_health.lch_remote_timeout_count;
1134                 health->lch_network_timeout_count +=
1135                                 ctr->lct_health.lch_network_timeout_count;
1136         }
1137 out_unlock:
1138         lnet_net_unlock(LNET_LOCK_EX);
1139         return rc;
1140 }
1141 EXPORT_SYMBOL(lnet_counters_get);
1142
1143 void
1144 lnet_counters_reset(void)
1145 {
1146         struct lnet_counters *counters;
1147         int             i;
1148
1149         lnet_net_lock(LNET_LOCK_EX);
1150
1151         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1152                 goto avoid_reset;
1153
1154         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
1155                 memset(counters, 0, sizeof(struct lnet_counters));
1156 avoid_reset:
1157         lnet_net_unlock(LNET_LOCK_EX);
1158 }
1159
1160 static char *
1161 lnet_res_type2str(int type)
1162 {
1163         switch (type) {
1164         default:
1165                 LBUG();
1166         case LNET_COOKIE_TYPE_MD:
1167                 return "MD";
1168         case LNET_COOKIE_TYPE_ME:
1169                 return "ME";
1170         case LNET_COOKIE_TYPE_EQ:
1171                 return "EQ";
1172         }
1173 }
1174
1175 static void
1176 lnet_res_container_cleanup(struct lnet_res_container *rec)
1177 {
1178         int     count = 0;
1179
1180         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
1181                 return;
1182
1183         while (!list_empty(&rec->rec_active)) {
1184                 struct list_head *e = rec->rec_active.next;
1185
1186                 list_del_init(e);
1187                 if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
1188                         lnet_md_free(list_entry(e, struct lnet_libmd, md_list));
1189
1190                 } else { /* NB: Active MEs should be attached on portals */
1191                         LBUG();
1192                 }
1193                 count++;
1194         }
1195
1196         if (count > 0) {
1197                 /* Found alive MD/ME/EQ, user really should unlink/free
1198                  * all of them before finalize LNet, but if someone didn't,
1199                  * we have to recycle garbage for him */
1200                 CERROR("%d active elements on exit of %s container\n",
1201                        count, lnet_res_type2str(rec->rec_type));
1202         }
1203
1204         if (rec->rec_lh_hash != NULL) {
1205                 CFS_FREE_PTR_ARRAY(rec->rec_lh_hash, LNET_LH_HASH_SIZE);
1206                 rec->rec_lh_hash = NULL;
1207         }
1208
1209         rec->rec_type = 0; /* mark it as finalized */
1210 }
1211
1212 static int
1213 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
1214 {
1215         int     rc = 0;
1216         int     i;
1217
1218         LASSERT(rec->rec_type == 0);
1219
1220         rec->rec_type = type;
1221         INIT_LIST_HEAD(&rec->rec_active);
1222
1223         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
1224
1225         /* Arbitrary choice of hash table size */
1226         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
1227                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
1228         if (rec->rec_lh_hash == NULL) {
1229                 rc = -ENOMEM;
1230                 goto out;
1231         }
1232
1233         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
1234                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
1235
1236         return 0;
1237
1238 out:
1239         CERROR("Failed to setup %s resource container\n",
1240                lnet_res_type2str(type));
1241         lnet_res_container_cleanup(rec);
1242         return rc;
1243 }
1244
1245 static void
1246 lnet_res_containers_destroy(struct lnet_res_container **recs)
1247 {
1248         struct lnet_res_container       *rec;
1249         int                             i;
1250
1251         cfs_percpt_for_each(rec, i, recs)
1252                 lnet_res_container_cleanup(rec);
1253
1254         cfs_percpt_free(recs);
1255 }
1256
1257 static struct lnet_res_container **
1258 lnet_res_containers_create(int type)
1259 {
1260         struct lnet_res_container       **recs;
1261         struct lnet_res_container       *rec;
1262         int                             rc;
1263         int                             i;
1264
1265         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
1266         if (recs == NULL) {
1267                 CERROR("Failed to allocate %s resource containers\n",
1268                        lnet_res_type2str(type));
1269                 return NULL;
1270         }
1271
1272         cfs_percpt_for_each(rec, i, recs) {
1273                 rc = lnet_res_container_setup(rec, i, type);
1274                 if (rc != 0) {
1275                         lnet_res_containers_destroy(recs);
1276                         return NULL;
1277                 }
1278         }
1279
1280         return recs;
1281 }
1282
1283 struct lnet_libhandle *
1284 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
1285 {
1286         /* ALWAYS called with lnet_res_lock held */
1287         struct list_head        *head;
1288         struct lnet_libhandle   *lh;
1289         unsigned int            hash;
1290
1291         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
1292                 return NULL;
1293
1294         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
1295         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
1296
1297         list_for_each_entry(lh, head, lh_hash_chain) {
1298                 if (lh->lh_cookie == cookie)
1299                         return lh;
1300         }
1301
1302         return NULL;
1303 }
1304
1305 void
1306 lnet_res_lh_initialize(struct lnet_res_container *rec,
1307                        struct lnet_libhandle *lh)
1308 {
1309         /* ALWAYS called with lnet_res_lock held */
1310         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
1311         unsigned int    hash;
1312
1313         lh->lh_cookie = rec->rec_lh_cookie;
1314         rec->rec_lh_cookie += 1 << ibits;
1315
1316         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
1317
1318         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
1319 }
1320
1321 struct list_head **
1322 lnet_create_array_of_queues(void)
1323 {
1324         struct list_head **qs;
1325         struct list_head *q;
1326         int i;
1327
1328         qs = cfs_percpt_alloc(lnet_cpt_table(),
1329                               sizeof(struct list_head));
1330         if (!qs) {
1331                 CERROR("Failed to allocate queues\n");
1332                 return NULL;
1333         }
1334
1335         cfs_percpt_for_each(q, i, qs)
1336                 INIT_LIST_HEAD(q);
1337
1338         return qs;
1339 }
1340
1341 static int lnet_unprepare(void);
1342
1343 static int
1344 lnet_prepare(lnet_pid_t requested_pid)
1345 {
1346         /* Prepare to bring up the network */
1347         struct lnet_res_container **recs;
1348         int                       rc = 0;
1349
1350         if (requested_pid == LNET_PID_ANY) {
1351                 /* Don't instantiate LNET just for me */
1352                 return -ENETDOWN;
1353         }
1354
1355         LASSERT(the_lnet.ln_refcount == 0);
1356
1357         the_lnet.ln_routing = 0;
1358
1359         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
1360         the_lnet.ln_pid = requested_pid;
1361
1362         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
1363         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
1364         INIT_LIST_HEAD(&the_lnet.ln_nets);
1365         INIT_LIST_HEAD(&the_lnet.ln_routers);
1366         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
1367         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
1368         INIT_LIST_HEAD(&the_lnet.ln_dc_request);
1369         INIT_LIST_HEAD(&the_lnet.ln_dc_working);
1370         INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
1371         INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
1372         INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
1373         INIT_LIST_HEAD(&the_lnet.ln_udsp_list);
1374         init_waitqueue_head(&the_lnet.ln_dc_waitq);
1375         the_lnet.ln_mt_handler = NULL;
1376         init_completion(&the_lnet.ln_started);
1377         atomic_set(&the_lnet.ln_late_msg_count, 0);
1378         atomic64_set(&the_lnet.ln_late_msg_nsecs, 0);
1379
1380         rc = lnet_slab_setup();
1381         if (rc != 0)
1382                 goto failed;
1383
1384         rc = lnet_create_remote_nets_table();
1385         if (rc != 0)
1386                 goto failed;
1387
1388         /*
1389          * NB the interface cookie in wire handles guards against delayed
1390          * replies and ACKs appearing valid after reboot.
1391          */
1392         the_lnet.ln_interface_cookie = ktime_get_real_ns();
1393
1394         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
1395                                                 sizeof(struct lnet_counters));
1396         if (the_lnet.ln_counters == NULL) {
1397                 CERROR("Failed to allocate counters for LNet\n");
1398                 rc = -ENOMEM;
1399                 goto failed;
1400         }
1401
1402         rc = lnet_peer_tables_create();
1403         if (rc != 0)
1404                 goto failed;
1405
1406         rc = lnet_msg_containers_create();
1407         if (rc != 0)
1408                 goto failed;
1409
1410         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
1411                                       LNET_COOKIE_TYPE_EQ);
1412         if (rc != 0)
1413                 goto failed;
1414
1415         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
1416         if (recs == NULL) {
1417                 rc = -ENOMEM;
1418                 goto failed;
1419         }
1420
1421         the_lnet.ln_md_containers = recs;
1422
1423         rc = lnet_portals_create();
1424         if (rc != 0) {
1425                 CERROR("Failed to create portals for LNet: %d\n", rc);
1426                 goto failed;
1427         }
1428
1429         the_lnet.ln_mt_zombie_rstqs = lnet_create_array_of_queues();
1430         if (!the_lnet.ln_mt_zombie_rstqs) {
1431                 rc = -ENOMEM;
1432                 goto failed;
1433         }
1434
1435         return 0;
1436
1437  failed:
1438         lnet_unprepare();
1439         return rc;
1440 }
1441
1442 static int
1443 lnet_unprepare(void)
1444 {
1445         /* NB no LNET_LOCK since this is the last reference.  All LND instances
1446          * have shut down already, so it is safe to unlink and free all
1447          * descriptors, even those that appear committed to a network op (eg MD
1448          * with non-zero pending count) */
1449
1450         lnet_fail_nid(LNET_NID_ANY, 0);
1451
1452         LASSERT(the_lnet.ln_refcount == 0);
1453         LASSERT(list_empty(&the_lnet.ln_test_peers));
1454         LASSERT(list_empty(&the_lnet.ln_nets));
1455
1456         if (the_lnet.ln_mt_zombie_rstqs) {
1457                 lnet_clean_zombie_rstqs();
1458                 the_lnet.ln_mt_zombie_rstqs = NULL;
1459         }
1460
1461         lnet_assert_handler_unused(the_lnet.ln_mt_handler);
1462         the_lnet.ln_mt_handler = NULL;
1463
1464         lnet_portals_destroy();
1465
1466         if (the_lnet.ln_md_containers != NULL) {
1467                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
1468                 the_lnet.ln_md_containers = NULL;
1469         }
1470
1471         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
1472
1473         lnet_msg_containers_destroy();
1474         lnet_peer_uninit();
1475         lnet_rtrpools_free(0);
1476
1477         if (the_lnet.ln_counters != NULL) {
1478                 cfs_percpt_free(the_lnet.ln_counters);
1479                 the_lnet.ln_counters = NULL;
1480         }
1481         lnet_destroy_remote_nets_table();
1482         lnet_udsp_destroy(true);
1483         lnet_slab_cleanup();
1484
1485         return 0;
1486 }
1487
1488 struct lnet_ni  *
1489 lnet_net2ni_locked(__u32 net_id, int cpt)
1490 {
1491         struct lnet_ni   *ni;
1492         struct lnet_net  *net;
1493
1494         LASSERT(cpt != LNET_LOCK_EX);
1495
1496         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1497                 if (net->net_id == net_id) {
1498                         ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
1499                                               ni_netlist);
1500                         return ni;
1501                 }
1502         }
1503
1504         return NULL;
1505 }
1506
1507 struct lnet_ni *
1508 lnet_net2ni_addref(__u32 net)
1509 {
1510         struct lnet_ni *ni;
1511
1512         lnet_net_lock(0);
1513         ni = lnet_net2ni_locked(net, 0);
1514         if (ni)
1515                 lnet_ni_addref_locked(ni, 0);
1516         lnet_net_unlock(0);
1517
1518         return ni;
1519 }
1520 EXPORT_SYMBOL(lnet_net2ni_addref);
1521
1522 struct lnet_net *
1523 lnet_get_net_locked(__u32 net_id)
1524 {
1525         struct lnet_net  *net;
1526
1527         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1528                 if (net->net_id == net_id)
1529                         return net;
1530         }
1531
1532         return NULL;
1533 }
1534
1535 void
1536 lnet_net_clr_pref_rtrs(struct lnet_net *net)
1537 {
1538         struct list_head zombies;
1539         struct lnet_nid_list *ne;
1540         struct lnet_nid_list *tmp;
1541
1542         INIT_LIST_HEAD(&zombies);
1543
1544         lnet_net_lock(LNET_LOCK_EX);
1545         list_splice_init(&net->net_rtr_pref_nids, &zombies);
1546         lnet_net_unlock(LNET_LOCK_EX);
1547
1548         list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
1549                 list_del_init(&ne->nl_list);
1550                 LIBCFS_FREE(ne, sizeof(*ne));
1551         }
1552 }
1553
1554 int
1555 lnet_net_add_pref_rtr(struct lnet_net *net,
1556                       struct lnet_nid *gw_nid)
1557 __must_hold(&the_lnet.ln_api_mutex)
1558 {
1559         struct lnet_nid_list *ne;
1560
1561         /* This function is called with api_mutex held. When the api_mutex
1562          * is held the list can not be modified, as it is only modified as
1563          * a result of applying a UDSP and that happens under api_mutex
1564          * lock.
1565          */
1566         list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
1567                 if (nid_same(&ne->nl_nid, gw_nid))
1568                         return -EEXIST;
1569         }
1570
1571         LIBCFS_ALLOC(ne, sizeof(*ne));
1572         if (!ne)
1573                 return -ENOMEM;
1574
1575         ne->nl_nid = *gw_nid;
1576
1577         /* Lock the cpt to protect against addition and checks in the
1578          * selection algorithm
1579          */
1580         lnet_net_lock(LNET_LOCK_EX);
1581         list_add(&ne->nl_list, &net->net_rtr_pref_nids);
1582         lnet_net_unlock(LNET_LOCK_EX);
1583
1584         return 0;
1585 }
1586
1587 static unsigned int
1588 lnet_nid4_cpt_hash(lnet_nid_t nid, unsigned int number)
1589 {
1590         __u64 key = nid;
1591         __u16 lnd = LNET_NETTYP(LNET_NIDNET(nid));
1592         unsigned int cpt;
1593
1594         if (lnd == KFILND || lnd == GNILND) {
1595                 cpt = hash_long(key, LNET_CPT_BITS);
1596
1597                 /* NB: The number of CPTs needn't be a power of 2 */
1598                 if (cpt >= number)
1599                         cpt = (key + cpt + (cpt >> 1)) % number;
1600         } else {
1601                 __u64 pair_bits = 0x0001000100010001LLU;
1602                 __u64 mask = pair_bits * 0xFF;
1603                 __u64 pair_sum;
1604                 /* For ipv4 NIDs, use (sum-by-multiplication of nid bytes) mod
1605                  * (number of CPTs) to match nid to a CPT.
1606                  */
1607                 pair_sum = (key & mask) + ((key >> 8) & mask);
1608                 pair_sum = (pair_sum * pair_bits) >> 48;
1609                 cpt = (unsigned int)(pair_sum) % number;
1610         }
1611
1612         CDEBUG(D_NET, "Match nid %s to cpt %u\n",
1613                libcfs_nid2str(nid), cpt);
1614
1615         return cpt;
1616 }
1617
1618 unsigned int
1619 lnet_nid_cpt_hash(struct lnet_nid *nid, unsigned int number)
1620 {
1621         unsigned int val;
1622         u32 h = 0;
1623         int i;
1624
1625         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
1626
1627         if (number == 1)
1628                 return 0;
1629
1630         if (nid_is_nid4(nid))
1631                 return lnet_nid4_cpt_hash(lnet_nid_to_nid4(nid), number);
1632
1633         for (i = 0; i < 4; i++)
1634                 h = cfs_hash_32(nid->nid_addr[i]^h, 32);
1635         val = cfs_hash_32(LNET_NID_NET(nid) ^ h, LNET_CPT_BITS);
1636         if (val < number)
1637                 return val;
1638         return (unsigned int)(h + val + (val >> 1)) % number;
1639 }
1640
1641 int
1642 lnet_cpt_of_nid_locked(struct lnet_nid *nid, struct lnet_ni *ni)
1643 {
1644         struct lnet_net *net;
1645
1646         /* must called with hold of lnet_net_lock */
1647         if (LNET_CPT_NUMBER == 1)
1648                 return 0; /* the only one */
1649
1650         /*
1651          * If NI is provided then use the CPT identified in the NI cpt
1652          * list if one exists. If one doesn't exist, then that NI is
1653          * associated with all CPTs and it follows that the net it belongs
1654          * to is implicitly associated with all CPTs, so just hash the nid
1655          * and return that.
1656          */
1657         if (ni != NULL) {
1658                 if (ni->ni_cpts != NULL)
1659                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
1660                                                              ni->ni_ncpts)];
1661                 else
1662                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1663         }
1664
1665         /* no NI provided so look at the net */
1666         net = lnet_get_net_locked(LNET_NID_NET(nid));
1667
1668         if (net != NULL && net->net_cpts != NULL) {
1669                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
1670         }
1671
1672         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1673 }
1674
1675 int
1676 lnet_nid2cpt(struct lnet_nid *nid, struct lnet_ni *ni)
1677 {
1678         int     cpt;
1679         int     cpt2;
1680
1681         if (LNET_CPT_NUMBER == 1)
1682                 return 0; /* the only one */
1683
1684         cpt = lnet_net_lock_current();
1685
1686         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
1687
1688         lnet_net_unlock(cpt);
1689
1690         return cpt2;
1691 }
1692 EXPORT_SYMBOL(lnet_nid2cpt);
1693
1694 int
1695 lnet_cpt_of_nid(lnet_nid_t nid4, struct lnet_ni *ni)
1696 {
1697         struct lnet_nid nid;
1698
1699         if (LNET_CPT_NUMBER == 1)
1700                 return 0; /* the only one */
1701
1702         lnet_nid4_to_nid(nid4, &nid);
1703         return lnet_nid2cpt(&nid, ni);
1704 }
1705 EXPORT_SYMBOL(lnet_cpt_of_nid);
1706
1707 int
1708 lnet_islocalnet_locked(__u32 net_id)
1709 {
1710         struct lnet_net *net;
1711         bool local;
1712
1713         net = lnet_get_net_locked(net_id);
1714
1715         local = net != NULL;
1716
1717         return local;
1718 }
1719
1720 int
1721 lnet_islocalnet(__u32 net_id)
1722 {
1723         int cpt;
1724         bool local;
1725
1726         cpt = lnet_net_lock_current();
1727
1728         local = lnet_islocalnet_locked(net_id);
1729
1730         lnet_net_unlock(cpt);
1731
1732         return local;
1733 }
1734
1735 struct lnet_ni  *
1736 lnet_nid_to_ni_locked(struct lnet_nid *nid, int cpt)
1737 {
1738         struct lnet_net  *net;
1739         struct lnet_ni *ni;
1740
1741         LASSERT(cpt != LNET_LOCK_EX);
1742
1743         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1744                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1745                         if (nid_same(&ni->ni_nid, nid))
1746                                 return ni;
1747                 }
1748         }
1749
1750         return NULL;
1751 }
1752
1753 struct lnet_ni *
1754 lnet_nid_to_ni_addref(struct lnet_nid *nid)
1755 {
1756         struct lnet_ni *ni;
1757
1758         lnet_net_lock(0);
1759         ni = lnet_nid_to_ni_locked(nid, 0);
1760         if (ni)
1761                 lnet_ni_addref_locked(ni, 0);
1762         lnet_net_unlock(0);
1763
1764         return ni;
1765 }
1766 EXPORT_SYMBOL(lnet_nid_to_ni_addref);
1767
1768 int
1769 lnet_islocalnid(struct lnet_nid *nid)
1770 {
1771         struct lnet_ni  *ni;
1772         int             cpt;
1773
1774         cpt = lnet_net_lock_current();
1775         ni = lnet_nid_to_ni_locked(nid, cpt);
1776         lnet_net_unlock(cpt);
1777
1778         return ni != NULL;
1779 }
1780
1781 int
1782 lnet_count_acceptor_nets(void)
1783 {
1784         /* Return the # of NIs that need the acceptor. */
1785         int              count = 0;
1786         struct lnet_net  *net;
1787         int              cpt;
1788
1789         cpt = lnet_net_lock_current();
1790         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1791                 /* all socklnd type networks should have the acceptor
1792                  * thread started */
1793                 if (net->net_lnd->lnd_accept != NULL)
1794                         count++;
1795         }
1796
1797         lnet_net_unlock(cpt);
1798
1799         return count;
1800 }
1801
1802 struct lnet_ping_buffer *
1803 lnet_ping_buffer_alloc(int nbytes, gfp_t gfp)
1804 {
1805         struct lnet_ping_buffer *pbuf;
1806
1807         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nbytes), gfp);
1808         if (pbuf) {
1809                 pbuf->pb_nbytes = nbytes;       /* sizeof of pb_info */
1810                 pbuf->pb_needs_post = false;
1811                 atomic_set(&pbuf->pb_refcnt, 1);
1812         }
1813
1814         return pbuf;
1815 }
1816
1817 void
1818 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
1819 {
1820         LASSERT(atomic_read(&pbuf->pb_refcnt) == 0);
1821         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nbytes));
1822 }
1823
1824 static struct lnet_ping_buffer *
1825 lnet_ping_target_create(int nbytes)
1826 {
1827         struct lnet_ping_buffer *pbuf;
1828
1829         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
1830         if (pbuf == NULL) {
1831                 CERROR("Can't allocate ping source [%d]\n", nbytes);
1832                 return NULL;
1833         }
1834
1835         pbuf->pb_info.pi_nnis = 0;
1836         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1837         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1838         pbuf->pb_info.pi_features =
1839                 LNET_PING_FEAT_NI_STATUS | LNET_PING_FEAT_MULTI_RAIL;
1840
1841         return pbuf;
1842 }
1843
1844 static inline int
1845 lnet_get_net_ni_bytes_locked(struct lnet_net *net)
1846 {
1847         struct lnet_ni *ni;
1848         int bytes = 0;
1849
1850         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1851                 bytes += lnet_ping_sts_size(&ni->ni_nid);
1852
1853         return bytes;
1854 }
1855
1856 static inline int
1857 lnet_get_ni_bytes(void)
1858 {
1859         struct lnet_ni *ni;
1860         struct lnet_net *net;
1861         int bytes = 0;
1862
1863         lnet_net_lock(0);
1864
1865         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1866                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1867                         bytes += lnet_ping_sts_size(&ni->ni_nid);
1868         }
1869
1870         lnet_net_unlock(0);
1871
1872         return bytes;
1873 }
1874
1875 void
1876 lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf)
1877 {
1878         struct lnet_ni_large_status *lstat, *lend;
1879         struct lnet_ni_status *stat, *end;
1880         int nnis;
1881         int i;
1882
1883         __swab32s(&pbuf->pb_info.pi_magic);
1884         __swab32s(&pbuf->pb_info.pi_features);
1885         __swab32s(&pbuf->pb_info.pi_pid);
1886         __swab32s(&pbuf->pb_info.pi_nnis);
1887         nnis = pbuf->pb_info.pi_nnis;
1888         stat = &pbuf->pb_info.pi_ni[0];
1889         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
1890         for (i = 0; i < nnis && stat + 1 <= end; i++, stat++) {
1891                 __swab64s(&stat->ns_nid);
1892                 __swab32s(&stat->ns_status);
1893                 if (i == 0)
1894                         /* Might be total size */
1895                         __swab32s(&stat->ns_msg_size);
1896         }
1897         if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_LARGE_ADDR))
1898                 return;
1899
1900         lstat = (struct lnet_ni_large_status *)stat;
1901         lend = (void *)end;
1902         while (lstat + 1 <= lend) {
1903                 __swab32s(&lstat->ns_status);
1904                 /* struct lnet_nid never needs to be swabed */
1905                 lstat = lnet_ping_sts_next(lstat);
1906         }
1907 }
1908
1909 int
1910 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1911 {
1912         if (!pinfo)
1913                 return -EINVAL;
1914         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1915                 return -EPROTO;
1916         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1917                 return -EPROTO;
1918         /* Loopback is guaranteed to be present */
1919         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
1920                 return -ERANGE;
1921         if (LNET_PING_INFO_LONI(pinfo) != LNET_NID_LO_0)
1922                 return -EPROTO;
1923         return 0;
1924 }
1925
1926 static void
1927 lnet_ping_target_destroy(void)
1928 {
1929         struct lnet_net *net;
1930         struct lnet_ni  *ni;
1931
1932         lnet_net_lock(LNET_LOCK_EX);
1933
1934         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1935                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1936                         lnet_ni_lock(ni);
1937                         ni->ni_status = NULL;
1938                         lnet_ni_unlock(ni);
1939                 }
1940         }
1941
1942         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1943         the_lnet.ln_ping_target = NULL;
1944
1945         lnet_net_unlock(LNET_LOCK_EX);
1946 }
1947
1948 static void
1949 lnet_ping_target_event_handler(struct lnet_event *event)
1950 {
1951         struct lnet_ping_buffer *pbuf = event->md_user_ptr;
1952
1953         if (event->unlinked)
1954                 lnet_ping_buffer_decref(pbuf);
1955 }
1956
1957 static int
1958 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1959                        struct lnet_handle_md *ping_mdh,
1960                        int ni_bytes, bool set_eq)
1961 {
1962         struct lnet_processid id = {
1963                 .nid = LNET_ANY_NID,
1964                 .pid = LNET_PID_ANY
1965         };
1966         struct lnet_me *me;
1967         struct lnet_md md = { NULL };
1968         int rc;
1969
1970         if (set_eq)
1971                 the_lnet.ln_ping_target_handler =
1972                         lnet_ping_target_event_handler;
1973
1974         *ppbuf = lnet_ping_target_create(ni_bytes);
1975         if (*ppbuf == NULL) {
1976                 rc = -ENOMEM;
1977                 goto fail_free_eq;
1978         }
1979
1980         /* Ping target ME/MD */
1981         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
1982                           LNET_PROTO_PING_MATCHBITS, 0,
1983                           LNET_UNLINK, LNET_INS_AFTER);
1984         if (IS_ERR(me)) {
1985                 rc = PTR_ERR(me);
1986                 CERROR("Can't create ping target ME: %d\n", rc);
1987                 goto fail_decref_ping_buffer;
1988         }
1989
1990         /* initialize md content */
1991         md.start     = &(*ppbuf)->pb_info;
1992         md.length    = (*ppbuf)->pb_nbytes;
1993         md.threshold = LNET_MD_THRESH_INF;
1994         md.max_size  = 0;
1995         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1996                        LNET_MD_MANAGE_REMOTE;
1997         md.handler   = the_lnet.ln_ping_target_handler;
1998         md.user_ptr  = *ppbuf;
1999
2000         rc = LNetMDAttach(me, &md, LNET_RETAIN, ping_mdh);
2001         if (rc != 0) {
2002                 CERROR("Can't attach ping target MD: %d\n", rc);
2003                 goto fail_decref_ping_buffer;
2004         }
2005         lnet_ping_buffer_addref(*ppbuf);
2006
2007         return 0;
2008
2009 fail_decref_ping_buffer:
2010         LASSERT(atomic_read(&(*ppbuf)->pb_refcnt) == 1);
2011         lnet_ping_buffer_decref(*ppbuf);
2012         *ppbuf = NULL;
2013 fail_free_eq:
2014         return rc;
2015 }
2016
2017 static void
2018 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
2019                     struct lnet_handle_md *ping_mdh)
2020 {
2021         LNetMDUnlink(*ping_mdh);
2022         LNetInvalidateMDHandle(ping_mdh);
2023
2024         /* NB the MD could be busy; this just starts the unlink */
2025         wait_var_event_warning(&pbuf->pb_refcnt,
2026                                atomic_read(&pbuf->pb_refcnt) <= 1,
2027                                "Still waiting for ping data MD to unlink\n");
2028 }
2029
2030 static void
2031 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
2032 {
2033         struct lnet_ni *ni;
2034         struct lnet_net *net;
2035         struct lnet_ni_status *ns, *end;
2036         struct lnet_ni_large_status *lns, *lend;
2037         int rc;
2038
2039         pbuf->pb_info.pi_nnis = 0;
2040         ns = &pbuf->pb_info.pi_ni[0];
2041         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
2042         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2043                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2044                         if (!nid_is_nid4(&ni->ni_nid)) {
2045                                 if (ns == &pbuf->pb_info.pi_ni[1]) {
2046                                         /* This is primary, and it is long */
2047                                         pbuf->pb_info.pi_features |=
2048                                                 LNET_PING_FEAT_PRIMARY_LARGE;
2049                                 }
2050                                 continue;
2051                         }
2052                         LASSERT(ns + 1 <= end);
2053                         ns->ns_nid = lnet_nid_to_nid4(&ni->ni_nid);
2054
2055                         lnet_ni_lock(ni);
2056                         ns->ns_status = lnet_ni_get_status_locked(ni);
2057                         ni->ni_status = &ns->ns_status;
2058                         lnet_ni_unlock(ni);
2059
2060                         pbuf->pb_info.pi_nnis++;
2061                         ns++;
2062                 }
2063         }
2064
2065         lns = (void *)ns;
2066         lend = (void *)end;
2067         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2068                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2069                         if (nid_is_nid4(&ni->ni_nid))
2070                                 continue;
2071                         LASSERT(lns + 1 <= lend);
2072
2073                         lns->ns_nid = ni->ni_nid;
2074
2075                         lnet_ni_lock(ni);
2076                         lns->ns_status = lnet_ni_get_status_locked(ni);
2077                         ni->ni_status = &lns->ns_status;
2078                         lnet_ni_unlock(ni);
2079
2080                         lns = lnet_ping_sts_next(lns);
2081                 }
2082         }
2083         if ((void *)lns > (void *)ns) {
2084                 /* Record total info size */
2085                 pbuf->pb_info.pi_ni[0].ns_msg_size =
2086                         (void *)lns - (void *)&pbuf->pb_info;
2087                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_LARGE_ADDR;
2088         }
2089
2090         /* We (ab)use the ns_status of the loopback interface to
2091          * transmit the sequence number. The first interface listed
2092          * must be the loopback interface.
2093          */
2094         rc = lnet_ping_info_validate(&pbuf->pb_info);
2095         if (rc) {
2096                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
2097                 LBUG();
2098         }
2099         LNET_PING_BUFFER_SEQNO(pbuf) =
2100                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
2101 }
2102
2103 static void
2104 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
2105                         struct lnet_handle_md ping_mdh)
2106 __must_hold(&the_lnet.ln_api_mutex)
2107 {
2108         struct lnet_ping_buffer *old_pbuf = NULL;
2109         struct lnet_handle_md old_ping_md;
2110
2111         /* switch the NIs to point to the new ping info created */
2112         lnet_net_lock(LNET_LOCK_EX);
2113
2114         if (!the_lnet.ln_routing)
2115                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
2116         if (!lnet_peer_discovery_disabled)
2117                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
2118
2119         /* Ensure only known feature bits have been set. */
2120         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
2121         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
2122
2123         lnet_ping_target_install_locked(pbuf);
2124
2125         if (the_lnet.ln_ping_target) {
2126                 old_pbuf = the_lnet.ln_ping_target;
2127                 old_ping_md = the_lnet.ln_ping_target_md;
2128         }
2129         the_lnet.ln_ping_target_md = ping_mdh;
2130         the_lnet.ln_ping_target = pbuf;
2131
2132         lnet_net_unlock(LNET_LOCK_EX);
2133
2134         if (old_pbuf) {
2135                 /* unlink and free the old ping info.
2136                  * There may be outstanding traffic on this MD, and
2137                  * ln_api_mutex may be required to finalize that
2138                  * traffic. Release ln_api_mutex while we wait for
2139                  * refs on this ping buffer to drop
2140                  */
2141                 mutex_unlock(&the_lnet.ln_api_mutex);
2142                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
2143                 mutex_lock(&the_lnet.ln_api_mutex);
2144                 lnet_ping_buffer_decref(old_pbuf);
2145         }
2146
2147         lnet_push_update_to_peers(0);
2148 }
2149
2150 static void
2151 lnet_ping_target_fini(void)
2152 {
2153         lnet_ping_md_unlink(the_lnet.ln_ping_target,
2154                             &the_lnet.ln_ping_target_md);
2155
2156         lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
2157         lnet_ping_target_destroy();
2158 }
2159
2160 /* Resize the push target. */
2161 int lnet_push_target_resize(void)
2162 {
2163         struct lnet_handle_md mdh;
2164         struct lnet_handle_md old_mdh;
2165         struct lnet_ping_buffer *pbuf;
2166         struct lnet_ping_buffer *old_pbuf;
2167         int nbytes;
2168         int rc;
2169
2170 again:
2171         nbytes = the_lnet.ln_push_target_nbytes;
2172         if (nbytes <= 0) {
2173                 CDEBUG(D_NET, "Invalid nbytes %d\n", nbytes);
2174                 return -EINVAL;
2175         }
2176
2177         /* NB: lnet_ping_buffer_alloc() sets pbuf refcount to 1. That ref is
2178          * dropped when we need to resize again (see "old_pbuf" below) or when
2179          * LNet is shutdown (see lnet_push_target_fini())
2180          */
2181         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
2182         if (!pbuf) {
2183                 CDEBUG(D_NET, "Can't allocate pbuf for nbytes %d\n", nbytes);
2184                 return -ENOMEM;
2185         }
2186
2187         rc = lnet_push_target_post(pbuf, &mdh);
2188         if (rc) {
2189                 CDEBUG(D_NET, "Failed to post push target: %d\n", rc);
2190                 lnet_ping_buffer_decref(pbuf);
2191                 return rc;
2192         }
2193
2194         lnet_net_lock(LNET_LOCK_EX);
2195         old_pbuf = the_lnet.ln_push_target;
2196         old_mdh = the_lnet.ln_push_target_md;
2197         the_lnet.ln_push_target = pbuf;
2198         the_lnet.ln_push_target_md = mdh;
2199         lnet_net_unlock(LNET_LOCK_EX);
2200
2201         if (old_pbuf) {
2202                 LNetMDUnlink(old_mdh);
2203                 /* Drop ref set by lnet_ping_buffer_alloc() */
2204                 lnet_ping_buffer_decref(old_pbuf);
2205         }
2206
2207         /* Received another push or reply that requires a larger buffer */
2208         if (nbytes < the_lnet.ln_push_target_nbytes)
2209                 goto again;
2210
2211         CDEBUG(D_NET, "nbytes %d success\n", nbytes);
2212         return 0;
2213 }
2214
2215 int lnet_push_target_post(struct lnet_ping_buffer *pbuf,
2216                           struct lnet_handle_md *mdhp)
2217 {
2218         struct lnet_processid id = { LNET_ANY_NID, LNET_PID_ANY };
2219         struct lnet_md md = { NULL };
2220         struct lnet_me *me;
2221         int rc;
2222
2223         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
2224                           LNET_PROTO_PING_MATCHBITS, 0,
2225                           LNET_UNLINK, LNET_INS_AFTER);
2226         if (IS_ERR(me)) {
2227                 rc = PTR_ERR(me);
2228                 CERROR("Can't create push target ME: %d\n", rc);
2229                 return rc;
2230         }
2231
2232         pbuf->pb_needs_post = false;
2233
2234         /* This reference is dropped by lnet_push_target_event_handler() */
2235         lnet_ping_buffer_addref(pbuf);
2236
2237         /* initialize md content */
2238         md.start     = &pbuf->pb_info;
2239         md.length    = pbuf->pb_nbytes;
2240         md.threshold = 1;
2241         md.max_size  = 0;
2242         md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE;
2243         md.user_ptr  = pbuf;
2244         md.handler   = the_lnet.ln_push_target_handler;
2245
2246         rc = LNetMDAttach(me, &md, LNET_UNLINK, mdhp);
2247         if (rc) {
2248                 CERROR("Can't attach push MD: %d\n", rc);
2249                 lnet_ping_buffer_decref(pbuf);
2250                 pbuf->pb_needs_post = true;
2251                 return rc;
2252         }
2253
2254         CDEBUG(D_NET, "posted push target %p\n", pbuf);
2255
2256         return 0;
2257 }
2258
2259 static void lnet_push_target_event_handler(struct lnet_event *ev)
2260 {
2261         struct lnet_ping_buffer *pbuf = ev->md_user_ptr;
2262
2263         CDEBUG(D_NET, "type %d status %d unlinked %d\n", ev->type, ev->status,
2264                ev->unlinked);
2265
2266         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
2267                 lnet_swap_pinginfo(pbuf);
2268
2269         if (ev->type == LNET_EVENT_UNLINK) {
2270                 /* Drop ref added by lnet_push_target_post() */
2271                 lnet_ping_buffer_decref(pbuf);
2272                 return;
2273         }
2274
2275         lnet_peer_push_event(ev);
2276         if (ev->unlinked)
2277                 /* Drop ref added by lnet_push_target_post */
2278                 lnet_ping_buffer_decref(pbuf);
2279 }
2280
2281 /* Initialize the push target. */
2282 static int lnet_push_target_init(void)
2283 {
2284         int rc;
2285
2286         if (the_lnet.ln_push_target)
2287                 return -EALREADY;
2288
2289         the_lnet.ln_push_target_handler =
2290                 lnet_push_target_event_handler;
2291
2292         rc = LNetSetLazyPortal(LNET_RESERVED_PORTAL);
2293         LASSERT(rc == 0);
2294
2295         /* Start at the required minimum, we'll enlarge if required. */
2296         the_lnet.ln_push_target_nbytes = LNET_PING_INFO_MIN_SIZE;
2297
2298         rc = lnet_push_target_resize();
2299         if (rc) {
2300                 LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2301                 the_lnet.ln_push_target_handler = NULL;
2302         }
2303
2304         return rc;
2305 }
2306
2307 /* Clean up the push target. */
2308 static void lnet_push_target_fini(void)
2309 {
2310         if (!the_lnet.ln_push_target)
2311                 return;
2312
2313         /* Unlink and invalidate to prevent new references. */
2314         LNetMDUnlink(the_lnet.ln_push_target_md);
2315         LNetInvalidateMDHandle(&the_lnet.ln_push_target_md);
2316
2317         /* Wait for the unlink to complete. */
2318         wait_var_event_warning(&the_lnet.ln_push_target->pb_refcnt,
2319                                atomic_read(&the_lnet.ln_push_target->pb_refcnt) <= 1,
2320                                "Still waiting for ping data MD to unlink\n");
2321
2322         /* Drop ref set by lnet_ping_buffer_alloc() */
2323         lnet_ping_buffer_decref(the_lnet.ln_push_target);
2324         the_lnet.ln_push_target = NULL;
2325         the_lnet.ln_push_target_nbytes = 0;
2326
2327         LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2328         lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
2329         the_lnet.ln_push_target_handler = NULL;
2330 }
2331
2332 static int
2333 lnet_ni_tq_credits(struct lnet_ni *ni)
2334 {
2335         int     credits;
2336
2337         LASSERT(ni->ni_ncpts >= 1);
2338
2339         if (ni->ni_ncpts == 1)
2340                 return ni->ni_net->net_tunables.lct_max_tx_credits;
2341
2342         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
2343         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
2344         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
2345
2346         return credits;
2347 }
2348
2349 static void
2350 lnet_ni_unlink_locked(struct lnet_ni *ni)
2351 {
2352         /* move it to zombie list and nobody can find it anymore */
2353         LASSERT(!list_empty(&ni->ni_netlist));
2354         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
2355         lnet_ni_decref_locked(ni, 0);
2356 }
2357
2358 static void
2359 lnet_clear_zombies_nis_locked(struct lnet_net *net)
2360 {
2361         int             i;
2362         int             islo;
2363         struct lnet_ni  *ni;
2364         struct list_head *zombie_list = &net->net_ni_zombie;
2365
2366         /*
2367          * Now wait for the NIs I just nuked to show up on the zombie
2368          * list and shut them down in guaranteed thread context
2369          */
2370         i = 2;
2371         while ((ni = list_first_entry_or_null(zombie_list,
2372                                               struct lnet_ni,
2373                                               ni_netlist)) != NULL) {
2374                 int *ref;
2375                 int j;
2376
2377                 list_del_init(&ni->ni_netlist);
2378                 /* the ni should be in deleting state. If it's not it's
2379                  * a bug */
2380                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
2381                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
2382                         if (*ref == 0)
2383                                 continue;
2384                         /* still busy, add it back to zombie list */
2385                         list_add(&ni->ni_netlist, zombie_list);
2386                         break;
2387                 }
2388
2389                 if (!list_empty(&ni->ni_netlist)) {
2390                         /* Unlock mutex while waiting to allow other
2391                          * threads to read the LNet state and fall through
2392                          * to avoid deadlock
2393                          */
2394                         lnet_net_unlock(LNET_LOCK_EX);
2395                         mutex_unlock(&the_lnet.ln_api_mutex);
2396
2397                         ++i;
2398                         if ((i & (-i)) == i) {
2399                                 CDEBUG(D_WARNING,
2400                                        "Waiting for zombie LNI %s\n",
2401                                        libcfs_nidstr(&ni->ni_nid));
2402                         }
2403                         schedule_timeout_uninterruptible(cfs_time_seconds(1));
2404
2405                         mutex_lock(&the_lnet.ln_api_mutex);
2406                         lnet_net_lock(LNET_LOCK_EX);
2407                         continue;
2408                 }
2409
2410                 lnet_net_unlock(LNET_LOCK_EX);
2411
2412                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
2413
2414                 LASSERT(!in_interrupt());
2415                 /* Holding the LND mutex makes it safe for lnd_shutdown
2416                  * to call module_put(). Module unload cannot finish
2417                  * until lnet_unregister_lnd() completes, and that
2418                  * requires the LND mutex.
2419                  */
2420                 mutex_unlock(&the_lnet.ln_api_mutex);
2421                 mutex_lock(&the_lnet.ln_lnd_mutex);
2422                 (net->net_lnd->lnd_shutdown)(ni);
2423                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2424                 mutex_lock(&the_lnet.ln_api_mutex);
2425
2426                 if (!islo)
2427                         CDEBUG(D_LNI, "Removed LNI %s\n",
2428                               libcfs_nidstr(&ni->ni_nid));
2429
2430                 lnet_ni_free(ni);
2431                 i = 2;
2432                 lnet_net_lock(LNET_LOCK_EX);
2433         }
2434 }
2435
2436 /* shutdown down the NI and release refcount */
2437 static void
2438 lnet_shutdown_lndni(struct lnet_ni *ni)
2439 {
2440         int i;
2441         struct lnet_net *net = ni->ni_net;
2442
2443         lnet_net_lock(LNET_LOCK_EX);
2444         lnet_ni_lock(ni);
2445         ni->ni_state = LNET_NI_STATE_DELETING;
2446         lnet_ni_unlock(ni);
2447         lnet_ni_unlink_locked(ni);
2448         lnet_incr_dlc_seq();
2449         lnet_net_unlock(LNET_LOCK_EX);
2450
2451         /* clear messages for this NI on the lazy portal */
2452         for (i = 0; i < the_lnet.ln_nportals; i++)
2453                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
2454
2455         lnet_net_lock(LNET_LOCK_EX);
2456         lnet_clear_zombies_nis_locked(net);
2457         lnet_net_unlock(LNET_LOCK_EX);
2458 }
2459
2460 static void
2461 lnet_shutdown_lndnet(struct lnet_net *net)
2462 {
2463         struct lnet_ni *ni;
2464
2465         lnet_net_lock(LNET_LOCK_EX);
2466
2467         list_del_init(&net->net_list);
2468
2469         while ((ni = list_first_entry_or_null(&net->net_ni_list,
2470                                               struct lnet_ni,
2471                                               ni_netlist)) != NULL) {
2472                 lnet_net_unlock(LNET_LOCK_EX);
2473                 lnet_shutdown_lndni(ni);
2474                 lnet_net_lock(LNET_LOCK_EX);
2475         }
2476
2477         lnet_net_unlock(LNET_LOCK_EX);
2478
2479         /* Do peer table cleanup for this net */
2480         lnet_peer_tables_cleanup(net);
2481
2482         lnet_net_free(net);
2483 }
2484
2485 static void
2486 lnet_shutdown_lndnets(void)
2487 {
2488         struct lnet_net *net;
2489         LIST_HEAD(resend);
2490         struct lnet_msg *msg, *tmp;
2491
2492         /* NB called holding the global mutex */
2493
2494         /* All quiet on the API front */
2495         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING ||
2496                 the_lnet.ln_state == LNET_STATE_STOPPING);
2497         LASSERT(the_lnet.ln_refcount == 0);
2498
2499         lnet_net_lock(LNET_LOCK_EX);
2500         the_lnet.ln_state = LNET_STATE_STOPPING;
2501
2502         /*
2503          * move the nets to the zombie list to avoid them being
2504          * picked up for new work. LONET is also included in the
2505          * Nets that will be moved to the zombie list
2506          */
2507         list_splice_init(&the_lnet.ln_nets, &the_lnet.ln_net_zombie);
2508
2509         /* Drop the cached loopback Net. */
2510         if (the_lnet.ln_loni != NULL) {
2511                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
2512                 the_lnet.ln_loni = NULL;
2513         }
2514         lnet_net_unlock(LNET_LOCK_EX);
2515
2516         /* iterate through the net zombie list and delete each net */
2517         while ((net = list_first_entry_or_null(&the_lnet.ln_net_zombie,
2518                                                struct lnet_net,
2519                                                net_list)) != NULL)
2520                 lnet_shutdown_lndnet(net);
2521
2522         spin_lock(&the_lnet.ln_msg_resend_lock);
2523         list_splice(&the_lnet.ln_msg_resend, &resend);
2524         spin_unlock(&the_lnet.ln_msg_resend_lock);
2525
2526         list_for_each_entry_safe(msg, tmp, &resend, msg_list) {
2527                 list_del_init(&msg->msg_list);
2528                 msg->msg_no_resend = true;
2529                 lnet_finalize(msg, -ECANCELED);
2530         }
2531
2532         lnet_net_lock(LNET_LOCK_EX);
2533         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
2534         lnet_net_unlock(LNET_LOCK_EX);
2535 }
2536
2537 static int
2538 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
2539 {
2540         int                     rc = -EINVAL;
2541         struct lnet_tx_queue    *tq;
2542         int                     i;
2543         struct lnet_net         *net = ni->ni_net;
2544
2545         mutex_lock(&the_lnet.ln_lnd_mutex);
2546
2547         if (tun) {
2548                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
2549                 ni->ni_lnd_tunables_set = true;
2550         }
2551
2552         rc = (net->net_lnd->lnd_startup)(ni);
2553
2554         mutex_unlock(&the_lnet.ln_lnd_mutex);
2555
2556         if (rc != 0) {
2557                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
2558                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
2559                 goto failed0;
2560         }
2561
2562         /* We keep a reference on the loopback net through the loopback NI */
2563         if (net->net_lnd->lnd_type == LOLND) {
2564                 lnet_ni_addref(ni);
2565                 LASSERT(the_lnet.ln_loni == NULL);
2566                 the_lnet.ln_loni = ni;
2567                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
2568                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
2569                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
2570                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
2571                 return 0;
2572         }
2573
2574         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
2575             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
2576                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
2577                                    libcfs_lnd2str(net->net_lnd->lnd_type),
2578                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
2579                                         "" : "per-peer ");
2580                 /* shutdown the NI since if we get here then it must've already
2581                  * been started
2582                  */
2583                 lnet_shutdown_lndni(ni);
2584                 return -EINVAL;
2585         }
2586
2587         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
2588                 tq->tq_credits_min =
2589                 tq->tq_credits_max =
2590                 tq->tq_credits = lnet_ni_tq_credits(ni);
2591         }
2592
2593         atomic_set(&ni->ni_tx_credits,
2594                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
2595         atomic_set(&ni->ni_healthv, LNET_MAX_HEALTH_VALUE);
2596
2597         /* Nodes with small feet have little entropy. The NID for this
2598          * node gives the most entropy in the low bits.
2599          */
2600         add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid));
2601
2602         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
2603                 libcfs_nidstr(&ni->ni_nid),
2604                 ni->ni_net->net_tunables.lct_peer_tx_credits,
2605                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
2606                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
2607                 ni->ni_net->net_tunables.lct_peer_timeout);
2608
2609         return 0;
2610 failed0:
2611         lnet_ni_free(ni);
2612         return rc;
2613 }
2614
2615 static const struct lnet_lnd *lnet_load_lnd(u32 lnd_type)
2616 {
2617         const struct lnet_lnd *lnd;
2618         int rc = 0;
2619
2620         mutex_lock(&the_lnet.ln_lnd_mutex);
2621         lnd = lnet_find_lnd_by_type(lnd_type);
2622         if (!lnd) {
2623                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2624                 rc = request_module("%s", libcfs_lnd2modname(lnd_type));
2625                 mutex_lock(&the_lnet.ln_lnd_mutex);
2626
2627                 lnd = lnet_find_lnd_by_type(lnd_type);
2628                 if (!lnd) {
2629                         mutex_unlock(&the_lnet.ln_lnd_mutex);
2630                         CERROR("Can't load LND %s, module %s, rc=%d\n",
2631                         libcfs_lnd2str(lnd_type),
2632                         libcfs_lnd2modname(lnd_type), rc);
2633 #ifndef HAVE_MODULE_LOADING_SUPPORT
2634                         LCONSOLE_ERROR_MSG(0x104,
2635                                            "Your kernel must be compiled with kernel module loading support.");
2636 #endif
2637                         return ERR_PTR(-EINVAL);
2638                 }
2639         }
2640         mutex_unlock(&the_lnet.ln_lnd_mutex);
2641
2642         return lnd;
2643 }
2644
2645 static int
2646 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
2647 {
2648         struct lnet_ni *ni;
2649         struct lnet_net *net_l = NULL;
2650         LIST_HEAD(local_ni_list);
2651         int rc;
2652         int ni_count = 0;
2653         __u32 lnd_type;
2654         const struct lnet_lnd  *lnd;
2655         int peer_timeout =
2656                 net->net_tunables.lct_peer_timeout;
2657         int maxtxcredits =
2658                 net->net_tunables.lct_max_tx_credits;
2659         int peerrtrcredits =
2660                 net->net_tunables.lct_peer_rtr_credits;
2661
2662         /*
2663          * make sure that this net is unique. If it isn't then
2664          * we are adding interfaces to an already existing network, and
2665          * 'net' is just a convenient way to pass in the list.
2666          * if it is unique we need to find the LND and load it if
2667          * necessary.
2668          */
2669         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
2670                 lnd_type = LNET_NETTYP(net->net_id);
2671
2672                 lnd = lnet_load_lnd(lnd_type);
2673                 if (IS_ERR(lnd)) {
2674                         rc = PTR_ERR(lnd);
2675                         goto failed0;
2676                 }
2677
2678                 mutex_lock(&the_lnet.ln_lnd_mutex);
2679                 net->net_lnd = lnd;
2680                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2681
2682                 net_l = net;
2683         }
2684
2685         /*
2686          * net_l: if the network being added is unique then net_l
2687          *        will point to that network
2688          *        if the network being added is not unique then
2689          *        net_l points to the existing network.
2690          *
2691          * When we enter the loop below, we'll pick NIs off he
2692          * network beign added and start them up, then add them to
2693          * a local ni list. Once we've successfully started all
2694          * the NIs then we join the local NI list (of started up
2695          * networks) with the net_l->net_ni_list, which should
2696          * point to the correct network to add the new ni list to
2697          *
2698          * If any of the new NIs fail to start up, then we want to
2699          * iterate through the local ni list, which should include
2700          * any NIs which were successfully started up, and shut
2701          * them down.
2702          *
2703          * After than we want to delete the network being added,
2704          * to avoid a memory leak.
2705          */
2706         while ((ni = list_first_entry_or_null(&net->net_ni_added,
2707                                               struct lnet_ni,
2708                                               ni_netlist)) != NULL) {
2709                 list_del_init(&ni->ni_netlist);
2710
2711                 /* make sure that the the NI we're about to start
2712                  * up is actually unique. if it's not fail. */
2713                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
2714                                         ni->ni_interface)) {
2715                         rc = -EEXIST;
2716                         goto failed1;
2717                 }
2718
2719                 /* adjust the pointer the parent network, just in case it
2720                  * the net is a duplicate */
2721                 ni->ni_net = net_l;
2722
2723                 rc = lnet_startup_lndni(ni, tun);
2724
2725                 if (rc != 0)
2726                         goto failed1;
2727
2728                 lnet_ni_addref(ni);
2729                 list_add_tail(&ni->ni_netlist, &local_ni_list);
2730
2731                 ni_count++;
2732         }
2733
2734         lnet_net_lock(LNET_LOCK_EX);
2735         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
2736         lnet_incr_dlc_seq();
2737
2738         list_for_each_entry(ni, &net_l->net_ni_list, ni_netlist) {
2739                 if (!ni)
2740                         break;
2741                 lnet_ni_lock(ni);
2742                 ni->ni_state = LNET_NI_STATE_ACTIVE;
2743                 lnet_ni_unlock(ni);
2744         }
2745         lnet_net_unlock(LNET_LOCK_EX);
2746
2747         /* if the network is not unique then we don't want to keep
2748          * it around after we're done. Free it. Otherwise add that
2749          * net to the global the_lnet.ln_nets */
2750         if (net_l != net && net_l != NULL) {
2751                 /*
2752                  * TODO - note. currently the tunables can not be updated
2753                  * once added
2754                  */
2755                 lnet_net_free(net);
2756         } else {
2757                 /*
2758                  * restore tunables after it has been overwitten by the
2759                  * lnd
2760                  */
2761                 if (peer_timeout != -1)
2762                         net->net_tunables.lct_peer_timeout = peer_timeout;
2763                 if (maxtxcredits != -1)
2764                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
2765                 if (peerrtrcredits != -1)
2766                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
2767
2768                 lnet_net_lock(LNET_LOCK_EX);
2769                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
2770                 lnet_net_unlock(LNET_LOCK_EX);
2771         }
2772
2773         return ni_count;
2774
2775 failed1:
2776         /*
2777          * shutdown the new NIs that are being started up
2778          * free the NET being started
2779          */
2780         while ((ni = list_first_entry_or_null(&local_ni_list,
2781                                               struct lnet_ni,
2782                                               ni_netlist)) != NULL)
2783                 lnet_shutdown_lndni(ni);
2784
2785 failed0:
2786         lnet_net_free(net);
2787
2788         return rc;
2789 }
2790
2791 static int
2792 lnet_startup_lndnets(struct list_head *netlist)
2793 {
2794         struct lnet_net         *net;
2795         int                     rc;
2796         int                     ni_count = 0;
2797
2798         /*
2799          * Change to running state before bringing up the LNDs. This
2800          * allows lnet_shutdown_lndnets() to assert that we've passed
2801          * through here.
2802          */
2803         lnet_net_lock(LNET_LOCK_EX);
2804         the_lnet.ln_state = LNET_STATE_RUNNING;
2805         lnet_net_unlock(LNET_LOCK_EX);
2806
2807         while ((net = list_first_entry_or_null(netlist,
2808                                                struct lnet_net,
2809                                                net_list)) != NULL) {
2810                 list_del_init(&net->net_list);
2811
2812                 rc = lnet_startup_lndnet(net, NULL);
2813
2814                 if (rc < 0)
2815                         goto failed;
2816
2817                 ni_count += rc;
2818         }
2819
2820         return ni_count;
2821 failed:
2822         lnet_shutdown_lndnets();
2823
2824         return rc;
2825 }
2826
2827 static int lnet_genl_parse_list(struct sk_buff *msg,
2828                                 const struct ln_key_list *data[], u16 idx)
2829 {
2830         const struct ln_key_list *list = data[idx];
2831         const struct ln_key_props *props;
2832         struct nlattr *node;
2833         u16 count;
2834
2835         if (!list)
2836                 return 0;
2837
2838         if (!list->lkl_maxattr)
2839                 return -ERANGE;
2840
2841         props = list->lkl_list;
2842         if (!props)
2843                 return -EINVAL;
2844
2845         node = nla_nest_start(msg, LN_SCALAR_ATTR_LIST);
2846         if (!node)
2847                 return -ENOBUFS;
2848
2849         for (count = 1; count <= list->lkl_maxattr; count++) {
2850                 struct nlattr *key = nla_nest_start(msg, count);
2851
2852                 if (!key)
2853                         return -EMSGSIZE;
2854
2855                 if (count == 1)
2856                         nla_put_u16(msg, LN_SCALAR_ATTR_LIST_SIZE,
2857                                     list->lkl_maxattr);
2858
2859                 nla_put_u16(msg, LN_SCALAR_ATTR_INDEX, count);
2860                 if (props[count].lkp_value)
2861                         nla_put_string(msg, LN_SCALAR_ATTR_VALUE,
2862                                        props[count].lkp_value);
2863                 if (props[count].lkp_key_format)
2864                         nla_put_u16(msg, LN_SCALAR_ATTR_KEY_FORMAT,
2865                                     props[count].lkp_key_format);
2866                 nla_put_u16(msg, LN_SCALAR_ATTR_NLA_TYPE,
2867                             props[count].lkp_data_type);
2868                 if (props[count].lkp_data_type == NLA_NESTED) {
2869                         int rc;
2870
2871                         rc = lnet_genl_parse_list(msg, data, ++idx);
2872                         if (rc < 0)
2873                                 return rc;
2874                         idx = rc;
2875                 }
2876
2877                 nla_nest_end(msg, key);
2878         }
2879
2880         nla_nest_end(msg, node);
2881         return idx;
2882 }
2883
2884 int lnet_genl_send_scalar_list(struct sk_buff *msg, u32 portid, u32 seq,
2885                                const struct genl_family *family, int flags,
2886                                u8 cmd, const struct ln_key_list *data[])
2887 {
2888         int rc = 0;
2889         void *hdr;
2890
2891         if (!data[0])
2892                 return -EINVAL;
2893
2894         hdr = genlmsg_put(msg, portid, seq, family, flags, cmd);
2895         if (!hdr)
2896                 GOTO(canceled, rc = -EMSGSIZE);
2897
2898         rc = lnet_genl_parse_list(msg, data, 0);
2899         if (rc < 0)
2900                 GOTO(canceled, rc);
2901
2902         genlmsg_end(msg, hdr);
2903 canceled:
2904         if (rc < 0)
2905                 genlmsg_cancel(msg, hdr);
2906         return rc > 0 ? 0 : rc;
2907 }
2908 EXPORT_SYMBOL(lnet_genl_send_scalar_list);
2909
2910 static struct genl_family lnet_family;
2911
2912 /**
2913  * Initialize LNet library.
2914  *
2915  * Automatically called at module loading time. Caller has to call
2916  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
2917  * latter returned 0. It must be called exactly once.
2918  *
2919  * \retval 0 on success
2920  * \retval -ve on failures.
2921  */
2922 int lnet_lib_init(void)
2923 {
2924         int rc;
2925
2926         lnet_assert_wire_constants();
2927
2928         /* refer to global cfs_cpt_table for now */
2929         the_lnet.ln_cpt_table = cfs_cpt_tab;
2930         the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab);
2931
2932         LASSERT(the_lnet.ln_cpt_number > 0);
2933         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
2934                 /* we are under risk of consuming all lh_cookie */
2935                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
2936                        "please change setting of CPT-table and retry\n",
2937                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
2938                 return -E2BIG;
2939         }
2940
2941         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
2942                 the_lnet.ln_cpt_bits++;
2943
2944         rc = lnet_create_locks();
2945         if (rc != 0) {
2946                 CERROR("Can't create LNet global locks: %d\n", rc);
2947                 return rc;
2948         }
2949
2950         rc = genl_register_family(&lnet_family);
2951         if (rc != 0) {
2952                 lnet_destroy_locks();
2953                 CERROR("Can't register LNet netlink family: %d\n", rc);
2954                 return rc;
2955         }
2956
2957         the_lnet.ln_refcount = 0;
2958         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
2959         INIT_LIST_HEAD(&the_lnet.ln_msg_resend);
2960
2961         /* The hash table size is the number of bits it takes to express the set
2962          * ln_num_routes, minus 1 (better to under estimate than over so we
2963          * don't waste memory). */
2964         if (rnet_htable_size <= 0)
2965                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
2966         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
2967                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
2968         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
2969                                            order_base_2(rnet_htable_size) - 1);
2970
2971         /* All LNDs apart from the LOLND are in separate modules.  They
2972          * register themselves when their module loads, and unregister
2973          * themselves when their module is unloaded. */
2974         lnet_register_lnd(&the_lolnd);
2975         return 0;
2976 }
2977
2978 /**
2979  * Finalize LNet library.
2980  *
2981  * \pre lnet_lib_init() called with success.
2982  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
2983  *
2984  * As this happens at module-unload, all lnds must already be unloaded,
2985  * so they must already be unregistered.
2986  */
2987 void lnet_lib_exit(void)
2988 {
2989         int i;
2990
2991         LASSERT(the_lnet.ln_refcount == 0);
2992         lnet_unregister_lnd(&the_lolnd);
2993         for (i = 0; i < NUM_LNDS; i++)
2994                 LASSERT(!the_lnet.ln_lnds[i]);
2995         lnet_destroy_locks();
2996         genl_unregister_family(&lnet_family);
2997 }
2998
2999 /**
3000  * Set LNet PID and start LNet interfaces, routing, and forwarding.
3001  *
3002  * Users must call this function at least once before any other functions.
3003  * For each successful call there must be a corresponding call to
3004  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
3005  * ignored.
3006  *
3007  * The PID used by LNet may be different from the one requested.
3008  * See LNetGetId().
3009  *
3010  * \param requested_pid PID requested by the caller.
3011  *
3012  * \return >= 0 on success, and < 0 error code on failures.
3013  */
3014 int
3015 LNetNIInit(lnet_pid_t requested_pid)
3016 {
3017         int im_a_router = 0;
3018         int rc;
3019         int ni_bytes;
3020         struct lnet_ping_buffer *pbuf;
3021         struct lnet_handle_md ping_mdh;
3022         LIST_HEAD(net_head);
3023         struct lnet_net *net;
3024
3025         mutex_lock(&the_lnet.ln_api_mutex);
3026
3027         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
3028
3029         if (the_lnet.ln_state == LNET_STATE_STOPPING) {
3030                 mutex_unlock(&the_lnet.ln_api_mutex);
3031                 return -ESHUTDOWN;
3032         }
3033
3034         if (the_lnet.ln_refcount > 0) {
3035                 rc = the_lnet.ln_refcount++;
3036                 mutex_unlock(&the_lnet.ln_api_mutex);
3037                 return rc;
3038         }
3039
3040         rc = lnet_prepare(requested_pid);
3041         if (rc != 0) {
3042                 mutex_unlock(&the_lnet.ln_api_mutex);
3043                 return rc;
3044         }
3045
3046         /* create a network for Loopback network */
3047         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
3048         if (net == NULL) {
3049                 rc = -ENOMEM;
3050                 goto err_empty_list;
3051         }
3052
3053         /* Add in the loopback NI */
3054         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
3055                 rc = -ENOMEM;
3056                 goto err_empty_list;
3057         }
3058
3059         if (use_tcp_bonding)
3060                 CWARN("use_tcp_bonding has been removed. Use Multi-Rail and Dynamic Discovery instead, see LU-13641\n");
3061
3062         /* If LNet is being initialized via DLC it is possible
3063          * that the user requests not to load module parameters (ones which
3064          * are supported by DLC) on initialization.  Therefore, make sure not
3065          * to load networks, routes and forwarding from module parameters
3066          * in this case.  On cleanup in case of failure only clean up
3067          * routes if it has been loaded */
3068         if (!the_lnet.ln_nis_from_mod_params) {
3069                 rc = lnet_parse_networks(&net_head, lnet_get_networks());
3070                 if (rc < 0)
3071                         goto err_empty_list;
3072         }
3073
3074         rc = lnet_startup_lndnets(&net_head);
3075         if (rc < 0)
3076                 goto err_empty_list;
3077
3078         if (!the_lnet.ln_nis_from_mod_params) {
3079                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
3080                 if (rc != 0)
3081                         goto err_shutdown_lndnis;
3082
3083                 rc = lnet_rtrpools_alloc(im_a_router);
3084                 if (rc != 0)
3085                         goto err_destroy_routes;
3086         }
3087
3088         rc = lnet_acceptor_start();
3089         if (rc != 0)
3090                 goto err_destroy_routes;
3091
3092         the_lnet.ln_refcount = 1;
3093         /* Now I may use my own API functions... */
3094
3095         ni_bytes = LNET_PING_INFO_HDR_SIZE;
3096         list_for_each_entry(net, &the_lnet.ln_nets, net_list)
3097                 ni_bytes += lnet_get_net_ni_bytes_locked(net);
3098
3099         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_bytes, true);
3100         if (rc != 0)
3101                 goto err_acceptor_stop;
3102
3103         lnet_ping_target_update(pbuf, ping_mdh);
3104
3105         the_lnet.ln_mt_handler = lnet_mt_event_handler;
3106
3107         rc = lnet_push_target_init();
3108         if (rc != 0)
3109                 goto err_stop_ping;
3110
3111         rc = lnet_monitor_thr_start();
3112         if (rc != 0)
3113                 goto err_destroy_push_target;
3114
3115         rc = lnet_peer_discovery_start();
3116         if (rc != 0)
3117                 goto err_stop_monitor_thr;
3118
3119         lnet_fault_init();
3120         lnet_router_debugfs_init();
3121
3122         mutex_unlock(&the_lnet.ln_api_mutex);
3123
3124         complete_all(&the_lnet.ln_started);
3125
3126         /* wait for all routers to start */
3127         lnet_wait_router_start();
3128
3129         return 0;
3130
3131 err_stop_monitor_thr:
3132         lnet_monitor_thr_stop();
3133 err_destroy_push_target:
3134         lnet_push_target_fini();
3135 err_stop_ping:
3136         lnet_ping_target_fini();
3137 err_acceptor_stop:
3138         the_lnet.ln_refcount = 0;
3139         lnet_acceptor_stop();
3140 err_destroy_routes:
3141         if (!the_lnet.ln_nis_from_mod_params)
3142                 lnet_destroy_routes();
3143 err_shutdown_lndnis:
3144         lnet_shutdown_lndnets();
3145 err_empty_list:
3146         lnet_unprepare();
3147         LASSERT(rc < 0);
3148         mutex_unlock(&the_lnet.ln_api_mutex);
3149         while ((net = list_first_entry_or_null(&net_head,
3150                                                struct lnet_net,
3151                                                net_list)) != NULL) {
3152                 list_del_init(&net->net_list);
3153                 lnet_net_free(net);
3154         }
3155         return rc;
3156 }
3157 EXPORT_SYMBOL(LNetNIInit);
3158
3159 /**
3160  * Stop LNet interfaces, routing, and forwarding.
3161  *
3162  * Users must call this function once for each successful call to LNetNIInit().
3163  * Once the LNetNIFini() operation has been started, the results of pending
3164  * API operations are undefined.
3165  *
3166  * \return always 0 for current implementation.
3167  */
3168 int
3169 LNetNIFini(void)
3170 {
3171         mutex_lock(&the_lnet.ln_api_mutex);
3172
3173         LASSERT(the_lnet.ln_refcount > 0);
3174
3175         if (the_lnet.ln_refcount != 1) {
3176                 the_lnet.ln_refcount--;
3177         } else {
3178                 LASSERT(!the_lnet.ln_niinit_self);
3179
3180                 lnet_net_lock(LNET_LOCK_EX);
3181                 the_lnet.ln_state = LNET_STATE_STOPPING;
3182                 lnet_net_unlock(LNET_LOCK_EX);
3183
3184                 lnet_fault_fini();
3185
3186                 lnet_router_debugfs_fini();
3187                 lnet_peer_discovery_stop();
3188                 lnet_monitor_thr_stop();
3189                 lnet_push_target_fini();
3190                 lnet_ping_target_fini();
3191
3192                 /* Teardown fns that use my own API functions BEFORE here */
3193                 the_lnet.ln_refcount = 0;
3194
3195                 lnet_acceptor_stop();
3196                 lnet_destroy_routes();
3197                 lnet_shutdown_lndnets();
3198                 lnet_unprepare();
3199         }
3200
3201         mutex_unlock(&the_lnet.ln_api_mutex);
3202         return 0;
3203 }
3204 EXPORT_SYMBOL(LNetNIFini);
3205
3206 /**
3207  * Grabs the ni data from the ni structure and fills the out
3208  * parameters
3209  *
3210  * \param[in] ni network        interface structure
3211  * \param[out] cfg_ni           NI config information
3212  * \param[out] tun              network and LND tunables
3213  */
3214 static void
3215 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
3216                    struct lnet_ioctl_config_lnd_tunables *tun,
3217                    struct lnet_ioctl_element_stats *stats,
3218                    __u32 tun_size)
3219 {
3220         size_t min_size = 0;
3221         int i;
3222
3223         if (!ni || !cfg_ni || !tun || !nid_is_nid4(&ni->ni_nid))
3224                 return;
3225
3226         if (ni->ni_interface != NULL) {
3227                 strncpy(cfg_ni->lic_ni_intf,
3228                         ni->ni_interface,
3229                         sizeof(cfg_ni->lic_ni_intf));
3230         }
3231
3232         cfg_ni->lic_nid = lnet_nid_to_nid4(&ni->ni_nid);
3233         cfg_ni->lic_status = lnet_ni_get_status_locked(ni);
3234         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
3235
3236         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
3237
3238         if (stats) {
3239                 stats->iel_send_count = lnet_sum_stats(&ni->ni_stats,
3240                                                        LNET_STATS_TYPE_SEND);
3241                 stats->iel_recv_count = lnet_sum_stats(&ni->ni_stats,
3242                                                        LNET_STATS_TYPE_RECV);
3243                 stats->iel_drop_count = lnet_sum_stats(&ni->ni_stats,
3244                                                        LNET_STATS_TYPE_DROP);
3245         }
3246
3247         /*
3248          * tun->lt_tun will always be present, but in order to be
3249          * backwards compatible, we need to deal with the cases when
3250          * tun->lt_tun is smaller than what the kernel has, because it
3251          * comes from an older version of a userspace program, then we'll
3252          * need to copy as much information as we have available space.
3253          */
3254         min_size = tun_size - sizeof(tun->lt_cmn);
3255         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
3256
3257         /* copy over the cpts */
3258         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
3259             ni->ni_cpts == NULL)  {
3260                 for (i = 0; i < ni->ni_ncpts; i++)
3261                         cfg_ni->lic_cpts[i] = i;
3262         } else {
3263                 for (i = 0;
3264                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
3265                      i < LNET_MAX_SHOW_NUM_CPT;
3266                      i++)
3267                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
3268         }
3269         cfg_ni->lic_ncpts = ni->ni_ncpts;
3270 }
3271
3272 /**
3273  * NOTE: This is a legacy function left in the code to be backwards
3274  * compatible with older userspace programs. It should eventually be
3275  * removed.
3276  *
3277  * Grabs the ni data from the ni structure and fills the out
3278  * parameters
3279  *
3280  * \param[in] ni network        interface structure
3281  * \param[out] config           config information
3282  */
3283 static void
3284 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
3285                          struct lnet_ioctl_config_data *config)
3286 {
3287         struct lnet_ioctl_net_config *net_config;
3288         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
3289         size_t min_size, tunable_size = 0;
3290         int i;
3291
3292         if (!ni || !config || !nid_is_nid4(&ni->ni_nid))
3293                 return;
3294
3295         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
3296         if (!net_config)
3297                 return;
3298
3299         if (!ni->ni_interface)
3300                 return;
3301
3302         strncpy(net_config->ni_interface,
3303                 ni->ni_interface,
3304                 sizeof(net_config->ni_interface));
3305
3306         config->cfg_nid = lnet_nid_to_nid4(&ni->ni_nid);
3307         config->cfg_config_u.cfg_net.net_peer_timeout =
3308                 ni->ni_net->net_tunables.lct_peer_timeout;
3309         config->cfg_config_u.cfg_net.net_max_tx_credits =
3310                 ni->ni_net->net_tunables.lct_max_tx_credits;
3311         config->cfg_config_u.cfg_net.net_peer_tx_credits =
3312                 ni->ni_net->net_tunables.lct_peer_tx_credits;
3313         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
3314                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
3315
3316         net_config->ni_status = lnet_ni_get_status_locked(ni);
3317
3318         if (ni->ni_cpts) {
3319                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
3320
3321                 for (i = 0; i < num_cpts; i++)
3322                         net_config->ni_cpts[i] = ni->ni_cpts[i];
3323
3324                 config->cfg_ncpts = num_cpts;
3325         }
3326
3327         /*
3328          * See if user land tools sent in a newer and larger version
3329          * of struct lnet_tunables than what the kernel uses.
3330          */
3331         min_size = sizeof(*config) + sizeof(*net_config);
3332
3333         if (config->cfg_hdr.ioc_len > min_size)
3334                 tunable_size = config->cfg_hdr.ioc_len - min_size;
3335
3336         /* Don't copy too much data to user space */
3337         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
3338         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
3339
3340         if (lnd_cfg && min_size) {
3341                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
3342                 config->cfg_config_u.cfg_net.net_interface_count = 1;
3343
3344                 /* Tell user land that kernel side has less data */
3345                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
3346                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
3347                         config->cfg_hdr.ioc_len -= min_size;
3348                 }
3349         }
3350 }
3351
3352 struct lnet_ni *
3353 lnet_get_ni_idx_locked(int idx)
3354 {
3355         struct lnet_ni          *ni;
3356         struct lnet_net         *net;
3357
3358         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3359                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3360                         if (idx-- == 0)
3361                                 return ni;
3362                 }
3363         }
3364
3365         return NULL;
3366 }
3367
3368 int lnet_get_net_healthv_locked(struct lnet_net *net)
3369 {
3370         struct lnet_ni *ni;
3371         int best_healthv = 0;
3372         int healthv, ni_fatal;
3373
3374         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3375                 healthv = atomic_read(&ni->ni_healthv);
3376                 ni_fatal = atomic_read(&ni->ni_fatal_error_on);
3377                 if (!ni_fatal && healthv > best_healthv)
3378                         best_healthv = healthv;
3379         }
3380
3381         return best_healthv;
3382 }
3383
3384 struct lnet_ni *
3385 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
3386 {
3387         struct lnet_ni          *ni;
3388         struct lnet_net         *net = mynet;
3389
3390         /*
3391          * It is possible that the net has been cleaned out while there is
3392          * a message being sent. This function accessed the net without
3393          * checking if the list is empty
3394          */
3395         if (!prev) {
3396                 if (!net)
3397                         net = list_first_entry(&the_lnet.ln_nets,
3398                                                struct lnet_net,
3399                                                net_list);
3400                 if (list_empty(&net->net_ni_list))
3401                         return NULL;
3402                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3403                                       ni_netlist);
3404
3405                 return ni;
3406         }
3407
3408         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
3409                 /* if you reached the end of the ni list and the net is
3410                  * specified, then there are no more nis in that net */
3411                 if (net != NULL)
3412                         return NULL;
3413
3414                 /* we reached the end of this net ni list. move to the
3415                  * next net */
3416                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
3417                         /* no more nets and no more NIs. */
3418                         return NULL;
3419
3420                 /* get the next net */
3421                 net = list_first_entry(&prev->ni_net->net_list, struct lnet_net,
3422                                        net_list);
3423                 if (list_empty(&net->net_ni_list))
3424                         return NULL;
3425                 /* get the ni on it */
3426                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3427                                       ni_netlist);
3428
3429                 return ni;
3430         }
3431
3432         if (list_empty(&prev->ni_netlist))
3433                 return NULL;
3434
3435         /* there are more nis left */
3436         ni = list_first_entry(&prev->ni_netlist, struct lnet_ni, ni_netlist);
3437
3438         return ni;
3439 }
3440
3441 static int
3442 lnet_get_net_config(struct lnet_ioctl_config_data *config)
3443 {
3444         struct lnet_ni *ni;
3445         int cpt;
3446         int rc = -ENOENT;
3447         int idx = config->cfg_count;
3448
3449         cpt = lnet_net_lock_current();
3450
3451         ni = lnet_get_ni_idx_locked(idx);
3452
3453         if (ni != NULL) {
3454                 rc = 0;
3455                 lnet_ni_lock(ni);
3456                 lnet_fill_ni_info_legacy(ni, config);
3457                 lnet_ni_unlock(ni);
3458         }
3459
3460         lnet_net_unlock(cpt);
3461         return rc;
3462 }
3463
3464 static int
3465 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
3466                    struct lnet_ioctl_config_lnd_tunables *tun,
3467                    struct lnet_ioctl_element_stats *stats,
3468                    __u32 tun_size)
3469 {
3470         struct lnet_ni          *ni;
3471         int                     cpt;
3472         int                     rc = -ENOENT;
3473
3474         if (!cfg_ni || !tun || !stats)
3475                 return -EINVAL;
3476
3477         cpt = lnet_net_lock_current();
3478
3479         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
3480
3481         if (ni) {
3482                 rc = 0;
3483                 lnet_ni_lock(ni);
3484                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
3485                 lnet_ni_unlock(ni);
3486         }
3487
3488         lnet_net_unlock(cpt);
3489         return rc;
3490 }
3491
3492 static int lnet_get_ni_stats(struct lnet_ioctl_element_msg_stats *msg_stats)
3493 {
3494         struct lnet_ni *ni;
3495         int rc = -ENOENT;
3496
3497         if (!msg_stats)
3498                 return -EINVAL;
3499
3500         ni = lnet_get_ni_idx_locked(msg_stats->im_idx);
3501
3502         if (ni) {
3503                 lnet_usr_translate_stats(msg_stats, &ni->ni_stats);
3504                 rc = 0;
3505         }
3506
3507         return rc;
3508 }
3509
3510 static int lnet_add_net_common(struct lnet_net *net,
3511                                struct lnet_ioctl_config_lnd_tunables *tun)
3512 {
3513         struct lnet_handle_md ping_mdh;
3514         struct lnet_ping_buffer *pbuf;
3515         struct lnet_remotenet *rnet;
3516         struct lnet_ni *ni;
3517         u32 net_id;
3518         int rc;
3519
3520         lnet_net_lock(LNET_LOCK_EX);
3521         rnet = lnet_find_rnet_locked(net->net_id);
3522         lnet_net_unlock(LNET_LOCK_EX);
3523         /*
3524          * make sure that the net added doesn't invalidate the current
3525          * configuration LNet is keeping
3526          */
3527         if (rnet) {
3528                 CERROR("Adding net %s will invalidate routing configuration\n",
3529                        libcfs_net2str(net->net_id));
3530                 lnet_net_free(net);
3531                 return -EUSERS;
3532         }
3533
3534         if (tun)
3535                 memcpy(&net->net_tunables,
3536                        &tun->lt_cmn, sizeof(net->net_tunables));
3537         else
3538                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
3539
3540         net_id = net->net_id;
3541
3542         rc = lnet_startup_lndnet(net,
3543                                  (tun) ? &tun->lt_tun : NULL);
3544         if (rc < 0)
3545                 return rc;
3546
3547         /* make sure you calculate the correct number of slots in the ping
3548          * buffer. Since the ping info is a flattened list of all the NIs,
3549          * we should allocate enough slots to accomodate the number of NIs
3550          * which will be added.
3551          */
3552         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3553                                     LNET_PING_INFO_HDR_SIZE +
3554                                     lnet_get_ni_bytes(),
3555                                     false);
3556         if (rc < 0) {
3557                 lnet_shutdown_lndnet(net);
3558                 return rc;
3559         }
3560
3561         lnet_net_lock(LNET_LOCK_EX);
3562         net = lnet_get_net_locked(net_id);
3563         LASSERT(net);
3564
3565         /* apply the UDSPs */
3566         rc = lnet_udsp_apply_policies_on_net(net);
3567         if (rc)
3568                 CERROR("Failed to apply UDSPs on local net %s\n",
3569                        libcfs_net2str(net->net_id));
3570
3571         /* At this point we lost track of which NI was just added, so we
3572          * just re-apply the policies on all of the NIs on this net
3573          */
3574         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3575                 rc = lnet_udsp_apply_policies_on_ni(ni);
3576                 if (rc)
3577                         CERROR("Failed to apply UDSPs on ni %s\n",
3578                                libcfs_nidstr(&ni->ni_nid));
3579         }
3580         lnet_net_unlock(LNET_LOCK_EX);
3581
3582         /*
3583          * Start the acceptor thread if this is the first network
3584          * being added that requires the thread.
3585          */
3586         if (net->net_lnd->lnd_accept) {
3587                 rc = lnet_acceptor_start();
3588                 if (rc < 0) {
3589                         /* shutdown the net that we just started */
3590                         CERROR("Failed to start up acceptor thread\n");
3591                         lnet_shutdown_lndnet(net);
3592                         goto failed;
3593                 }
3594         }
3595
3596         lnet_net_lock(LNET_LOCK_EX);
3597         lnet_peer_net_added(net);
3598         lnet_net_unlock(LNET_LOCK_EX);
3599
3600         lnet_ping_target_update(pbuf, ping_mdh);
3601
3602         return 0;
3603
3604 failed:
3605         lnet_ping_md_unlink(pbuf, &ping_mdh);
3606         lnet_ping_buffer_decref(pbuf);
3607         return rc;
3608 }
3609
3610 static void
3611 lnet_set_tune_defaults(struct lnet_ioctl_config_lnd_tunables *tun)
3612 {
3613         if (tun) {
3614                 if (tun->lt_cmn.lct_peer_timeout < 0)
3615                         tun->lt_cmn.lct_peer_timeout = DEFAULT_PEER_TIMEOUT;
3616                 if (!tun->lt_cmn.lct_peer_tx_credits)
3617                         tun->lt_cmn.lct_peer_tx_credits = DEFAULT_PEER_CREDITS;
3618                 if (!tun->lt_cmn.lct_max_tx_credits)
3619                         tun->lt_cmn.lct_max_tx_credits = DEFAULT_CREDITS;
3620         }
3621 }
3622
3623 static int lnet_handle_legacy_ip2nets(char *ip2nets,
3624                                       struct lnet_ioctl_config_lnd_tunables *tun)
3625 {
3626         struct lnet_net *net;
3627         const char *nets;
3628         int rc;
3629         LIST_HEAD(net_head);
3630
3631         rc = lnet_parse_ip2nets(&nets, ip2nets);
3632         if (rc < 0)
3633                 return rc;
3634
3635         rc = lnet_parse_networks(&net_head, nets);
3636         if (rc < 0)
3637                 return rc;
3638
3639         lnet_set_tune_defaults(tun);
3640
3641         mutex_lock(&the_lnet.ln_api_mutex);
3642         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3643                 rc = -ESHUTDOWN;
3644                 goto out;
3645         }
3646
3647         while ((net = list_first_entry_or_null(&net_head,
3648                                                struct lnet_net,
3649                                                net_list)) != NULL) {
3650                 list_del_init(&net->net_list);
3651                 rc = lnet_add_net_common(net, tun);
3652                 if (rc < 0)
3653                         goto out;
3654         }
3655
3656 out:
3657         mutex_unlock(&the_lnet.ln_api_mutex);
3658
3659         while ((net = list_first_entry_or_null(&net_head,
3660                                                struct lnet_net,
3661                                                net_list)) != NULL) {
3662                 list_del_init(&net->net_list);
3663                 lnet_net_free(net);
3664         }
3665         return rc;
3666 }
3667
3668 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf, u32 net_id,
3669                     struct lnet_ioctl_config_lnd_tunables *tun)
3670 {
3671         struct lnet_net *net;
3672         struct lnet_ni *ni;
3673         int rc, i;
3674         u32 lnd_type;
3675
3676         /* handle legacy ip2nets from DLC */
3677         if (conf->lic_legacy_ip2nets[0] != '\0')
3678                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
3679                                                   tun);
3680
3681         lnd_type = LNET_NETTYP(net_id);
3682
3683         if (!libcfs_isknown_lnd(lnd_type)) {
3684                 CERROR("No valid net and lnd information provided\n");
3685                 return -ENOENT;
3686         }
3687
3688         net = lnet_net_alloc(net_id, NULL);
3689         if (!net)
3690                 return -ENOMEM;
3691
3692         for (i = 0; i < conf->lic_ncpts; i++) {
3693                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER) {
3694                         lnet_net_free(net);
3695                         return -ERANGE;
3696                 }
3697         }
3698
3699         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
3700                                        conf->lic_ni_intf);
3701         if (!ni) {
3702                 lnet_net_free(net);
3703                 return -ENOMEM;
3704         }
3705
3706         lnet_set_tune_defaults(tun);
3707
3708         mutex_lock(&the_lnet.ln_api_mutex);
3709         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3710                 lnet_net_free(net);
3711                 rc = -ESHUTDOWN;
3712         } else {
3713                 rc = lnet_add_net_common(net, tun);
3714         }
3715
3716         mutex_unlock(&the_lnet.ln_api_mutex);
3717
3718         /* If NI already exist delete this new unused copy */
3719         if (rc == -EEXIST)
3720                 lnet_ni_free(ni);
3721
3722         return rc;
3723 }
3724
3725 int lnet_dyn_del_ni(struct lnet_nid *nid)
3726 {
3727         struct lnet_net *net;
3728         struct lnet_ni *ni;
3729         u32 net_id = LNET_NID_NET(nid);
3730         struct lnet_ping_buffer *pbuf;
3731         struct lnet_handle_md ping_mdh;
3732         int net_bytes, rc;
3733         bool net_empty;
3734
3735         /* don't allow userspace to shutdown the LOLND */
3736         if (LNET_NETTYP(net_id) == LOLND)
3737                 return -EINVAL;
3738
3739         mutex_lock(&the_lnet.ln_api_mutex);
3740         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3741                 rc = -ESHUTDOWN;
3742                 goto unlock_api_mutex;
3743         }
3744
3745         lnet_net_lock(0);
3746
3747         net = lnet_get_net_locked(net_id);
3748         if (!net) {
3749                 CERROR("net %s not found\n",
3750                        libcfs_net2str(net_id));
3751                 rc = -ENOENT;
3752                 goto unlock_net;
3753         }
3754
3755         if (!nid_addr_is_set(nid)) {
3756                 /* remove the entire net */
3757                 net_bytes = lnet_get_net_ni_bytes_locked(net);
3758
3759                 lnet_net_unlock(0);
3760
3761                 /* create and link a new ping info, before removing the old one */
3762                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3763                                             LNET_PING_INFO_HDR_SIZE +
3764                                             lnet_get_ni_bytes() - net_bytes,
3765                                             false);
3766                 if (rc != 0)
3767                         goto unlock_api_mutex;
3768
3769                 lnet_shutdown_lndnet(net);
3770
3771                 lnet_acceptor_stop();
3772
3773                 lnet_ping_target_update(pbuf, ping_mdh);
3774
3775                 goto unlock_api_mutex;
3776         }
3777
3778         ni = lnet_nid_to_ni_locked(nid, 0);
3779         if (!ni) {
3780                 CERROR("nid %s not found\n", libcfs_nidstr(nid));
3781                 rc = -ENOENT;
3782                 goto unlock_net;
3783         }
3784
3785         net_bytes = lnet_get_net_ni_bytes_locked(net);
3786         net_empty = list_is_singular(&net->net_ni_list);
3787
3788         lnet_net_unlock(0);
3789
3790         /* create and link a new ping info, before removing the old one */
3791         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3792                                     (LNET_PING_INFO_HDR_SIZE +
3793                                      lnet_get_ni_bytes() -
3794                                      lnet_ping_sts_size(&ni->ni_nid)),
3795                                     false);
3796         if (rc != 0)
3797                 goto unlock_api_mutex;
3798
3799         lnet_shutdown_lndni(ni);
3800
3801         lnet_acceptor_stop();
3802
3803         lnet_ping_target_update(pbuf, ping_mdh);
3804
3805         /* check if the net is empty and remove it if it is */
3806         if (net_empty)
3807                 lnet_shutdown_lndnet(net);
3808
3809         goto unlock_api_mutex;
3810
3811 unlock_net:
3812         lnet_net_unlock(0);
3813 unlock_api_mutex:
3814         mutex_unlock(&the_lnet.ln_api_mutex);
3815
3816         return rc;
3817 }
3818
3819 /*
3820  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
3821  * They are only expected to be called for unique networks.
3822  * That can be as a result of older DLC library
3823  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
3824  */
3825 int
3826 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
3827 {
3828         struct lnet_net *net;
3829         LIST_HEAD(net_head);
3830         int rc;
3831         struct lnet_ioctl_config_lnd_tunables tun;
3832         const char *nets = conf->cfg_config_u.cfg_net.net_intf;
3833
3834         /* Create a net/ni structures for the network string */
3835         rc = lnet_parse_networks(&net_head, nets);
3836         if (rc <= 0)
3837                 return rc == 0 ? -EINVAL : rc;
3838
3839         mutex_lock(&the_lnet.ln_api_mutex);
3840         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3841                 rc = -ESHUTDOWN;
3842                 goto out_unlock_clean;
3843         }
3844
3845         if (rc > 1) {
3846                 rc = -EINVAL; /* only add one network per call */
3847                 goto out_unlock_clean;
3848         }
3849
3850         net = list_first_entry(&net_head, struct lnet_net, net_list);
3851         list_del_init(&net->net_list);
3852
3853         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
3854
3855         memset(&tun, 0, sizeof(tun));
3856
3857         tun.lt_cmn.lct_peer_timeout =
3858           (!conf->cfg_config_u.cfg_net.net_peer_timeout) ? DEFAULT_PEER_TIMEOUT :
3859                 conf->cfg_config_u.cfg_net.net_peer_timeout;
3860         tun.lt_cmn.lct_peer_tx_credits =
3861           (!conf->cfg_config_u.cfg_net.net_peer_tx_credits) ? DEFAULT_PEER_CREDITS :
3862                 conf->cfg_config_u.cfg_net.net_peer_tx_credits;
3863         tun.lt_cmn.lct_peer_rtr_credits =
3864           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
3865         tun.lt_cmn.lct_max_tx_credits =
3866           (!conf->cfg_config_u.cfg_net.net_max_tx_credits) ? DEFAULT_CREDITS :
3867                 conf->cfg_config_u.cfg_net.net_max_tx_credits;
3868
3869         rc = lnet_add_net_common(net, &tun);
3870
3871 out_unlock_clean:
3872         mutex_unlock(&the_lnet.ln_api_mutex);
3873         /* net_head list is empty in success case */
3874         while ((net = list_first_entry_or_null(&net_head,
3875                                                struct lnet_net,
3876                                                net_list)) != NULL) {
3877                 list_del_init(&net->net_list);
3878                 lnet_net_free(net);
3879         }
3880         return rc;
3881 }
3882
3883 int
3884 lnet_dyn_del_net(u32 net_id)
3885 {
3886         struct lnet_net *net;
3887         struct lnet_ping_buffer *pbuf;
3888         struct lnet_handle_md ping_mdh;
3889         int net_ni_bytes, rc;
3890
3891         /* don't allow userspace to shutdown the LOLND */
3892         if (LNET_NETTYP(net_id) == LOLND)
3893                 return -EINVAL;
3894
3895         mutex_lock(&the_lnet.ln_api_mutex);
3896         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3897                 rc = -ESHUTDOWN;
3898                 goto out;
3899         }
3900
3901         lnet_net_lock(0);
3902
3903         net = lnet_get_net_locked(net_id);
3904         if (net == NULL) {
3905                 lnet_net_unlock(0);
3906                 rc = -EINVAL;
3907                 goto out;
3908         }
3909
3910         net_ni_bytes = lnet_get_net_ni_bytes_locked(net);
3911
3912         lnet_net_unlock(0);
3913
3914         /* create and link a new ping info, before removing the old one */
3915         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3916                                     LNET_PING_INFO_HDR_SIZE +
3917                                     lnet_get_ni_bytes() - net_ni_bytes,
3918                                     false);
3919         if (rc != 0)
3920                 goto out;
3921
3922         lnet_shutdown_lndnet(net);
3923
3924         lnet_acceptor_stop();
3925
3926         lnet_ping_target_update(pbuf, ping_mdh);
3927
3928 out:
3929         mutex_unlock(&the_lnet.ln_api_mutex);
3930
3931         return rc;
3932 }
3933
3934 void lnet_mark_ping_buffer_for_update(void)
3935 {
3936         if (the_lnet.ln_routing)
3937                 return;
3938
3939         atomic_set(&the_lnet.ln_update_ping_buf, 1);
3940         complete(&the_lnet.ln_mt_wait_complete);
3941 }
3942 EXPORT_SYMBOL(lnet_mark_ping_buffer_for_update);
3943
3944 void lnet_update_ping_buffer(struct work_struct *work)
3945 {
3946         struct lnet_ping_buffer *pbuf;
3947         struct lnet_handle_md ping_mdh;
3948
3949         mutex_lock(&the_lnet.ln_api_mutex);
3950
3951         atomic_set(&the_lnet.ln_pb_update_ready, 1);
3952
3953         if ((the_lnet.ln_state == LNET_STATE_RUNNING) &&
3954             !lnet_ping_target_setup(&pbuf, &ping_mdh,
3955                                     LNET_PING_INFO_HDR_SIZE +
3956                                     lnet_get_ni_bytes(),
3957                                     false))
3958                 lnet_ping_target_update(pbuf, ping_mdh);
3959
3960
3961         mutex_unlock(&the_lnet.ln_api_mutex);
3962 }
3963
3964
3965 void lnet_queue_ping_buffer_update(void)
3966 {
3967         /* don't queue pb update if it is not needed */
3968         if (atomic_dec_if_positive(&the_lnet.ln_update_ping_buf) < 0)
3969                 return;
3970
3971         /* don't queue pb update if already queued and not processed */
3972         if (atomic_dec_if_positive(&the_lnet.ln_pb_update_ready) < 0)
3973                 return;
3974
3975         INIT_WORK(&the_lnet.ln_pb_update_work, lnet_update_ping_buffer);
3976         queue_work(the_lnet.ln_pb_update_wq, &the_lnet.ln_pb_update_work);
3977 }
3978
3979 void lnet_incr_dlc_seq(void)
3980 {
3981         atomic_inc(&lnet_dlc_seq_no);
3982 }
3983
3984 __u32 lnet_get_dlc_seq_locked(void)
3985 {
3986         return atomic_read(&lnet_dlc_seq_no);
3987 }
3988
3989 static void
3990 lnet_ni_set_healthv(lnet_nid_t nid, int value, bool all)
3991 {
3992         struct lnet_net *net;
3993         struct lnet_ni *ni;
3994
3995         lnet_net_lock(LNET_LOCK_EX);
3996         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3997                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3998                         if (all || (nid_is_nid4(&ni->ni_nid) &&
3999                                     lnet_nid_to_nid4(&ni->ni_nid) == nid)) {
4000                                 atomic_set(&ni->ni_healthv, value);
4001                                 if (list_empty(&ni->ni_recovery) &&
4002                                     value < LNET_MAX_HEALTH_VALUE) {
4003                                         CERROR("manually adding local NI %s to recovery\n",
4004                                                libcfs_nidstr(&ni->ni_nid));
4005                                         list_add_tail(&ni->ni_recovery,
4006                                                       &the_lnet.ln_mt_localNIRecovq);
4007                                         lnet_ni_addref_locked(ni, 0);
4008                                 }
4009                                 if (!all) {
4010                                         lnet_net_unlock(LNET_LOCK_EX);
4011                                         return;
4012                                 }
4013                         }
4014                 }
4015         }
4016         lnet_net_unlock(LNET_LOCK_EX);
4017 }
4018
4019 static void
4020 lnet_ni_set_conns_per_peer(lnet_nid_t nid, int value, bool all)
4021 {
4022         struct lnet_net *net;
4023         struct lnet_ni *ni;
4024
4025         lnet_net_lock(LNET_LOCK_EX);
4026         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
4027                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
4028                         if (lnet_nid_to_nid4(&ni->ni_nid) != nid && !all)
4029                                 continue;
4030                         if (LNET_NETTYP(net->net_id) == SOCKLND)
4031                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_sock.lnd_conns_per_peer = value;
4032                         else if (LNET_NETTYP(net->net_id) == O2IBLND)
4033                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer = value;
4034                         if (!all) {
4035                                 lnet_net_unlock(LNET_LOCK_EX);
4036                                 return;
4037                         }
4038                 }
4039         }
4040         lnet_net_unlock(LNET_LOCK_EX);
4041 }
4042
4043 static int
4044 lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats)
4045 {
4046         int cpt, rc = 0;
4047         struct lnet_ni *ni;
4048         struct lnet_nid nid;
4049
4050         lnet_nid4_to_nid(stats->hlni_nid, &nid);
4051         cpt = lnet_net_lock_current();
4052         ni = lnet_nid_to_ni_locked(&nid, cpt);
4053         if (!ni) {
4054                 rc = -ENOENT;
4055                 goto unlock;
4056         }
4057
4058         stats->hlni_local_interrupt = atomic_read(&ni->ni_hstats.hlt_local_interrupt);
4059         stats->hlni_local_dropped = atomic_read(&ni->ni_hstats.hlt_local_dropped);
4060         stats->hlni_local_aborted = atomic_read(&ni->ni_hstats.hlt_local_aborted);
4061         stats->hlni_local_no_route = atomic_read(&ni->ni_hstats.hlt_local_no_route);
4062         stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout);
4063         stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error);
4064         stats->hlni_fatal_error = atomic_read(&ni->ni_fatal_error_on);
4065         stats->hlni_health_value = atomic_read(&ni->ni_healthv);
4066         stats->hlni_ping_count = ni->ni_ping_count;
4067         stats->hlni_next_ping = ni->ni_next_ping;
4068
4069 unlock:
4070         lnet_net_unlock(cpt);
4071
4072         return rc;
4073 }
4074
4075 static int
4076 lnet_get_local_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4077 {
4078         struct lnet_ni *ni;
4079         int i = 0;
4080
4081         lnet_net_lock(LNET_LOCK_EX);
4082         list_for_each_entry(ni, &the_lnet.ln_mt_localNIRecovq, ni_recovery) {
4083                 if (!nid_is_nid4(&ni->ni_nid))
4084                         continue;
4085                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&ni->ni_nid);
4086                 i++;
4087                 if (i >= LNET_MAX_SHOW_NUM_NID)
4088                         break;
4089         }
4090         lnet_net_unlock(LNET_LOCK_EX);
4091         list->rlst_num_nids = i;
4092
4093         return 0;
4094 }
4095
4096 static int
4097 lnet_get_peer_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4098 {
4099         struct lnet_peer_ni *lpni;
4100         int i = 0;
4101
4102         lnet_net_lock(LNET_LOCK_EX);
4103         list_for_each_entry(lpni, &the_lnet.ln_mt_peerNIRecovq, lpni_recovery) {
4104                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&lpni->lpni_nid);
4105                 i++;
4106                 if (i >= LNET_MAX_SHOW_NUM_NID)
4107                         break;
4108         }
4109         lnet_net_unlock(LNET_LOCK_EX);
4110         list->rlst_num_nids = i;
4111
4112         return 0;
4113 }
4114
4115 /**
4116  * LNet ioctl handler.
4117  *
4118  */
4119 int
4120 LNetCtl(unsigned int cmd, void *arg)
4121 {
4122         struct libcfs_ioctl_data *data = arg;
4123         struct lnet_ioctl_config_data *config;
4124         struct lnet_ni           *ni;
4125         struct lnet_nid           nid;
4126         int                       rc;
4127
4128         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
4129                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
4130
4131         switch (cmd) {
4132         case IOC_LIBCFS_GET_NI: {
4133                 struct lnet_processid id = {};
4134
4135                 rc = LNetGetId(data->ioc_count, &id, false);
4136                 data->ioc_nid = lnet_nid_to_nid4(&id.nid);
4137                 return rc;
4138         }
4139         case IOC_LIBCFS_FAIL_NID:
4140                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
4141
4142         case IOC_LIBCFS_ADD_ROUTE: {
4143                 /* default router sensitivity to 1 */
4144                 unsigned int sensitivity = 1;
4145                 config = arg;
4146
4147                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4148                         return -EINVAL;
4149
4150                 if (config->cfg_config_u.cfg_route.rtr_sensitivity) {
4151                         sensitivity =
4152                           config->cfg_config_u.cfg_route.rtr_sensitivity;
4153                 }
4154
4155                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4156                 mutex_lock(&the_lnet.ln_api_mutex);
4157                 rc = lnet_add_route(config->cfg_net,
4158                                     config->cfg_config_u.cfg_route.rtr_hop,
4159                                     &nid,
4160                                     config->cfg_config_u.cfg_route.
4161                                         rtr_priority, sensitivity);
4162                 mutex_unlock(&the_lnet.ln_api_mutex);
4163                 return rc;
4164         }
4165
4166         case IOC_LIBCFS_DEL_ROUTE:
4167                 config = arg;
4168
4169                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4170                         return -EINVAL;
4171
4172                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4173                 mutex_lock(&the_lnet.ln_api_mutex);
4174                 rc = lnet_del_route(config->cfg_net, &nid);
4175                 mutex_unlock(&the_lnet.ln_api_mutex);
4176                 return rc;
4177
4178         case IOC_LIBCFS_GET_ROUTE:
4179                 config = arg;
4180
4181                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4182                         return -EINVAL;
4183
4184                 mutex_lock(&the_lnet.ln_api_mutex);
4185                 rc = lnet_get_route(config->cfg_count,
4186                                     &config->cfg_net,
4187                                     &config->cfg_config_u.cfg_route.rtr_hop,
4188                                     &config->cfg_nid,
4189                                     &config->cfg_config_u.cfg_route.rtr_flags,
4190                                     &config->cfg_config_u.cfg_route.
4191                                         rtr_priority,
4192                                     &config->cfg_config_u.cfg_route.
4193                                         rtr_sensitivity);
4194                 mutex_unlock(&the_lnet.ln_api_mutex);
4195                 return rc;
4196
4197         case IOC_LIBCFS_GET_LOCAL_NI: {
4198                 struct lnet_ioctl_config_ni *cfg_ni;
4199                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
4200                 struct lnet_ioctl_element_stats *stats;
4201                 __u32 tun_size;
4202
4203                 cfg_ni = arg;
4204
4205                 /* get the tunables if they are available */
4206                 if (cfg_ni->lic_cfg_hdr.ioc_len <
4207                     sizeof(*cfg_ni) + sizeof(*stats) + sizeof(*tun))
4208                         return -EINVAL;
4209
4210                 stats = (struct lnet_ioctl_element_stats *)
4211                         cfg_ni->lic_bulk;
4212                 tun = (struct lnet_ioctl_config_lnd_tunables *)
4213                                 (cfg_ni->lic_bulk + sizeof(*stats));
4214
4215                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
4216                         sizeof(*stats);
4217
4218                 mutex_lock(&the_lnet.ln_api_mutex);
4219                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
4220                 mutex_unlock(&the_lnet.ln_api_mutex);
4221                 return rc;
4222         }
4223
4224         case IOC_LIBCFS_GET_LOCAL_NI_MSG_STATS: {
4225                 struct lnet_ioctl_element_msg_stats *msg_stats = arg;
4226                 int cpt;
4227
4228                 if (msg_stats->im_hdr.ioc_len != sizeof(*msg_stats))
4229                         return -EINVAL;
4230
4231                 mutex_lock(&the_lnet.ln_api_mutex);
4232
4233                 cpt = lnet_net_lock_current();
4234                 rc = lnet_get_ni_stats(msg_stats);
4235                 lnet_net_unlock(cpt);
4236
4237                 mutex_unlock(&the_lnet.ln_api_mutex);
4238
4239                 return rc;
4240         }
4241
4242         case IOC_LIBCFS_GET_NET: {
4243                 size_t total = sizeof(*config) +
4244                                sizeof(struct lnet_ioctl_net_config);
4245                 config = arg;
4246
4247                 if (config->cfg_hdr.ioc_len < total)
4248                         return -EINVAL;
4249
4250                 mutex_lock(&the_lnet.ln_api_mutex);
4251                 rc = lnet_get_net_config(config);
4252                 mutex_unlock(&the_lnet.ln_api_mutex);
4253                 return rc;
4254         }
4255
4256         case IOC_LIBCFS_GET_LNET_STATS:
4257         {
4258                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
4259
4260                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
4261                         return -EINVAL;
4262
4263                 mutex_lock(&the_lnet.ln_api_mutex);
4264                 rc = lnet_counters_get(&lnet_stats->st_cntrs);
4265                 mutex_unlock(&the_lnet.ln_api_mutex);
4266                 return rc;
4267         }
4268
4269         case IOC_LIBCFS_RESET_LNET_STATS:
4270         {
4271                 mutex_lock(&the_lnet.ln_api_mutex);
4272                 lnet_counters_reset();
4273                 mutex_unlock(&the_lnet.ln_api_mutex);
4274                 return 0;
4275         }
4276
4277         case IOC_LIBCFS_CONFIG_RTR:
4278                 config = arg;
4279
4280                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4281                         return -EINVAL;
4282
4283                 mutex_lock(&the_lnet.ln_api_mutex);
4284                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
4285                         rc = lnet_rtrpools_enable();
4286                         mutex_unlock(&the_lnet.ln_api_mutex);
4287                         return rc;
4288                 }
4289                 lnet_rtrpools_disable();
4290                 mutex_unlock(&the_lnet.ln_api_mutex);
4291                 return 0;
4292
4293         case IOC_LIBCFS_ADD_BUF:
4294                 config = arg;
4295
4296                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4297                         return -EINVAL;
4298
4299                 mutex_lock(&the_lnet.ln_api_mutex);
4300                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
4301                                                 buf_tiny,
4302                                           config->cfg_config_u.cfg_buffers.
4303                                                 buf_small,
4304                                           config->cfg_config_u.cfg_buffers.
4305                                                 buf_large);
4306                 mutex_unlock(&the_lnet.ln_api_mutex);
4307                 return rc;
4308
4309         case IOC_LIBCFS_SET_NUMA_RANGE: {
4310                 struct lnet_ioctl_set_value *numa;
4311                 numa = arg;
4312                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4313                         return -EINVAL;
4314                 lnet_net_lock(LNET_LOCK_EX);
4315                 lnet_numa_range = numa->sv_value;
4316                 lnet_net_unlock(LNET_LOCK_EX);
4317                 return 0;
4318         }
4319
4320         case IOC_LIBCFS_GET_NUMA_RANGE: {
4321                 struct lnet_ioctl_set_value *numa;
4322                 numa = arg;
4323                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4324                         return -EINVAL;
4325                 numa->sv_value = lnet_numa_range;
4326                 return 0;
4327         }
4328
4329         case IOC_LIBCFS_GET_BUF: {
4330                 struct lnet_ioctl_pool_cfg *pool_cfg;
4331                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
4332
4333                 config = arg;
4334
4335                 if (config->cfg_hdr.ioc_len < total)
4336                         return -EINVAL;
4337
4338                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
4339
4340                 mutex_lock(&the_lnet.ln_api_mutex);
4341                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
4342                 mutex_unlock(&the_lnet.ln_api_mutex);
4343                 return rc;
4344         }
4345
4346         case IOC_LIBCFS_GET_LOCAL_HSTATS: {
4347                 struct lnet_ioctl_local_ni_hstats *stats = arg;
4348
4349                 if (stats->hlni_hdr.ioc_len < sizeof(*stats))
4350                         return -EINVAL;
4351
4352                 mutex_lock(&the_lnet.ln_api_mutex);
4353                 rc = lnet_get_local_ni_hstats(stats);
4354                 mutex_unlock(&the_lnet.ln_api_mutex);
4355
4356                 return rc;
4357         }
4358
4359         case IOC_LIBCFS_GET_RECOVERY_QUEUE: {
4360                 struct lnet_ioctl_recovery_list *list = arg;
4361                 if (list->rlst_hdr.ioc_len < sizeof(*list))
4362                         return -EINVAL;
4363
4364                 mutex_lock(&the_lnet.ln_api_mutex);
4365                 if (list->rlst_type == LNET_HEALTH_TYPE_LOCAL_NI)
4366                         rc = lnet_get_local_ni_recovery_list(list);
4367                 else
4368                         rc = lnet_get_peer_ni_recovery_list(list);
4369                 mutex_unlock(&the_lnet.ln_api_mutex);
4370                 return rc;
4371         }
4372
4373         case IOC_LIBCFS_ADD_PEER_NI: {
4374                 struct lnet_ioctl_peer_cfg *cfg = arg;
4375                 struct lnet_nid prim_nid;
4376
4377                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4378                         return -EINVAL;
4379
4380                 mutex_lock(&the_lnet.ln_api_mutex);
4381                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4382                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4383                 rc = lnet_user_add_peer_ni(&prim_nid, &nid, cfg->prcfg_mr,
4384                                            cfg->prcfg_count == 1);
4385                 mutex_unlock(&the_lnet.ln_api_mutex);
4386                 return rc;
4387         }
4388
4389         case IOC_LIBCFS_DEL_PEER_NI: {
4390                 struct lnet_ioctl_peer_cfg *cfg = arg;
4391                 struct lnet_nid prim_nid;
4392
4393                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4394                         return -EINVAL;
4395
4396                 mutex_lock(&the_lnet.ln_api_mutex);
4397                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4398                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4399                 rc = lnet_del_peer_ni(&prim_nid,
4400                                       &nid,
4401                                       cfg->prcfg_count);
4402                 mutex_unlock(&the_lnet.ln_api_mutex);
4403                 return rc;
4404         }
4405
4406         case IOC_LIBCFS_GET_PEER_INFO: {
4407                 struct lnet_ioctl_peer *peer_info = arg;
4408
4409                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
4410                         return -EINVAL;
4411
4412                 mutex_lock(&the_lnet.ln_api_mutex);
4413                 rc = lnet_get_peer_ni_info(
4414                    peer_info->pr_count,
4415                    &peer_info->pr_nid,
4416                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
4417                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
4418                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
4419                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
4420                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
4421                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
4422                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
4423                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
4424                 mutex_unlock(&the_lnet.ln_api_mutex);
4425                 return rc;
4426         }
4427
4428         case IOC_LIBCFS_GET_PEER_NI: {
4429                 struct lnet_ioctl_peer_cfg *cfg = arg;
4430
4431                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4432                         return -EINVAL;
4433
4434                 mutex_lock(&the_lnet.ln_api_mutex);
4435                 rc = lnet_get_peer_info(cfg,
4436                                         (void __user *)cfg->prcfg_bulk);
4437                 mutex_unlock(&the_lnet.ln_api_mutex);
4438                 return rc;
4439         }
4440
4441         case IOC_LIBCFS_GET_PEER_LIST: {
4442                 struct lnet_ioctl_peer_cfg *cfg = arg;
4443
4444                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4445                         return -EINVAL;
4446
4447                 mutex_lock(&the_lnet.ln_api_mutex);
4448                 rc = lnet_get_peer_list(&cfg->prcfg_count, &cfg->prcfg_size,
4449                                 (struct lnet_process_id __user *)cfg->prcfg_bulk);
4450                 mutex_unlock(&the_lnet.ln_api_mutex);
4451                 return rc;
4452         }
4453
4454         case IOC_LIBCFS_SET_HEALHV: {
4455                 struct lnet_ioctl_reset_health_cfg *cfg = arg;
4456                 int value;
4457
4458                 if (cfg->rh_hdr.ioc_len < sizeof(*cfg))
4459                         return -EINVAL;
4460                 if (cfg->rh_value < 0 ||
4461                     cfg->rh_value > LNET_MAX_HEALTH_VALUE)
4462                         value = LNET_MAX_HEALTH_VALUE;
4463                 else
4464                         value = cfg->rh_value;
4465                 CDEBUG(D_NET, "Manually setting healthv to %d for %s:%s. all = %d\n",
4466                        value, (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) ?
4467                        "local" : "peer", libcfs_nid2str(cfg->rh_nid), cfg->rh_all);
4468                 lnet_nid4_to_nid(cfg->rh_nid, &nid);
4469                 mutex_lock(&the_lnet.ln_api_mutex);
4470                 if (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI)
4471                         lnet_ni_set_healthv(cfg->rh_nid, value,
4472                                              cfg->rh_all);
4473                 else
4474                         lnet_peer_ni_set_healthv(&nid, value, cfg->rh_all);
4475                 mutex_unlock(&the_lnet.ln_api_mutex);
4476                 return 0;
4477         }
4478
4479         case IOC_LIBCFS_SET_PEER: {
4480                 struct lnet_ioctl_peer_cfg *cfg = arg;
4481                 struct lnet_peer *lp;
4482
4483                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4484                         return -EINVAL;
4485
4486                 mutex_lock(&the_lnet.ln_api_mutex);
4487                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &nid);
4488                 lp = lnet_find_peer(&nid);
4489                 if (!lp) {
4490                         mutex_unlock(&the_lnet.ln_api_mutex);
4491                         return -ENOENT;
4492                 }
4493                 spin_lock(&lp->lp_lock);
4494                 lp->lp_state = cfg->prcfg_state;
4495                 spin_unlock(&lp->lp_lock);
4496                 lnet_peer_decref_locked(lp);
4497                 mutex_unlock(&the_lnet.ln_api_mutex);
4498                 CDEBUG(D_NET, "Set peer %s state to %u\n",
4499                        libcfs_nid2str(cfg->prcfg_prim_nid), cfg->prcfg_state);
4500                 return 0;
4501         }
4502
4503         case IOC_LIBCFS_SET_CONNS_PER_PEER: {
4504                 struct lnet_ioctl_reset_conns_per_peer_cfg *cfg = arg;
4505                 int value;
4506
4507                 if (cfg->rcpp_hdr.ioc_len < sizeof(*cfg))
4508                         return -EINVAL;
4509                 if (cfg->rcpp_value < 0)
4510                         value = 1;
4511                 else
4512                         value = cfg->rcpp_value;
4513                 CDEBUG(D_NET,
4514                        "Setting conns_per_peer to %d for %s. all = %d\n",
4515                        value, libcfs_nid2str(cfg->rcpp_nid), cfg->rcpp_all);
4516                 mutex_lock(&the_lnet.ln_api_mutex);
4517                 lnet_ni_set_conns_per_peer(cfg->rcpp_nid, value, cfg->rcpp_all);
4518                 mutex_unlock(&the_lnet.ln_api_mutex);
4519                 return 0;
4520         }
4521
4522         case IOC_LIBCFS_NOTIFY_ROUTER: {
4523                 /* Convert the user-supplied real time to monotonic.
4524                  * NB: "when" is always in the past
4525                  */
4526                 time64_t when = ktime_get_seconds() -
4527                                 (ktime_get_real_seconds() - data->ioc_u64[0]);
4528
4529                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4530                 return lnet_notify(NULL, &nid, data->ioc_flags, false, when);
4531         }
4532
4533         case IOC_LIBCFS_LNET_DIST:
4534                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4535                 rc = LNetDist(&nid, &nid, &data->ioc_u32[1]);
4536                 if (rc < 0 && rc != -EHOSTUNREACH)
4537                         return rc;
4538
4539                 data->ioc_nid = lnet_nid_to_nid4(&nid);
4540                 data->ioc_u32[0] = rc;
4541                 return 0;
4542
4543         case IOC_LIBCFS_TESTPROTOCOMPAT:
4544                 the_lnet.ln_testprotocompat = data->ioc_flags;
4545                 return 0;
4546
4547         case IOC_LIBCFS_LNET_FAULT:
4548                 return lnet_fault_ctl(data->ioc_flags, data);
4549
4550         case IOC_LIBCFS_PING_PEER: {
4551                 struct lnet_ioctl_ping_data *ping = arg;
4552                 struct lnet_process_id __user *ids = ping->ping_buf;
4553                 struct lnet_nid src_nid = LNET_ANY_NID;
4554                 struct lnet_genl_ping_list plist;
4555                 struct lnet_processid id;
4556                 struct lnet_peer *lp;
4557                 signed long timeout;
4558                 int count, i;
4559
4560                 /* Check if the supplied ping data supports source nid
4561                  * NB: This check is sufficient if lnet_ioctl_ping_data has
4562                  * additional fields added, but if they are re-ordered or
4563                  * fields removed then this will break. It is expected that
4564                  * these ioctls will be replaced with netlink implementation, so
4565                  * it is probably not worth coming up with a more robust version
4566                  * compatibility scheme.
4567                  */
4568                 if (ping->ping_hdr.ioc_len >= sizeof(struct lnet_ioctl_ping_data))
4569                         lnet_nid4_to_nid(ping->ping_src, &src_nid);
4570
4571                 /* If timeout is negative then set default of 3 minutes */
4572                 if (((s32)ping->op_param) <= 0 ||
4573                     ping->op_param > (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
4574                         timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
4575                 else
4576                         timeout = nsecs_to_jiffies(ping->op_param * NSEC_PER_MSEC);
4577
4578                 id.pid = ping->ping_id.pid;
4579                 lnet_nid4_to_nid(ping->ping_id.nid, &id.nid);
4580                 rc = lnet_ping(&id, &src_nid, timeout, &plist,
4581                                ping->ping_count);
4582                 if (rc < 0)
4583                         goto report_ping_err;
4584                 count = rc;
4585                 rc = 0;
4586
4587                 for (i = 0; i < count; i++) {
4588                         struct lnet_processid *result;
4589                         struct lnet_process_id tmpid;
4590
4591                         result = genradix_ptr(&plist.lgpl_list, i);
4592                         memset(&tmpid, 0, sizeof(tmpid));
4593                         tmpid.pid = result->pid;
4594                         tmpid.nid = lnet_nid_to_nid4(&result->nid);
4595                         if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
4596                                 rc = -EFAULT;
4597                                 goto report_ping_err;
4598                         }
4599                 }
4600
4601                 mutex_lock(&the_lnet.ln_api_mutex);
4602                 lp = lnet_find_peer(&id.nid);
4603                 if (lp) {
4604                         ping->ping_id.nid =
4605                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4606                         ping->mr_info = lnet_peer_is_multi_rail(lp);
4607                         lnet_peer_decref_locked(lp);
4608                 }
4609                 mutex_unlock(&the_lnet.ln_api_mutex);
4610
4611                 ping->ping_count = count;
4612 report_ping_err:
4613                 genradix_free(&plist.lgpl_list);
4614                 return rc;
4615         }
4616
4617         case IOC_LIBCFS_DISCOVER: {
4618                 struct lnet_ioctl_ping_data *discover = arg;
4619                 struct lnet_process_id __user *ids;
4620                 struct lnet_genl_ping_list dlists;
4621                 struct lnet_processid id;
4622                 struct lnet_peer *lp;
4623                 int count, i;
4624
4625                 if (discover->ping_count <= 0)
4626                         return -EINVAL;
4627
4628                 genradix_init(&dlists.lgpl_list);
4629                 /* If the user buffer has more space than the lnet_interfaces_max,
4630                  * then only fill it up to lnet_interfaces_max.
4631                  */
4632                 if (discover->ping_count > lnet_interfaces_max)
4633                         discover->ping_count = lnet_interfaces_max;
4634
4635                 id.pid = discover->ping_id.pid;
4636                 lnet_nid4_to_nid(discover->ping_id.nid, &id.nid);
4637                 rc = lnet_discover(&id, discover->op_param, &dlists);
4638                 if (rc < 0)
4639                         goto report_discover_err;
4640                 count = rc;
4641
4642                 ids = discover->ping_buf;
4643                 for (i = 0; i < count; i++) {
4644                         struct lnet_processid *result;
4645                         struct lnet_process_id tmpid;
4646
4647                         result = genradix_ptr(&dlists.lgpl_list, i);
4648                         memset(&tmpid, 0, sizeof(tmpid));
4649                         tmpid.pid = result->pid;
4650                         tmpid.nid = lnet_nid_to_nid4(&result->nid);
4651                         if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
4652                                 rc = -EFAULT;
4653                                 goto report_discover_err;
4654                         }
4655
4656                         if (i >= discover->ping_count)
4657                                 break;
4658                 }
4659                 rc = 0;
4660
4661                 mutex_lock(&the_lnet.ln_api_mutex);
4662                 lp = lnet_find_peer(&id.nid);
4663                 if (lp) {
4664                         discover->ping_id.nid =
4665                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4666                         discover->mr_info = lnet_peer_is_multi_rail(lp);
4667                         lnet_peer_decref_locked(lp);
4668                 }
4669                 mutex_unlock(&the_lnet.ln_api_mutex);
4670
4671                 discover->ping_count = count;
4672 report_discover_err:
4673                 genradix_free(&dlists.lgpl_list);
4674                 return rc;
4675         }
4676
4677         case IOC_LIBCFS_ADD_UDSP: {
4678                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4679                 __u32 bulk_size = ioc_udsp->iou_hdr.ioc_len;
4680
4681                 mutex_lock(&the_lnet.ln_api_mutex);
4682                 rc = lnet_udsp_demarshal_add(arg, bulk_size);
4683                 if (!rc) {
4684                         rc = lnet_udsp_apply_policies(NULL, false);
4685                         CDEBUG(D_NET, "policy application returned %d\n", rc);
4686                         rc = 0;
4687                 }
4688                 mutex_unlock(&the_lnet.ln_api_mutex);
4689
4690                 return rc;
4691         }
4692
4693         case IOC_LIBCFS_DEL_UDSP: {
4694                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4695                 int idx = ioc_udsp->iou_idx;
4696
4697                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4698                         return -EINVAL;
4699
4700                 mutex_lock(&the_lnet.ln_api_mutex);
4701                 rc = lnet_udsp_del_policy(idx);
4702                 mutex_unlock(&the_lnet.ln_api_mutex);
4703
4704                 return rc;
4705         }
4706
4707         case IOC_LIBCFS_GET_UDSP_SIZE: {
4708                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4709                 struct lnet_udsp *udsp;
4710
4711                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4712                         return -EINVAL;
4713
4714                 rc = 0;
4715
4716                 mutex_lock(&the_lnet.ln_api_mutex);
4717                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4718                 if (!udsp) {
4719                         rc = -ENOENT;
4720                 } else {
4721                         /* coming in iou_idx will hold the idx of the udsp
4722                          * to get the size of. going out the iou_idx will
4723                          * hold the size of the UDSP found at the passed
4724                          * in index.
4725                          */
4726                         ioc_udsp->iou_idx = lnet_get_udsp_size(udsp);
4727                         if (ioc_udsp->iou_idx < 0)
4728                                 rc = -EINVAL;
4729                 }
4730                 mutex_unlock(&the_lnet.ln_api_mutex);
4731
4732                 return rc;
4733         }
4734
4735         case IOC_LIBCFS_GET_UDSP: {
4736                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4737                 struct lnet_udsp *udsp;
4738
4739                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4740                         return -EINVAL;
4741
4742                 rc = 0;
4743
4744                 mutex_lock(&the_lnet.ln_api_mutex);
4745                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4746                 if (!udsp)
4747                         rc = -ENOENT;
4748                 else
4749                         rc = lnet_udsp_marshal(udsp, ioc_udsp);
4750                 mutex_unlock(&the_lnet.ln_api_mutex);
4751
4752                 return rc;
4753         }
4754
4755         case IOC_LIBCFS_GET_CONST_UDSP_INFO: {
4756                 struct lnet_ioctl_construct_udsp_info *info = arg;
4757
4758                 if (info->cud_hdr.ioc_len < sizeof(*info))
4759                         return -EINVAL;
4760
4761                 CDEBUG(D_NET, "GET_UDSP_INFO for %s\n",
4762                        libcfs_nid2str(info->cud_nid));
4763
4764                 lnet_nid4_to_nid(info->cud_nid, &nid);
4765                 mutex_lock(&the_lnet.ln_api_mutex);
4766                 lnet_net_lock(0);
4767                 lnet_udsp_get_construct_info(info, &nid);
4768                 lnet_net_unlock(0);
4769                 mutex_unlock(&the_lnet.ln_api_mutex);
4770
4771                 return 0;
4772         }
4773
4774         default:
4775                 ni = lnet_net2ni_addref(data->ioc_net);
4776                 if (ni == NULL)
4777                         return -EINVAL;
4778
4779                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
4780                         rc = -EINVAL;
4781                 else
4782                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
4783
4784                 lnet_ni_decref(ni);
4785                 return rc <= 0 ? rc : 0;
4786         }
4787         /* not reached */
4788 }
4789 EXPORT_SYMBOL(LNetCtl);
4790
4791 struct lnet_nid_cpt {
4792         struct lnet_nid lnc_nid;
4793         unsigned int lnc_cpt;
4794 };
4795
4796 struct lnet_genl_nid_cpt_list {
4797         unsigned int lgncl_index;
4798         unsigned int lgncl_list_count;
4799         GENRADIX(struct lnet_nid_cpt) lgncl_lnc_list;
4800 };
4801
4802 static inline struct lnet_genl_nid_cpt_list *
4803 lnet_cpt_of_nid_dump_ctx(struct netlink_callback *cb)
4804 {
4805         return (struct lnet_genl_nid_cpt_list *)cb->args[0];
4806 }
4807
4808 static int lnet_cpt_of_nid_show_done(struct netlink_callback *cb)
4809 {
4810         struct lnet_genl_nid_cpt_list *lgncl;
4811
4812         lgncl = lnet_cpt_of_nid_dump_ctx(cb);
4813
4814         if (lgncl) {
4815                 genradix_free(&lgncl->lgncl_lnc_list);
4816                 LIBCFS_FREE(lgncl, sizeof(*lgncl));
4817                 cb->args[0] = 0;
4818         }
4819
4820         return 0;
4821 }
4822
4823 static int lnet_cpt_of_nid_show_start(struct netlink_callback *cb)
4824 {
4825         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
4826 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4827         struct netlink_ext_ack *extack = NULL;
4828 #endif
4829         struct lnet_genl_nid_cpt_list *lgncl;
4830         int msg_len = genlmsg_len(gnlh);
4831         struct nlattr *params, *top;
4832         int rem, rc = 0;
4833
4834 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4835         extack = cb->extack;
4836 #endif
4837
4838         mutex_lock(&the_lnet.ln_api_mutex);
4839         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
4840                 NL_SET_ERR_MSG(extack, "Network is down");
4841                 mutex_unlock(&the_lnet.ln_api_mutex);
4842                 return -ENETDOWN;
4843         }
4844
4845         msg_len = genlmsg_len(gnlh);
4846         if (!msg_len) {
4847                 NL_SET_ERR_MSG(extack, "Missing NID argument(s)");
4848                 mutex_unlock(&the_lnet.ln_api_mutex);
4849                 return -ENOENT;
4850         }
4851
4852         LIBCFS_ALLOC(lgncl, sizeof(*lgncl));
4853         if (!lgncl) {
4854                 mutex_unlock(&the_lnet.ln_api_mutex);
4855                 return -ENOMEM;
4856         }
4857
4858         genradix_init(&lgncl->lgncl_lnc_list);
4859         lgncl->lgncl_list_count = 0;
4860         cb->args[0] = (long)lgncl;
4861
4862         params = genlmsg_data(gnlh);
4863         nla_for_each_attr(top, params, msg_len, rem) {
4864                 struct nlattr *nids;
4865                 int rem2;
4866
4867                 switch (nla_type(top)) {
4868                 case LN_SCALAR_ATTR_LIST:
4869                         nla_for_each_nested(nids, top, rem2) {
4870                                 char nidstr[LNET_NIDSTR_SIZE + 1];
4871                                 struct lnet_nid_cpt *lnc;
4872
4873                                 if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
4874                                         continue;
4875
4876                                 memset(nidstr, 0, sizeof(nidstr));
4877                                 rc = nla_strscpy(nidstr, nids, sizeof(nidstr));
4878                                 if (rc < 0) {
4879                                         NL_SET_ERR_MSG(extack,
4880                                                        "failed to get NID");
4881                                         GOTO(report_err, rc);
4882                                 }
4883
4884                                 lnc = genradix_ptr_alloc(&lgncl->lgncl_lnc_list,
4885                                                       lgncl->lgncl_list_count++,
4886                                                       GFP_KERNEL);
4887                                 if (!lnc) {
4888                                         NL_SET_ERR_MSG(extack,
4889                                                       "failed to allocate NID");
4890                                         GOTO(report_err, rc = -ENOMEM);
4891                                 }
4892
4893                                 rc = libcfs_strnid(&lnc->lnc_nid,
4894                                                    strim(nidstr));
4895                                 if (rc < 0) {
4896                                         NL_SET_ERR_MSG(extack, "invalid NID");
4897                                         GOTO(report_err, rc);
4898                                 }
4899                                 rc = 0;
4900                                 CDEBUG(D_NET, "nid: %s\n",
4901                                        libcfs_nidstr(&lnc->lnc_nid));
4902                         }
4903                         fallthrough;
4904                 default:
4905                         break;
4906                 }
4907         }
4908 report_err:
4909         mutex_unlock(&the_lnet.ln_api_mutex);
4910
4911         if (rc < 0)
4912                 lnet_cpt_of_nid_show_done(cb);
4913
4914         return rc;
4915 }
4916
4917 static const struct ln_key_list cpt_of_nid_props_list = {
4918         .lkl_maxattr                    = LNET_CPT_OF_NID_ATTR_MAX,
4919         .lkl_list                       = {
4920                 [LNET_CPT_OF_NID_ATTR_HDR]      = {
4921                         .lkp_value              = "cpt-of-nid",
4922                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
4923                         .lkp_data_type          = NLA_NUL_STRING,
4924                 },
4925                 [LNET_CPT_OF_NID_ATTR_NID]      = {
4926                         .lkp_value              = "nid",
4927                         .lkp_data_type          = NLA_STRING,
4928                 },
4929                 [LNET_CPT_OF_NID_ATTR_CPT]      = {
4930                         .lkp_value              = "cpt",
4931                         .lkp_data_type          = NLA_U32,
4932                 },
4933         },
4934 };
4935
4936 static int lnet_cpt_of_nid_show_dump(struct sk_buff *msg,
4937                                      struct netlink_callback *cb)
4938 {
4939         struct lnet_genl_nid_cpt_list *lgncl;
4940 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4941         struct netlink_ext_ack *extack = NULL;
4942 #endif
4943         int portid = NETLINK_CB(cb->skb).portid;
4944         int seq = cb->nlh->nlmsg_seq;
4945         int idx;
4946         int rc = 0;
4947         bool need_hdr = true;
4948
4949 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4950         extack = cb->extack;
4951 #endif
4952
4953         mutex_lock(&the_lnet.ln_api_mutex);
4954         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
4955                 NL_SET_ERR_MSG(extack, "Network is down");
4956                 GOTO(send_error, rc = -ENETDOWN);
4957         }
4958
4959         lgncl = lnet_cpt_of_nid_dump_ctx(cb);
4960         idx = lgncl->lgncl_index;
4961
4962         if (!lgncl->lgncl_index) {
4963                 const struct ln_key_list *all[] = {
4964                         &cpt_of_nid_props_list, NULL, NULL
4965                 };
4966
4967                 rc = lnet_genl_send_scalar_list(msg, portid, seq, &lnet_family,
4968                                                 NLM_F_CREATE | NLM_F_MULTI,
4969                                                 LNET_CMD_CPT_OF_NID, all);
4970                 if (rc < 0) {
4971                         NL_SET_ERR_MSG(extack, "failed to send key table");
4972                         GOTO(send_error, rc);
4973                 }
4974         }
4975
4976         while (idx < lgncl->lgncl_list_count) {
4977                 struct lnet_nid_cpt *lnc;
4978                 void *hdr;
4979                 int cpt;
4980
4981                 lnc = genradix_ptr(&lgncl->lgncl_lnc_list, idx++);
4982
4983                 cpt = lnet_nid_cpt_hash(&lnc->lnc_nid, LNET_CPT_NUMBER);
4984
4985                 CDEBUG(D_NET, "nid: %s cpt: %d\n", libcfs_nidstr(&lnc->lnc_nid), cpt);
4986                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
4987                                   NLM_F_MULTI, LNET_CMD_CPT_OF_NID);
4988                 if (!hdr) {
4989                         NL_SET_ERR_MSG(extack, "failed to send values");
4990                         genlmsg_cancel(msg, hdr);
4991                         GOTO(send_error, rc = -EMSGSIZE);
4992                 }
4993
4994                 if (need_hdr) {
4995                         nla_put_string(msg, LNET_CPT_OF_NID_ATTR_HDR, "");
4996                         need_hdr = false;
4997                 }
4998
4999                 nla_put_string(msg, LNET_CPT_OF_NID_ATTR_NID,
5000                                libcfs_nidstr(&lnc->lnc_nid));
5001                 nla_put_u32(msg, LNET_CPT_OF_NID_ATTR_CPT, cpt);
5002
5003                 genlmsg_end(msg, hdr);
5004         }
5005
5006         genradix_free(&lgncl->lgncl_lnc_list);
5007         rc = 0;
5008         lgncl->lgncl_index = idx;
5009
5010 send_error:
5011         mutex_unlock(&the_lnet.ln_api_mutex);
5012
5013         return lnet_nl_send_error(cb->skb, portid, seq, rc);
5014 }
5015
5016 #ifndef HAVE_NETLINK_CALLBACK_START
5017 static int lnet_old_cpt_of_nid_show_dump(struct sk_buff *msg,
5018                                          struct netlink_callback *cb)
5019 {
5020         if (!cb->args[0]) {
5021                 int rc = lnet_cpt_of_nid_show_start(cb);
5022
5023                 if (rc < 0)
5024                         return lnet_nl_send_error(cb->skb,
5025                                                   NETLINK_CB(cb->skb).portid,
5026                                                   cb->nlh->nlmsg_seq,
5027                                                   rc);
5028         }
5029
5030         return lnet_cpt_of_nid_show_dump(msg, cb);
5031 }
5032 #endif
5033
5034 /* This is the keys for the UDSP info which is used by many
5035  * Netlink commands.
5036  */
5037 static const struct ln_key_list udsp_info_list = {
5038         .lkl_maxattr                    = LNET_UDSP_INFO_ATTR_MAX,
5039         .lkl_list                       = {
5040                 [LNET_UDSP_INFO_ATTR_NET_PRIORITY]              = {
5041                         .lkp_value      = "net priority",
5042                         .lkp_data_type  = NLA_S32
5043                 },
5044                 [LNET_UDSP_INFO_ATTR_NID_PRIORITY]              = {
5045                         .lkp_value      = "nid priority",
5046                         .lkp_data_type  = NLA_S32
5047                 },
5048                 [LNET_UDSP_INFO_ATTR_PREF_RTR_NIDS_LIST]        = {
5049                         .lkp_value      = "Preferred gateway NIDs",
5050                         .lkp_key_format = LNKF_MAPPING,
5051                         .lkp_data_type  = NLA_NESTED,
5052                 },
5053                 [LNET_UDSP_INFO_ATTR_PREF_NIDS_LIST]            = {
5054                         .lkp_value      = "Preferred source NIDs",
5055                         .lkp_key_format = LNKF_MAPPING,
5056                         .lkp_data_type  = NLA_NESTED,
5057                 },
5058         },
5059 };
5060
5061 static const struct ln_key_list udsp_info_pref_nids_list = {
5062         .lkl_maxattr                    = LNET_UDSP_INFO_PREF_NIDS_ATTR_MAX,
5063         .lkl_list                       = {
5064                 [LNET_UDSP_INFO_PREF_NIDS_ATTR_INDEX]           = {
5065                         .lkp_value      = "NID-0",
5066                         .lkp_data_type  = NLA_NUL_STRING,
5067                 },
5068                 [LNET_UDSP_INFO_PREF_NIDS_ATTR_NID]             = {
5069                         .lkp_value      = "0@lo",
5070                         .lkp_data_type  = NLA_STRING,
5071                 },
5072         },
5073 };
5074
5075 static int lnet_udsp_info_send(struct sk_buff *msg, int attr,
5076                                struct lnet_nid *nid, bool remote)
5077 {
5078         struct lnet_ioctl_construct_udsp_info *udsp;
5079         struct nlattr *udsp_attr, *udsp_info;
5080         struct nlattr *udsp_list_attr;
5081         struct nlattr *udsp_list_info;
5082         int i;
5083
5084         CFS_ALLOC_PTR(udsp);
5085         if (!udsp)
5086                 return -ENOMEM;
5087
5088         udsp->cud_peer = remote;
5089         lnet_udsp_get_construct_info(udsp, nid);
5090
5091         udsp_info = nla_nest_start(msg, attr);
5092         udsp_attr = nla_nest_start(msg, 0);
5093         nla_put_s32(msg, LNET_UDSP_INFO_ATTR_NET_PRIORITY,
5094                     udsp->cud_net_priority);
5095         nla_put_s32(msg, LNET_UDSP_INFO_ATTR_NID_PRIORITY,
5096                     udsp->cud_nid_priority);
5097
5098         if (udsp->cud_pref_rtr_nid[0] == 0)
5099                 goto skip_list;
5100
5101         udsp_list_info = nla_nest_start(msg,
5102                                         LNET_UDSP_INFO_ATTR_PREF_RTR_NIDS_LIST);
5103         for (i = 0; i < LNET_MAX_SHOW_NUM_NID; i++) {
5104                 char tmp[8]; /* NID-"3 number"\0 */
5105
5106                 if (udsp->cud_pref_rtr_nid[i] == 0)
5107                         break;
5108
5109                 udsp_list_attr = nla_nest_start(msg, i);
5110                 snprintf(tmp, sizeof(tmp), "NID-%d", i);
5111                 nla_put_string(msg, LNET_UDSP_INFO_PREF_NIDS_ATTR_INDEX,
5112                                tmp);
5113                 nla_put_string(msg, LNET_UDSP_INFO_PREF_NIDS_ATTR_NID,
5114                                libcfs_nid2str(udsp->cud_pref_rtr_nid[i]));
5115                 nla_nest_end(msg, udsp_list_attr);
5116         }
5117         nla_nest_end(msg, udsp_list_info);
5118 skip_list:
5119         nla_nest_end(msg, udsp_attr);
5120         nla_nest_end(msg, udsp_info);
5121         LIBCFS_FREE(udsp, sizeof(*udsp));
5122
5123         return 0;
5124 }
5125
5126 /* LNet NI handling */
5127 static const struct ln_key_list net_props_list = {
5128         .lkl_maxattr                    = LNET_NET_ATTR_MAX,
5129         .lkl_list                       = {
5130                 [LNET_NET_ATTR_HDR]             = {
5131                         .lkp_value              = "net",
5132                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5133                         .lkp_data_type          = NLA_NUL_STRING,
5134                 },
5135                 [LNET_NET_ATTR_TYPE]            = {
5136                         .lkp_value              = "net type",
5137                         .lkp_data_type          = NLA_STRING
5138                 },
5139                 [LNET_NET_ATTR_LOCAL]           = {
5140                         .lkp_value              = "local NI(s)",
5141                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5142                         .lkp_data_type          = NLA_NESTED
5143                 },
5144         },
5145 };
5146
5147 static struct ln_key_list local_ni_list = {
5148         .lkl_maxattr                    = LNET_NET_LOCAL_NI_ATTR_MAX,
5149         .lkl_list                       = {
5150                 [LNET_NET_LOCAL_NI_ATTR_NID]            = {
5151                         .lkp_value              = "nid",
5152                         .lkp_data_type          = NLA_STRING
5153                 },
5154                 [LNET_NET_LOCAL_NI_ATTR_STATUS]         = {
5155                         .lkp_value              = "status",
5156                         .lkp_data_type          = NLA_STRING
5157                 },
5158                 [LNET_NET_LOCAL_NI_ATTR_INTERFACE]      = {
5159                         .lkp_value              = "interfaces",
5160                         .lkp_key_format         = LNKF_MAPPING,
5161                         .lkp_data_type          = NLA_NESTED
5162                 },
5163                 [LNET_NET_LOCAL_NI_ATTR_STATS]          = {
5164                         .lkp_value              = "statistics",
5165                         .lkp_key_format         = LNKF_MAPPING,
5166                         .lkp_data_type          = NLA_NESTED
5167                 },
5168                 [LNET_NET_LOCAL_NI_ATTR_UDSP_INFO]      = {
5169                         .lkp_value              = "udsp info",
5170                         .lkp_key_format         = LNKF_MAPPING,
5171                         .lkp_data_type          = NLA_NESTED
5172                 },
5173                 [LNET_NET_LOCAL_NI_ATTR_SEND_STATS]     = {
5174                         .lkp_value              = "sent_stats",
5175                         .lkp_key_format         = LNKF_MAPPING,
5176                         .lkp_data_type          = NLA_NESTED
5177                 },
5178                 [LNET_NET_LOCAL_NI_ATTR_RECV_STATS]     = {
5179                         .lkp_value              = "received_stats",
5180                         .lkp_key_format         = LNKF_MAPPING,
5181                         .lkp_data_type          = NLA_NESTED
5182                 },
5183                 [LNET_NET_LOCAL_NI_ATTR_DROPPED_STATS]  = {
5184                         .lkp_value              = "dropped_stats",
5185                         .lkp_key_format         = LNKF_MAPPING,
5186                         .lkp_data_type          = NLA_NESTED
5187
5188                 },
5189                 [LNET_NET_LOCAL_NI_ATTR_HEALTH_STATS]   = {
5190                         .lkp_value              = "health stats",
5191                         .lkp_key_format         = LNKF_MAPPING,
5192                         .lkp_data_type          = NLA_NESTED
5193                 },
5194                 [LNET_NET_LOCAL_NI_ATTR_TUNABLES]       = {
5195                         .lkp_value              = "tunables",
5196                         .lkp_key_format         = LNKF_MAPPING,
5197                         .lkp_data_type          = NLA_NESTED
5198                 },
5199                 [LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES]   = {
5200                         .lkp_value              = "lnd tunables",
5201                         .lkp_key_format         = LNKF_MAPPING,
5202                         .lkp_data_type          = NLA_NESTED
5203                 },
5204                 [LNET_NET_LOCAL_NI_DEV_CPT]             = {
5205                         .lkp_value              = "dev cpt",
5206                         .lkp_data_type          = NLA_S32,
5207                 },
5208                 [LNET_NET_LOCAL_NI_CPTS]                = {
5209                         .lkp_value              = "CPT",
5210                         .lkp_data_type          = NLA_STRING,
5211                 },
5212         },
5213 };
5214
5215 static const struct ln_key_list local_ni_interfaces_list = {
5216         .lkl_maxattr                    = LNET_NET_LOCAL_NI_INTF_ATTR_MAX,
5217         .lkl_list                       = {
5218                 [LNET_NET_LOCAL_NI_INTF_ATTR_TYPE] = {
5219                         .lkp_value      = "0",
5220                         .lkp_data_type  = NLA_STRING
5221                 },
5222         },
5223 };
5224
5225 static const struct ln_key_list local_ni_stats_list = {
5226         .lkl_maxattr                    = LNET_NET_LOCAL_NI_STATS_ATTR_MAX,
5227         .lkl_list                       = {
5228                 [LNET_NET_LOCAL_NI_STATS_ATTR_SEND_COUNT]       = {
5229                         .lkp_value      = "send_count",
5230                         .lkp_data_type  = NLA_U32
5231                 },
5232                 [LNET_NET_LOCAL_NI_STATS_ATTR_RECV_COUNT]       = {
5233                         .lkp_value      = "recv_count",
5234                         .lkp_data_type  = NLA_U32
5235                 },
5236                 [LNET_NET_LOCAL_NI_STATS_ATTR_DROP_COUNT]       = {
5237                         .lkp_value      = "drop_count",
5238                         .lkp_data_type  = NLA_U32
5239                 },
5240         },
5241 };
5242
5243 static const struct ln_key_list local_ni_msg_stats_list = {
5244         .lkl_maxattr                    = LNET_NET_LOCAL_NI_MSG_STATS_ATTR_MAX,
5245         .lkl_list                       = {
5246                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT]    = {
5247                         .lkp_value      = "put",
5248                         .lkp_data_type  = NLA_U32
5249                 },
5250                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT]    = {
5251                         .lkp_value      = "get",
5252                         .lkp_data_type  = NLA_U32
5253                 },
5254                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT]  = {
5255                         .lkp_value      = "reply",
5256                         .lkp_data_type  = NLA_U32
5257                 },
5258                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT]    = {
5259                         .lkp_value      = "ack",
5260                         .lkp_data_type  = NLA_U32
5261                 },
5262                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT]  = {
5263                         .lkp_value      = "hello",
5264                         .lkp_data_type  = NLA_U32
5265                 },
5266         },
5267 };
5268
5269 static const struct ln_key_list local_ni_health_stats_list = {
5270         .lkl_maxattr                    = LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_MAX,
5271         .lkl_list                       = {
5272                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_FATAL_ERRORS] = {
5273                         .lkp_value      = "fatal_error",
5274                         .lkp_data_type  = NLA_S32
5275                 },
5276                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_LEVEL] = {
5277                         .lkp_value      = "health value",
5278                         .lkp_data_type  = NLA_S32
5279                 },
5280                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_INTERRUPTS] = {
5281                         .lkp_value      = "interrupts",
5282                         .lkp_data_type  = NLA_U32
5283                 },
5284                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_DROPPED] = {
5285                         .lkp_value      = "dropped",
5286                         .lkp_data_type  = NLA_U32
5287                 },
5288                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ABORTED] = {
5289                         .lkp_value      = "aborted",
5290                         .lkp_data_type  = NLA_U32
5291                 },
5292                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NO_ROUTE] = {
5293                         .lkp_value      = "no route",
5294                         .lkp_data_type  = NLA_U32
5295                 },
5296                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_TIMEOUTS] = {
5297                         .lkp_value      = "timeouts",
5298                         .lkp_data_type  = NLA_U32
5299                 },
5300                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ERROR] = {
5301                         .lkp_value      = "error",
5302                         .lkp_data_type  = NLA_U32
5303                 },
5304                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PING_COUNT] = {
5305                         .lkp_value      = "ping_count",
5306                         .lkp_data_type  = NLA_U32,
5307                 },
5308                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NEXT_PING] = {
5309                         .lkp_value      = "next_ping",
5310                         .lkp_data_type  = NLA_U64
5311                 },
5312         },
5313 };
5314
5315 static const struct ln_key_list local_ni_tunables_list = {
5316         .lkl_maxattr                    = LNET_NET_LOCAL_NI_TUNABLES_ATTR_MAX,
5317         .lkl_list                       = {
5318                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT]  = {
5319                         .lkp_value      = "peer_timeout",
5320                         .lkp_data_type  = NLA_S32
5321                 },
5322                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS]  = {
5323                         .lkp_value      = "peer_credits",
5324                         .lkp_data_type  = NLA_S32
5325                 },
5326                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS] = {
5327                         .lkp_value      = "peer_buffer_credits",
5328                         .lkp_data_type  = NLA_S32
5329                 },
5330                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS] = {
5331                         .lkp_value      = "credits",
5332                         .lkp_data_type  = NLA_S32
5333                 },
5334         },
5335 };
5336
5337 /* Use an index since the traversal is across LNet nets and ni collections */
5338 struct lnet_genl_net_list {
5339         unsigned int    lngl_net_id;
5340         unsigned int    lngl_idx;
5341 };
5342
5343 static inline struct lnet_genl_net_list *
5344 lnet_net_dump_ctx(struct netlink_callback *cb)
5345 {
5346         return (struct lnet_genl_net_list *)cb->args[0];
5347 }
5348
5349 static int lnet_net_show_done(struct netlink_callback *cb)
5350 {
5351         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
5352
5353         if (nlist) {
5354                 LIBCFS_FREE(nlist, sizeof(*nlist));
5355                 cb->args[0] = 0;
5356         }
5357
5358         return 0;
5359 }
5360
5361 /* LNet net ->start() handler for GET requests */
5362 static int lnet_net_show_start(struct netlink_callback *cb)
5363 {
5364         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5365 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5366         struct netlink_ext_ack *extack = NULL;
5367 #endif
5368         struct lnet_genl_net_list *nlist;
5369         int msg_len = genlmsg_len(gnlh);
5370         struct nlattr *params, *top;
5371         int rem, rc = 0;
5372
5373 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5374         extack = cb->extack;
5375 #endif
5376         if (the_lnet.ln_refcount == 0) {
5377                 NL_SET_ERR_MSG(extack, "LNet stack down");
5378                 return -ENETDOWN;
5379         }
5380
5381         LIBCFS_ALLOC(nlist, sizeof(*nlist));
5382         if (!nlist)
5383                 return -ENOMEM;
5384
5385         nlist->lngl_net_id = LNET_NET_ANY;
5386         nlist->lngl_idx = 0;
5387         cb->args[0] = (long)nlist;
5388
5389         cb->min_dump_alloc = U16_MAX;
5390         if (!msg_len)
5391                 return 0;
5392
5393         params = genlmsg_data(gnlh);
5394         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
5395                 NL_SET_ERR_MSG(extack, "invalid configuration");
5396                 return -EINVAL;
5397         }
5398
5399         nla_for_each_nested(top, params, rem) {
5400                 struct nlattr *net;
5401                 int rem2;
5402
5403                 nla_for_each_nested(net, top, rem2) {
5404                         char filter[LNET_NIDSTR_SIZE];
5405
5406                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE ||
5407                             nla_strcmp(net, "net type") != 0)
5408                                 continue;
5409
5410                         net = nla_next(net, &rem2);
5411                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE) {
5412                                 NL_SET_ERR_MSG(extack, "invalid config param");
5413                                 GOTO(report_err, rc = -EINVAL);
5414                         }
5415
5416                         rc = nla_strscpy(filter, net, sizeof(filter));
5417                         if (rc < 0) {
5418                                 NL_SET_ERR_MSG(extack, "failed to get param");
5419                                 GOTO(report_err, rc);
5420                         }
5421                         rc = 0;
5422
5423                         nlist->lngl_net_id = libcfs_str2net(filter);
5424                         if (nlist->lngl_net_id == LNET_NET_ANY) {
5425                                 NL_SET_ERR_MSG(extack, "cannot parse net");
5426                                 GOTO(report_err, rc = -ENOENT);
5427                         }
5428                 }
5429         }
5430 report_err:
5431         if (rc < 0)
5432                 lnet_net_show_done(cb);
5433
5434         return rc;
5435 }
5436
5437 static int lnet_net_show_dump(struct sk_buff *msg,
5438                               struct netlink_callback *cb)
5439 {
5440         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
5441 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5442         struct netlink_ext_ack *extack = NULL;
5443 #endif
5444         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5445         int portid = NETLINK_CB(cb->skb).portid;
5446         bool found = false, started = true;
5447         const struct lnet_lnd *lnd = NULL;
5448         int idx = nlist->lngl_idx, rc = 0;
5449         int seq = cb->nlh->nlmsg_seq;
5450         struct lnet_net *net;
5451         void *hdr = NULL;
5452
5453 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5454         extack = cb->extack;
5455 #endif
5456         lnet_net_lock(LNET_LOCK_EX);
5457
5458         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
5459                 struct nlattr *local_ni, *ni_attr;
5460                 struct lnet_ni *ni;
5461                 int dev = 0;
5462
5463                 if (nlist->lngl_net_id != LNET_NET_ANY &&
5464                     nlist->lngl_net_id != net->net_id)
5465                         continue;
5466
5467                 if (gnlh->version && LNET_NETTYP(net->net_id) != LOLND) {
5468                         if (!net->net_lnd) {
5469                                 NL_SET_ERR_MSG(extack,
5470                                                "LND not setup for NI");
5471                                 GOTO(net_unlock, rc = -ENODEV);
5472                         }
5473                         if (net->net_lnd != lnd)
5474                                 lnd = net->net_lnd;
5475                         else
5476                                 lnd = NULL;
5477                 }
5478
5479                 /* We need to resend the key table every time the base LND
5480                  * changed.
5481                  */
5482                 if (!idx || lnd) {
5483                         const struct ln_key_list *all[] = {
5484                                 &net_props_list, &local_ni_list,
5485                                 &local_ni_interfaces_list,
5486                                 &local_ni_stats_list,
5487                                 &udsp_info_list,
5488                                 &udsp_info_pref_nids_list,
5489                                 &udsp_info_pref_nids_list,
5490                                 &local_ni_msg_stats_list,
5491                                 &local_ni_msg_stats_list,
5492                                 &local_ni_msg_stats_list,
5493                                 &local_ni_health_stats_list,
5494                                 &local_ni_tunables_list,
5495                                 NULL, /* lnd tunables */
5496                                 NULL
5497                         };
5498                         int flags = NLM_F_CREATE | NLM_F_MULTI;
5499
5500                         if (lnd) {
5501                                 all[ARRAY_SIZE(all) - 2] = lnd->lnd_keys;
5502                                 if (idx)
5503                                         flags |= NLM_F_REPLACE;
5504                                 started = true;
5505                         }
5506
5507                         rc = lnet_genl_send_scalar_list(msg, portid, seq,
5508                                                         &lnet_family, flags,
5509                                                         LNET_CMD_NETS, all);
5510                         if (rc < 0) {
5511                                 NL_SET_ERR_MSG(extack, "failed to send key table");
5512                                 GOTO(net_unlock, rc);
5513                         }
5514                 }
5515
5516                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
5517                                   NLM_F_MULTI, LNET_CMD_NETS);
5518                 if (!hdr) {
5519                         NL_SET_ERR_MSG(extack, "failed to send values");
5520                         GOTO(net_unlock, rc = -EMSGSIZE);
5521                 }
5522
5523                 if (started) {
5524                         nla_put_string(msg, LNET_NET_ATTR_HDR, "");
5525                         started = false;
5526                 }
5527
5528                 nla_put_string(msg, LNET_NET_ATTR_TYPE,
5529                                libcfs_net2str(net->net_id));
5530
5531                 local_ni = nla_nest_start(msg, LNET_NET_ATTR_LOCAL);
5532                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
5533                         char *status = "up";
5534
5535                         if (idx++ < nlist->lngl_idx)
5536                                 continue;
5537
5538                         ni_attr = nla_nest_start(msg, dev++);
5539                         found = true;
5540                         lnet_ni_lock(ni);
5541                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_NID,
5542                                        libcfs_nidstr(&ni->ni_nid));
5543                         if (!nid_is_lo0(&ni->ni_nid) &&
5544                             lnet_ni_get_status_locked(ni) != LNET_NI_STATUS_UP)
5545                                 status = "down";
5546                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_STATUS,
5547                                        status);
5548
5549                         if (!nid_is_lo0(&ni->ni_nid) && ni->ni_interface) {
5550                                 struct nlattr *intf_nest, *intf_attr;
5551
5552                                 intf_nest = nla_nest_start(msg,
5553                                                            LNET_NET_LOCAL_NI_ATTR_INTERFACE);
5554                                 intf_attr = nla_nest_start(msg, 0);
5555                                 nla_put_string(msg,
5556                                                LNET_NET_LOCAL_NI_INTF_ATTR_TYPE,
5557                                                ni->ni_interface);
5558                                 nla_nest_end(msg, intf_attr);
5559                                 nla_nest_end(msg, intf_nest);
5560                         }
5561
5562                         if (gnlh->version) {
5563                                 char cpts[LNET_MAX_SHOW_NUM_CPT * 4 + 4], *cpt;
5564                                 struct lnet_ioctl_element_msg_stats msg_stats;
5565                                 struct lnet_ioctl_element_stats stats;
5566                                 size_t buf_len = sizeof(cpts), len;
5567                                 struct nlattr *health_attr, *health_stats;
5568                                 struct nlattr *send_attr, *send_stats;
5569                                 struct nlattr *recv_attr, *recv_stats;
5570                                 struct nlattr *drop_attr, *drop_stats;
5571                                 struct nlattr *stats_attr, *ni_stats;
5572                                 struct nlattr *tun_attr, *ni_tun;
5573                                 int j;
5574
5575                                 stats.iel_send_count = lnet_sum_stats(&ni->ni_stats,
5576                                                                       LNET_STATS_TYPE_SEND);
5577                                 stats.iel_recv_count = lnet_sum_stats(&ni->ni_stats,
5578                                                                       LNET_STATS_TYPE_RECV);
5579                                 stats.iel_drop_count = lnet_sum_stats(&ni->ni_stats,
5580                                                                       LNET_STATS_TYPE_DROP);
5581                                 lnet_ni_unlock(ni);
5582
5583                                 stats_attr = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_STATS);
5584                                 ni_stats = nla_nest_start(msg, 0);
5585                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_SEND_COUNT,
5586                                             stats.iel_send_count);
5587                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_RECV_COUNT,
5588                                             stats.iel_recv_count);
5589                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_DROP_COUNT,
5590                                             stats.iel_drop_count);
5591                                 nla_nest_end(msg, ni_stats);
5592                                 nla_nest_end(msg, stats_attr);
5593
5594                                 if (gnlh->version < 4)
5595                                         goto skip_udsp;
5596
5597                                 /* UDSP info */
5598                                 rc = lnet_udsp_info_send(msg, LNET_NET_LOCAL_NI_ATTR_UDSP_INFO,
5599                                                          &ni->ni_nid, false);
5600                                 if (rc < 0) {
5601                                         NL_SET_ERR_MSG(extack,
5602                                                        "Failed to get udsp info");
5603                                         genlmsg_cancel(msg, hdr);
5604                                         GOTO(net_unlock, rc = -ENOMEM);
5605                                 }
5606 skip_udsp:
5607                                 if (gnlh->version < 2)
5608                                         goto skip_msg_stats;
5609
5610                                 msg_stats.im_idx = idx - 1;
5611                                 rc = lnet_get_ni_stats(&msg_stats);
5612                                 if (rc < 0) {
5613                                         NL_SET_ERR_MSG(extack,
5614                                                        "failed to get msg stats");
5615                                         genlmsg_cancel(msg, hdr);
5616                                         GOTO(net_unlock, rc = -ENOMEM);
5617                                 }
5618
5619                                 send_stats = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_SEND_STATS);
5620                                 send_attr = nla_nest_start(msg, 0);
5621                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5622                                             msg_stats.im_send_stats.ico_get_count);
5623                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5624                                             msg_stats.im_send_stats.ico_put_count);
5625                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5626                                             msg_stats.im_send_stats.ico_reply_count);
5627                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5628                                             msg_stats.im_send_stats.ico_ack_count);
5629                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5630                                             msg_stats.im_send_stats.ico_hello_count);
5631                                 nla_nest_end(msg, send_attr);
5632                                 nla_nest_end(msg, send_stats);
5633
5634                                 recv_stats = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_RECV_STATS);
5635                                 recv_attr = nla_nest_start(msg, 0);
5636                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5637                                             msg_stats.im_recv_stats.ico_get_count);
5638                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5639                                             msg_stats.im_recv_stats.ico_put_count);
5640                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5641                                             msg_stats.im_recv_stats.ico_reply_count);
5642                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5643                                             msg_stats.im_recv_stats.ico_ack_count);
5644                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5645                                             msg_stats.im_recv_stats.ico_hello_count);
5646                                 nla_nest_end(msg, recv_attr);
5647                                 nla_nest_end(msg, recv_stats);
5648
5649                                 drop_stats = nla_nest_start(msg,
5650                                                             LNET_NET_LOCAL_NI_ATTR_DROPPED_STATS);
5651                                 drop_attr = nla_nest_start(msg, 0);
5652                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5653                                             msg_stats.im_drop_stats.ico_get_count);
5654                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5655                                             msg_stats.im_drop_stats.ico_put_count);
5656                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5657                                             msg_stats.im_drop_stats.ico_reply_count);
5658                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5659                                             msg_stats.im_drop_stats.ico_ack_count);
5660                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5661                                             msg_stats.im_drop_stats.ico_hello_count);
5662                                 nla_nest_end(msg, drop_attr);
5663                                 nla_nest_end(msg, drop_stats);
5664
5665                                 /* health stats */
5666                                 health_stats = nla_nest_start(msg,
5667                                                               LNET_NET_LOCAL_NI_ATTR_HEALTH_STATS);
5668                                 health_attr = nla_nest_start(msg, 0);
5669                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_FATAL_ERRORS,
5670                                             atomic_read(&ni->ni_fatal_error_on));
5671                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_LEVEL,
5672                                             atomic_read(&ni->ni_healthv));
5673                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_INTERRUPTS,
5674                                             atomic_read(&ni->ni_hstats.hlt_local_interrupt));
5675                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_DROPPED,
5676                                             atomic_read(&ni->ni_hstats.hlt_local_dropped));
5677                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ABORTED,
5678                                             atomic_read(&ni->ni_hstats.hlt_local_aborted));
5679                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NO_ROUTE,
5680                                             atomic_read(&ni->ni_hstats.hlt_local_no_route));
5681                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_TIMEOUTS,
5682                                             atomic_read(&ni->ni_hstats.hlt_local_timeout));
5683                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ERROR,
5684                                             atomic_read(&ni->ni_hstats.hlt_local_error));
5685                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PING_COUNT,
5686                                             ni->ni_ping_count);
5687                                 nla_put_u64_64bit(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NEXT_PING,
5688                                                   ni->ni_next_ping,
5689                                                   LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PAD);
5690                                 nla_nest_end(msg, health_attr);
5691                                 nla_nest_end(msg, health_stats);
5692 skip_msg_stats:
5693                                 /* Report net tunables */
5694                                 tun_attr = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_TUNABLES);
5695                                 ni_tun = nla_nest_start(msg, 0);
5696                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT,
5697                                             ni->ni_net->net_tunables.lct_peer_timeout);
5698                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS,
5699                                             ni->ni_net->net_tunables.lct_peer_tx_credits);
5700                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS,
5701                                             ni->ni_net->net_tunables.lct_peer_rtr_credits);
5702                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS,
5703                                             ni->ni_net->net_tunables.lct_max_tx_credits);
5704                                 nla_nest_end(msg, ni_tun);
5705
5706                                 nla_nest_end(msg, tun_attr);
5707
5708                                 if (lnd && lnd->lnd_nl_get && lnd->lnd_keys) {
5709                                         struct nlattr *lnd_tun_attr, *lnd_ni_tun;
5710
5711                                         lnd_tun_attr = nla_nest_start(msg,
5712                                                                       LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES);
5713                                         lnd_ni_tun = nla_nest_start(msg, 0);
5714                                         rc = lnd->lnd_nl_get(LNET_CMD_NETS, msg,
5715                                                              LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES,
5716                                                              ni);
5717                                         if (rc < 0) {
5718                                                 NL_SET_ERR_MSG(extack,
5719                                                                "failed to get lnd tunables");
5720                                                 genlmsg_cancel(msg, hdr);
5721                                                 GOTO(net_unlock, rc);
5722                                         }
5723                                         nla_nest_end(msg, lnd_ni_tun);
5724                                         nla_nest_end(msg, lnd_tun_attr);
5725                                 }
5726
5727                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_DEV_CPT, ni->ni_dev_cpt);
5728
5729                                 /* Report cpts. We could send this as a nested list
5730                                  * of integers but older versions of the tools
5731                                  * except a string. The new versions can handle
5732                                  * both formats so in the future we can change
5733                                  * this to a nested list.
5734                                  */
5735                                 len = snprintf(cpts, buf_len, "\"[");
5736                                 cpt = cpts + len;
5737                                 buf_len -= len;
5738
5739                                 if (ni->ni_ncpts == LNET_CPT_NUMBER && !ni->ni_cpts)  {
5740                                         for (j = 0; j < ni->ni_ncpts; j++) {
5741                                                 len = snprintf(cpt, buf_len, "%d,", j);
5742                                                 buf_len -= len;
5743                                                 cpt += len;
5744                                         }
5745                                 } else {
5746                                         for (j = 0;
5747                                              ni->ni_cpts && j < ni->ni_ncpts &&
5748                                              j < LNET_MAX_SHOW_NUM_CPT; j++) {
5749                                                 len = snprintf(cpt, buf_len, "%d,",
5750                                                                ni->ni_cpts[j]);
5751                                                 buf_len -= len;
5752                                                 cpt += len;
5753                                         }
5754                                 }
5755                                 snprintf(cpt - 1, sizeof(cpts), "]\"");
5756
5757                                 nla_put_string(msg, LNET_NET_LOCAL_NI_CPTS, cpts);
5758                         } else {
5759                                 lnet_ni_unlock(ni);
5760                         }
5761                         nla_nest_end(msg, ni_attr);
5762                 }
5763                 nla_nest_end(msg, local_ni);
5764
5765                 genlmsg_end(msg, hdr);
5766         }
5767
5768         if (!found) {
5769                 struct nlmsghdr *nlh = nlmsg_hdr(msg);
5770
5771                 nlmsg_cancel(msg, nlh);
5772                 NL_SET_ERR_MSG(extack, "Network is down");
5773                 rc = -ESRCH;
5774         }
5775         nlist->lngl_idx = idx;
5776 net_unlock:
5777         lnet_net_unlock(LNET_LOCK_EX);
5778
5779         return lnet_nl_send_error(cb->skb, portid, seq, rc);
5780 }
5781
5782 #ifndef HAVE_NETLINK_CALLBACK_START
5783 static int lnet_old_net_show_dump(struct sk_buff *msg,
5784                                    struct netlink_callback *cb)
5785 {
5786         if (!cb->args[0]) {
5787                 int rc = lnet_net_show_start(cb);
5788
5789                 if (rc < 0)
5790                         return lnet_nl_send_error(cb->skb,
5791                                                   NETLINK_CB(cb->skb).portid,
5792                                                   cb->nlh->nlmsg_seq,
5793                                                   rc);
5794         }
5795
5796         return lnet_net_show_dump(msg, cb);
5797 }
5798 #endif
5799
5800 static int lnet_genl_parse_tunables(struct nlattr *settings,
5801                                     struct lnet_ioctl_config_lnd_tunables *tun)
5802 {
5803         struct nlattr *param;
5804         int rem, rc = 0;
5805
5806         nla_for_each_nested(param, settings, rem) {
5807                 int type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_UNSPEC;
5808                 s64 num;
5809
5810                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
5811                         continue;
5812
5813                 if (nla_strcmp(param, "peer_timeout") == 0)
5814                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT;
5815                 else if (nla_strcmp(param, "peer_credits") == 0)
5816                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS;
5817                 else if (nla_strcmp(param, "peer_buffer_credits") == 0)
5818                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS;
5819                 else if (nla_strcmp(param, "credits") == 0)
5820                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS;
5821
5822                 param = nla_next(param, &rem);
5823                 if (nla_type(param) != LN_SCALAR_ATTR_INT_VALUE)
5824                         return -EINVAL;
5825
5826                 num = nla_get_s64(param);
5827                 switch (type) {
5828                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT:
5829                         if (num >= 0)
5830                                 tun->lt_cmn.lct_peer_timeout = num;
5831                         break;
5832                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS:
5833                         if (num > 0)
5834                                 tun->lt_cmn.lct_peer_tx_credits = num;
5835                         break;
5836                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS:
5837                         if (num > 0)
5838                                 tun->lt_cmn.lct_peer_rtr_credits = num;
5839                         break;
5840                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS:
5841                         if (num > 0)
5842                                 tun->lt_cmn.lct_max_tx_credits = num;
5843                         break;
5844                 default:
5845                         rc = -EINVAL;
5846                         break;
5847                 }
5848         }
5849         return rc;
5850 }
5851
5852 static int lnet_genl_parse_lnd_tunables(struct nlattr *settings,
5853                                         struct lnet_lnd_tunables *tun,
5854                                         const struct lnet_lnd *lnd)
5855 {
5856         const struct ln_key_list *list = lnd->lnd_keys;
5857         struct nlattr *param;
5858         int rem, rc = 0;
5859         int i = 1;
5860
5861         /* silently ignore these setting if the LND driver doesn't
5862          * support any LND tunables
5863          */
5864         if (!list || !lnd->lnd_nl_set || !list->lkl_maxattr)
5865                 return 0;
5866
5867         nla_for_each_nested(param, settings, rem) {
5868                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
5869                         continue;
5870
5871                 for (i = 1; i <= list->lkl_maxattr; i++) {
5872                         if (!list->lkl_list[i].lkp_value ||
5873                             nla_strcmp(param, list->lkl_list[i].lkp_value) != 0)
5874                                 continue;
5875
5876                         param = nla_next(param, &rem);
5877                         rc = lnd->lnd_nl_set(LNET_CMD_NETS, param, i, tun);
5878                         if (rc < 0)
5879                                 return rc;
5880                 }
5881         }
5882
5883         return rc;
5884 }
5885
5886 static int
5887 lnet_genl_parse_local_ni(struct nlattr *entry, struct genl_info *info,
5888                          int net_id, struct lnet_ioctl_config_ni *conf,
5889                          bool *ni_list)
5890 {
5891         bool create = info->nlhdr->nlmsg_flags & NLM_F_CREATE;
5892         struct lnet_ioctl_config_lnd_tunables *tun;
5893         struct nlattr *settings;
5894         int rem3, rc = 0;
5895
5896         LIBCFS_ALLOC(tun, sizeof(struct lnet_ioctl_config_lnd_tunables));
5897         if (!tun) {
5898                 GENL_SET_ERR_MSG(info, "cannot allocate memory for tunables");
5899                 GOTO(out, rc = -ENOMEM);
5900         }
5901
5902         /* Use LND defaults */
5903         tun->lt_cmn.lct_peer_timeout = -1;
5904         tun->lt_cmn.lct_peer_tx_credits = -1;
5905         tun->lt_cmn.lct_peer_rtr_credits = -1;
5906         tun->lt_cmn.lct_max_tx_credits = -1;
5907         conf->lic_ncpts = 0;
5908
5909         nla_for_each_nested(settings, entry, rem3) {
5910                 if (nla_type(settings) != LN_SCALAR_ATTR_VALUE)
5911                         continue;
5912
5913                 if (nla_strcmp(settings, "interfaces") == 0) {
5914                         struct nlattr *intf;
5915                         int rem4;
5916
5917                         settings = nla_next(settings, &rem3);
5918                         if (nla_type(settings) !=
5919                             LN_SCALAR_ATTR_LIST) {
5920                                 GENL_SET_ERR_MSG(info,
5921                                                  "invalid interfaces");
5922                                 GOTO(out, rc = -EINVAL);
5923                         }
5924
5925                         nla_for_each_nested(intf, settings, rem4) {
5926                                 intf = nla_next(intf, &rem4);
5927                                 if (nla_type(intf) !=
5928                                     LN_SCALAR_ATTR_VALUE) {
5929                                         GENL_SET_ERR_MSG(info,
5930                                                          "cannot parse interface");
5931                                         GOTO(out, rc = -EINVAL);
5932                                 }
5933
5934                                 rc = nla_strscpy(conf->lic_ni_intf, intf,
5935                                                  sizeof(conf->lic_ni_intf));
5936                                 if (rc < 0) {
5937                                         GENL_SET_ERR_MSG(info,
5938                                                          "failed to parse interfaces");
5939                                         GOTO(out, rc);
5940                                 }
5941                         }
5942                         *ni_list = true;
5943                 } else if (nla_strcmp(settings, "tunables") == 0) {
5944                         settings = nla_next(settings, &rem3);
5945                         if (nla_type(settings) !=
5946                             LN_SCALAR_ATTR_LIST) {
5947                                 GENL_SET_ERR_MSG(info,
5948                                                  "invalid tunables");
5949                                 GOTO(out, rc = -EINVAL);
5950                         }
5951
5952                         rc = lnet_genl_parse_tunables(settings, tun);
5953                         if (rc < 0) {
5954                                 GENL_SET_ERR_MSG(info,
5955                                                  "failed to parse tunables");
5956                                 GOTO(out, rc);
5957                         }
5958                 } else if ((nla_strcmp(settings, "lnd tunables") == 0)) {
5959                         const struct lnet_lnd *lnd;
5960
5961                         lnd = lnet_load_lnd(LNET_NETTYP(net_id));
5962                         if (IS_ERR(lnd)) {
5963                                 GENL_SET_ERR_MSG(info,
5964                                                  "LND type not supported");
5965                                 GOTO(out, rc = PTR_ERR(lnd));
5966                         }
5967
5968                         settings = nla_next(settings, &rem3);
5969                         if (nla_type(settings) !=
5970                             LN_SCALAR_ATTR_LIST) {
5971                                 GENL_SET_ERR_MSG(info,
5972                                                  "lnd tunables should be list\n");
5973                                 GOTO(out, rc = -EINVAL);
5974                         }
5975
5976                         rc = lnet_genl_parse_lnd_tunables(settings,
5977                                                           &tun->lt_tun, lnd);
5978                         if (rc < 0) {
5979                                 GENL_SET_ERR_MSG(info,
5980                                                  "failed to parse lnd tunables");
5981                                 GOTO(out, rc);
5982                         }
5983                 } else if (nla_strcmp(settings, "CPT") == 0) {
5984                         struct nlattr *cpt;
5985                         int rem4;
5986
5987                         settings = nla_next(settings, &rem3);
5988                         if (nla_type(settings) != LN_SCALAR_ATTR_LIST) {
5989                                 GENL_SET_ERR_MSG(info,
5990                                                  "CPT should be list");
5991                                 GOTO(out, rc = -EINVAL);
5992                         }
5993
5994                         nla_for_each_nested(cpt, settings, rem4) {
5995                                 s64 core;
5996
5997                                 if (nla_type(cpt) !=
5998                                     LN_SCALAR_ATTR_INT_VALUE) {
5999                                         GENL_SET_ERR_MSG(info,
6000                                                          "invalid CPT config");
6001                                         GOTO(out, rc = -EINVAL);
6002                                 }
6003
6004                                 core = nla_get_s64(cpt);
6005                                 if (core >= LNET_CPT_NUMBER) {
6006                                         GENL_SET_ERR_MSG(info,
6007                                                          "invalid CPT value");
6008                                         GOTO(out, rc = -ERANGE);
6009                                 }
6010
6011                                 conf->lic_cpts[conf->lic_ncpts] = core;
6012                                 conf->lic_ncpts++;
6013                         }
6014                 }
6015         }
6016
6017         if (!create) {
6018                 struct lnet_net *net;
6019                 struct lnet_ni *ni;
6020
6021                 rc = -ENODEV;
6022                 if (!strlen(conf->lic_ni_intf)) {
6023                         GENL_SET_ERR_MSG(info,
6024                                          "interface is missing");
6025                         GOTO(out, rc);
6026                 }
6027
6028                 lnet_net_lock(LNET_LOCK_EX);
6029                 net = lnet_get_net_locked(net_id);
6030                 if (!net) {
6031                         GENL_SET_ERR_MSG(info,
6032                                          "LNet net doesn't exist");
6033                         lnet_net_unlock(LNET_LOCK_EX);
6034                         GOTO(out, rc);
6035                 }
6036
6037                 list_for_each_entry(ni, &net->net_ni_list,
6038                                     ni_netlist) {
6039                         if (!ni->ni_interface ||
6040                             strcmp(ni->ni_interface,
6041                                   conf->lic_ni_intf) != 0)
6042                                 continue;
6043
6044                         lnet_net_unlock(LNET_LOCK_EX);
6045                         rc = lnet_dyn_del_ni(&ni->ni_nid);
6046                         if (rc < 0) {
6047                                 GENL_SET_ERR_MSG(info,
6048                                                  "cannot del LNet NI");
6049                                 GOTO(out, rc);
6050                         }
6051                         break;
6052                 }
6053
6054                 if (rc < 0) { /* will be -ENODEV */
6055                         GENL_SET_ERR_MSG(info,
6056                                          "interface invalid for deleting LNet NI");
6057                         lnet_net_unlock(LNET_LOCK_EX);
6058                 }
6059         } else {
6060                 if (!strlen(conf->lic_ni_intf)) {
6061                         GENL_SET_ERR_MSG(info,
6062                                          "interface is missing");
6063                         GOTO(out, rc);
6064                 }
6065
6066                 rc = lnet_dyn_add_ni(conf, net_id, tun);
6067                 switch (rc) {
6068                 case -ENOENT:
6069                         GENL_SET_ERR_MSG(info,
6070                                          "cannot parse net");
6071                         break;
6072                 case -ERANGE:
6073                         GENL_SET_ERR_MSG(info,
6074                                          "invalid CPT set");
6075                         break;
6076                 default:
6077                         GENL_SET_ERR_MSG(info,
6078                                          "cannot add LNet NI");
6079                 case 0:
6080                         break;
6081                 }
6082         }
6083 out:
6084         if (tun)
6085                 LIBCFS_FREE(tun, sizeof(struct lnet_ioctl_config_lnd_tunables));
6086
6087         return rc;
6088 }
6089
6090 static int lnet_net_cmd(struct sk_buff *skb, struct genl_info *info)
6091 {
6092         struct nlmsghdr *nlh = nlmsg_hdr(skb);
6093         struct genlmsghdr *gnlh = nlmsg_data(nlh);
6094         struct nlattr *params = genlmsg_data(gnlh);
6095         int msg_len, rem, rc = 0;
6096         struct nlattr *attr;
6097
6098         msg_len = genlmsg_len(gnlh);
6099         if (!msg_len) {
6100                 GENL_SET_ERR_MSG(info, "no configuration");
6101                 return -ENOMSG;
6102         }
6103
6104         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
6105                 GENL_SET_ERR_MSG(info, "invalid configuration");
6106                 return -EINVAL;
6107         }
6108
6109         nla_for_each_nested(attr, params, rem) {
6110                 bool ni_list = false, ipnets = false;
6111                 struct lnet_ioctl_config_ni conf;
6112                 u32 net_id = LNET_NET_ANY;
6113                 struct nlattr *entry;
6114                 int rem2;
6115
6116                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6117                         continue;
6118
6119                 nla_for_each_nested(entry, attr, rem2) {
6120                         switch (nla_type(entry)) {
6121                         case LN_SCALAR_ATTR_VALUE: {
6122                                 ssize_t len;
6123
6124                                 memset(&conf, 0, sizeof(conf));
6125                                 if (nla_strcmp(entry, "ip2net") == 0) {
6126                                         entry = nla_next(entry, &rem2);
6127                                         if (nla_type(entry) !=
6128                                             LN_SCALAR_ATTR_VALUE) {
6129                                                 GENL_SET_ERR_MSG(info,
6130                                                                  "ip2net has invalid key");
6131                                                 GOTO(out, rc = -EINVAL);
6132                                         }
6133
6134                                         len = nla_strscpy(conf.lic_legacy_ip2nets,
6135                                                           entry,
6136                                                           sizeof(conf.lic_legacy_ip2nets));
6137                                         if (len < 0) {
6138                                                 GENL_SET_ERR_MSG(info,
6139                                                                  "ip2net key string is invalid");
6140                                                 GOTO(out, rc = len);
6141                                         }
6142                                         ni_list = true;
6143                                         ipnets = true;
6144                                 } else if (nla_strcmp(entry, "net type") == 0) {
6145                                         char tmp[LNET_NIDSTR_SIZE];
6146
6147                                         entry = nla_next(entry, &rem2);
6148                                         if (nla_type(entry) !=
6149                                             LN_SCALAR_ATTR_VALUE) {
6150                                                 GENL_SET_ERR_MSG(info,
6151                                                                  "net type has invalid key");
6152                                                 GOTO(out, rc = -EINVAL);
6153                                         }
6154
6155                                         len = nla_strscpy(tmp, entry,
6156                                                           sizeof(tmp));
6157                                         if (len < 0) {
6158                                                 GENL_SET_ERR_MSG(info,
6159                                                                  "net type key string is invalid");
6160                                                 GOTO(out, rc = len);
6161                                         }
6162
6163                                         net_id = libcfs_str2net(tmp);
6164                                         if (!net_id) {
6165                                                 GENL_SET_ERR_MSG(info,
6166                                                                  "cannot parse net");
6167                                                 GOTO(out, rc = -ENODEV);
6168                                         }
6169                                         if (LNET_NETTYP(net_id) == LOLND) {
6170                                                 GENL_SET_ERR_MSG(info,
6171                                                                  "setting @lo not allowed");
6172                                                 GOTO(out, rc = -ENODEV);
6173                                         }
6174                                         conf.lic_legacy_ip2nets[0] = '\0';
6175                                         conf.lic_ni_intf[0] = '\0';
6176                                         ni_list = false;
6177                                 }
6178                                 if (rc < 0)
6179                                         GOTO(out, rc);
6180                                 break;
6181                         }
6182                         case LN_SCALAR_ATTR_LIST: {
6183                                 struct nlattr *interface;
6184                                 int rem3;
6185
6186                                 ipnets = false;
6187                                 nla_for_each_nested(interface, entry, rem3) {
6188                                         rc = lnet_genl_parse_local_ni(interface, info,
6189                                                                       net_id, &conf,
6190                                                                       &ni_list);
6191                                         if (rc < 0)
6192                                                 GOTO(out, rc);
6193                                 }
6194                                 break;
6195                         }
6196                         /* it is possible a newer version of the user land send
6197                          * values older kernels doesn't handle. So silently
6198                          * ignore these values
6199                          */
6200                         default:
6201                                 break;
6202                         }
6203                 }
6204
6205                 /* Handle case of just sent NET with no list of NIDs */
6206                 if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && !ni_list) {
6207                         rc = lnet_dyn_del_net(net_id);
6208                         if (rc < 0) {
6209                                 GENL_SET_ERR_MSG(info,
6210                                                  "cannot del network");
6211                         }
6212                 } else if ((info->nlhdr->nlmsg_flags & NLM_F_CREATE) &&
6213                            ipnets && ni_list) {
6214                         rc = lnet_handle_legacy_ip2nets(conf.lic_legacy_ip2nets,
6215                                                         NULL);
6216                         if (rc < 0)
6217                                 GENL_SET_ERR_MSG(info,
6218                                                  "cannot setup ip2nets");
6219                 }
6220         }
6221 out:
6222         return rc;
6223 }
6224
6225 /* Called with ln_api_mutex */
6226 static int lnet_parse_peer_nis(struct nlattr *rlist, struct genl_info *info,
6227                                struct lnet_nid *pnid, bool mr,
6228                                bool *create_some)
6229 {
6230         struct lnet_nid snid = LNET_ANY_NID;
6231         struct nlattr *props;
6232         int rem, rc = 0;
6233         s64 num = -1;
6234
6235         nla_for_each_nested(props, rlist, rem) {
6236                 if (nla_type(props) != LN_SCALAR_ATTR_VALUE)
6237                         continue;
6238
6239                 if (nla_strcmp(props, "nid") == 0) {
6240                         char nidstr[LNET_NIDSTR_SIZE];
6241
6242                         props = nla_next(props, &rem);
6243                         if (nla_type(props) != LN_SCALAR_ATTR_VALUE) {
6244                                 GENL_SET_ERR_MSG(info,
6245                                                  "invalid secondary NID");
6246                                 GOTO(report_err, rc = -EINVAL);
6247                         }
6248
6249                         rc = nla_strscpy(nidstr, props, sizeof(nidstr));
6250                         if (rc < 0) {
6251                                 GENL_SET_ERR_MSG(info,
6252                                                  "failed to get secondary NID");
6253                                 GOTO(report_err, rc);
6254                         }
6255
6256                         rc = libcfs_strnid(&snid, strim(nidstr));
6257                         if (rc < 0) {
6258                                 GENL_SET_ERR_MSG(info, "unsupported secondary NID");
6259                                 GOTO(report_err, rc);
6260                         }
6261
6262                         if (LNET_NID_IS_ANY(&snid) || nid_same(&snid, pnid))
6263                                 *create_some = false;
6264                 } else if (nla_strcmp(props, "health stats") == 0) {
6265                         struct nlattr *health;
6266                         int rem2;
6267
6268                         props = nla_next(props, &rem);
6269                         if (nla_type(props) !=
6270                               LN_SCALAR_ATTR_LIST) {
6271                                 GENL_SET_ERR_MSG(info,
6272                                                  "invalid health configuration");
6273                                 GOTO(report_err, rc = -EINVAL);
6274                         }
6275
6276                         nla_for_each_nested(health, props, rem2) {
6277                                 if (nla_type(health) != LN_SCALAR_ATTR_VALUE ||
6278                                     nla_strcmp(health, "health value") != 0) {
6279                                         GENL_SET_ERR_MSG(info,
6280                                                          "wrong health config format");
6281                                         GOTO(report_err, rc = -EINVAL);
6282                                 }
6283
6284                                 health = nla_next(health, &rem2);
6285                                 if (nla_type(health) !=
6286                                     LN_SCALAR_ATTR_INT_VALUE) {
6287                                         GENL_SET_ERR_MSG(info,
6288                                                          "invalid health config format");
6289                                         GOTO(report_err, rc = -EINVAL);
6290                                 }
6291
6292                                 num = nla_get_s64(health);
6293                                 clamp_t(s64, num, 0, LNET_MAX_HEALTH_VALUE);
6294                         }
6295                 }
6296         }
6297
6298         if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE && num != -1) {
6299                 lnet_peer_ni_set_healthv(pnid, num, !*create_some);
6300         } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
6301                 bool lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6302
6303                 rc = lnet_user_add_peer_ni(pnid, &snid, mr, lock_prim);
6304                 if (rc < 0)
6305                         GENL_SET_ERR_MSG(info,
6306                                          "failed to add peer");
6307         } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && *create_some) {
6308                 bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6309
6310                 rc = lnet_del_peer_ni(pnid, &snid, force);
6311                 if (rc < 0)
6312                         GENL_SET_ERR_MSG(info,
6313                                          "failed to del peer");
6314         }
6315 report_err:
6316         return rc;
6317 }
6318
6319 static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info)
6320 {
6321         struct nlmsghdr *nlh = nlmsg_hdr(skb);
6322         struct genlmsghdr *gnlh = nlmsg_data(nlh);
6323         struct nlattr *params = genlmsg_data(gnlh);
6324         int msg_len, rem, rc = 0;
6325         struct lnet_nid pnid;
6326         struct nlattr *attr;
6327
6328         mutex_lock(&the_lnet.ln_api_mutex);
6329         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
6330                 GENL_SET_ERR_MSG(info, "Network is down");
6331                 mutex_unlock(&the_lnet.ln_api_mutex);
6332                 return -ENETDOWN;
6333         }
6334
6335         msg_len = genlmsg_len(gnlh);
6336         if (!msg_len) {
6337                 GENL_SET_ERR_MSG(info, "no configuration");
6338                 mutex_unlock(&the_lnet.ln_api_mutex);
6339                 return -ENOMSG;
6340         }
6341
6342         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
6343                 GENL_SET_ERR_MSG(info, "invalid configuration");
6344                 mutex_unlock(&the_lnet.ln_api_mutex);
6345                 return -EINVAL;
6346         }
6347
6348         nla_for_each_nested(attr, params, rem) {
6349                 bool parse_peer_nis = false;
6350                 struct nlattr *pnid_prop;
6351                 int rem2;
6352
6353                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6354                         continue;
6355
6356                 pnid = LNET_ANY_NID;
6357                 nla_for_each_nested(pnid_prop, attr, rem2) {
6358                         bool mr = true;
6359
6360                         if (nla_type(pnid_prop) != LN_SCALAR_ATTR_VALUE)
6361                                 continue;
6362
6363                         if (nla_strcmp(pnid_prop, "primary nid") == 0) {
6364                                 char nidstr[LNET_NIDSTR_SIZE];
6365
6366                                 pnid_prop = nla_next(pnid_prop, &rem2);
6367                                 if (nla_type(pnid_prop) !=
6368                                     LN_SCALAR_ATTR_VALUE) {
6369                                         GENL_SET_ERR_MSG(info,
6370                                                           "invalid primary NID type");
6371                                         GOTO(report_err, rc = -EINVAL);
6372                                 }
6373
6374                                 rc = nla_strscpy(nidstr, pnid_prop,
6375                                                  sizeof(nidstr));
6376                                 if (rc < 0) {
6377                                         GENL_SET_ERR_MSG(info,
6378                                                          "failed to get primary NID");
6379                                         GOTO(report_err, rc);
6380                                 }
6381
6382                                 rc = libcfs_strnid(&pnid, strim(nidstr));
6383                                 if (rc < 0) {
6384                                         GENL_SET_ERR_MSG(info,
6385                                                          "unsupported primary NID");
6386                                         GOTO(report_err, rc);
6387                                 }
6388
6389                                 /* we must create primary NID for peer ni
6390                                  * creation
6391                                  */
6392                                 if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
6393                                         bool lock_prim;
6394
6395                                         lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6396                                         rc = lnet_user_add_peer_ni(&pnid,
6397                                                                    &LNET_ANY_NID,
6398                                                                    true, lock_prim);
6399                                         if (rc < 0) {
6400                                                 GENL_SET_ERR_MSG(info,
6401                                                                  "failed to add primary peer");
6402                                                 GOTO(report_err, rc);
6403                                         }
6404                                 }
6405                         } else if (nla_strcmp(pnid_prop, "Multi-Rail") == 0) {
6406                                 pnid_prop = nla_next(pnid_prop, &rem2);
6407                                 if (nla_type(pnid_prop) !=
6408                                     LN_SCALAR_ATTR_INT_VALUE) {
6409                                         GENL_SET_ERR_MSG(info,
6410                                                           "invalid MR flag param");
6411                                         GOTO(report_err, rc = -EINVAL);
6412                                 }
6413
6414                                 if (nla_get_s64(pnid_prop) == 0)
6415                                         mr = false;
6416                         } else if (nla_strcmp(pnid_prop, "peer state") == 0) {
6417                                 struct lnet_peer_ni *lpni;
6418                                 struct lnet_peer *lp;
6419
6420                                 pnid_prop = nla_next(pnid_prop, &rem2);
6421                                 if (nla_type(pnid_prop) !=
6422                                     LN_SCALAR_ATTR_INT_VALUE) {
6423                                         GENL_SET_ERR_MSG(info,
6424                                                           "invalid peer state param");
6425                                         GOTO(report_err, rc = -EINVAL);
6426                                 }
6427
6428                                 lpni = lnet_peer_ni_find_locked(&pnid);
6429                                 if (!lpni) {
6430                                         GENL_SET_ERR_MSG(info,
6431                                                           "invalid peer state param");
6432                                         GOTO(report_err, rc = -ENOENT);
6433                                 }
6434                                 lnet_peer_ni_decref_locked(lpni);
6435                                 lp = lpni->lpni_peer_net->lpn_peer;
6436                                 lp->lp_state = nla_get_s64(pnid_prop);
6437                         } else if (nla_strcmp(pnid_prop, "peer ni") == 0) {
6438                                 struct nlattr *rlist;
6439                                 int rem3;
6440
6441                                 if (!(info->nlhdr->nlmsg_flags & NLM_F_REPLACE) &&
6442                                     LNET_NID_IS_ANY(&pnid)) {
6443                                         GENL_SET_ERR_MSG(info,
6444                                                          "missing required primary NID");
6445                                         GOTO(report_err, rc);
6446                                 }
6447
6448                                 pnid_prop = nla_next(pnid_prop, &rem2);
6449                                 if (nla_type(pnid_prop) !=
6450                                     LN_SCALAR_ATTR_LIST) {
6451                                         GENL_SET_ERR_MSG(info,
6452                                                           "invalid NIDs list");
6453                                         GOTO(report_err, rc = -EINVAL);
6454                                 }
6455
6456                                 parse_peer_nis = true;
6457                                 nla_for_each_nested(rlist, pnid_prop, rem3) {
6458                                         rc = lnet_parse_peer_nis(rlist, info,
6459                                                                  &pnid, mr,
6460                                                                  &parse_peer_nis);
6461                                         if (rc < 0)
6462                                                 GOTO(report_err, rc);
6463                                 }
6464                         }
6465                 }
6466
6467                 /* If we have remote peer ni's we already add /del peers */
6468                 if (parse_peer_nis)
6469                         continue;
6470
6471                 if (LNET_NID_IS_ANY(&pnid)) {
6472                         GENL_SET_ERR_MSG(info, "missing primary NID");
6473                         GOTO(report_err, rc);
6474                 }
6475
6476                 if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
6477                         bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6478
6479                         rc = lnet_del_peer_ni(&pnid, &LNET_ANY_NID,
6480                                               force);
6481                         if (rc < 0) {
6482                                 GENL_SET_ERR_MSG(info,
6483                                                  "failed to del primary peer");
6484                                 GOTO(report_err, rc);
6485                         }
6486                 }
6487         }
6488 report_err:
6489         /* If we failed on creation and encounter a latter error then
6490          * delete the primary nid.
6491          */
6492         if (rc < 0 && info->nlhdr->nlmsg_flags & NLM_F_CREATE &&
6493             !LNET_NID_IS_ANY(&pnid))
6494                 lnet_del_peer_ni(&pnid, &LNET_ANY_NID,
6495                                  info->nlhdr->nlmsg_flags & NLM_F_EXCL);
6496         mutex_unlock(&the_lnet.ln_api_mutex);
6497
6498         return rc;
6499 }
6500
6501 /** LNet route handling */
6502
6503 /* We can't use struct lnet_ioctl_config_data since it lacks
6504  * support for large NIDS
6505  */
6506 struct lnet_route_properties {
6507         struct lnet_nid         lrp_gateway;
6508         u32                     lrp_net;
6509         s32                     lrp_hop;
6510         u32                     lrp_flags;
6511         u32                     lrp_priority;
6512         u32                     lrp_sensitivity;
6513 };
6514
6515 struct lnet_genl_route_list {
6516         unsigned int                            lgrl_index;
6517         unsigned int                            lgrl_count;
6518         GENRADIX(struct lnet_route_properties)  lgrl_list;
6519 };
6520
6521 static inline struct lnet_genl_route_list *
6522 lnet_route_dump_ctx(struct netlink_callback *cb)
6523 {
6524         return (struct lnet_genl_route_list *)cb->args[0];
6525 }
6526
6527 static int lnet_route_show_done(struct netlink_callback *cb)
6528 {
6529         struct lnet_genl_route_list *rlist = lnet_route_dump_ctx(cb);
6530
6531         if (rlist) {
6532                 genradix_free(&rlist->lgrl_list);
6533                 CFS_FREE_PTR(rlist);
6534         }
6535         cb->args[0] = 0;
6536
6537         return 0;
6538 }
6539
6540 static int lnet_scan_route(struct lnet_genl_route_list *rlist,
6541                     struct lnet_route_properties *settings)
6542 {
6543         struct lnet_remotenet *rnet;
6544         struct list_head *rn_list;
6545         struct lnet_route *route;
6546         int cpt, i, rc = 0;
6547
6548         cpt = lnet_net_lock_current();
6549
6550         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
6551                 rn_list = &the_lnet.ln_remote_nets_hash[i];
6552                 list_for_each_entry(rnet, rn_list, lrn_list) {
6553                         if (settings->lrp_net != LNET_NET_ANY &&
6554                             settings->lrp_net != rnet->lrn_net)
6555                                 continue;
6556
6557                         list_for_each_entry(route, &rnet->lrn_routes,
6558                                             lr_list) {
6559                                 struct lnet_route_properties *prop;
6560
6561                                 if (!LNET_NID_IS_ANY(&settings->lrp_gateway) &&
6562                                     !nid_same(&settings->lrp_gateway,
6563                                               &route->lr_nid)) {
6564                                         continue;
6565                                 }
6566
6567                                 if (settings->lrp_hop != -1 &&
6568                                     settings->lrp_hop != route->lr_hops)
6569                                         continue;
6570
6571                                 if (settings->lrp_priority != -1 &&
6572                                     settings->lrp_priority != route->lr_priority)
6573                                         continue;
6574
6575                                 if (settings->lrp_sensitivity != -1 &&
6576                                     settings->lrp_sensitivity !=
6577                                     route->lr_gateway->lp_health_sensitivity)
6578                                         continue;
6579
6580                                 prop = genradix_ptr_alloc(&rlist->lgrl_list,
6581                                                           rlist->lgrl_count++,
6582                                                           GFP_ATOMIC);
6583                                 if (!prop)
6584                                         GOTO(failed_alloc, rc = -ENOMEM);
6585
6586                                 prop->lrp_net = rnet->lrn_net;
6587                                 prop->lrp_gateway = route->lr_nid;
6588                                 prop->lrp_hop = route->lr_hops;
6589                                 prop->lrp_priority = route->lr_priority;
6590                                 prop->lrp_sensitivity =
6591                                         route->lr_gateway->lp_health_sensitivity;
6592                                 if (lnet_is_route_alive(route))
6593                                         prop->lrp_flags |= LNET_RT_ALIVE;
6594                                 else
6595                                         prop->lrp_flags &= ~LNET_RT_ALIVE;
6596                                 if (route->lr_single_hop)
6597                                         prop->lrp_flags &= ~LNET_RT_MULTI_HOP;
6598                                 else
6599                                         prop->lrp_flags |= LNET_RT_MULTI_HOP;
6600                         }
6601                 }
6602         }
6603
6604 failed_alloc:
6605         lnet_net_unlock(cpt);
6606         return rc;
6607 }
6608
6609 /* LNet route ->start() handler for GET requests */
6610 static int lnet_route_show_start(struct netlink_callback *cb)
6611 {
6612         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6613 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6614         struct netlink_ext_ack *extack = NULL;
6615 #endif
6616         struct lnet_genl_route_list *rlist;
6617         int msg_len = genlmsg_len(gnlh);
6618         int rc = 0;
6619
6620 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6621         extack = cb->extack;
6622 #endif
6623         if (the_lnet.ln_refcount == 0 ||
6624             the_lnet.ln_state != LNET_STATE_RUNNING) {
6625                 NL_SET_ERR_MSG(extack, "Network is down");
6626                 return -ENETDOWN;
6627         }
6628
6629         CFS_ALLOC_PTR(rlist);
6630         if (!rlist) {
6631                 NL_SET_ERR_MSG(extack, "No memory for route list");
6632                 return -ENOMEM;
6633         }
6634
6635         genradix_init(&rlist->lgrl_list);
6636         rlist->lgrl_count = 0;
6637         rlist->lgrl_index = 0;
6638         cb->args[0] = (long)rlist;
6639
6640         mutex_lock(&the_lnet.ln_api_mutex);
6641         if (!msg_len) {
6642                 struct lnet_route_properties tmp = {
6643                         .lrp_gateway            = LNET_ANY_NID,
6644                         .lrp_net                = LNET_NET_ANY,
6645                         .lrp_hop                = -1,
6646                         .lrp_priority           = -1,
6647                         .lrp_sensitivity        = -1,
6648                 };
6649
6650                 rc = lnet_scan_route(rlist, &tmp);
6651                 if (rc < 0) {
6652                         NL_SET_ERR_MSG(extack,
6653                                        "failed to allocate router data");
6654                         GOTO(report_err, rc);
6655                 }
6656         } else {
6657                 struct nlattr *params = genlmsg_data(gnlh);
6658                 struct nlattr *attr;
6659                 int rem;
6660
6661                 nla_for_each_nested(attr, params, rem) {
6662                         struct lnet_route_properties tmp = {
6663                                 .lrp_gateway            = LNET_ANY_NID,
6664                                 .lrp_net                = LNET_NET_ANY,
6665                                 .lrp_hop                = -1,
6666                                 .lrp_priority           = -1,
6667                                 .lrp_sensitivity        = -1,
6668                         };
6669                         struct nlattr *route;
6670                         int rem2;
6671
6672                         if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6673                                 continue;
6674
6675                         nla_for_each_nested(route, attr, rem2) {
6676                                 if (nla_type(route) != LN_SCALAR_ATTR_VALUE)
6677                                         continue;
6678
6679                                 if (nla_strcmp(route, "net") == 0) {
6680                                         char nw[LNET_NIDSTR_SIZE];
6681
6682                                         route = nla_next(route, &rem2);
6683                                         if (nla_type(route) !=
6684                                             LN_SCALAR_ATTR_VALUE) {
6685                                                 NL_SET_ERR_MSG(extack,
6686                                                                "invalid net param");
6687                                                 GOTO(report_err, rc = -EINVAL);
6688                                         }
6689
6690                                         rc = nla_strscpy(nw, route, sizeof(nw));
6691                                         if (rc < 0) {
6692                                                 NL_SET_ERR_MSG(extack,
6693                                                                "failed to get route param");
6694                                                 GOTO(report_err, rc);
6695                                         }
6696                                         rc = 0;
6697                                         tmp.lrp_net = libcfs_str2net(strim(nw));
6698                                 } else if (nla_strcmp(route, "gateway") == 0) {
6699                                         char gw[LNET_NIDSTR_SIZE];
6700
6701                                         route = nla_next(route, &rem2);
6702                                         if (nla_type(route) !=
6703                                             LN_SCALAR_ATTR_VALUE) {
6704                                                 NL_SET_ERR_MSG(extack,
6705                                                                "invalid gateway param");
6706                                                 GOTO(report_err, rc = -EINVAL);
6707                                         }
6708
6709                                         rc = nla_strscpy(gw, route, sizeof(gw));
6710                                         if (rc < 0) {
6711                                                 NL_SET_ERR_MSG(extack,
6712                                                                "failed to get route param");
6713                                                 GOTO(report_err, rc);
6714                                         }
6715
6716                                         rc = libcfs_strnid(&tmp.lrp_gateway, strim(gw));
6717                                         if (rc < 0) {
6718                                                 NL_SET_ERR_MSG(extack,
6719                                                                "cannot parse gateway");
6720                                                 GOTO(report_err, rc = -ENODEV);
6721                                         }
6722                                         rc = 0;
6723                                 } else if (nla_strcmp(route, "hop") == 0) {
6724                                         route = nla_next(route, &rem2);
6725                                         if (nla_type(route) !=
6726                                             LN_SCALAR_ATTR_INT_VALUE) {
6727                                                 NL_SET_ERR_MSG(extack,
6728                                                                "invalid hop param");
6729                                                 GOTO(report_err, rc = -EINVAL);
6730                                         }
6731
6732                                         tmp.lrp_hop = nla_get_s64(route);
6733                                         if (tmp.lrp_hop != -1)
6734                                                 clamp_t(s32, tmp.lrp_hop, 1, 127);
6735                                 } else if (nla_strcmp(route, "priority") == 0) {
6736                                         route = nla_next(route, &rem2);
6737                                         if (nla_type(route) !=
6738                                             LN_SCALAR_ATTR_INT_VALUE) {
6739                                                 NL_SET_ERR_MSG(extack,
6740                                                                "invalid priority param");
6741                                                 GOTO(report_err, rc = -EINVAL);
6742                                         }
6743
6744                                         tmp.lrp_priority = nla_get_s64(route);
6745                                 }
6746                         }
6747
6748                         rc = lnet_scan_route(rlist, &tmp);
6749                         if (rc < 0) {
6750                                 NL_SET_ERR_MSG(extack,
6751                                                "failed to allocate router data");
6752                                 GOTO(report_err, rc);
6753                         }
6754                 }
6755         }
6756 report_err:
6757         mutex_unlock(&the_lnet.ln_api_mutex);
6758
6759         if (rc < 0)
6760                 lnet_route_show_done(cb);
6761
6762         return rc;
6763 }
6764
6765 static const struct ln_key_list route_props_list = {
6766         .lkl_maxattr                    = LNET_ROUTE_ATTR_MAX,
6767         .lkl_list                       = {
6768                 [LNET_ROUTE_ATTR_HDR]                   = {
6769                         .lkp_value                      = "route",
6770                         .lkp_key_format                 = LNKF_SEQUENCE | LNKF_MAPPING,
6771                         .lkp_data_type                  = NLA_NUL_STRING,
6772                 },
6773                 [LNET_ROUTE_ATTR_NET]                   = {
6774                         .lkp_value                      = "net",
6775                         .lkp_data_type                  = NLA_STRING
6776                 },
6777                 [LNET_ROUTE_ATTR_GATEWAY]               = {
6778                         .lkp_value                      = "gateway",
6779                         .lkp_data_type                  = NLA_STRING
6780                 },
6781                 [LNET_ROUTE_ATTR_HOP]                   = {
6782                         .lkp_value                      = "hop",
6783                         .lkp_data_type                  = NLA_S32
6784                 },
6785                 [LNET_ROUTE_ATTR_PRIORITY]              = {
6786                         .lkp_value                      = "priority",
6787                         .lkp_data_type                  = NLA_U32
6788                 },
6789                 [LNET_ROUTE_ATTR_HEALTH_SENSITIVITY]    = {
6790                         .lkp_value                      = "health_sensitivity",
6791                         .lkp_data_type                  = NLA_U32
6792                 },
6793                 [LNET_ROUTE_ATTR_STATE] = {
6794                         .lkp_value                      = "state",
6795                         .lkp_data_type                  = NLA_STRING,
6796                 },
6797                 [LNET_ROUTE_ATTR_TYPE]  = {
6798                         .lkp_value                      = "type",
6799                         .lkp_data_type                  = NLA_STRING,
6800                 },
6801         },
6802 };
6803
6804
6805 static int lnet_route_show_dump(struct sk_buff *msg,
6806                                 struct netlink_callback *cb)
6807 {
6808         struct lnet_genl_route_list *rlist = lnet_route_dump_ctx(cb);
6809         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6810 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6811         struct netlink_ext_ack *extack = NULL;
6812 #endif
6813         int portid = NETLINK_CB(cb->skb).portid;
6814         int seq = cb->nlh->nlmsg_seq;
6815         int idx = rlist->lgrl_index;
6816         int msg_len = genlmsg_len(gnlh);
6817         int rc = 0;
6818
6819 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6820         extack = cb->extack;
6821 #endif
6822         if (!rlist->lgrl_count) {
6823                 NL_SET_ERR_MSG(extack, "No routes found");
6824                 GOTO(send_error, rc = msg_len ? -ENOENT : 0);
6825         }
6826
6827         if (!idx) {
6828                 const struct ln_key_list *all[] = {
6829                         &route_props_list, NULL
6830                 };
6831
6832                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
6833                                                 &lnet_family,
6834                                                 NLM_F_CREATE | NLM_F_MULTI,
6835                                                 LNET_CMD_ROUTES, all);
6836                 if (rc < 0) {
6837                         NL_SET_ERR_MSG(extack, "failed to send key table");
6838                         GOTO(send_error, rc);
6839                 }
6840         }
6841
6842         while (idx < rlist->lgrl_count) {
6843                 struct lnet_route_properties *prop;
6844                 void *hdr;
6845
6846                 prop = genradix_ptr(&rlist->lgrl_list, idx++);
6847
6848                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
6849                                   NLM_F_MULTI, LNET_CMD_ROUTES);
6850                 if (!hdr) {
6851                         NL_SET_ERR_MSG(extack, "failed to send values");
6852                         genlmsg_cancel(msg, hdr);
6853                         GOTO(send_error, rc = -EMSGSIZE);
6854                 }
6855
6856                 if (idx == 1)
6857                         nla_put_string(msg, LNET_ROUTE_ATTR_HDR, "");
6858
6859                 nla_put_string(msg, LNET_ROUTE_ATTR_NET,
6860                                libcfs_net2str(prop->lrp_net));
6861                 nla_put_string(msg, LNET_ROUTE_ATTR_GATEWAY,
6862                                libcfs_nidstr(&prop->lrp_gateway));
6863                 if (gnlh->version) {
6864                         nla_put_s32(msg, LNET_ROUTE_ATTR_HOP, prop->lrp_hop);
6865                         nla_put_u32(msg, LNET_ROUTE_ATTR_PRIORITY, prop->lrp_priority);
6866                         nla_put_u32(msg, LNET_ROUTE_ATTR_HEALTH_SENSITIVITY,
6867                                     prop->lrp_sensitivity);
6868
6869                         nla_put_string(msg, LNET_ROUTE_ATTR_STATE,
6870                                        prop->lrp_flags & LNET_RT_ALIVE ?
6871                                        "up" : "down");
6872                         nla_put_string(msg, LNET_ROUTE_ATTR_TYPE,
6873                                        prop->lrp_flags & LNET_RT_MULTI_HOP ?
6874                                        "multi-hop" : "single-hop");
6875                 }
6876                 genlmsg_end(msg, hdr);
6877         }
6878         rlist->lgrl_index = idx;
6879 send_error:
6880         return lnet_nl_send_error(cb->skb, portid, seq, rc);
6881 };
6882
6883 #ifndef HAVE_NETLINK_CALLBACK_START
6884 static int lnet_old_route_show_dump(struct sk_buff *msg,
6885                                     struct netlink_callback *cb)
6886 {
6887         if (!cb->args[0]) {
6888                 int rc = lnet_route_show_start(cb);
6889
6890                 if (rc < 0)
6891                         return lnet_nl_send_error(cb->skb,
6892                                                   NETLINK_CB(cb->skb).portid,
6893                                                   cb->nlh->nlmsg_seq,
6894                                                   rc);
6895         }
6896
6897         return lnet_route_show_dump(msg, cb);
6898 }
6899 #endif /* !HAVE_NETLINK_CALLBACK_START */
6900
6901 /** LNet peer handling */
6902 struct lnet_genl_processid_list {
6903         unsigned int                    lgpl_index;
6904         unsigned int                    lgpl_count;
6905         GENRADIX(struct lnet_processid) lgpl_list;
6906 };
6907
6908 static inline struct lnet_genl_processid_list *
6909 lnet_peer_dump_ctx(struct netlink_callback *cb)
6910 {
6911         return (struct lnet_genl_processid_list *)cb->args[0];
6912 }
6913
6914 static int lnet_peer_ni_show_done(struct netlink_callback *cb)
6915 {
6916         struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb);
6917
6918         if (plist) {
6919                 genradix_free(&plist->lgpl_list);
6920                 CFS_FREE_PTR(plist);
6921         }
6922         cb->args[0] = 0;
6923
6924         return 0;
6925 }
6926
6927 /* LNet peer ->start() handler for GET requests */
6928 static int lnet_peer_ni_show_start(struct netlink_callback *cb)
6929 {
6930         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6931 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6932         struct netlink_ext_ack *extack = NULL;
6933 #endif
6934         struct lnet_genl_processid_list *plist;
6935         int msg_len = genlmsg_len(gnlh);
6936         int rc = 0;
6937
6938 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6939         extack = cb->extack;
6940 #endif
6941         mutex_lock(&the_lnet.ln_api_mutex);
6942         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
6943                 NL_SET_ERR_MSG(extack, "Network is down");
6944                 mutex_unlock(&the_lnet.ln_api_mutex);
6945                 return -ENETDOWN;
6946         }
6947
6948         CFS_ALLOC_PTR(plist);
6949         if (!plist) {
6950                 NL_SET_ERR_MSG(extack, "No memory for peer list");
6951                 mutex_unlock(&the_lnet.ln_api_mutex);
6952                 return -ENOMEM;
6953         }
6954
6955         genradix_init(&plist->lgpl_list);
6956         plist->lgpl_count = 0;
6957         plist->lgpl_index = 0;
6958         cb->args[0] = (long)plist;
6959
6960         if (!msg_len) {
6961                 struct lnet_peer_table *ptable;
6962                 int cpt;
6963
6964                 cfs_percpt_for_each(ptable, cpt, the_lnet.ln_peer_tables) {
6965                         struct lnet_peer *lp;
6966
6967                         list_for_each_entry(lp, &ptable->pt_peer_list,
6968                                             lp_peer_list) {
6969                                 struct lnet_processid *lpi;
6970
6971                                 lpi = genradix_ptr_alloc(&plist->lgpl_list,
6972                                                          plist->lgpl_count++,
6973                                                          GFP_KERNEL);
6974                                 if (!lpi) {
6975                                         NL_SET_ERR_MSG(extack,
6976                                                       "failed to allocate NID");
6977                                         GOTO(report_err, rc = -ENOMEM);
6978                                 }
6979
6980                                 lpi->pid = LNET_PID_LUSTRE;
6981                                 lpi->nid = lp->lp_primary_nid;
6982                         }
6983                 }
6984         } else {
6985                 struct nlattr *params = genlmsg_data(gnlh);
6986                 struct nlattr *attr;
6987                 int rem;
6988
6989                 nla_for_each_nested(attr, params, rem) {
6990                         struct nlattr *nid;
6991                         int rem2;
6992
6993                         if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6994                                 continue;
6995
6996                         nla_for_each_nested(nid, attr, rem2) {
6997                                 char addr[LNET_NIDSTR_SIZE];
6998                                 struct lnet_processid *id;
6999
7000                                 if (nla_type(nid) != LN_SCALAR_ATTR_VALUE ||
7001                                     nla_strcmp(nid, "primary nid") != 0)
7002                                         continue;
7003
7004                                 nid = nla_next(nid, &rem2);
7005                                 if (nla_type(nid) != LN_SCALAR_ATTR_VALUE) {
7006                                         NL_SET_ERR_MSG(extack,
7007                                                        "invalid primary nid param");
7008                                         GOTO(report_err, rc = -EINVAL);
7009                                 }
7010
7011                                 rc = nla_strscpy(addr, nid, sizeof(addr));
7012                                 if (rc < 0) {
7013                                         NL_SET_ERR_MSG(extack,
7014                                                        "failed to get primary nid param");
7015                                         GOTO(report_err, rc);
7016                                 }
7017
7018                                 id = genradix_ptr_alloc(&plist->lgpl_list,
7019                                                         plist->lgpl_count++,
7020                                                         GFP_KERNEL);
7021                                 if (!id) {
7022                                         NL_SET_ERR_MSG(extack, "failed to allocate NID");
7023                                         GOTO(report_err, rc = -ENOMEM);
7024                                 }
7025
7026                                 rc = libcfs_strid(id, strim(addr));
7027                                 if (rc < 0) {
7028                                         NL_SET_ERR_MSG(extack, "invalid NID");
7029                                         GOTO(report_err, rc);
7030                                 }
7031                                 rc = 0;
7032                         }
7033                 }
7034         }
7035 report_err:
7036         mutex_unlock(&the_lnet.ln_api_mutex);
7037
7038         if (rc < 0)
7039                 lnet_peer_ni_show_done(cb);
7040
7041         return rc;
7042 }
7043
7044 static const struct ln_key_list lnet_peer_ni_keys = {
7045         .lkl_maxattr                    = LNET_PEER_NI_ATTR_MAX,
7046         .lkl_list                       = {
7047                 [LNET_PEER_NI_ATTR_HDR]  = {
7048                         .lkp_value              = "peer",
7049                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7050                         .lkp_data_type          = NLA_NUL_STRING,
7051                 },
7052                 [LNET_PEER_NI_ATTR_PRIMARY_NID] = {
7053                         .lkp_value              = "primary nid",
7054                         .lkp_data_type          = NLA_STRING,
7055                 },
7056                 [LNET_PEER_NI_ATTR_MULTIRAIL]   = {
7057                         .lkp_value              = "Multi-Rail",
7058                         .lkp_data_type          = NLA_FLAG
7059                 },
7060                 [LNET_PEER_NI_ATTR_STATE]       = {
7061                         .lkp_value              = "peer state",
7062                         .lkp_data_type          = NLA_U32
7063                 },
7064                 [LNET_PEER_NI_ATTR_PEER_NI_LIST] = {
7065                         .lkp_value              = "peer ni",
7066                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7067                         .lkp_data_type          = NLA_NESTED,
7068                 },
7069         },
7070 };
7071
7072 static const struct ln_key_list lnet_peer_ni_list = {
7073         .lkl_maxattr                    = LNET_PEER_NI_LIST_ATTR_MAX,
7074         .lkl_list                       = {
7075                 [LNET_PEER_NI_LIST_ATTR_NID]            = {
7076                         .lkp_value                      = "nid",
7077                         .lkp_data_type                  = NLA_STRING,
7078                 },
7079                 [LNET_PEER_NI_LIST_ATTR_UDSP_INFO]      = {
7080                         .lkp_value                      = "udsp info",
7081                         .lkp_key_format                 = LNKF_MAPPING,
7082                         .lkp_data_type                  = NLA_NESTED,
7083                 },
7084                 [LNET_PEER_NI_LIST_ATTR_STATE]          = {
7085                         .lkp_value                      = "state",
7086                         .lkp_data_type                  = NLA_STRING,
7087                 },
7088                 [LNET_PEER_NI_LIST_ATTR_MAX_TX_CREDITS] = {
7089                         .lkp_value                      = "max_ni_tx_credits",
7090                         .lkp_data_type                  = NLA_U32,
7091                 },
7092                 [LNET_PEER_NI_LIST_ATTR_CUR_TX_CREDITS] = {
7093                         .lkp_value                      = "available_tx_credits",
7094                         .lkp_data_type                  = NLA_U32,
7095                 },
7096                 [LNET_PEER_NI_LIST_ATTR_MIN_TX_CREDITS] = {
7097                         .lkp_value                      = "min_tx_credits",
7098                         .lkp_data_type                  = NLA_U32,
7099                 },
7100                 [LNET_PEER_NI_LIST_ATTR_QUEUE_BUF_COUNT] = {
7101                         .lkp_value                      = "tx_q_num_of_buf",
7102                         .lkp_data_type                  = NLA_U32,
7103                 },
7104                 [LNET_PEER_NI_LIST_ATTR_CUR_RTR_CREDITS] = {
7105                         .lkp_value                      = "available_rtr_credits",
7106                         .lkp_data_type                  = NLA_U32,
7107                 },
7108                 [LNET_PEER_NI_LIST_ATTR_MIN_RTR_CREDITS] = {
7109                         .lkp_value                      = "min_rtr_credits",
7110                         .lkp_data_type                  = NLA_U32,
7111                 },
7112                 [LNET_PEER_NI_LIST_ATTR_REFCOUNT]       = {
7113                         .lkp_value                      = "refcount",
7114                         .lkp_data_type                  = NLA_U32,
7115                 },
7116                 [LNET_PEER_NI_LIST_ATTR_STATS_COUNT]    = {
7117                         .lkp_value                      = "statistics",
7118                         .lkp_key_format                 = LNKF_MAPPING,
7119                         .lkp_data_type                  = NLA_NESTED
7120                 },
7121                 [LNET_PEER_NI_LIST_ATTR_SENT_STATS]     = {
7122                         .lkp_value                      = "sent_stats",
7123                         .lkp_key_format                 = LNKF_MAPPING,
7124                         .lkp_data_type                  = NLA_NESTED
7125                 },
7126                 [LNET_PEER_NI_LIST_ATTR_RECV_STATS]     = {
7127                         .lkp_value                      = "received_stats",
7128                         .lkp_key_format                 = LNKF_MAPPING,
7129                         .lkp_data_type                  = NLA_NESTED
7130                 },
7131                 [LNET_PEER_NI_LIST_ATTR_DROP_STATS]     = {
7132                         .lkp_value                      = "dropped_stats",
7133                         .lkp_key_format                 = LNKF_MAPPING,
7134                         .lkp_data_type                  = NLA_NESTED
7135                 },
7136                 [LNET_PEER_NI_LIST_ATTR_HEALTH_STATS]   = {
7137                         .lkp_value                      = "health stats",
7138                         .lkp_key_format                 = LNKF_MAPPING,
7139                         .lkp_data_type                  = NLA_NESTED
7140                 },
7141         },
7142 };
7143
7144 static const struct ln_key_list lnet_peer_ni_list_stats_count = {
7145         .lkl_maxattr                    = LNET_PEER_NI_LIST_STATS_COUNT_ATTR_MAX,
7146         .lkl_list                       = {
7147                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_SEND_COUNT] = {
7148                         .lkp_value                              = "send_count",
7149                         .lkp_data_type                          = NLA_U32,
7150                 },
7151                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_RECV_COUNT] = {
7152                         .lkp_value                              = "recv_count",
7153                         .lkp_data_type                          = NLA_U32,
7154                 },
7155                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_DROP_COUNT] = {
7156                         .lkp_value                              = "drop_count",
7157                         .lkp_data_type                          = NLA_U32,
7158                 },
7159         },
7160 };
7161
7162 static const struct ln_key_list lnet_peer_ni_list_stats = {
7163         .lkl_maxattr                    = LNET_PEER_NI_LIST_STATS_ATTR_MAX,
7164         .lkl_list                       = {
7165                 [LNET_PEER_NI_LIST_STATS_ATTR_PUT]      = {
7166                         .lkp_value                      = "put",
7167                         .lkp_data_type                  = NLA_U32,
7168                 },
7169                 [LNET_PEER_NI_LIST_STATS_ATTR_GET]      = {
7170                         .lkp_value                      = "get",
7171                         .lkp_data_type                  = NLA_U32,
7172                 },
7173                 [LNET_PEER_NI_LIST_STATS_ATTR_REPLY]    = {
7174                         .lkp_value                      = "reply",
7175                         .lkp_data_type                  = NLA_U32,
7176                 },
7177                 [LNET_PEER_NI_LIST_STATS_ATTR_ACK]      = {
7178                         .lkp_value                      = "ack",
7179                         .lkp_data_type                  = NLA_U32,
7180                 },
7181                 [LNET_PEER_NI_LIST_STATS_ATTR_HELLO]    = {
7182                         .lkp_value                      = "hello",
7183                         .lkp_data_type                  = NLA_U32,
7184                 },
7185         },
7186 };
7187
7188 static const struct ln_key_list lnet_peer_ni_list_health = {
7189         .lkl_maxattr                    = LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_MAX,
7190         .lkl_list                       = {
7191                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_VALUE]     = {
7192                         .lkp_value                      = "health value",
7193                         .lkp_data_type                  = NLA_S32,
7194                 },
7195                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_DROPPED]   = {
7196                         .lkp_value                      = "dropped",
7197                         .lkp_data_type                  = NLA_U32,
7198                 },
7199                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_TIMEOUT]   = {
7200                         .lkp_value                      = "timeout",
7201                         .lkp_data_type                  = NLA_U32,
7202                 },
7203                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_ERROR]     = {
7204                         .lkp_value                      = "error",
7205                         .lkp_data_type                  = NLA_U32,
7206                 },
7207                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NETWORK_TIMEOUT] = {
7208                         .lkp_value                      = "network timeout",
7209                         .lkp_data_type                  = NLA_U32,
7210                 },
7211                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PING_COUNT] = {
7212                         .lkp_value                      = "ping_count",
7213                         .lkp_data_type                  = NLA_U32,
7214                 },
7215                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NEXT_PING] = {
7216                         .lkp_value                      = "next_ping",
7217                         .lkp_data_type                  = NLA_S64,
7218                 },
7219         },
7220 };
7221
7222 static int lnet_peer_ni_show_dump(struct sk_buff *msg,
7223                                   struct netlink_callback *cb)
7224 {
7225         struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb);
7226         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
7227 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7228         struct netlink_ext_ack *extack = NULL;
7229 #endif
7230         int portid = NETLINK_CB(cb->skb).portid;
7231         int seq = cb->nlh->nlmsg_seq;
7232         int idx = plist->lgpl_index;
7233         int msg_len = genlmsg_len(gnlh);
7234         int rc = 0;
7235
7236 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
7237         extack = cb->extack;
7238 #endif
7239         if (!plist->lgpl_count) {
7240                 NL_SET_ERR_MSG(extack, "No peers found");
7241                 GOTO(send_error, rc = msg_len ? -ENOENT : 0);
7242         }
7243
7244         if (!idx) {
7245                 const struct ln_key_list *all[] = {
7246                         &lnet_peer_ni_keys, &lnet_peer_ni_list,
7247                         &udsp_info_list, &udsp_info_pref_nids_list,
7248                         &udsp_info_pref_nids_list,
7249                         &lnet_peer_ni_list_stats_count,
7250                         &lnet_peer_ni_list_stats, /* send_stats */
7251                         &lnet_peer_ni_list_stats, /* recv_stats */
7252                         &lnet_peer_ni_list_stats, /* drop stats */
7253                         &lnet_peer_ni_list_health,
7254                         NULL
7255                 };
7256
7257                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
7258                                                 &lnet_family,
7259                                                 NLM_F_CREATE | NLM_F_MULTI,
7260                                                 LNET_CMD_PEERS, all);
7261                 if (rc < 0) {
7262                         NL_SET_ERR_MSG(extack, "failed to send key table");
7263                         GOTO(send_error, rc);
7264                 }
7265         }
7266
7267         mutex_lock(&the_lnet.ln_api_mutex);
7268         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
7269                 NL_SET_ERR_MSG(extack, "Network is down");
7270                 GOTO(unlock_api_mutex, rc = -ENETDOWN);
7271         }
7272
7273         while (idx < plist->lgpl_count) {
7274                 struct lnet_processid *id;
7275                 struct lnet_peer_ni *lpni = NULL;
7276                 struct nlattr *nid_list;
7277                 struct lnet_peer *lp;
7278                 int count = 1;
7279                 void *hdr;
7280
7281                 id = genradix_ptr(&plist->lgpl_list, idx++);
7282                 if (nid_is_lo0(&id->nid))
7283                         continue;
7284
7285                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
7286                                   NLM_F_MULTI, LNET_CMD_PEERS);
7287                 if (!hdr) {
7288                         NL_SET_ERR_MSG(extack, "failed to send values");
7289                         genlmsg_cancel(msg, hdr);
7290                         GOTO(unlock_api_mutex, rc = -EMSGSIZE);
7291                 }
7292
7293                 lp = lnet_find_peer(&id->nid);
7294                 if (!lp) {
7295                         NL_SET_ERR_MSG(extack, "cannot find peer");
7296                         GOTO(unlock_api_mutex, rc = -ENOENT);
7297                 }
7298
7299                 if (idx == 1)
7300                         nla_put_string(msg, LNET_PEER_NI_ATTR_HDR, "");
7301
7302                 nla_put_string(msg, LNET_PEER_NI_ATTR_PRIMARY_NID,
7303                                libcfs_nidstr(&lp->lp_primary_nid));
7304                 if (lnet_peer_is_multi_rail(lp))
7305                         nla_put_flag(msg, LNET_PEER_NI_ATTR_MULTIRAIL);
7306
7307                 if (gnlh->version >= 3)
7308                         nla_put_u32(msg, LNET_PEER_NI_ATTR_STATE, lp->lp_state);
7309
7310                 nid_list = nla_nest_start(msg, LNET_PEER_NI_ATTR_PEER_NI_LIST);
7311                 while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
7312                         struct nlattr *peer_nid = nla_nest_start(msg, count++);
7313
7314                         nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_NID,
7315                                        libcfs_nidstr(&lpni->lpni_nid));
7316
7317                         if (gnlh->version >= 4) {
7318                                 rc = lnet_udsp_info_send(msg,
7319                                                          LNET_PEER_NI_LIST_ATTR_UDSP_INFO,
7320                                                          &lpni->lpni_nid, true);
7321                                 if (rc < 0) {
7322                                         lnet_peer_decref_locked(lp);
7323                                         NL_SET_ERR_MSG(extack,
7324                                                        "failed to get UDSP info");
7325                                         GOTO(unlock_api_mutex, rc);
7326                                 }
7327                         }
7328
7329                         if (lnet_isrouter(lpni) ||
7330                             lnet_peer_aliveness_enabled(lpni)) {
7331                                 nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_STATE,
7332                                                lnet_is_peer_ni_alive(lpni) ?
7333                                                "up" : "down");
7334                         } else {
7335                                 nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_STATE,
7336                                                "NA");
7337                         }
7338
7339                         if (gnlh->version) {
7340                                 struct lnet_ioctl_element_msg_stats lpni_msg_stats;
7341                                 struct nlattr *send_stats_list, *send_stats;
7342                                 struct nlattr *recv_stats_list, *recv_stats;
7343                                 struct nlattr *drop_stats_list, *drop_stats;
7344                                 struct nlattr *health_list, *health_stats;
7345                                 struct lnet_ioctl_element_stats stats;
7346                                 struct nlattr *stats_attr, *ni_stats;
7347
7348                                 nla_put_u32(msg,
7349                                             LNET_PEER_NI_LIST_ATTR_MAX_TX_CREDITS,
7350                                             lpni->lpni_net ?
7351                                                 lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0);
7352                                 nla_put_u32(msg,
7353                                             LNET_PEER_NI_LIST_ATTR_CUR_TX_CREDITS,
7354                                             lpni->lpni_txcredits);
7355                                 nla_put_u32(msg,
7356                                             LNET_PEER_NI_LIST_ATTR_MIN_TX_CREDITS,
7357                                             lpni->lpni_mintxcredits);
7358                                 nla_put_u32(msg,
7359                                             LNET_PEER_NI_LIST_ATTR_QUEUE_BUF_COUNT,
7360                                             lpni->lpni_txqnob);
7361                                 nla_put_u32(msg,
7362                                             LNET_PEER_NI_LIST_ATTR_CUR_RTR_CREDITS,
7363                                             lpni->lpni_rtrcredits);
7364                                 nla_put_u32(msg,
7365                                             LNET_PEER_NI_LIST_ATTR_MIN_RTR_CREDITS,
7366                                             lpni->lpni_minrtrcredits);
7367                                 nla_put_u32(msg,
7368                                             LNET_PEER_NI_LIST_ATTR_REFCOUNT,
7369                                             kref_read(&lpni->lpni_kref));
7370
7371                                 memset(&stats, 0, sizeof(stats));
7372                                 stats.iel_send_count = lnet_sum_stats(&lpni->lpni_stats,
7373                                                                       LNET_STATS_TYPE_SEND);
7374                                 stats.iel_recv_count = lnet_sum_stats(&lpni->lpni_stats,
7375                                                                       LNET_STATS_TYPE_RECV);
7376                                 stats.iel_drop_count = lnet_sum_stats(&lpni->lpni_stats,
7377                                                                       LNET_STATS_TYPE_DROP);
7378
7379                                 stats_attr = nla_nest_start(msg,
7380                                                             LNET_PEER_NI_LIST_ATTR_STATS_COUNT);
7381                                 ni_stats = nla_nest_start(msg, 0);
7382                                 nla_put_u32(msg,
7383                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_SEND_COUNT,
7384                                             stats.iel_send_count);
7385                                 nla_put_u32(msg,
7386                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_RECV_COUNT,
7387                                             stats.iel_recv_count);
7388                                 nla_put_u32(msg,
7389                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_DROP_COUNT,
7390                                             stats.iel_drop_count);
7391                                 nla_nest_end(msg, ni_stats);
7392                                 nla_nest_end(msg, stats_attr);
7393
7394                                 if (gnlh->version < 2)
7395                                         goto skip_msg_stats;
7396
7397                                 lnet_usr_translate_stats(&lpni_msg_stats, &lpni->lpni_stats);
7398
7399                                 send_stats_list = nla_nest_start(msg,
7400                                                                  LNET_PEER_NI_LIST_ATTR_SENT_STATS);
7401                                 send_stats = nla_nest_start(msg, 0);
7402                                 nla_put_u32(msg,
7403                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7404                                             lpni_msg_stats.im_send_stats.ico_put_count);
7405                                 nla_put_u32(msg,
7406                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7407                                             lpni_msg_stats.im_send_stats.ico_get_count);
7408                                 nla_put_u32(msg,
7409                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7410                                             lpni_msg_stats.im_send_stats.ico_reply_count);
7411                                 nla_put_u32(msg,
7412                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7413                                             lpni_msg_stats.im_send_stats.ico_ack_count);
7414                                 nla_put_u32(msg,
7415                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7416                                             lpni_msg_stats.im_send_stats.ico_hello_count);
7417                                 nla_nest_end(msg, send_stats);
7418                                 nla_nest_end(msg, send_stats_list);
7419
7420                                 recv_stats_list = nla_nest_start(msg,
7421                                                                  LNET_PEER_NI_LIST_ATTR_RECV_STATS);
7422                                 recv_stats = nla_nest_start(msg, 0);
7423                                 nla_put_u32(msg,
7424                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7425                                             lpni_msg_stats.im_recv_stats.ico_put_count);
7426                                 nla_put_u32(msg,
7427                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7428                                             lpni_msg_stats.im_recv_stats.ico_get_count);
7429                                 nla_put_u32(msg,
7430                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7431                                             lpni_msg_stats.im_recv_stats.ico_reply_count);
7432                                 nla_put_u32(msg,
7433                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7434                                             lpni_msg_stats.im_recv_stats.ico_ack_count);
7435                                 nla_put_u32(msg,
7436                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7437                                             lpni_msg_stats.im_recv_stats.ico_hello_count);
7438                                 nla_nest_end(msg, recv_stats);
7439                                 nla_nest_end(msg, recv_stats_list);
7440
7441                                 drop_stats_list = nla_nest_start(msg,
7442                                                                  LNET_PEER_NI_LIST_ATTR_DROP_STATS);
7443                                 drop_stats = nla_nest_start(msg, 0);
7444                                 nla_put_u32(msg,
7445                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7446                                             lpni_msg_stats.im_drop_stats.ico_put_count);
7447                                 nla_put_u32(msg,
7448                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7449                                             lpni_msg_stats.im_drop_stats.ico_get_count);
7450                                 nla_put_u32(msg,
7451                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7452                                             lpni_msg_stats.im_drop_stats.ico_reply_count);
7453                                 nla_put_u32(msg,
7454                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7455                                             lpni_msg_stats.im_drop_stats.ico_ack_count);
7456                                 nla_put_u32(msg,
7457                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7458                                             lpni_msg_stats.im_drop_stats.ico_hello_count);
7459                                 nla_nest_end(msg, drop_stats);
7460                                 nla_nest_end(msg, drop_stats_list);
7461
7462                                 health_list = nla_nest_start(msg,
7463                                                              LNET_PEER_NI_LIST_ATTR_HEALTH_STATS);
7464                                 health_stats = nla_nest_start(msg, 0);
7465                                 nla_put_s32(msg,
7466                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_VALUE,
7467                                             atomic_read(&lpni->lpni_healthv));
7468                                 nla_put_u32(msg,
7469                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_DROPPED,
7470                                             atomic_read(&lpni->lpni_hstats.hlt_remote_dropped));
7471                                 nla_put_u32(msg,
7472                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_TIMEOUT,
7473                                             atomic_read(&lpni->lpni_hstats.hlt_remote_timeout));
7474                                 nla_put_u32(msg,
7475                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_ERROR,
7476                                             atomic_read(&lpni->lpni_hstats.hlt_remote_error));
7477                                 nla_put_u32(msg,
7478                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NETWORK_TIMEOUT,
7479                                             atomic_read(&lpni->lpni_hstats.hlt_network_timeout));
7480                                 nla_put_u32(msg,
7481                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PING_COUNT,
7482                                             lpni->lpni_ping_count);
7483                                 nla_put_s64(msg,
7484                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NEXT_PING,
7485                                             lpni->lpni_next_ping,
7486                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PAD);
7487                                 nla_nest_end(msg, health_stats);
7488                                 nla_nest_end(msg, health_list);
7489                         }
7490 skip_msg_stats:
7491                         nla_nest_end(msg, peer_nid);
7492                 }
7493                 nla_nest_end(msg, nid_list);
7494
7495                 genlmsg_end(msg, hdr);
7496                 lnet_peer_decref_locked(lp);
7497         }
7498         plist->lgpl_index = idx;
7499 unlock_api_mutex:
7500         mutex_unlock(&the_lnet.ln_api_mutex);
7501 send_error:
7502         return lnet_nl_send_error(cb->skb, portid, seq, rc);
7503 };
7504
7505 #ifndef HAVE_NETLINK_CALLBACK_START
7506 static int lnet_old_peer_ni_show_dump(struct sk_buff *msg,
7507                                       struct netlink_callback *cb)
7508 {
7509         if (!cb->args[0]) {
7510                 int rc = lnet_peer_ni_show_start(cb);
7511
7512                 if (rc < 0)
7513                         return lnet_nl_send_error(cb->skb,
7514                                                   NETLINK_CB(cb->skb).portid,
7515                                                   cb->nlh->nlmsg_seq,
7516                                                   rc);
7517         }
7518
7519         return lnet_peer_ni_show_dump(msg, cb);
7520 }
7521 #endif
7522
7523 static int lnet_route_cmd(struct sk_buff *skb, struct genl_info *info)
7524 {
7525         struct nlmsghdr *nlh = nlmsg_hdr(skb);
7526         struct genlmsghdr *gnlh = nlmsg_data(nlh);
7527         struct nlattr *params = genlmsg_data(gnlh);
7528         int msg_len, rem, rc = 0;
7529         struct nlattr *attr;
7530
7531         mutex_lock(&the_lnet.ln_api_mutex);
7532         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
7533                 GENL_SET_ERR_MSG(info, "Network is down");
7534                 mutex_unlock(&the_lnet.ln_api_mutex);
7535                 return -ENETDOWN;
7536         }
7537
7538         msg_len = genlmsg_len(gnlh);
7539         if (!msg_len) {
7540                 GENL_SET_ERR_MSG(info, "no configuration");
7541                 mutex_unlock(&the_lnet.ln_api_mutex);
7542                 return -ENOMSG;
7543         }
7544
7545         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
7546                 GENL_SET_ERR_MSG(info, "invalid configuration");
7547                 mutex_unlock(&the_lnet.ln_api_mutex);
7548                 return -EINVAL;
7549         }
7550
7551         nla_for_each_nested(attr, params, rem) {
7552                 u32 net_id = LNET_NET_ANY, hops = LNET_UNDEFINED_HOPS;
7553                 u32 priority = 0, sensitivity = 1;
7554                 struct lnet_nid gw_nid = LNET_ANY_NID;
7555                 struct nlattr *route_prop;
7556                 bool alive = true;
7557                 s64 when = 0;
7558                 int rem2;
7559
7560                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
7561                         continue;
7562
7563                 nla_for_each_nested(route_prop, attr, rem2) {
7564                         char tmp[LNET_NIDSTR_SIZE];
7565                         ssize_t len;
7566                         s64 num;
7567
7568                         if (nla_type(route_prop) != LN_SCALAR_ATTR_VALUE)
7569                                 continue;
7570
7571                         if (nla_strcmp(route_prop, "net") == 0) {
7572                                 route_prop = nla_next(route_prop, &rem2);
7573                                 if (nla_type(route_prop) !=
7574                                     LN_SCALAR_ATTR_VALUE) {
7575                                         GENL_SET_ERR_MSG(info,
7576                                                          "net is invalid key");
7577                                         GOTO(report_err, rc = -EINVAL);
7578                                 }
7579
7580                                 len = nla_strscpy(tmp, route_prop, sizeof(tmp));
7581                                 if (len < 0) {
7582                                         GENL_SET_ERR_MSG(info,
7583                                                          "net key string is invalid");
7584                                         GOTO(report_err, rc = len);
7585                                 }
7586
7587                                 net_id = libcfs_str2net(tmp);
7588                                 if (!net_id) {
7589                                         GENL_SET_ERR_MSG(info,
7590                                                          "cannot parse remote net");
7591                                         GOTO(report_err, rc = -ENODEV);
7592                                 }
7593
7594                                 if (LNET_NETTYP(net_id) == LOLND) {
7595                                         GENL_SET_ERR_MSG(info,
7596                                                          "setting @lo not allowed");
7597                                         GOTO(report_err, rc = -EACCES);
7598                                 }
7599
7600                                 if (net_id == LNET_NET_ANY) {
7601                                         GENL_SET_ERR_MSG(info,
7602                                                          "setting LNET_NET_ANY not allowed");
7603                                         GOTO(report_err, rc = -ENXIO);
7604                                 }
7605                         } else if (nla_strcmp(route_prop, "gateway") == 0) {
7606                                 route_prop = nla_next(route_prop, &rem2);
7607                                 if (nla_type(route_prop) !=
7608                                     LN_SCALAR_ATTR_VALUE) {
7609                                         GENL_SET_ERR_MSG(info,
7610                                                          "gateway is invalid key");
7611                                         GOTO(report_err, rc = -EINVAL);
7612                                 }
7613
7614                                 len = nla_strscpy(tmp, route_prop, sizeof(tmp));
7615                                 if (len < 0) {
7616                                         GENL_SET_ERR_MSG(info,
7617                                                          "gateway string is invalid");
7618                                         GOTO(report_err, rc = len);
7619                                 }
7620
7621                                 rc = libcfs_strnid(&gw_nid, strim(tmp));
7622                                 if (rc < 0) {
7623                                         GENL_SET_ERR_MSG(info,
7624                                                          "cannot parse gateway");
7625                                         GOTO(report_err, rc = -ENODEV);
7626                                 }
7627                         } else if (nla_strcmp(route_prop, "state") == 0) {
7628                                 route_prop = nla_next(route_prop, &rem2);
7629                                 if (nla_type(route_prop) !=
7630                                     LN_SCALAR_ATTR_VALUE) {
7631                                         GENL_SET_ERR_MSG(info,
7632                                                          "state is invalid key");
7633                                         GOTO(report_err, rc = -EINVAL);
7634                                 }
7635
7636                                 if (nla_strcmp(route_prop, "down") == 0) {
7637                                         alive = false;
7638                                 } else if (nla_strcmp(route_prop, "up") == 0) {
7639                                         alive = true;
7640                                 } else {
7641                                         GENL_SET_ERR_MSG(info,
7642                                                          "status string bad value");
7643                                         GOTO(report_err, rc = -EINVAL);
7644                                 }
7645                         } else if (nla_strcmp(route_prop, "notify_time") == 0) {
7646                                 route_prop = nla_next(route_prop, &rem2);
7647                                 if (nla_type(route_prop) !=
7648                                     LN_SCALAR_ATTR_INT_VALUE) {
7649                                         GENL_SET_ERR_MSG(info,
7650                                                          "notify_time is invalid key");
7651                                         GOTO(report_err, rc = -EINVAL);
7652                                 }
7653
7654                                 when = nla_get_s64(route_prop);
7655                                 if (ktime_get_real_seconds() < when) {
7656                                         GENL_SET_ERR_MSG(info,
7657                                                          "notify_time is in the future");
7658                                         GOTO(report_err, rc = -EINVAL);
7659                                 }
7660                         } else if (nla_strcmp(route_prop, "hop") == 0) {
7661                                 route_prop = nla_next(route_prop, &rem2);
7662                                 if (nla_type(route_prop) !=
7663                                     LN_SCALAR_ATTR_INT_VALUE) {
7664                                         GENL_SET_ERR_MSG(info,
7665                                                          "hop has invalid key");
7666                                         GOTO(report_err, rc = -EINVAL);
7667                                 }
7668
7669                                 hops = nla_get_s64(route_prop);
7670                                 if ((hops < 1 || hops > 255) && hops != -1) {
7671                                         GENL_SET_ERR_MSG(info,
7672                                                          "invalid hop count must be between 1 and 255");
7673                                         GOTO(report_err, rc = -EINVAL);
7674                                 }
7675                         } else if (nla_strcmp(route_prop, "priority") == 0) {
7676                                 route_prop = nla_next(route_prop, &rem2);
7677                                 if (nla_type(route_prop) !=
7678                                     LN_SCALAR_ATTR_INT_VALUE) {
7679                                         GENL_SET_ERR_MSG(info,
7680                                                          "priority has invalid key");
7681                                         GOTO(report_err, rc = -EINVAL);
7682                                 }
7683
7684                                 num = nla_get_s64(route_prop);
7685                                 if (num < 0) {
7686                                         GENL_SET_ERR_MSG(info,
7687                                                          "invalid priority, must not be negative");
7688                                         GOTO(report_err, rc = -EINVAL);
7689                                 }
7690                                 priority = num;
7691                         } else if (nla_strcmp(route_prop,
7692                                               "health_sensitivity") == 0) {
7693                                 route_prop = nla_next(route_prop, &rem2);
7694                                 if (nla_type(route_prop) !=
7695                                     LN_SCALAR_ATTR_INT_VALUE) {
7696                                         GENL_SET_ERR_MSG(info,
7697                                                          "sensitivity has invalid key");
7698                                         GOTO(report_err, rc = -EINVAL);
7699                                 }
7700
7701                                 num = nla_get_s64(route_prop);
7702                                 if (num < 1) {
7703                                         GENL_SET_ERR_MSG(info,
7704                                                          "invalid health sensitivity, must be 1 or greater");
7705                                         GOTO(report_err, rc = -EINVAL);
7706                                 }
7707                                 sensitivity = num;
7708                         }
7709                 }
7710
7711                 if (net_id == LNET_NET_ANY) {
7712                         GENL_SET_ERR_MSG(info,
7713                                          "missing mandatory parameter: network");
7714                         GOTO(report_err, rc = -ENODEV);
7715                 }
7716
7717                 if (LNET_NID_IS_ANY(&gw_nid)) {
7718                         GENL_SET_ERR_MSG(info,
7719                                          "missing mandatory parameter: gateway");
7720                         GOTO(report_err, rc = -ENODEV);
7721                 }
7722
7723                 if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) {
7724                         /* Convert the user-supplied real time to monotonic.
7725                          * NB: "when" is always in the past
7726                          */
7727                         when = ktime_get_seconds() -
7728                                 (ktime_get_real_seconds() - when);
7729
7730                         mutex_unlock(&the_lnet.ln_api_mutex);
7731                         rc = lnet_notify(NULL, &gw_nid, alive, false, when);
7732                         mutex_lock(&the_lnet.ln_api_mutex);
7733                         if (rc < 0)
7734                                 GOTO(report_err, rc);
7735                         else if (the_lnet.ln_state != LNET_STATE_RUNNING)
7736                                 GOTO(report_err, rc = -ENETDOWN);
7737                 } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
7738                         rc = lnet_add_route(net_id, hops, &gw_nid, priority,
7739                                             sensitivity);
7740                         if (rc < 0) {
7741                                 switch (rc) {
7742                                 case -EINVAL:
7743                                         GENL_SET_ERR_MSG(info,
7744                                                          "invalid settings for route creation");
7745                                         break;
7746                                 case -EHOSTUNREACH:
7747                                         GENL_SET_ERR_MSG(info,
7748                                                          "No interface configured on the same net as gateway");
7749                                         break;
7750                                 case -ESHUTDOWN:
7751                                         GENL_SET_ERR_MSG(info,
7752                                                          "Network is down");
7753                                         break;
7754                                 case -EEXIST:
7755                                         GENL_SET_ERR_MSG(info,
7756                                                          "Route already exists or the specified network is local");
7757                                         break;
7758                                 default:
7759                                         GENL_SET_ERR_MSG(info,
7760                                                          "failed to create route");
7761                                         break;
7762                                 }
7763                                 GOTO(report_err, rc);
7764                         }
7765                 } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
7766                         rc = lnet_del_route(net_id, &gw_nid);
7767                         if (rc < 0) {
7768                                 GENL_SET_ERR_MSG(info,
7769                                                  "failed to delete route");
7770                                 GOTO(report_err, rc);
7771                         }
7772                 }
7773         }
7774 report_err:
7775         mutex_unlock(&the_lnet.ln_api_mutex);
7776
7777         return rc;
7778 }
7779
7780 static inline struct lnet_genl_ping_list *
7781 lnet_ping_dump_ctx(struct netlink_callback *cb)
7782 {
7783         return (struct lnet_genl_ping_list *)cb->args[0];
7784 }
7785
7786 static int lnet_ping_show_done(struct netlink_callback *cb)
7787 {
7788         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
7789
7790         if (plist) {
7791                 genradix_free(&plist->lgpl_failed);
7792                 genradix_free(&plist->lgpl_list);
7793                 LIBCFS_FREE(plist, sizeof(*plist));
7794                 cb->args[0] = 0;
7795         }
7796
7797         return 0;
7798 }
7799
7800 /* LNet ping ->start() handler for GET requests */
7801 static int lnet_ping_show_start(struct netlink_callback *cb)
7802 {
7803         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
7804 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7805         struct netlink_ext_ack *extack = NULL;
7806 #endif
7807         struct lnet_genl_ping_list *plist;
7808         int msg_len = genlmsg_len(gnlh);
7809         struct nlattr *params, *top;
7810         int rem, rc = 0;
7811
7812 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
7813         extack = cb->extack;
7814 #endif
7815         if (the_lnet.ln_refcount == 0) {
7816                 NL_SET_ERR_MSG(extack, "Network is down");
7817                 return -ENETDOWN;
7818         }
7819
7820         if (!msg_len) {
7821                 NL_SET_ERR_MSG(extack, "Ping needs NID targets");
7822                 return -ENOENT;
7823         }
7824
7825         LIBCFS_ALLOC(plist, sizeof(*plist));
7826         if (!plist) {
7827                 NL_SET_ERR_MSG(extack, "failed to setup ping list");
7828                 return -ENOMEM;
7829         }
7830         genradix_init(&plist->lgpl_list);
7831         plist->lgpl_timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
7832         plist->lgpl_src_nid = LNET_ANY_NID;
7833         plist->lgpl_index = 0;
7834         plist->lgpl_list_count = 0;
7835         cb->args[0] = (long)plist;
7836
7837         params = genlmsg_data(gnlh);
7838         nla_for_each_attr(top, params, msg_len, rem) {
7839                 struct nlattr *nids;
7840                 int rem2;
7841
7842                 switch (nla_type(top)) {
7843                 case LN_SCALAR_ATTR_VALUE:
7844                         if (nla_strcmp(top, "timeout") == 0) {
7845                                 s64 timeout;
7846
7847                                 top = nla_next(top, &rem);
7848                                 if (nla_type(top) != LN_SCALAR_ATTR_INT_VALUE) {
7849                                         NL_SET_ERR_MSG(extack,
7850                                                        "invalid timeout param");
7851                                         GOTO(report_err, rc = -EINVAL);
7852                                 }
7853
7854                                 /* If timeout is negative then set default of
7855                                  * 3 minutes
7856                                  */
7857                                 timeout = nla_get_s64(top);
7858                                 if (timeout > 0 &&
7859                                     timeout < (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
7860                                         plist->lgpl_timeout =
7861                                                 nsecs_to_jiffies(timeout * NSEC_PER_MSEC);
7862                         } else if (nla_strcmp(top, "source") == 0) {
7863                                 char nidstr[LNET_NIDSTR_SIZE + 1];
7864
7865                                 top = nla_next(top, &rem);
7866                                 if (nla_type(top) != LN_SCALAR_ATTR_VALUE) {
7867                                         NL_SET_ERR_MSG(extack,
7868                                                        "invalid source param");
7869                                         GOTO(report_err, rc = -EINVAL);
7870                                 }
7871
7872                                 rc = nla_strscpy(nidstr, top, sizeof(nidstr));
7873                                 if (rc < 0) {
7874                                         NL_SET_ERR_MSG(extack,
7875                                                        "failed to parse source nid");
7876                                         GOTO(report_err, rc);
7877                                 }
7878
7879                                 rc = libcfs_strnid(&plist->lgpl_src_nid,
7880                                                    strim(nidstr));
7881                                 if (rc < 0) {
7882                                         NL_SET_ERR_MSG(extack,
7883                                                        "invalid source nid");
7884                                         GOTO(report_err, rc);
7885                                 }
7886                                 rc = 0;
7887                         }
7888                         break;
7889                 case LN_SCALAR_ATTR_LIST:
7890                         nla_for_each_nested(nids, top, rem2) {
7891                                 char nid[LNET_NIDSTR_SIZE + 1];
7892                                 struct lnet_processid *id;
7893
7894                                 if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
7895                                         continue;
7896
7897                                 memset(nid, 0, sizeof(nid));
7898                                 rc = nla_strscpy(nid, nids, sizeof(nid));
7899                                 if (rc < 0) {
7900                                         NL_SET_ERR_MSG(extack,
7901                                                        "failed to get NID");
7902                                         GOTO(report_err, rc);
7903                                 }
7904
7905                                 id = genradix_ptr_alloc(&plist->lgpl_list,
7906                                                         plist->lgpl_list_count++,
7907                                                         GFP_KERNEL);
7908                                 if (!id) {
7909                                         NL_SET_ERR_MSG(extack,
7910                                                        "failed to allocate NID");
7911                                         GOTO(report_err, rc = -ENOMEM);
7912                                 }
7913
7914                                 rc = libcfs_strid(id, strim(nid));
7915                                 if (rc < 0) {
7916                                         NL_SET_ERR_MSG(extack, "cannot parse NID");
7917                                         GOTO(report_err, rc);
7918                                 }
7919                                 rc = 0;
7920                         }
7921                         fallthrough;
7922                 default:
7923                         break;
7924                 }
7925         }
7926 report_err:
7927         if (rc < 0)
7928                 lnet_ping_show_done(cb);
7929
7930         return rc;
7931 }
7932
7933 static const struct ln_key_list ping_err_props_list = {
7934         .lkl_maxattr                    = LNET_ERR_ATTR_MAX,
7935         .lkl_list                       = {
7936                 [LNET_ERR_ATTR_HDR]             = {
7937                         .lkp_value              = "manage",
7938                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7939                         .lkp_data_type          = NLA_NUL_STRING,
7940                 },
7941                 [LNET_ERR_ATTR_TYPE]            = {
7942                         .lkp_value              = "ping",
7943                         .lkp_data_type          = NLA_STRING,
7944                 },
7945                 [LNET_ERR_ATTR_ERRNO]           = {
7946                         .lkp_value              = "errno",
7947                         .lkp_data_type          = NLA_S16,
7948                 },
7949                 [LNET_ERR_ATTR_DESCR]           = {
7950                         .lkp_value              = "descr",
7951                         .lkp_data_type          = NLA_STRING,
7952                 },
7953         },
7954 };
7955
7956 static const struct ln_key_list ping_props_list = {
7957         .lkl_maxattr                    = LNET_PING_ATTR_MAX,
7958         .lkl_list                       = {
7959                 [LNET_PING_ATTR_HDR]            = {
7960                         .lkp_value              = "ping",
7961                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7962                         .lkp_data_type          = NLA_NUL_STRING,
7963                 },
7964                 [LNET_PING_ATTR_PRIMARY_NID]    = {
7965                         .lkp_value              = "primary nid",
7966                         .lkp_data_type          = NLA_STRING
7967                 },
7968                 [LNET_PING_ATTR_ERRNO]          = {
7969                         .lkp_value              = "errno",
7970                         .lkp_data_type          = NLA_S16
7971                 },
7972                 [LNET_PING_ATTR_MULTIRAIL]      = {
7973                         .lkp_value              = "Multi-Rail",
7974                         .lkp_data_type          = NLA_FLAG
7975                 },
7976                 [LNET_PING_ATTR_PEER_NI_LIST]   = {
7977                         .lkp_value              = "peer_ni",
7978                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7979                         .lkp_data_type          = NLA_NESTED
7980                 },
7981         },
7982 };
7983
7984 static const struct ln_key_list ping_peer_ni_list = {
7985         .lkl_maxattr                    = LNET_PING_PEER_NI_ATTR_MAX,
7986         .lkl_list                       = {
7987                 [LNET_PING_PEER_NI_ATTR_NID]    = {
7988                         .lkp_value              = "nid",
7989                         .lkp_data_type          = NLA_STRING
7990                 },
7991         },
7992 };
7993
7994 static int lnet_ping_show_dump(struct sk_buff *msg,
7995                                struct netlink_callback *cb)
7996 {
7997         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
7998 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7999         struct netlink_ext_ack *extack = NULL;
8000 #endif
8001         int portid = NETLINK_CB(cb->skb).portid;
8002         int seq = cb->nlh->nlmsg_seq;
8003         int idx = plist->lgpl_index;
8004         int rc = 0, i = 0;
8005
8006 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
8007         extack = cb->extack;
8008 #endif
8009         if (!plist->lgpl_index) {
8010                 const struct ln_key_list *all[] = {
8011                         &ping_props_list, &ping_peer_ni_list, NULL
8012                 };
8013
8014                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
8015                                                 &lnet_family,
8016                                                 NLM_F_CREATE | NLM_F_MULTI,
8017                                                 LNET_CMD_PING, all);
8018                 if (rc < 0) {
8019                         NL_SET_ERR_MSG(extack, "failed to send key table");
8020                         GOTO(send_error, rc);
8021                 }
8022
8023                 genradix_init(&plist->lgpl_failed);
8024         }
8025
8026         while (idx < plist->lgpl_list_count) {
8027                 struct lnet_nid primary_nid = LNET_ANY_NID;
8028                 struct lnet_genl_ping_list peers;
8029                 struct lnet_processid *id;
8030                 struct nlattr *nid_list;
8031                 struct lnet_peer *lp;
8032                 bool mr_flag = false;
8033                 unsigned int count;
8034                 void *hdr = NULL;
8035
8036                 id = genradix_ptr(&plist->lgpl_list, idx++);
8037
8038                 rc = lnet_ping(id, &plist->lgpl_src_nid, plist->lgpl_timeout,
8039                                &peers, lnet_interfaces_max);
8040                 if (rc < 0) {
8041                         struct lnet_fail_ping *fail;
8042
8043                         fail = genradix_ptr_alloc(&plist->lgpl_failed,
8044                                                   plist->lgpl_failed_count++,
8045                                                   GFP_KERNEL);
8046                         if (!fail) {
8047                                 NL_SET_ERR_MSG(extack,
8048                                                "failed to allocate failed NID");
8049                                 GOTO(send_error, rc);
8050                         }
8051                         memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
8052                         snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
8053                                  "failed to ping %s",
8054                                  libcfs_nidstr(&id->nid));
8055                         fail->lfp_id = *id;
8056                         fail->lfp_errno = rc;
8057                         goto cant_reach;
8058                 }
8059
8060                 mutex_lock(&the_lnet.ln_api_mutex);
8061                 lp = lnet_find_peer(&id->nid);
8062                 if (lp) {
8063                         primary_nid = lp->lp_primary_nid;
8064                         mr_flag = lnet_peer_is_multi_rail(lp);
8065                         lnet_peer_decref_locked(lp);
8066                 }
8067                 mutex_unlock(&the_lnet.ln_api_mutex);
8068
8069                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
8070                                   NLM_F_MULTI, LNET_CMD_PING);
8071                 if (!hdr) {
8072                         NL_SET_ERR_MSG(extack, "failed to send values");
8073                         genlmsg_cancel(msg, hdr);
8074                         GOTO(send_error, rc = -EMSGSIZE);
8075                 }
8076
8077                 if (i++ == 0)
8078                         nla_put_string(msg, LNET_PING_ATTR_HDR, "");
8079
8080                 nla_put_string(msg, LNET_PING_ATTR_PRIMARY_NID,
8081                                libcfs_nidstr(&primary_nid));
8082                 if (mr_flag)
8083                         nla_put_flag(msg, LNET_PING_ATTR_MULTIRAIL);
8084
8085                 nid_list = nla_nest_start(msg, LNET_PING_ATTR_PEER_NI_LIST);
8086                 for (count = 0; count < rc; count++) {
8087                         struct lnet_processid *result;
8088                         struct nlattr *nid_attr;
8089                         char *idstr;
8090
8091                         result = genradix_ptr(&peers.lgpl_list, count);
8092                         if (nid_is_lo0(&result->nid))
8093                                 continue;
8094
8095                         nid_attr = nla_nest_start(msg, count + 1);
8096                         if (id->pid == LNET_PID_LUSTRE)
8097                                 idstr = libcfs_nidstr(&result->nid);
8098                         else
8099                                 idstr = libcfs_idstr(result);
8100                         nla_put_string(msg, LNET_PING_PEER_NI_ATTR_NID, idstr);
8101                         nla_nest_end(msg, nid_attr);
8102                 }
8103                 nla_nest_end(msg, nid_list);
8104                 genlmsg_end(msg, hdr);
8105 cant_reach:
8106                 genradix_free(&peers.lgpl_list);
8107         }
8108
8109         if (plist->lgpl_failed_count) {
8110                 int flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
8111                 const struct ln_key_list *fail[] = {
8112                         &ping_err_props_list, NULL
8113                 };
8114
8115                 rc = lnet_genl_send_scalar_list(msg, portid, seq, &lnet_family,
8116                                                 flags, LNET_CMD_PING, fail);
8117                 if (rc < 0) {
8118                         NL_SET_ERR_MSG(extack,
8119                                        "failed to send new key table");
8120                         GOTO(send_error, rc);
8121                 }
8122
8123                 for (i = 0; i < plist->lgpl_failed_count; i++) {
8124                         struct lnet_fail_ping *fail;
8125                         void *hdr;
8126
8127                         fail = genradix_ptr(&plist->lgpl_failed, i);
8128
8129                         hdr = genlmsg_put(msg, portid, seq, &lnet_family,
8130                                           NLM_F_MULTI, LNET_CMD_PING);
8131                         if (!hdr) {
8132                                 NL_SET_ERR_MSG(extack,
8133                                                "failed to send failed values");
8134                                 genlmsg_cancel(msg, hdr);
8135                                 GOTO(send_error, rc = -EMSGSIZE);
8136                         }
8137
8138                         if (i == 0)
8139                                 nla_put_string(msg, LNET_ERR_ATTR_HDR, "");
8140
8141                         nla_put_string(msg, LNET_ERR_ATTR_TYPE, "\n");
8142                         nla_put_s16(msg, LNET_ERR_ATTR_ERRNO,
8143                                     fail->lfp_errno);
8144                         nla_put_string(msg, LNET_ERR_ATTR_DESCR,
8145                                        fail->lfp_msg);
8146                         genlmsg_end(msg, hdr);
8147                 }
8148         }
8149         genradix_free(&plist->lgpl_list);
8150         rc = 0; /* don't treat it as an error */
8151
8152         plist->lgpl_index = idx;
8153 send_error:
8154         return lnet_nl_send_error(cb->skb, portid, seq, rc);
8155 }
8156
8157 #ifndef HAVE_NETLINK_CALLBACK_START
8158 static int lnet_old_ping_show_dump(struct sk_buff *msg,
8159                                    struct netlink_callback *cb)
8160 {
8161         if (!cb->args[0]) {
8162                 int rc = lnet_ping_show_start(cb);
8163
8164                 if (rc < 0)
8165                         return lnet_nl_send_error(cb->skb,
8166                                                   NETLINK_CB(cb->skb).portid,
8167                                                   cb->nlh->nlmsg_seq,
8168                                                   rc);
8169         }
8170
8171         return lnet_ping_show_dump(msg, cb);
8172 }
8173 #endif
8174
8175 static const struct ln_key_list discover_err_props_list = {
8176         .lkl_maxattr                    = LNET_ERR_ATTR_MAX,
8177         .lkl_list                       = {
8178                 [LNET_ERR_ATTR_HDR]             = {
8179                         .lkp_value              = "manage",
8180                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8181                         .lkp_data_type          = NLA_NUL_STRING,
8182                 },
8183                 [LNET_ERR_ATTR_TYPE]            = {
8184                         .lkp_value              = "discover",
8185                         .lkp_data_type          = NLA_STRING,
8186                 },
8187                 [LNET_ERR_ATTR_ERRNO]           = {
8188                         .lkp_value              = "errno",
8189                         .lkp_data_type          = NLA_S16,
8190                 },
8191                 [LNET_ERR_ATTR_DESCR]           = {
8192                         .lkp_value              = "descr",
8193                         .lkp_data_type          = NLA_STRING,
8194                 },
8195         },
8196 };
8197
8198 static const struct ln_key_list discover_props_list = {
8199         .lkl_maxattr                    = LNET_PING_ATTR_MAX,
8200         .lkl_list                       = {
8201                 [LNET_PING_ATTR_HDR]            = {
8202                         .lkp_value              = "discover",
8203                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8204                         .lkp_data_type          = NLA_NUL_STRING,
8205                 },
8206                 [LNET_PING_ATTR_PRIMARY_NID]    = {
8207                         .lkp_value              = "primary nid",
8208                         .lkp_data_type          = NLA_STRING
8209                 },
8210                 [LNET_PING_ATTR_ERRNO]          = {
8211                         .lkp_value              = "errno",
8212                         .lkp_data_type          = NLA_S16
8213                 },
8214                 [LNET_PING_ATTR_MULTIRAIL]      = {
8215                         .lkp_value              = "Multi-Rail",
8216                         .lkp_data_type          = NLA_FLAG
8217                 },
8218                 [LNET_PING_ATTR_PEER_NI_LIST]   = {
8219                         .lkp_value              = "peer_ni",
8220                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8221                         .lkp_data_type          = NLA_NESTED
8222                 },
8223         },
8224 };
8225
8226 static int lnet_ping_cmd(struct sk_buff *skb, struct genl_info *info)
8227 {
8228         const struct ln_key_list *all[] = {
8229                 &discover_props_list, &ping_peer_ni_list, NULL
8230         };
8231         struct nlmsghdr *nlh = nlmsg_hdr(skb);
8232         struct genlmsghdr *gnlh = nlmsg_data(nlh);
8233         struct nlattr *params = genlmsg_data(gnlh);
8234         struct lnet_genl_ping_list dlists;
8235         int msg_len, rem, rc = 0, i;
8236         bool clear_hdr = false;
8237         struct sk_buff *reply;
8238         struct nlattr *attr;
8239         void *hdr = NULL;
8240
8241         msg_len = genlmsg_len(gnlh);
8242         if (!msg_len) {
8243                 GENL_SET_ERR_MSG(info, "no configuration");
8244                 return -ENOMSG;
8245         }
8246
8247         if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
8248                 GENL_SET_ERR_MSG(info, "only NLM_F_CREATE setting is allowed");
8249                 return -EINVAL;
8250         }
8251
8252         reply = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
8253         if (!reply) {
8254                 GENL_SET_ERR_MSG(info,
8255                                  "fail to allocate reply");
8256                 return -ENOMEM;
8257         }
8258
8259         genradix_init(&dlists.lgpl_failed);
8260         dlists.lgpl_failed_count = 0;
8261         genradix_init(&dlists.lgpl_list);
8262         dlists.lgpl_list_count = 0;
8263
8264         rc = lnet_genl_send_scalar_list(reply, info->snd_portid,
8265                                         info->snd_seq, &lnet_family,
8266                                         NLM_F_CREATE | NLM_F_MULTI,
8267                                         LNET_CMD_PING, all);
8268         if (rc < 0) {
8269                 GENL_SET_ERR_MSG(info,
8270                                  "failed to send key table");
8271                 GOTO(report_err, rc);
8272         }
8273
8274         nla_for_each_attr(attr, params, msg_len, rem) {
8275                 struct nlattr *nids;
8276                 int rem2;
8277
8278                 /* We only care about the NID list to discover with */
8279                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
8280                         continue;
8281
8282                 nla_for_each_nested(nids, attr, rem2) {
8283                         char nid[LNET_NIDSTR_SIZE + 1];
8284                         struct lnet_processid id;
8285                         struct nlattr *nid_list;
8286                         struct lnet_peer *lp;
8287                         ssize_t len;
8288
8289                         if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
8290                                 continue;
8291
8292                         memset(nid, 0, sizeof(nid));
8293                         rc = nla_strscpy(nid, nids, sizeof(nid));
8294                         if (rc < 0) {
8295                                 GENL_SET_ERR_MSG(info,
8296                                                  "failed to get NID");
8297                                 GOTO(report_err, rc);
8298                         }
8299
8300                         len = libcfs_strid(&id, strim(nid));
8301                         if (len < 0) {
8302                                 struct lnet_fail_ping *fail;
8303
8304                                 fail = genradix_ptr_alloc(&dlists.lgpl_failed,
8305                                                           dlists.lgpl_failed_count++,
8306                                                           GFP_KERNEL);
8307                                 if (!fail) {
8308                                         GENL_SET_ERR_MSG(info,
8309                                                          "failed to allocate improper NID");
8310                                         GOTO(report_err, rc = -ENOMEM);
8311                                 }
8312                                 memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
8313                                 snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
8314                                          "cannot parse NID '%s'", strim(nid));
8315                                 fail->lfp_id = id;
8316                                 fail->lfp_errno = len;
8317                                 continue;
8318                         }
8319
8320                         if (LNET_NID_IS_ANY(&id.nid))
8321                                 continue;
8322
8323                         rc = lnet_discover(&id,
8324                                            info->nlhdr->nlmsg_flags & NLM_F_EXCL,
8325                                            &dlists);
8326                         if (rc < 0) {
8327                                 struct lnet_fail_ping *fail;
8328
8329                                 fail = genradix_ptr_alloc(&dlists.lgpl_failed,
8330                                                           dlists.lgpl_failed_count++,
8331                                                           GFP_KERNEL);
8332                                 if (!fail) {
8333                                         GENL_SET_ERR_MSG(info,
8334                                                          "failed to allocate failed NID");
8335                                         GOTO(report_err, rc = -ENOMEM);
8336                                 }
8337                                 memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
8338                                 snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
8339                                          "failed to discover %s",
8340                                          libcfs_nidstr(&id.nid));
8341                                 fail->lfp_id = id;
8342                                 fail->lfp_errno = rc;
8343                                 continue;
8344                         }
8345
8346                         /* create the genetlink message header */
8347                         hdr = genlmsg_put(reply, info->snd_portid, info->snd_seq,
8348                                           &lnet_family, NLM_F_MULTI, LNET_CMD_PING);
8349                         if (!hdr) {
8350                                 GENL_SET_ERR_MSG(info,
8351                                                  "failed to allocate hdr");
8352                                 GOTO(report_err, rc = -ENOMEM);
8353                         }
8354
8355                         if (!clear_hdr) {
8356                                 nla_put_string(reply, LNET_PING_ATTR_HDR, "");
8357                                 clear_hdr = true;
8358                         }
8359
8360                         lp = lnet_find_peer(&id.nid);
8361                         if (lp) {
8362                                 nla_put_string(reply, LNET_PING_ATTR_PRIMARY_NID,
8363                                                libcfs_nidstr(&lp->lp_primary_nid));
8364                                 if (lnet_peer_is_multi_rail(lp))
8365                                         nla_put_flag(reply, LNET_PING_ATTR_MULTIRAIL);
8366                                 lnet_peer_decref_locked(lp);
8367                         }
8368
8369                         nid_list = nla_nest_start(reply, LNET_PING_ATTR_PEER_NI_LIST);
8370                         for (i = 0; i < dlists.lgpl_list_count; i++) {
8371                                 struct lnet_processid *found;
8372                                 struct nlattr *nid_attr;
8373                                 char *idstr;
8374
8375                                 found = genradix_ptr(&dlists.lgpl_list, i);
8376                                 if (nid_is_lo0(&found->nid))
8377                                         continue;
8378
8379                                 nid_attr = nla_nest_start(reply, i + 1);
8380                                 if (id.pid == LNET_PID_LUSTRE)
8381                                         idstr = libcfs_nidstr(&found->nid);
8382                                 else
8383                                         idstr = libcfs_idstr(found);
8384                                 nla_put_string(reply, LNET_PING_PEER_NI_ATTR_NID, idstr);
8385                                 nla_nest_end(reply, nid_attr);
8386                         }
8387                         nla_nest_end(reply, nid_list);
8388
8389                         genlmsg_end(reply, hdr);
8390                 }
8391         }
8392
8393         if (dlists.lgpl_failed_count) {
8394                 int flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
8395                 const struct ln_key_list *fail[] = {
8396                         &discover_err_props_list, NULL
8397                 };
8398
8399                 rc = lnet_genl_send_scalar_list(reply, info->snd_portid,
8400                                                 info->snd_seq, &lnet_family,
8401                                                 flags, LNET_CMD_PING, fail);
8402                 if (rc < 0) {
8403                         GENL_SET_ERR_MSG(info,
8404                                          "failed to send new key table");
8405                         GOTO(report_err, rc);
8406                 }
8407
8408                 for (i = 0; i < dlists.lgpl_failed_count; i++) {
8409                         struct lnet_fail_ping *fail;
8410
8411                         hdr = genlmsg_put(reply, info->snd_portid, info->snd_seq,
8412                                           &lnet_family, NLM_F_MULTI, LNET_CMD_PING);
8413                         if (!hdr) {
8414                                 GENL_SET_ERR_MSG(info,
8415                                                  "failed to send failed values");
8416                                 GOTO(report_err, rc = -ENOMSG);
8417                         }
8418
8419                         fail = genradix_ptr(&dlists.lgpl_failed, i);
8420                         if (i == 0)
8421                                 nla_put_string(reply, LNET_ERR_ATTR_HDR, "");
8422
8423                         nla_put_string(reply, LNET_ERR_ATTR_TYPE, "\n");
8424                         nla_put_s16(reply, LNET_ERR_ATTR_ERRNO,
8425                                     fail->lfp_errno);
8426                         nla_put_string(reply, LNET_ERR_ATTR_DESCR,
8427                                        fail->lfp_msg);
8428                         genlmsg_end(reply, hdr);
8429                 }
8430         }
8431
8432         nlh = nlmsg_put(reply, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
8433                         NLM_F_MULTI);
8434         if (!nlh) {
8435                 genlmsg_cancel(reply, hdr);
8436                 GENL_SET_ERR_MSG(info,
8437                                  "failed to finish message");
8438                 GOTO(report_err, rc = -EMSGSIZE);
8439         }
8440
8441 report_err:
8442         genradix_free(&dlists.lgpl_failed);
8443         genradix_free(&dlists.lgpl_list);
8444
8445         if (rc < 0) {
8446                 genlmsg_cancel(reply, hdr);
8447                 nlmsg_free(reply);
8448         } else {
8449                 rc = genlmsg_reply(reply, info);
8450         }
8451
8452         return rc;
8453 }
8454
8455 #define lnet_peer_dist_show_done        lnet_peer_ni_show_done
8456
8457 static int lnet_peer_dist_show_start(struct netlink_callback *cb)
8458 {
8459         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
8460 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
8461         struct netlink_ext_ack *extack = NULL;
8462 #endif
8463         struct lnet_genl_processid_list *plist;
8464         int msg_len = genlmsg_len(gnlh);
8465         struct nlattr *params, *top;
8466         int rem, rc = 0;
8467
8468 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
8469         extack = cb->extack;
8470 #endif
8471         mutex_lock(&the_lnet.ln_api_mutex);
8472         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
8473                 NL_SET_ERR_MSG(extack, "Network is down");
8474                 mutex_unlock(&the_lnet.ln_api_mutex);
8475                 return -ENETDOWN;
8476         }
8477
8478         msg_len = genlmsg_len(gnlh);
8479         if (!msg_len) {
8480                 NL_SET_ERR_MSG(extack, "Missing NID argument(s)");
8481                 mutex_unlock(&the_lnet.ln_api_mutex);
8482                 return -ENOENT;
8483         }
8484
8485         CFS_ALLOC_PTR(plist);
8486         if (!plist) {
8487                 NL_SET_ERR_MSG(extack, "No memory for peer NID list");
8488                 mutex_unlock(&the_lnet.ln_api_mutex);
8489                 return -ENOMEM;
8490         }
8491
8492         genradix_init(&plist->lgpl_list);
8493         plist->lgpl_count = 0;
8494         plist->lgpl_index = 0;
8495         cb->args[0] = (long)plist;
8496
8497         params = genlmsg_data(gnlh);
8498         nla_for_each_attr(top, params, msg_len, rem) {
8499                 struct nlattr *nids;
8500                 int rem2;
8501
8502                 if (nla_type(top) != LN_SCALAR_ATTR_LIST)
8503                         continue;
8504
8505                 nla_for_each_nested(nids, top, rem2) {
8506                         char nidstr[LNET_NIDSTR_SIZE + 1];
8507                         struct lnet_processid *id;
8508
8509                         if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
8510                                 continue;
8511
8512                         memset(nidstr, 0, sizeof(nidstr));
8513                         rc = nla_strscpy(nidstr, nids, sizeof(nidstr));
8514                         if (rc < 0) {
8515                                 NL_SET_ERR_MSG(extack,
8516                                                "failed to get NID");
8517                                 GOTO(report_err, rc);
8518                         }
8519
8520                         id = genradix_ptr_alloc(&plist->lgpl_list,
8521                                                 plist->lgpl_count++,
8522                                                 GFP_KERNEL);
8523                         if (!id) {
8524                                 NL_SET_ERR_MSG(extack, "failed to allocate NID");
8525                                 GOTO(report_err, rc = -ENOMEM);
8526                         }
8527
8528                         rc = libcfs_strid(id, strim(nidstr));
8529                         if (rc < 0) {
8530                                 NL_SET_ERR_MSG(extack, "invalid NID");
8531                                 GOTO(report_err, rc);
8532                         }
8533                         rc = 0;
8534                 }
8535         }
8536 report_err:
8537         mutex_unlock(&the_lnet.ln_api_mutex);
8538
8539         if (rc < 0)
8540                 lnet_peer_dist_show_done(cb);
8541
8542         return rc;
8543 }
8544
8545 static const struct ln_key_list peer_dist_props_list = {
8546         .lkl_maxattr                    = LNET_PEER_DIST_ATTR_MAX,
8547         .lkl_list                       = {
8548                 [LNET_PEER_DIST_ATTR_HDR]       = {
8549                         .lkp_value              = "peer",
8550                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8551                         .lkp_data_type          = NLA_NUL_STRING,
8552                 },
8553                 [LNET_PEER_DIST_ATTR_NID]       = {
8554                         .lkp_value              = "nid",
8555                         .lkp_data_type          = NLA_STRING
8556                 },
8557                 [LNET_PEER_DIST_ATTR_DIST]      = {
8558                         .lkp_value              = "distance",
8559                         .lkp_data_type          = NLA_U32
8560                 },
8561                 [LNET_PEER_DIST_ATTR_ORDER]     = {
8562                         .lkp_value              = "order",
8563                         .lkp_data_type          = NLA_U32
8564                 },
8565         },
8566 };
8567
8568 static int lnet_peer_dist_show_dump(struct sk_buff *msg,
8569                                     struct netlink_callback *cb)
8570 {
8571         struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb);
8572 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
8573         struct netlink_ext_ack *extack = NULL;
8574 #endif
8575         int portid = NETLINK_CB(cb->skb).portid;
8576         int seq = cb->nlh->nlmsg_seq;
8577         int idx = plist->lgpl_index;
8578         int rc = 0;
8579
8580 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
8581         extack = cb->extack;
8582 #endif
8583         if (!idx) {
8584                 const struct ln_key_list *all[] = {
8585                         &peer_dist_props_list, NULL
8586                 };
8587
8588                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
8589                                                 &lnet_family,
8590                                                 NLM_F_CREATE | NLM_F_MULTI,
8591                                                 LNET_CMD_PEER_DIST, all);
8592                 if (rc < 0) {
8593                         NL_SET_ERR_MSG(extack, "failed to send key table");
8594                         GOTO(send_error, rc);
8595                 }
8596         }
8597
8598         while (idx < plist->lgpl_count) {
8599                 struct lnet_processid *id;
8600                 void *hdr;
8601                 u32 order;
8602                 int dist;
8603
8604                 id = genradix_ptr(&plist->lgpl_list, idx++);
8605                 if (nid_is_lo0(&id->nid))
8606                         continue;
8607
8608                 dist = LNetDist(&id->nid, &id->nid, &order);
8609                 if (dist < 0) {
8610                         if (dist == -EHOSTUNREACH)
8611                                 continue;
8612
8613                         rc = dist;
8614                         return rc;
8615                 }
8616
8617                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
8618                                   NLM_F_MULTI, LNET_CMD_PEER_DIST);
8619                 if (!hdr) {
8620                         NL_SET_ERR_MSG(extack, "failed to send values");
8621                         genlmsg_cancel(msg, hdr);
8622                         GOTO(send_error, rc = -EMSGSIZE);
8623                 }
8624
8625                 if (idx == 1)
8626                         nla_put_string(msg, LNET_PEER_DIST_ATTR_HDR, "");
8627
8628                 nla_put_string(msg, LNET_PEER_DIST_ATTR_NID,
8629                                libcfs_nidstr(&id->nid));
8630                 nla_put_u32(msg, LNET_PEER_DIST_ATTR_DIST, dist);
8631                 nla_put_u32(msg, LNET_PEER_DIST_ATTR_ORDER, order);
8632
8633                 genlmsg_end(msg, hdr);
8634         }
8635
8636         plist->lgpl_index = idx;
8637 send_error:
8638         return lnet_nl_send_error(cb->skb, portid, seq, rc);
8639 }
8640
8641 #ifndef HAVE_NETLINK_CALLBACK_START
8642 static int lnet_old_peer_dist_show_dump(struct sk_buff *msg,
8643                                         struct netlink_callback *cb)
8644 {
8645         if (!cb->args[0]) {
8646                 int rc = lnet_peer_dist_show_start(cb);
8647
8648                 if (rc < 0)
8649                         return lnet_nl_send_error(cb->skb,
8650                                                   NETLINK_CB(cb->skb).portid,
8651                                                   cb->nlh->nlmsg_seq,
8652                                                   rc);
8653         }
8654
8655         return lnet_peer_dist_show_dump(msg, cb);
8656 }
8657 #endif
8658
8659 static const struct genl_multicast_group lnet_mcast_grps[] = {
8660         { .name =       "ip2net",       },
8661         { .name =       "net",          },
8662         { .name =       "peer",         },
8663         { .name =       "route",        },
8664         { .name =       "ping",         },
8665         { .name =       "discover",     },
8666         { .name =       "cpt-of-nid",   },
8667 };
8668
8669 static const struct genl_ops lnet_genl_ops[] = {
8670         {
8671                 .cmd            = LNET_CMD_NETS,
8672                 .flags          = GENL_ADMIN_PERM,
8673 #ifdef HAVE_NETLINK_CALLBACK_START
8674                 .start          = lnet_net_show_start,
8675                 .dumpit         = lnet_net_show_dump,
8676 #else
8677                 .dumpit         = lnet_old_net_show_dump,
8678 #endif
8679                 .done           = lnet_net_show_done,
8680                 .doit           = lnet_net_cmd,
8681         },
8682         {
8683                 .cmd            = LNET_CMD_PEERS,
8684                 .flags          = GENL_ADMIN_PERM,
8685 #ifdef HAVE_NETLINK_CALLBACK_START
8686                 .start          = lnet_peer_ni_show_start,
8687                 .dumpit         = lnet_peer_ni_show_dump,
8688 #else
8689                 .dumpit         = lnet_old_peer_ni_show_dump,
8690 #endif
8691                 .done           = lnet_peer_ni_show_done,
8692                 .doit           = lnet_peer_ni_cmd,
8693         },
8694         {
8695                 .cmd            = LNET_CMD_ROUTES,
8696                 .flags          = GENL_ADMIN_PERM,
8697 #ifdef HAVE_NETLINK_CALLBACK_START
8698                 .start          = lnet_route_show_start,
8699                 .dumpit         = lnet_route_show_dump,
8700 #else
8701                 .dumpit         = lnet_old_route_show_dump,
8702 #endif
8703                 .done           = lnet_route_show_done,
8704                 .doit           = lnet_route_cmd,
8705         },
8706         {
8707                 .cmd            = LNET_CMD_PING,
8708                 .flags          = GENL_ADMIN_PERM,
8709 #ifdef HAVE_NETLINK_CALLBACK_START
8710                 .start          = lnet_ping_show_start,
8711                 .dumpit         = lnet_ping_show_dump,
8712 #else
8713                 .dumpit         = lnet_old_ping_show_dump,
8714 #endif
8715                 .done           = lnet_ping_show_done,
8716                 .doit           = lnet_ping_cmd,
8717         },
8718         {
8719                 .cmd            = LNET_CMD_CPT_OF_NID,
8720 #ifdef HAVE_NETLINK_CALLBACK_START
8721                 .start          = lnet_cpt_of_nid_show_start,
8722                 .dumpit         = lnet_cpt_of_nid_show_dump,
8723 #else
8724                 .dumpit         = lnet_old_cpt_of_nid_show_dump,
8725 #endif
8726                 .done           = lnet_cpt_of_nid_show_done,
8727         },
8728         {
8729                 .cmd            = LNET_CMD_PEER_DIST,
8730 #ifdef HAVE_NETLINK_CALLBACK_START
8731                 .start          = lnet_peer_dist_show_start,
8732                 .dumpit         = lnet_peer_dist_show_dump,
8733 #else
8734                 .dumpit         = lnet_old_peer_dist_show_dump,
8735 #endif
8736                 .done           = lnet_peer_dist_show_done,
8737         },
8738 };
8739
8740 static struct genl_family lnet_family = {
8741         .name           = LNET_GENL_NAME,
8742         .version        = LNET_GENL_VERSION,
8743         .module         = THIS_MODULE,
8744         .parallel_ops   = true,
8745         .netnsok        = true,
8746         .ops            = lnet_genl_ops,
8747         .n_ops          = ARRAY_SIZE(lnet_genl_ops),
8748         .mcgrps         = lnet_mcast_grps,
8749         .n_mcgrps       = ARRAY_SIZE(lnet_mcast_grps),
8750 #ifdef GENL_FAMILY_HAS_RESV_START_OP
8751         .resv_start_op  = __LNET_CMD_MAX_PLUS_ONE,
8752 #endif
8753 };
8754
8755 void LNetDebugPeer(struct lnet_processid *id)
8756 {
8757         lnet_debug_peer(&id->nid);
8758 }
8759 EXPORT_SYMBOL(LNetDebugPeer);
8760
8761 /**
8762  * Determine if the specified peer \a nid is on the local node.
8763  *
8764  * \param nid   peer nid to check
8765  *
8766  * \retval true         If peer NID is on the local node.
8767  * \retval false        If peer NID is not on the local node.
8768  */
8769 bool LNetIsPeerLocal(struct lnet_nid *nid)
8770 {
8771         struct lnet_net *net;
8772         struct lnet_ni *ni;
8773         int cpt;
8774
8775         cpt = lnet_net_lock_current();
8776         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
8777                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
8778                         if (nid_same(&ni->ni_nid, nid)) {
8779                                 lnet_net_unlock(cpt);
8780                                 return true;
8781                         }
8782                 }
8783         }
8784         lnet_net_unlock(cpt);
8785
8786         return false;
8787 }
8788 EXPORT_SYMBOL(LNetIsPeerLocal);
8789
8790 /**
8791  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
8792  * Note that all interfaces share a same PID, as requested by LNetNIInit().
8793  *
8794  * @index       Index of the interface to look up.
8795  * @id          On successful return, this location will hold the
8796  *              struct lnet_process_id ID of the interface.
8797  * @large_nids  Report large NIDs if this is true.
8798  *
8799  * RETURN       0 If an interface exists at \a index.
8800  *              -ENOENT If no interface has been found.
8801  */
8802 int
8803 LNetGetId(unsigned int index, struct lnet_processid *id, bool large_nids)
8804 {
8805         struct lnet_ni   *ni;
8806         struct lnet_net  *net;
8807         int               cpt;
8808         int               rc = -ENOENT;
8809
8810         LASSERT(the_lnet.ln_refcount > 0);
8811
8812         cpt = lnet_net_lock_current();
8813
8814         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
8815                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
8816                         if (!large_nids && !nid_is_nid4(&ni->ni_nid))
8817                                 continue;
8818
8819                         if (index-- != 0)
8820                                 continue;
8821
8822                         id->nid = ni->ni_nid;
8823                         id->pid = the_lnet.ln_pid;
8824                         rc = 0;
8825                         break;
8826                 }
8827         }
8828
8829         lnet_net_unlock(cpt);
8830         return rc;
8831 }
8832 EXPORT_SYMBOL(LNetGetId);
8833
8834 struct ping_data {
8835         int rc;
8836         int replied;
8837         int pd_unlinked;
8838         struct lnet_handle_md mdh;
8839         struct completion completion;
8840 };
8841
8842 static void
8843 lnet_ping_event_handler(struct lnet_event *event)
8844 {
8845         struct ping_data *pd = event->md_user_ptr;
8846
8847         CDEBUG(D_NET, "ping event (%d %d)%s\n",
8848                event->type, event->status,
8849                event->unlinked ? " unlinked" : "");
8850
8851         if (event->status) {
8852                 if (!pd->rc)
8853                         pd->rc = event->status;
8854         } else if (event->type == LNET_EVENT_REPLY) {
8855                 pd->replied = 1;
8856                 pd->rc = event->mlength;
8857         }
8858
8859         if (event->unlinked)
8860                 pd->pd_unlinked = 1;
8861
8862         if (event->unlinked ||
8863             (event->type == LNET_EVENT_SEND && event->status))
8864                 complete(&pd->completion);
8865 }
8866
8867 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
8868                      signed long timeout, struct lnet_genl_ping_list *plist,
8869                      int n_ids)
8870 {
8871         int id_bytes = sizeof(struct lnet_ni_status); /* For 0@lo */
8872         struct lnet_md md = { NULL };
8873         struct ping_data pd = { 0 };
8874         struct lnet_ping_buffer *pbuf;
8875         struct lnet_processid pid;
8876         struct lnet_ping_iter pi;
8877         int i = 0;
8878         u32 *st;
8879         int nob;
8880         int rc;
8881         int rc2;
8882
8883         genradix_init(&plist->lgpl_list);
8884
8885         /* n_ids limit is arbitrary */
8886         if (n_ids <= 0 || LNET_NID_IS_ANY(&id->nid))
8887                 return -EINVAL;
8888
8889         /* if the user buffer has more space than the lnet_interfaces_max
8890          * then only fill it up to lnet_interfaces_max
8891          */
8892         if (n_ids > lnet_interfaces_max)
8893                 n_ids = lnet_interfaces_max;
8894
8895         if (id->pid == LNET_PID_ANY)
8896                 id->pid = LNET_PID_LUSTRE;
8897
8898         id_bytes += n_ids * sizeof(struct lnet_nid);
8899         pbuf = lnet_ping_buffer_alloc(id_bytes, GFP_NOFS);
8900         if (!pbuf)
8901                 return -ENOMEM;
8902
8903         /* initialize md content */
8904         md.start     = &pbuf->pb_info;
8905         md.length    = id_bytes;
8906         md.threshold = 2; /* GET/REPLY */
8907         md.max_size  = 0;
8908         md.options   = LNET_MD_TRUNCATE;
8909         md.user_ptr  = &pd;
8910         md.handler   = lnet_ping_event_handler;
8911
8912         init_completion(&pd.completion);
8913
8914         rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh);
8915         if (rc != 0) {
8916                 CERROR("Can't bind MD: %d\n", rc);
8917                 goto fail_ping_buffer_decref;
8918         }
8919
8920         rc = LNetGet(src_nid, pd.mdh, id, LNET_RESERVED_PORTAL,
8921                      LNET_PROTO_PING_MATCHBITS, 0, false);
8922         if (rc != 0) {
8923                 /* Don't CERROR; this could be deliberate! */
8924                 rc2 = LNetMDUnlink(pd.mdh);
8925                 LASSERT(rc2 == 0);
8926
8927                 /* NB must wait for the UNLINK event below... */
8928         }
8929
8930         /* Ensure completion in finite time... */
8931         wait_for_completion_timeout(&pd.completion, timeout);
8932         if (!pd.pd_unlinked) {
8933                 LNetMDUnlink(pd.mdh);
8934                 wait_for_completion(&pd.completion);
8935         }
8936
8937         if (!pd.replied) {
8938                 rc = pd.rc ?: -EIO;
8939                 goto fail_ping_buffer_decref;
8940         }
8941
8942         nob = pd.rc;
8943         LASSERT(nob >= 0 && nob <= id_bytes);
8944
8945         rc = -EPROTO;           /* if I can't parse... */
8946
8947         if (nob < LNET_PING_INFO_HDR_SIZE) {
8948                 CERROR("%s: ping info too short %d\n",
8949                        libcfs_idstr(id), nob);
8950                 goto fail_ping_buffer_decref;
8951         }
8952
8953         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
8954                 lnet_swap_pinginfo(pbuf);
8955         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
8956                 CERROR("%s: Unexpected magic %08x\n",
8957                        libcfs_idstr(id), pbuf->pb_info.pi_magic);
8958                 goto fail_ping_buffer_decref;
8959         }
8960
8961         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
8962                 CERROR("%s: ping w/o NI status: 0x%x\n",
8963                        libcfs_idstr(id), pbuf->pb_info.pi_features);
8964                 goto fail_ping_buffer_decref;
8965         }
8966
8967         /* Test if smaller than lnet_pinginfo with just one pi_ni status info.
8968          * That one might contain size when large nids are used.
8969          */
8970         if (nob < offsetof(struct lnet_ping_info, pi_ni[1])) {
8971                 CERROR("%s: Short reply %d(%lu min)\n",
8972                        libcfs_idstr(id), nob,
8973                        offsetof(struct lnet_ping_info, pi_ni[1]));
8974                 goto fail_ping_buffer_decref;
8975         }
8976
8977         if (ping_info_count_entries(pbuf) < n_ids) {
8978                 n_ids = ping_info_count_entries(pbuf);
8979                 id_bytes = lnet_ping_info_size(&pbuf->pb_info);
8980         }
8981
8982         if (nob < id_bytes) {
8983                 CERROR("%s: Short reply %d(%d expected)\n",
8984                        libcfs_idstr(id), nob, id_bytes);
8985                 goto fail_ping_buffer_decref;
8986         }
8987
8988         for (st = ping_iter_first(&pi, pbuf, &pid.nid);
8989              st;
8990              st = ping_iter_next(&pi, &pid.nid)) {
8991                 id = genradix_ptr_alloc(&plist->lgpl_list, i++, GFP_KERNEL);
8992                 if (!id) {
8993                         rc = -ENOMEM;
8994                         goto fail_ping_buffer_decref;
8995                 }
8996
8997                 id->pid = pbuf->pb_info.pi_pid;
8998                 id->nid = pid.nid;
8999         }
9000         rc = i;
9001 fail_ping_buffer_decref:
9002         lnet_ping_buffer_decref(pbuf);
9003         return rc;
9004 }
9005
9006 static int
9007 lnet_discover(struct lnet_processid *pid, u32 force,
9008               struct lnet_genl_ping_list *dlist)
9009 {
9010         struct lnet_peer_ni *lpni;
9011         struct lnet_peer_ni *p;
9012         struct lnet_peer *lp;
9013         int cpt;
9014         int rc;
9015
9016         if (LNET_NID_IS_ANY(&pid->nid))
9017                 return -EINVAL;
9018
9019         if (pid->pid == LNET_PID_ANY)
9020                 pid->pid = LNET_PID_LUSTRE;
9021
9022         cpt = lnet_net_lock_current();
9023         lpni = lnet_peerni_by_nid_locked(&pid->nid, NULL, cpt);
9024         if (IS_ERR(lpni)) {
9025                 rc = PTR_ERR(lpni);
9026                 goto out;
9027         }
9028
9029         /*
9030          * Clearing the NIDS_UPTODATE flag ensures the peer will
9031          * be discovered, provided discovery has not been disabled.
9032          */
9033         lp = lpni->lpni_peer_net->lpn_peer;
9034         spin_lock(&lp->lp_lock);
9035         lp->lp_state &= ~LNET_PEER_NIDS_UPTODATE;
9036         /* If the force flag is set, force a PING and PUSH as well. */
9037         if (force)
9038                 lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH;
9039         spin_unlock(&lp->lp_lock);
9040         rc = lnet_discover_peer_locked(lpni, cpt, true);
9041         if (rc)
9042                 goto out_decref;
9043
9044         /* The lpni (or lp) for this NID may have changed and our ref is
9045          * the only thing keeping the old one around. Release the ref
9046          * and lookup the lpni again
9047          */
9048         lnet_peer_ni_decref_locked(lpni);
9049         lpni = lnet_peer_ni_find_locked(&pid->nid);
9050         if (!lpni) {
9051                 rc = -ENOENT;
9052                 goto out;
9053         }
9054         lp = lpni->lpni_peer_net->lpn_peer;
9055
9056         dlist->lgpl_list_count = 0;
9057         p = NULL;
9058         while ((p = lnet_get_next_peer_ni_locked(lp, NULL, p)) != NULL) {
9059                 struct lnet_processid *id;
9060
9061                 id = genradix_ptr_alloc(&dlist->lgpl_list,
9062                                         dlist->lgpl_list_count++, GFP_ATOMIC);
9063                 if (!id) {
9064                         rc = -ENOMEM;
9065                         goto out_decref;
9066                 }
9067                 id->pid = pid->pid;
9068                 id->nid = p->lpni_nid;
9069         }
9070         rc = dlist->lgpl_list_count;
9071
9072 out_decref:
9073         lnet_peer_ni_decref_locked(lpni);
9074 out:
9075         lnet_net_unlock(cpt);
9076
9077         return rc;
9078 }
9079
9080 /**
9081  * Retrieve peer discovery status.
9082  *
9083  * \retval 1 if lnet_peer_discovery_disabled is 0
9084  * \retval 0 if lnet_peer_discovery_disabled is 1
9085  */
9086 int
9087 LNetGetPeerDiscoveryStatus(void)
9088 {
9089         return !lnet_peer_discovery_disabled;
9090 }
9091 EXPORT_SYMBOL(LNetGetPeerDiscoveryStatus);