Whamcloud - gitweb
LU-17054 lnet: Change cpt-of-nid to get result from kernel
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  */
31
32 #define DEBUG_SUBSYSTEM S_LNET
33
34 #include <linux/ctype.h>
35 #include <linux/generic-radix-tree.h>
36 #include <linux/log2.h>
37 #include <linux/ktime.h>
38 #include <linux/moduleparam.h>
39 #include <linux/uaccess.h>
40 #ifdef HAVE_SCHED_HEADERS
41 #include <linux/sched/signal.h>
42 #endif
43 #include <net/genetlink.h>
44
45 #include <libcfs/linux/linux-net.h>
46 #include <lnet/udsp.h>
47 #include <lnet/lib-lnet.h>
48
49 #define D_LNI D_CONSOLE
50
51 /*
52  * initialize ln_api_mutex statically, since it needs to be used in
53  * discovery_set callback. That module parameter callback can be called
54  * before module init completes. The mutex needs to be ready for use then.
55  */
56 struct lnet the_lnet = {
57         .ln_api_mutex = __MUTEX_INITIALIZER(the_lnet.ln_api_mutex),
58 };              /* THE state of the network */
59 EXPORT_SYMBOL(the_lnet);
60
61 static char *ip2nets = "";
62 module_param(ip2nets, charp, 0444);
63 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
64
65 static char *networks = "";
66 module_param(networks, charp, 0444);
67 MODULE_PARM_DESC(networks, "local networks");
68
69 static char *routes = "";
70 module_param(routes, charp, 0444);
71 MODULE_PARM_DESC(routes, "routes to non-local networks");
72
73 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
74 module_param(rnet_htable_size, int, 0444);
75 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
76
77 static int use_tcp_bonding;
78 module_param(use_tcp_bonding, int, 0444);
79 MODULE_PARM_DESC(use_tcp_bonding,
80                  "use_tcp_bonding parameter has been removed");
81
82 unsigned int lnet_numa_range = 0;
83 module_param(lnet_numa_range, uint, 0444);
84 MODULE_PARM_DESC(lnet_numa_range,
85                 "NUMA range to consider during Multi-Rail selection");
86
87 /*
88  * lnet_health_sensitivity determines by how much we decrement the health
89  * value on sending error. The value defaults to 100, which means health
90  * interface health is decremented by 100 points every failure.
91  */
92 unsigned int lnet_health_sensitivity = 100;
93 static int sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp);
94 #ifdef HAVE_KERNEL_PARAM_OPS
95 static struct kernel_param_ops param_ops_health_sensitivity = {
96         .set = sensitivity_set,
97         .get = param_get_int,
98 };
99 #define param_check_health_sensitivity(name, p) \
100                 __param_check(name, p, int)
101 module_param(lnet_health_sensitivity, health_sensitivity, S_IRUGO|S_IWUSR);
102 #else
103 module_param_call(lnet_health_sensitivity, sensitivity_set, param_get_int,
104                   &lnet_health_sensitivity, S_IRUGO|S_IWUSR);
105 #endif
106 MODULE_PARM_DESC(lnet_health_sensitivity,
107                 "Value to decrement the health value by on error");
108
109 /*
110  * lnet_recovery_interval determines how often we should perform recovery
111  * on unhealthy interfaces.
112  */
113 unsigned int lnet_recovery_interval = 1;
114 static int recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp);
115 #ifdef HAVE_KERNEL_PARAM_OPS
116 static struct kernel_param_ops param_ops_recovery_interval = {
117         .set = recovery_interval_set,
118         .get = param_get_int,
119 };
120 #define param_check_recovery_interval(name, p) \
121                 __param_check(name, p, int)
122 module_param(lnet_recovery_interval, recovery_interval, S_IRUGO|S_IWUSR);
123 #else
124 module_param_call(lnet_recovery_interval, recovery_interval_set, param_get_int,
125                   &lnet_recovery_interval, S_IRUGO|S_IWUSR);
126 #endif
127 MODULE_PARM_DESC(lnet_recovery_interval,
128                 "DEPRECATED - Interval to recover unhealthy interfaces in seconds");
129
130 unsigned int lnet_recovery_limit;
131 module_param(lnet_recovery_limit, uint, 0644);
132 MODULE_PARM_DESC(lnet_recovery_limit,
133                  "How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery");
134
135 unsigned int lnet_max_recovery_ping_interval = 900;
136 unsigned int lnet_max_recovery_ping_count = 9;
137 static int max_recovery_ping_interval_set(const char *val,
138                                           cfs_kernel_param_arg_t *kp);
139
140 #define param_check_max_recovery_ping_interval(name, p) \
141                 __param_check(name, p, int)
142
143 #ifdef HAVE_KERNEL_PARAM_OPS
144 static struct kernel_param_ops param_ops_max_recovery_ping_interval = {
145         .set = max_recovery_ping_interval_set,
146         .get = param_get_int,
147 };
148 module_param(lnet_max_recovery_ping_interval, max_recovery_ping_interval, 0644);
149 #else
150 module_param_call(lnet_max_recovery_ping_interval, max_recovery_ping_interval,
151                   param_get_int, &lnet_max_recovery_ping_interval, 0644);
152 #endif
153 MODULE_PARM_DESC(lnet_max_recovery_ping_interval,
154                  "The max interval between LNet recovery pings, in seconds");
155
156 static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
157 static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp);
158
159 static struct kernel_param_ops param_ops_interfaces_max = {
160         .set = intf_max_set,
161         .get = param_get_int,
162 };
163
164 #define param_check_interfaces_max(name, p) \
165                 __param_check(name, p, int)
166
167 #ifdef HAVE_KERNEL_PARAM_OPS
168 module_param(lnet_interfaces_max, interfaces_max, 0644);
169 #else
170 module_param_call(lnet_interfaces_max, intf_max_set, param_get_int,
171                   &param_ops_interfaces_max, 0644);
172 #endif
173 MODULE_PARM_DESC(lnet_interfaces_max,
174                 "Maximum number of interfaces in a node.");
175
176 unsigned lnet_peer_discovery_disabled = 0;
177 static int discovery_set(const char *val, cfs_kernel_param_arg_t *kp);
178
179 static struct kernel_param_ops param_ops_discovery_disabled = {
180         .set = discovery_set,
181         .get = param_get_int,
182 };
183
184 #define param_check_discovery_disabled(name, p) \
185                 __param_check(name, p, int)
186 #ifdef HAVE_KERNEL_PARAM_OPS
187 module_param(lnet_peer_discovery_disabled, discovery_disabled, 0644);
188 #else
189 module_param_call(lnet_peer_discovery_disabled, discovery_set, param_get_int,
190                   &param_ops_discovery_disabled, 0644);
191 #endif
192 MODULE_PARM_DESC(lnet_peer_discovery_disabled,
193                 "Set to 1 to disable peer discovery on this node.");
194
195 unsigned int lnet_drop_asym_route;
196 static int drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp);
197
198 static struct kernel_param_ops param_ops_drop_asym_route = {
199         .set = drop_asym_route_set,
200         .get = param_get_int,
201 };
202
203 #define param_check_drop_asym_route(name, p)    \
204         __param_check(name, p, int)
205 #ifdef HAVE_KERNEL_PARAM_OPS
206 module_param(lnet_drop_asym_route, drop_asym_route, 0644);
207 #else
208 module_param_call(lnet_drop_asym_route, drop_asym_route_set, param_get_int,
209                   &param_ops_drop_asym_route, 0644);
210 #endif
211 MODULE_PARM_DESC(lnet_drop_asym_route,
212                  "Set to 1 to drop asymmetrical route messages.");
213
214 #define LNET_TRANSACTION_TIMEOUT_DEFAULT 150
215 unsigned int lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_DEFAULT;
216 static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
217 #ifdef HAVE_KERNEL_PARAM_OPS
218 static struct kernel_param_ops param_ops_transaction_timeout = {
219         .set = transaction_to_set,
220         .get = param_get_int,
221 };
222
223 #define param_check_transaction_timeout(name, p) \
224                 __param_check(name, p, int)
225 module_param(lnet_transaction_timeout, transaction_timeout, S_IRUGO|S_IWUSR);
226 #else
227 module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
228                   &lnet_transaction_timeout, S_IRUGO|S_IWUSR);
229 #endif
230 MODULE_PARM_DESC(lnet_transaction_timeout,
231                 "Maximum number of seconds to wait for a peer response.");
232
233 #define LNET_RETRY_COUNT_DEFAULT 2
234 unsigned int lnet_retry_count = LNET_RETRY_COUNT_DEFAULT;
235 static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
236 #ifdef HAVE_KERNEL_PARAM_OPS
237 static struct kernel_param_ops param_ops_retry_count = {
238         .set = retry_count_set,
239         .get = param_get_int,
240 };
241
242 #define param_check_retry_count(name, p) \
243                 __param_check(name, p, int)
244 module_param(lnet_retry_count, retry_count, S_IRUGO|S_IWUSR);
245 #else
246 module_param_call(lnet_retry_count, retry_count_set, param_get_int,
247                   &lnet_retry_count, S_IRUGO|S_IWUSR);
248 #endif
249 MODULE_PARM_DESC(lnet_retry_count,
250                  "Maximum number of times to retry transmitting a message");
251
252 unsigned int lnet_response_tracking = 3;
253 static int response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp);
254
255 #ifdef HAVE_KERNEL_PARAM_OPS
256 static struct kernel_param_ops param_ops_response_tracking = {
257         .set = response_tracking_set,
258         .get = param_get_int,
259 };
260
261 #define param_check_response_tracking(name, p)  \
262         __param_check(name, p, int)
263 module_param(lnet_response_tracking, response_tracking, 0644);
264 #else
265 module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
266                   &lnet_response_tracking, 0644);
267 #endif
268 MODULE_PARM_DESC(lnet_response_tracking,
269                  "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
270
271 int lock_prim_nid = 1;
272 module_param(lock_prim_nid, int, 0444);
273 MODULE_PARM_DESC(lock_prim_nid,
274                  "Whether nid passed down by Lustre is locked as primary");
275
276 #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
277                                   (LNET_RETRY_COUNT_DEFAULT + 1))
278 unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
279 static void lnet_set_lnd_timeout(void)
280 {
281         lnet_lnd_timeout = max((lnet_transaction_timeout - 1) /
282                                (lnet_retry_count + 1), 1U);
283 }
284
285 /*
286  * This sequence number keeps track of how many times DLC was used to
287  * update the local NIs. It is incremented when a NI is added or
288  * removed and checked when sending a message to determine if there is
289  * a need to re-run the selection algorithm. See lnet_select_pathway()
290  * for more details on its usage.
291  */
292 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
293
294 struct lnet_fail_ping {
295         struct lnet_processid           lfp_id;
296         int                             lfp_errno;
297         char                            lfp_msg[256];
298 };
299
300 struct lnet_genl_ping_list {
301         unsigned int                    lgpl_index;
302         unsigned int                    lgpl_list_count;
303         unsigned int                    lgpl_failed_count;
304         signed long                     lgpl_timeout;
305         struct lnet_nid                 lgpl_src_nid;
306         GENRADIX(struct lnet_fail_ping) lgpl_failed;
307         GENRADIX(struct lnet_processid) lgpl_list;
308 };
309
310 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
311                      signed long timeout, struct lnet_genl_ping_list *plist,
312                      int n_ids);
313
314 static int lnet_discover(struct lnet_processid *id, u32 force,
315                          struct lnet_genl_ping_list *dlists);
316
317 static int
318 sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
319 {
320         int rc;
321         unsigned *sensitivity = (unsigned *)kp->arg;
322         unsigned long value;
323
324         rc = kstrtoul(val, 0, &value);
325         if (rc) {
326                 CERROR("Invalid module parameter value for 'lnet_health_sensitivity'\n");
327                 return rc;
328         }
329
330         /*
331          * The purpose of locking the api_mutex here is to ensure that
332          * the correct value ends up stored properly.
333          */
334         mutex_lock(&the_lnet.ln_api_mutex);
335
336         if (value > LNET_MAX_HEALTH_VALUE) {
337                 mutex_unlock(&the_lnet.ln_api_mutex);
338                 CERROR("Invalid health value. Maximum: %d value = %lu\n",
339                        LNET_MAX_HEALTH_VALUE, value);
340                 return -EINVAL;
341         }
342
343         if (*sensitivity != 0 && value == 0 && lnet_retry_count != 0) {
344                 lnet_retry_count = 0;
345                 lnet_set_lnd_timeout();
346         }
347
348         *sensitivity = value;
349
350         mutex_unlock(&the_lnet.ln_api_mutex);
351
352         return 0;
353 }
354
355 static int
356 recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
357 {
358         CWARN("'lnet_recovery_interval' has been deprecated\n");
359
360         return 0;
361 }
362
363 static int
364 max_recovery_ping_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
365 {
366         int rc;
367         unsigned long value;
368
369         rc = kstrtoul(val, 0, &value);
370         if (rc) {
371                 CERROR("Invalid module parameter value for 'lnet_max_recovery_ping_interval'\n");
372                 return rc;
373         }
374
375         if (!value) {
376                 CERROR("Invalid max ping timeout. Must be strictly positive\n");
377                 return -EINVAL;
378         }
379
380         /* The purpose of locking the api_mutex here is to ensure that
381          * the correct value ends up stored properly.
382          */
383         mutex_lock(&the_lnet.ln_api_mutex);
384         lnet_max_recovery_ping_interval = value;
385         lnet_max_recovery_ping_count = 0;
386         value >>= 1;
387         while (value) {
388                 lnet_max_recovery_ping_count++;
389                 value >>= 1;
390         }
391         mutex_unlock(&the_lnet.ln_api_mutex);
392
393         return 0;
394 }
395
396 static int
397 discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
398 {
399         int rc;
400         unsigned *discovery_off = (unsigned *)kp->arg;
401         unsigned long value;
402         struct lnet_ping_buffer *pbuf;
403
404         rc = kstrtoul(val, 0, &value);
405         if (rc) {
406                 CERROR("Invalid module parameter value for 'lnet_peer_discovery_disabled'\n");
407                 return rc;
408         }
409
410         value = (value) ? 1 : 0;
411
412         /*
413          * The purpose of locking the api_mutex here is to ensure that
414          * the correct value ends up stored properly.
415          */
416         mutex_lock(&the_lnet.ln_api_mutex);
417
418         if (value == *discovery_off) {
419                 mutex_unlock(&the_lnet.ln_api_mutex);
420                 return 0;
421         }
422
423         /*
424          * We still want to set the discovery value even when LNet is not
425          * running. This is the case when LNet is being loaded and we want
426          * the module parameters to take effect. Otherwise if we're
427          * changing the value dynamically, we want to set it after
428          * updating the peers
429          */
430         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
431                 *discovery_off = value;
432                 mutex_unlock(&the_lnet.ln_api_mutex);
433                 return 0;
434         }
435
436         /* tell peers that discovery setting has changed */
437         lnet_net_lock(LNET_LOCK_EX);
438         pbuf = the_lnet.ln_ping_target;
439         if (value)
440                 pbuf->pb_info.pi_features &= ~LNET_PING_FEAT_DISCOVERY;
441         else
442                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
443         lnet_net_unlock(LNET_LOCK_EX);
444
445         /* only send a push when we're turning off discovery */
446         if (*discovery_off <= 0 && value > 0)
447                 lnet_push_update_to_peers(1);
448         *discovery_off = value;
449
450         mutex_unlock(&the_lnet.ln_api_mutex);
451
452         return 0;
453 }
454
455 static int
456 drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp)
457 {
458         int rc;
459         unsigned int *drop_asym_route = (unsigned int *)kp->arg;
460         unsigned long value;
461
462         rc = kstrtoul(val, 0, &value);
463         if (rc) {
464                 CERROR("Invalid module parameter value for "
465                        "'lnet_drop_asym_route'\n");
466                 return rc;
467         }
468
469         /*
470          * The purpose of locking the api_mutex here is to ensure that
471          * the correct value ends up stored properly.
472          */
473         mutex_lock(&the_lnet.ln_api_mutex);
474
475         if (value == *drop_asym_route) {
476                 mutex_unlock(&the_lnet.ln_api_mutex);
477                 return 0;
478         }
479
480         *drop_asym_route = value;
481
482         mutex_unlock(&the_lnet.ln_api_mutex);
483
484         return 0;
485 }
486
487 static int
488 transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp)
489 {
490         int rc;
491         unsigned *transaction_to = (unsigned *)kp->arg;
492         unsigned long value;
493
494         rc = kstrtoul(val, 0, &value);
495         if (rc) {
496                 CERROR("Invalid module parameter value for 'lnet_transaction_timeout'\n");
497                 return rc;
498         }
499
500         /*
501          * The purpose of locking the api_mutex here is to ensure that
502          * the correct value ends up stored properly.
503          */
504         mutex_lock(&the_lnet.ln_api_mutex);
505
506         if (value <= lnet_retry_count || value == 0) {
507                 mutex_unlock(&the_lnet.ln_api_mutex);
508                 CERROR("Invalid value for lnet_transaction_timeout (%lu). "
509                        "Has to be greater than lnet_retry_count (%u)\n",
510                        value, lnet_retry_count);
511                 return -EINVAL;
512         }
513
514         if (value == *transaction_to) {
515                 mutex_unlock(&the_lnet.ln_api_mutex);
516                 return 0;
517         }
518
519         *transaction_to = value;
520         /* Update the lnet_lnd_timeout now that we've modified the
521          * transaction timeout
522          */
523         lnet_set_lnd_timeout();
524
525         mutex_unlock(&the_lnet.ln_api_mutex);
526
527         return 0;
528 }
529
530 static int
531 retry_count_set(const char *val, cfs_kernel_param_arg_t *kp)
532 {
533         int rc;
534         unsigned *retry_count = (unsigned *)kp->arg;
535         unsigned long value;
536
537         rc = kstrtoul(val, 0, &value);
538         if (rc) {
539                 CERROR("Invalid module parameter value for 'lnet_retry_count'\n");
540                 return rc;
541         }
542
543         /*
544          * The purpose of locking the api_mutex here is to ensure that
545          * the correct value ends up stored properly.
546          */
547         mutex_lock(&the_lnet.ln_api_mutex);
548
549         if (lnet_health_sensitivity == 0 && value > 0) {
550                 mutex_unlock(&the_lnet.ln_api_mutex);
551                 CERROR("Can not set lnet_retry_count when health feature is turned off\n");
552                 return -EINVAL;
553         }
554
555         if (value > lnet_transaction_timeout) {
556                 mutex_unlock(&the_lnet.ln_api_mutex);
557                 CERROR("Invalid value for lnet_retry_count (%lu). "
558                        "Has to be smaller than lnet_transaction_timeout (%u)\n",
559                        value, lnet_transaction_timeout);
560                 return -EINVAL;
561         }
562
563         *retry_count = value;
564
565         /* Update the lnet_lnd_timeout now that we've modified the
566          * retry count
567          */
568         lnet_set_lnd_timeout();
569
570         mutex_unlock(&the_lnet.ln_api_mutex);
571
572         return 0;
573 }
574
575 static int
576 intf_max_set(const char *val, cfs_kernel_param_arg_t *kp)
577 {
578         int value, rc;
579
580         rc = kstrtoint(val, 0, &value);
581         if (rc) {
582                 CERROR("Invalid module parameter value for 'lnet_interfaces_max'\n");
583                 return rc;
584         }
585
586         if (value < LNET_INTERFACES_MIN) {
587                 CWARN("max interfaces provided are too small, setting to %d\n",
588                       LNET_INTERFACES_MAX_DEFAULT);
589                 value = LNET_INTERFACES_MAX_DEFAULT;
590         }
591
592         *(int *)kp->arg = value;
593
594         return 0;
595 }
596
597 static int
598 response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp)
599 {
600         int rc;
601         unsigned long new_value;
602
603         rc = kstrtoul(val, 0, &new_value);
604         if (rc) {
605                 CERROR("Invalid value for 'lnet_response_tracking'\n");
606                 return -EINVAL;
607         }
608
609         if (new_value < 0 || new_value > 3) {
610                 CWARN("Invalid value (%lu) for 'lnet_response_tracking'\n",
611                       new_value);
612                 return -EINVAL;
613         }
614
615         lnet_response_tracking = new_value;
616
617         return 0;
618 }
619
620 static const char *
621 lnet_get_routes(void)
622 {
623         return routes;
624 }
625
626 static const char *
627 lnet_get_networks(void)
628 {
629         const char *nets;
630         int rc;
631
632         if (*networks != 0 && *ip2nets != 0) {
633                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
634                                    "'ip2nets' but not both at once\n");
635                 return NULL;
636         }
637
638         if (*ip2nets != 0) {
639                 rc = lnet_parse_ip2nets(&nets, ip2nets);
640                 return (rc == 0) ? nets : NULL;
641         }
642
643         if (*networks != 0)
644                 return networks;
645
646         return "tcp";
647 }
648
649 static void
650 lnet_init_locks(void)
651 {
652         spin_lock_init(&the_lnet.ln_eq_wait_lock);
653         spin_lock_init(&the_lnet.ln_msg_resend_lock);
654         init_completion(&the_lnet.ln_mt_wait_complete);
655         mutex_init(&the_lnet.ln_lnd_mutex);
656 }
657
658 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
659 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
660                                             *  MDs kmem_cache */
661 struct kmem_cache *lnet_udsp_cachep;       /* udsp cache */
662 struct kmem_cache *lnet_rspt_cachep;       /* response tracker cache */
663 struct kmem_cache *lnet_msg_cachep;
664
665 static int
666 lnet_slab_setup(void)
667 {
668         /* create specific kmem_cache for MEs and small MDs (i.e., originally
669          * allocated in <size-xxx> kmem_cache).
670          */
671         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(struct lnet_me),
672                                             0, 0, NULL);
673         if (!lnet_mes_cachep)
674                 return -ENOMEM;
675
676         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
677                                                   LNET_SMALL_MD_SIZE, 0, 0,
678                                                   NULL);
679         if (!lnet_small_mds_cachep)
680                 return -ENOMEM;
681
682         lnet_udsp_cachep = kmem_cache_create("lnet_udsp",
683                                              sizeof(struct lnet_udsp),
684                                              0, 0, NULL);
685         if (!lnet_udsp_cachep)
686                 return -ENOMEM;
687
688         lnet_rspt_cachep = kmem_cache_create("lnet_rspt", sizeof(struct lnet_rsp_tracker),
689                                             0, 0, NULL);
690         if (!lnet_rspt_cachep)
691                 return -ENOMEM;
692
693         lnet_msg_cachep = kmem_cache_create("lnet_msg", sizeof(struct lnet_msg),
694                                             0, 0, NULL);
695         if (!lnet_msg_cachep)
696                 return -ENOMEM;
697
698         return 0;
699 }
700
701 static void
702 lnet_slab_cleanup(void)
703 {
704         if (lnet_msg_cachep) {
705                 kmem_cache_destroy(lnet_msg_cachep);
706                 lnet_msg_cachep = NULL;
707         }
708
709         if (lnet_rspt_cachep) {
710                 kmem_cache_destroy(lnet_rspt_cachep);
711                 lnet_rspt_cachep = NULL;
712         }
713
714         if (lnet_udsp_cachep) {
715                 kmem_cache_destroy(lnet_udsp_cachep);
716                 lnet_udsp_cachep = NULL;
717         }
718
719         if (lnet_small_mds_cachep) {
720                 kmem_cache_destroy(lnet_small_mds_cachep);
721                 lnet_small_mds_cachep = NULL;
722         }
723
724         if (lnet_mes_cachep) {
725                 kmem_cache_destroy(lnet_mes_cachep);
726                 lnet_mes_cachep = NULL;
727         }
728 }
729
730 static int
731 lnet_create_remote_nets_table(void)
732 {
733         int               i;
734         struct list_head *hash;
735
736         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
737         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
738         CFS_ALLOC_PTR_ARRAY(hash, LNET_REMOTE_NETS_HASH_SIZE);
739         if (hash == NULL) {
740                 CERROR("Failed to create remote nets hash table\n");
741                 return -ENOMEM;
742         }
743
744         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
745                 INIT_LIST_HEAD(&hash[i]);
746         the_lnet.ln_remote_nets_hash = hash;
747         return 0;
748 }
749
750 static void
751 lnet_destroy_remote_nets_table(void)
752 {
753         int i;
754
755         if (the_lnet.ln_remote_nets_hash == NULL)
756                 return;
757
758         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
759                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
760
761         CFS_FREE_PTR_ARRAY(the_lnet.ln_remote_nets_hash,
762                            LNET_REMOTE_NETS_HASH_SIZE);
763         the_lnet.ln_remote_nets_hash = NULL;
764 }
765
766 static void
767 lnet_destroy_locks(void)
768 {
769         if (the_lnet.ln_res_lock != NULL) {
770                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
771                 the_lnet.ln_res_lock = NULL;
772         }
773
774         if (the_lnet.ln_net_lock != NULL) {
775                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
776                 the_lnet.ln_net_lock = NULL;
777         }
778 }
779
780 static int
781 lnet_create_locks(void)
782 {
783         lnet_init_locks();
784
785         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
786         if (the_lnet.ln_res_lock == NULL)
787                 goto failed;
788
789         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
790         if (the_lnet.ln_net_lock == NULL)
791                 goto failed;
792
793         return 0;
794
795  failed:
796         lnet_destroy_locks();
797         return -ENOMEM;
798 }
799
800 static void lnet_assert_wire_constants(void)
801 {
802         /* Wire protocol assertions generated by 'wirecheck'
803          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
804          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
805          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
806          */
807
808         /* Constants... */
809         BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
810         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
811         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
812         BUILD_BUG_ON(LNET_MSG_ACK != 0);
813         BUILD_BUG_ON(LNET_MSG_PUT != 1);
814         BUILD_BUG_ON(LNET_MSG_GET != 2);
815         BUILD_BUG_ON(LNET_MSG_REPLY != 3);
816         BUILD_BUG_ON(LNET_MSG_HELLO != 4);
817
818         BUILD_BUG_ON((int)sizeof(lnet_nid_t) != 8);
819         BUILD_BUG_ON((int)sizeof(lnet_pid_t) != 4);
820
821         /* Checks for struct lnet_nid */
822         BUILD_BUG_ON((int)sizeof(struct lnet_nid) != 20);
823         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_size) != 0);
824         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_size) != 1);
825         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_type) != 1);
826         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_type) != 1);
827         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_num) != 2);
828         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_num) != 2);
829         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_addr) != 4);
830         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_addr) != 16);
831
832         /* Checks for struct lnet_process_id_packed */
833         BUILD_BUG_ON((int)sizeof(struct lnet_process_id_packed) != 12);
834         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, nid) != 0);
835         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->nid) != 8);
836         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, pid) != 8);
837         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->pid) != 4);
838
839         /* Checks for struct lnet_handle_wire */
840         BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
841         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
842                                    wh_interface_cookie) != 0);
843         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
844         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
845                                    wh_object_cookie) != 8);
846         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
847
848         /* Checks for struct struct lnet_magicversion */
849         BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
850         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
851         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
852         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
853         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
854         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion,
855                                    version_minor) != 6);
856         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
857
858         /* Checks for struct _lnet_hdr_nid4 */
859         BUILD_BUG_ON((int)sizeof(struct _lnet_hdr_nid4) != 72);
860         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_nid) != 0);
861         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_nid) != 8);
862         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_nid) != 8);
863         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_nid) != 8);
864         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_pid) != 16);
865         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_pid) != 4);
866         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_pid) != 20);
867         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_pid) != 4);
868         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, type) != 24);
869         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->type) != 4);
870         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, payload_length) != 28);
871         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->payload_length) != 4);
872         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg) != 32);
873         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg) != 40);
874
875         /* Ack */
876         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.dst_wmd) != 32);
877         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.dst_wmd) != 16);
878         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.match_bits) != 48);
879         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.match_bits) != 8);
880         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.mlength) != 56);
881         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.mlength) != 4);
882
883         /* Put */
884         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ack_wmd) != 32);
885         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ack_wmd) != 16);
886         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.match_bits) != 48);
887         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.match_bits) != 8);
888         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.hdr_data) != 56);
889         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.hdr_data) != 8);
890         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ptl_index) != 64);
891         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ptl_index) != 4);
892         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.offset) != 68);
893         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.offset) != 4);
894
895         /* Get */
896         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.return_wmd) != 32);
897         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.return_wmd) != 16);
898         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.match_bits) != 48);
899         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.match_bits) != 8);
900         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.ptl_index) != 56);
901         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.ptl_index) != 4);
902         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.src_offset) != 60);
903         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.src_offset) != 4);
904         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.sink_length) != 64);
905         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.sink_length) != 4);
906
907         /* Reply */
908         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.reply.dst_wmd) != 32);
909         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.reply.dst_wmd) != 16);
910
911         /* Hello */
912         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.incarnation) != 32);
913         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.incarnation) != 8);
914         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.type) != 40);
915         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.type) != 4);
916
917         /* Checks for struct lnet_ni_status and related constants */
918         BUILD_BUG_ON(LNET_NI_STATUS_INVALID != 0x00000000);
919         BUILD_BUG_ON(LNET_NI_STATUS_UP != 0x15aac0de);
920         BUILD_BUG_ON(LNET_NI_STATUS_DOWN != 0xdeadface);
921
922         /* Checks for struct lnet_ni_status */
923         BUILD_BUG_ON((int)sizeof(struct lnet_ni_status) != 16);
924         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_nid) != 0);
925         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) != 8);
926         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_status) != 8);
927         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_status) != 4);
928         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_msg_size) != 12);
929         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_msg_size) != 4);
930
931         /* Checks for struct lnet_ni_large_status */
932         BUILD_BUG_ON((int)sizeof(struct lnet_ni_large_status) != 24);
933         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_status) != 0);
934         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_status) != 4);
935         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_nid) != 4);
936         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_nid) != 20);
937
938         /* Checks for struct lnet_ping_info and related constants */
939         BUILD_BUG_ON(LNET_PROTO_PING_MAGIC != 0x70696E67);
940         BUILD_BUG_ON(LNET_PING_FEAT_INVAL != 0);
941         BUILD_BUG_ON(LNET_PING_FEAT_BASE != 1);
942         BUILD_BUG_ON(LNET_PING_FEAT_NI_STATUS != 2);
943         BUILD_BUG_ON(LNET_PING_FEAT_RTE_DISABLED != 4);
944         BUILD_BUG_ON(LNET_PING_FEAT_MULTI_RAIL != 8);
945         BUILD_BUG_ON(LNET_PING_FEAT_DISCOVERY != 16);
946         BUILD_BUG_ON(LNET_PING_FEAT_LARGE_ADDR != 32);
947         BUILD_BUG_ON(LNET_PING_FEAT_PRIMARY_LARGE != 64);
948         BUILD_BUG_ON(LNET_PING_FEAT_BITS != 127);
949
950         /* Checks for struct lnet_ping_info */
951         BUILD_BUG_ON((int)sizeof(struct lnet_ping_info) != 16);
952         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_magic) != 0);
953         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) != 4);
954         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_features) != 4);
955         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_features) != 4);
956         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_pid) != 8);
957         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) != 4);
958         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_nnis) != 12);
959         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) != 4);
960         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_ni) != 16);
961         BUILD_BUG_ON(offsetof(struct lnet_ping_info, pi_ni) != sizeof(struct lnet_ping_info));
962
963         /* Acceptor connection request */
964         BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
965
966         /* Checks for struct lnet_acceptor_connreq */
967         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq) != 16);
968         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_magic) != 0);
969         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_magic) != 4);
970         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_version) != 4);
971         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_version) != 4);
972         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_nid) != 8);
973         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_nid) != 8);
974
975         /* Checks for struct lnet_acceptor_connreq_v2 */
976         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq_v2) != 28);
977         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_magic) != 0);
978         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_magic) != 4);
979         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_version) != 4);
980         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_version) != 4);
981         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_nid) != 8);
982         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_nid) != 20);
983
984         /* Checks for struct lnet_counters_common */
985         BUILD_BUG_ON((int)sizeof(struct lnet_counters_common) != 60);
986         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_alloc) != 0);
987         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_alloc) != 4);
988         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_max) != 4);
989         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_max) != 4);
990         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_errors) != 8);
991         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_errors) != 4);
992         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_count) != 12);
993         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_count) != 4);
994         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_count) != 16);
995         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_count) != 4);
996         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_count) != 20);
997         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_count) != 4);
998         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_count) != 24);
999         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_count) != 4);
1000         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_length) != 28);
1001         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_length) != 8);
1002         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_length) != 36);
1003         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_length) != 8);
1004         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_length) != 44);
1005         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_length) != 8);
1006         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_length) != 52);
1007         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_length) != 8);
1008 }
1009
1010 static const struct lnet_lnd *lnet_find_lnd_by_type(__u32 type)
1011 {
1012         const struct lnet_lnd *lnd;
1013
1014         /* holding lnd mutex */
1015         if (type >= NUM_LNDS)
1016                 return NULL;
1017         lnd = the_lnet.ln_lnds[type];
1018         LASSERT(!lnd || lnd->lnd_type == type);
1019
1020         return lnd;
1021 }
1022
1023 unsigned int
1024 lnet_get_lnd_timeout(void)
1025 {
1026         return lnet_lnd_timeout;
1027 }
1028 EXPORT_SYMBOL(lnet_get_lnd_timeout);
1029
1030 void
1031 lnet_register_lnd(const struct lnet_lnd *lnd)
1032 {
1033         mutex_lock(&the_lnet.ln_lnd_mutex);
1034
1035         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
1036         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
1037
1038         the_lnet.ln_lnds[lnd->lnd_type] = lnd;
1039
1040         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
1041
1042         mutex_unlock(&the_lnet.ln_lnd_mutex);
1043 }
1044 EXPORT_SYMBOL(lnet_register_lnd);
1045
1046 void
1047 lnet_unregister_lnd(const struct lnet_lnd *lnd)
1048 {
1049         mutex_lock(&the_lnet.ln_lnd_mutex);
1050
1051         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
1052
1053         the_lnet.ln_lnds[lnd->lnd_type] = NULL;
1054         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
1055
1056         mutex_unlock(&the_lnet.ln_lnd_mutex);
1057 }
1058 EXPORT_SYMBOL(lnet_unregister_lnd);
1059
1060 static void
1061 lnet_counters_get_common_locked(struct lnet_counters_common *common)
1062 {
1063         struct lnet_counters *ctr;
1064         int i;
1065
1066         /* FIXME !!! Their is no assert_lnet_net_locked() to ensure this
1067          * actually called under the protection of the lnet_net_lock.
1068          */
1069         memset(common, 0, sizeof(*common));
1070
1071         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1072                 common->lcc_msgs_max     += ctr->lct_common.lcc_msgs_max;
1073                 common->lcc_msgs_alloc   += ctr->lct_common.lcc_msgs_alloc;
1074                 common->lcc_errors       += ctr->lct_common.lcc_errors;
1075                 common->lcc_send_count   += ctr->lct_common.lcc_send_count;
1076                 common->lcc_recv_count   += ctr->lct_common.lcc_recv_count;
1077                 common->lcc_route_count  += ctr->lct_common.lcc_route_count;
1078                 common->lcc_drop_count   += ctr->lct_common.lcc_drop_count;
1079                 common->lcc_send_length  += ctr->lct_common.lcc_send_length;
1080                 common->lcc_recv_length  += ctr->lct_common.lcc_recv_length;
1081                 common->lcc_route_length += ctr->lct_common.lcc_route_length;
1082                 common->lcc_drop_length  += ctr->lct_common.lcc_drop_length;
1083         }
1084 }
1085
1086 void
1087 lnet_counters_get_common(struct lnet_counters_common *common)
1088 {
1089         lnet_net_lock(LNET_LOCK_EX);
1090         lnet_counters_get_common_locked(common);
1091         lnet_net_unlock(LNET_LOCK_EX);
1092 }
1093 EXPORT_SYMBOL(lnet_counters_get_common);
1094
1095 int
1096 lnet_counters_get(struct lnet_counters *counters)
1097 {
1098         struct lnet_counters *ctr;
1099         struct lnet_counters_health *health = &counters->lct_health;
1100         int i, rc = 0;
1101
1102         memset(counters, 0, sizeof(*counters));
1103
1104         lnet_net_lock(LNET_LOCK_EX);
1105
1106         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1107                 GOTO(out_unlock, rc = -ENODEV);
1108
1109         lnet_counters_get_common_locked(&counters->lct_common);
1110
1111         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1112                 health->lch_rst_alloc    += ctr->lct_health.lch_rst_alloc;
1113                 health->lch_resend_count += ctr->lct_health.lch_resend_count;
1114                 health->lch_response_timeout_count +=
1115                                 ctr->lct_health.lch_response_timeout_count;
1116                 health->lch_local_interrupt_count +=
1117                                 ctr->lct_health.lch_local_interrupt_count;
1118                 health->lch_local_dropped_count +=
1119                                 ctr->lct_health.lch_local_dropped_count;
1120                 health->lch_local_aborted_count +=
1121                                 ctr->lct_health.lch_local_aborted_count;
1122                 health->lch_local_no_route_count +=
1123                                 ctr->lct_health.lch_local_no_route_count;
1124                 health->lch_local_timeout_count +=
1125                                 ctr->lct_health.lch_local_timeout_count;
1126                 health->lch_local_error_count +=
1127                                 ctr->lct_health.lch_local_error_count;
1128                 health->lch_remote_dropped_count +=
1129                                 ctr->lct_health.lch_remote_dropped_count;
1130                 health->lch_remote_error_count +=
1131                                 ctr->lct_health.lch_remote_error_count;
1132                 health->lch_remote_timeout_count +=
1133                                 ctr->lct_health.lch_remote_timeout_count;
1134                 health->lch_network_timeout_count +=
1135                                 ctr->lct_health.lch_network_timeout_count;
1136         }
1137 out_unlock:
1138         lnet_net_unlock(LNET_LOCK_EX);
1139         return rc;
1140 }
1141 EXPORT_SYMBOL(lnet_counters_get);
1142
1143 void
1144 lnet_counters_reset(void)
1145 {
1146         struct lnet_counters *counters;
1147         int             i;
1148
1149         lnet_net_lock(LNET_LOCK_EX);
1150
1151         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1152                 goto avoid_reset;
1153
1154         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
1155                 memset(counters, 0, sizeof(struct lnet_counters));
1156 avoid_reset:
1157         lnet_net_unlock(LNET_LOCK_EX);
1158 }
1159
1160 static char *
1161 lnet_res_type2str(int type)
1162 {
1163         switch (type) {
1164         default:
1165                 LBUG();
1166         case LNET_COOKIE_TYPE_MD:
1167                 return "MD";
1168         case LNET_COOKIE_TYPE_ME:
1169                 return "ME";
1170         case LNET_COOKIE_TYPE_EQ:
1171                 return "EQ";
1172         }
1173 }
1174
1175 static void
1176 lnet_res_container_cleanup(struct lnet_res_container *rec)
1177 {
1178         int     count = 0;
1179
1180         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
1181                 return;
1182
1183         while (!list_empty(&rec->rec_active)) {
1184                 struct list_head *e = rec->rec_active.next;
1185
1186                 list_del_init(e);
1187                 if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
1188                         lnet_md_free(list_entry(e, struct lnet_libmd, md_list));
1189
1190                 } else { /* NB: Active MEs should be attached on portals */
1191                         LBUG();
1192                 }
1193                 count++;
1194         }
1195
1196         if (count > 0) {
1197                 /* Found alive MD/ME/EQ, user really should unlink/free
1198                  * all of them before finalize LNet, but if someone didn't,
1199                  * we have to recycle garbage for him */
1200                 CERROR("%d active elements on exit of %s container\n",
1201                        count, lnet_res_type2str(rec->rec_type));
1202         }
1203
1204         if (rec->rec_lh_hash != NULL) {
1205                 CFS_FREE_PTR_ARRAY(rec->rec_lh_hash, LNET_LH_HASH_SIZE);
1206                 rec->rec_lh_hash = NULL;
1207         }
1208
1209         rec->rec_type = 0; /* mark it as finalized */
1210 }
1211
1212 static int
1213 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
1214 {
1215         int     rc = 0;
1216         int     i;
1217
1218         LASSERT(rec->rec_type == 0);
1219
1220         rec->rec_type = type;
1221         INIT_LIST_HEAD(&rec->rec_active);
1222
1223         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
1224
1225         /* Arbitrary choice of hash table size */
1226         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
1227                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
1228         if (rec->rec_lh_hash == NULL) {
1229                 rc = -ENOMEM;
1230                 goto out;
1231         }
1232
1233         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
1234                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
1235
1236         return 0;
1237
1238 out:
1239         CERROR("Failed to setup %s resource container\n",
1240                lnet_res_type2str(type));
1241         lnet_res_container_cleanup(rec);
1242         return rc;
1243 }
1244
1245 static void
1246 lnet_res_containers_destroy(struct lnet_res_container **recs)
1247 {
1248         struct lnet_res_container       *rec;
1249         int                             i;
1250
1251         cfs_percpt_for_each(rec, i, recs)
1252                 lnet_res_container_cleanup(rec);
1253
1254         cfs_percpt_free(recs);
1255 }
1256
1257 static struct lnet_res_container **
1258 lnet_res_containers_create(int type)
1259 {
1260         struct lnet_res_container       **recs;
1261         struct lnet_res_container       *rec;
1262         int                             rc;
1263         int                             i;
1264
1265         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
1266         if (recs == NULL) {
1267                 CERROR("Failed to allocate %s resource containers\n",
1268                        lnet_res_type2str(type));
1269                 return NULL;
1270         }
1271
1272         cfs_percpt_for_each(rec, i, recs) {
1273                 rc = lnet_res_container_setup(rec, i, type);
1274                 if (rc != 0) {
1275                         lnet_res_containers_destroy(recs);
1276                         return NULL;
1277                 }
1278         }
1279
1280         return recs;
1281 }
1282
1283 struct lnet_libhandle *
1284 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
1285 {
1286         /* ALWAYS called with lnet_res_lock held */
1287         struct list_head        *head;
1288         struct lnet_libhandle   *lh;
1289         unsigned int            hash;
1290
1291         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
1292                 return NULL;
1293
1294         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
1295         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
1296
1297         list_for_each_entry(lh, head, lh_hash_chain) {
1298                 if (lh->lh_cookie == cookie)
1299                         return lh;
1300         }
1301
1302         return NULL;
1303 }
1304
1305 void
1306 lnet_res_lh_initialize(struct lnet_res_container *rec,
1307                        struct lnet_libhandle *lh)
1308 {
1309         /* ALWAYS called with lnet_res_lock held */
1310         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
1311         unsigned int    hash;
1312
1313         lh->lh_cookie = rec->rec_lh_cookie;
1314         rec->rec_lh_cookie += 1 << ibits;
1315
1316         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
1317
1318         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
1319 }
1320
1321 struct list_head **
1322 lnet_create_array_of_queues(void)
1323 {
1324         struct list_head **qs;
1325         struct list_head *q;
1326         int i;
1327
1328         qs = cfs_percpt_alloc(lnet_cpt_table(),
1329                               sizeof(struct list_head));
1330         if (!qs) {
1331                 CERROR("Failed to allocate queues\n");
1332                 return NULL;
1333         }
1334
1335         cfs_percpt_for_each(q, i, qs)
1336                 INIT_LIST_HEAD(q);
1337
1338         return qs;
1339 }
1340
1341 static int lnet_unprepare(void);
1342
1343 static int
1344 lnet_prepare(lnet_pid_t requested_pid)
1345 {
1346         /* Prepare to bring up the network */
1347         struct lnet_res_container **recs;
1348         int                       rc = 0;
1349
1350         if (requested_pid == LNET_PID_ANY) {
1351                 /* Don't instantiate LNET just for me */
1352                 return -ENETDOWN;
1353         }
1354
1355         LASSERT(the_lnet.ln_refcount == 0);
1356
1357         the_lnet.ln_routing = 0;
1358
1359         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
1360         the_lnet.ln_pid = requested_pid;
1361
1362         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
1363         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
1364         INIT_LIST_HEAD(&the_lnet.ln_nets);
1365         INIT_LIST_HEAD(&the_lnet.ln_routers);
1366         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
1367         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
1368         INIT_LIST_HEAD(&the_lnet.ln_dc_request);
1369         INIT_LIST_HEAD(&the_lnet.ln_dc_working);
1370         INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
1371         INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
1372         INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
1373         INIT_LIST_HEAD(&the_lnet.ln_udsp_list);
1374         init_waitqueue_head(&the_lnet.ln_dc_waitq);
1375         the_lnet.ln_mt_handler = NULL;
1376         init_completion(&the_lnet.ln_started);
1377         atomic_set(&the_lnet.ln_late_msg_count, 0);
1378         atomic64_set(&the_lnet.ln_late_msg_nsecs, 0);
1379
1380         rc = lnet_slab_setup();
1381         if (rc != 0)
1382                 goto failed;
1383
1384         rc = lnet_create_remote_nets_table();
1385         if (rc != 0)
1386                 goto failed;
1387
1388         /*
1389          * NB the interface cookie in wire handles guards against delayed
1390          * replies and ACKs appearing valid after reboot.
1391          */
1392         the_lnet.ln_interface_cookie = ktime_get_real_ns();
1393
1394         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
1395                                                 sizeof(struct lnet_counters));
1396         if (the_lnet.ln_counters == NULL) {
1397                 CERROR("Failed to allocate counters for LNet\n");
1398                 rc = -ENOMEM;
1399                 goto failed;
1400         }
1401
1402         rc = lnet_peer_tables_create();
1403         if (rc != 0)
1404                 goto failed;
1405
1406         rc = lnet_msg_containers_create();
1407         if (rc != 0)
1408                 goto failed;
1409
1410         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
1411                                       LNET_COOKIE_TYPE_EQ);
1412         if (rc != 0)
1413                 goto failed;
1414
1415         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
1416         if (recs == NULL) {
1417                 rc = -ENOMEM;
1418                 goto failed;
1419         }
1420
1421         the_lnet.ln_md_containers = recs;
1422
1423         rc = lnet_portals_create();
1424         if (rc != 0) {
1425                 CERROR("Failed to create portals for LNet: %d\n", rc);
1426                 goto failed;
1427         }
1428
1429         the_lnet.ln_mt_zombie_rstqs = lnet_create_array_of_queues();
1430         if (!the_lnet.ln_mt_zombie_rstqs) {
1431                 rc = -ENOMEM;
1432                 goto failed;
1433         }
1434
1435         return 0;
1436
1437  failed:
1438         lnet_unprepare();
1439         return rc;
1440 }
1441
1442 static int
1443 lnet_unprepare(void)
1444 {
1445         /* NB no LNET_LOCK since this is the last reference.  All LND instances
1446          * have shut down already, so it is safe to unlink and free all
1447          * descriptors, even those that appear committed to a network op (eg MD
1448          * with non-zero pending count) */
1449
1450         lnet_fail_nid(LNET_NID_ANY, 0);
1451
1452         LASSERT(the_lnet.ln_refcount == 0);
1453         LASSERT(list_empty(&the_lnet.ln_test_peers));
1454         LASSERT(list_empty(&the_lnet.ln_nets));
1455
1456         if (the_lnet.ln_mt_zombie_rstqs) {
1457                 lnet_clean_zombie_rstqs();
1458                 the_lnet.ln_mt_zombie_rstqs = NULL;
1459         }
1460
1461         lnet_assert_handler_unused(the_lnet.ln_mt_handler);
1462         the_lnet.ln_mt_handler = NULL;
1463
1464         lnet_portals_destroy();
1465
1466         if (the_lnet.ln_md_containers != NULL) {
1467                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
1468                 the_lnet.ln_md_containers = NULL;
1469         }
1470
1471         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
1472
1473         lnet_msg_containers_destroy();
1474         lnet_peer_uninit();
1475         lnet_rtrpools_free(0);
1476
1477         if (the_lnet.ln_counters != NULL) {
1478                 cfs_percpt_free(the_lnet.ln_counters);
1479                 the_lnet.ln_counters = NULL;
1480         }
1481         lnet_destroy_remote_nets_table();
1482         lnet_udsp_destroy(true);
1483         lnet_slab_cleanup();
1484
1485         return 0;
1486 }
1487
1488 struct lnet_ni  *
1489 lnet_net2ni_locked(__u32 net_id, int cpt)
1490 {
1491         struct lnet_ni   *ni;
1492         struct lnet_net  *net;
1493
1494         LASSERT(cpt != LNET_LOCK_EX);
1495
1496         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1497                 if (net->net_id == net_id) {
1498                         ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
1499                                               ni_netlist);
1500                         return ni;
1501                 }
1502         }
1503
1504         return NULL;
1505 }
1506
1507 struct lnet_ni *
1508 lnet_net2ni_addref(__u32 net)
1509 {
1510         struct lnet_ni *ni;
1511
1512         lnet_net_lock(0);
1513         ni = lnet_net2ni_locked(net, 0);
1514         if (ni)
1515                 lnet_ni_addref_locked(ni, 0);
1516         lnet_net_unlock(0);
1517
1518         return ni;
1519 }
1520 EXPORT_SYMBOL(lnet_net2ni_addref);
1521
1522 struct lnet_net *
1523 lnet_get_net_locked(__u32 net_id)
1524 {
1525         struct lnet_net  *net;
1526
1527         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1528                 if (net->net_id == net_id)
1529                         return net;
1530         }
1531
1532         return NULL;
1533 }
1534
1535 void
1536 lnet_net_clr_pref_rtrs(struct lnet_net *net)
1537 {
1538         struct list_head zombies;
1539         struct lnet_nid_list *ne;
1540         struct lnet_nid_list *tmp;
1541
1542         INIT_LIST_HEAD(&zombies);
1543
1544         lnet_net_lock(LNET_LOCK_EX);
1545         list_splice_init(&net->net_rtr_pref_nids, &zombies);
1546         lnet_net_unlock(LNET_LOCK_EX);
1547
1548         list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
1549                 list_del_init(&ne->nl_list);
1550                 LIBCFS_FREE(ne, sizeof(*ne));
1551         }
1552 }
1553
1554 int
1555 lnet_net_add_pref_rtr(struct lnet_net *net,
1556                       struct lnet_nid *gw_nid)
1557 __must_hold(&the_lnet.ln_api_mutex)
1558 {
1559         struct lnet_nid_list *ne;
1560
1561         /* This function is called with api_mutex held. When the api_mutex
1562          * is held the list can not be modified, as it is only modified as
1563          * a result of applying a UDSP and that happens under api_mutex
1564          * lock.
1565          */
1566         list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
1567                 if (nid_same(&ne->nl_nid, gw_nid))
1568                         return -EEXIST;
1569         }
1570
1571         LIBCFS_ALLOC(ne, sizeof(*ne));
1572         if (!ne)
1573                 return -ENOMEM;
1574
1575         ne->nl_nid = *gw_nid;
1576
1577         /* Lock the cpt to protect against addition and checks in the
1578          * selection algorithm
1579          */
1580         lnet_net_lock(LNET_LOCK_EX);
1581         list_add(&ne->nl_list, &net->net_rtr_pref_nids);
1582         lnet_net_unlock(LNET_LOCK_EX);
1583
1584         return 0;
1585 }
1586
1587 static unsigned int
1588 lnet_nid4_cpt_hash(lnet_nid_t nid, unsigned int number)
1589 {
1590         __u64 key = nid;
1591         __u16 lnd = LNET_NETTYP(LNET_NIDNET(nid));
1592         unsigned int cpt;
1593
1594         if (lnd == KFILND || lnd == GNILND) {
1595                 cpt = hash_long(key, LNET_CPT_BITS);
1596
1597                 /* NB: The number of CPTs needn't be a power of 2 */
1598                 if (cpt >= number)
1599                         cpt = (key + cpt + (cpt >> 1)) % number;
1600         } else {
1601                 __u64 pair_bits = 0x0001000100010001LLU;
1602                 __u64 mask = pair_bits * 0xFF;
1603                 __u64 pair_sum;
1604                 /* For ipv4 NIDs, use (sum-by-multiplication of nid bytes) mod
1605                  * (number of CPTs) to match nid to a CPT.
1606                  */
1607                 pair_sum = (key & mask) + ((key >> 8) & mask);
1608                 pair_sum = (pair_sum * pair_bits) >> 48;
1609                 cpt = (unsigned int)(pair_sum) % number;
1610         }
1611
1612         CDEBUG(D_NET, "Match nid %s to cpt %u\n",
1613                libcfs_nid2str(nid), cpt);
1614
1615         return cpt;
1616 }
1617
1618 unsigned int
1619 lnet_nid_cpt_hash(struct lnet_nid *nid, unsigned int number)
1620 {
1621         unsigned int val;
1622         u32 h = 0;
1623         int i;
1624
1625         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
1626
1627         if (number == 1)
1628                 return 0;
1629
1630         if (nid_is_nid4(nid))
1631                 return lnet_nid4_cpt_hash(lnet_nid_to_nid4(nid), number);
1632
1633         for (i = 0; i < 4; i++)
1634                 h = cfs_hash_32(nid->nid_addr[i]^h, 32);
1635         val = cfs_hash_32(LNET_NID_NET(nid) ^ h, LNET_CPT_BITS);
1636         if (val < number)
1637                 return val;
1638         return (unsigned int)(h + val + (val >> 1)) % number;
1639 }
1640
1641 int
1642 lnet_cpt_of_nid_locked(struct lnet_nid *nid, struct lnet_ni *ni)
1643 {
1644         struct lnet_net *net;
1645
1646         /* must called with hold of lnet_net_lock */
1647         if (LNET_CPT_NUMBER == 1)
1648                 return 0; /* the only one */
1649
1650         /*
1651          * If NI is provided then use the CPT identified in the NI cpt
1652          * list if one exists. If one doesn't exist, then that NI is
1653          * associated with all CPTs and it follows that the net it belongs
1654          * to is implicitly associated with all CPTs, so just hash the nid
1655          * and return that.
1656          */
1657         if (ni != NULL) {
1658                 if (ni->ni_cpts != NULL)
1659                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
1660                                                              ni->ni_ncpts)];
1661                 else
1662                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1663         }
1664
1665         /* no NI provided so look at the net */
1666         net = lnet_get_net_locked(LNET_NID_NET(nid));
1667
1668         if (net != NULL && net->net_cpts != NULL) {
1669                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
1670         }
1671
1672         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1673 }
1674
1675 int
1676 lnet_nid2cpt(struct lnet_nid *nid, struct lnet_ni *ni)
1677 {
1678         int     cpt;
1679         int     cpt2;
1680
1681         if (LNET_CPT_NUMBER == 1)
1682                 return 0; /* the only one */
1683
1684         cpt = lnet_net_lock_current();
1685
1686         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
1687
1688         lnet_net_unlock(cpt);
1689
1690         return cpt2;
1691 }
1692 EXPORT_SYMBOL(lnet_nid2cpt);
1693
1694 int
1695 lnet_cpt_of_nid(lnet_nid_t nid4, struct lnet_ni *ni)
1696 {
1697         struct lnet_nid nid;
1698
1699         if (LNET_CPT_NUMBER == 1)
1700                 return 0; /* the only one */
1701
1702         lnet_nid4_to_nid(nid4, &nid);
1703         return lnet_nid2cpt(&nid, ni);
1704 }
1705 EXPORT_SYMBOL(lnet_cpt_of_nid);
1706
1707 int
1708 lnet_islocalnet_locked(__u32 net_id)
1709 {
1710         struct lnet_net *net;
1711         bool local;
1712
1713         net = lnet_get_net_locked(net_id);
1714
1715         local = net != NULL;
1716
1717         return local;
1718 }
1719
1720 int
1721 lnet_islocalnet(__u32 net_id)
1722 {
1723         int cpt;
1724         bool local;
1725
1726         cpt = lnet_net_lock_current();
1727
1728         local = lnet_islocalnet_locked(net_id);
1729
1730         lnet_net_unlock(cpt);
1731
1732         return local;
1733 }
1734
1735 struct lnet_ni  *
1736 lnet_nid_to_ni_locked(struct lnet_nid *nid, int cpt)
1737 {
1738         struct lnet_net  *net;
1739         struct lnet_ni *ni;
1740
1741         LASSERT(cpt != LNET_LOCK_EX);
1742
1743         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1744                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1745                         if (nid_same(&ni->ni_nid, nid))
1746                                 return ni;
1747                 }
1748         }
1749
1750         return NULL;
1751 }
1752
1753 struct lnet_ni *
1754 lnet_nid_to_ni_addref(struct lnet_nid *nid)
1755 {
1756         struct lnet_ni *ni;
1757
1758         lnet_net_lock(0);
1759         ni = lnet_nid_to_ni_locked(nid, 0);
1760         if (ni)
1761                 lnet_ni_addref_locked(ni, 0);
1762         lnet_net_unlock(0);
1763
1764         return ni;
1765 }
1766 EXPORT_SYMBOL(lnet_nid_to_ni_addref);
1767
1768 int
1769 lnet_islocalnid(struct lnet_nid *nid)
1770 {
1771         struct lnet_ni  *ni;
1772         int             cpt;
1773
1774         cpt = lnet_net_lock_current();
1775         ni = lnet_nid_to_ni_locked(nid, cpt);
1776         lnet_net_unlock(cpt);
1777
1778         return ni != NULL;
1779 }
1780
1781 int
1782 lnet_count_acceptor_nets(void)
1783 {
1784         /* Return the # of NIs that need the acceptor. */
1785         int              count = 0;
1786         struct lnet_net  *net;
1787         int              cpt;
1788
1789         cpt = lnet_net_lock_current();
1790         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1791                 /* all socklnd type networks should have the acceptor
1792                  * thread started */
1793                 if (net->net_lnd->lnd_accept != NULL)
1794                         count++;
1795         }
1796
1797         lnet_net_unlock(cpt);
1798
1799         return count;
1800 }
1801
1802 struct lnet_ping_buffer *
1803 lnet_ping_buffer_alloc(int nbytes, gfp_t gfp)
1804 {
1805         struct lnet_ping_buffer *pbuf;
1806
1807         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nbytes), gfp);
1808         if (pbuf) {
1809                 pbuf->pb_nbytes = nbytes;       /* sizeof of pb_info */
1810                 pbuf->pb_needs_post = false;
1811                 atomic_set(&pbuf->pb_refcnt, 1);
1812         }
1813
1814         return pbuf;
1815 }
1816
1817 void
1818 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
1819 {
1820         LASSERT(atomic_read(&pbuf->pb_refcnt) == 0);
1821         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nbytes));
1822 }
1823
1824 static struct lnet_ping_buffer *
1825 lnet_ping_target_create(int nbytes)
1826 {
1827         struct lnet_ping_buffer *pbuf;
1828
1829         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
1830         if (pbuf == NULL) {
1831                 CERROR("Can't allocate ping source [%d]\n", nbytes);
1832                 return NULL;
1833         }
1834
1835         pbuf->pb_info.pi_nnis = 0;
1836         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1837         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1838         pbuf->pb_info.pi_features =
1839                 LNET_PING_FEAT_NI_STATUS | LNET_PING_FEAT_MULTI_RAIL;
1840
1841         return pbuf;
1842 }
1843
1844 static inline int
1845 lnet_get_net_ni_bytes_locked(struct lnet_net *net)
1846 {
1847         struct lnet_ni *ni;
1848         int bytes = 0;
1849
1850         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1851                 bytes += lnet_ping_sts_size(&ni->ni_nid);
1852
1853         return bytes;
1854 }
1855
1856 static inline int
1857 lnet_get_ni_bytes(void)
1858 {
1859         struct lnet_ni *ni;
1860         struct lnet_net *net;
1861         int bytes = 0;
1862
1863         lnet_net_lock(0);
1864
1865         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1866                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1867                         bytes += lnet_ping_sts_size(&ni->ni_nid);
1868         }
1869
1870         lnet_net_unlock(0);
1871
1872         return bytes;
1873 }
1874
1875 void
1876 lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf)
1877 {
1878         struct lnet_ni_large_status *lstat, *lend;
1879         struct lnet_ni_status *stat, *end;
1880         int nnis;
1881         int i;
1882
1883         __swab32s(&pbuf->pb_info.pi_magic);
1884         __swab32s(&pbuf->pb_info.pi_features);
1885         __swab32s(&pbuf->pb_info.pi_pid);
1886         __swab32s(&pbuf->pb_info.pi_nnis);
1887         nnis = pbuf->pb_info.pi_nnis;
1888         stat = &pbuf->pb_info.pi_ni[0];
1889         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
1890         for (i = 0; i < nnis && stat + 1 <= end; i++, stat++) {
1891                 __swab64s(&stat->ns_nid);
1892                 __swab32s(&stat->ns_status);
1893                 if (i == 0)
1894                         /* Might be total size */
1895                         __swab32s(&stat->ns_msg_size);
1896         }
1897         if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_LARGE_ADDR))
1898                 return;
1899
1900         lstat = (struct lnet_ni_large_status *)stat;
1901         lend = (void *)end;
1902         while (lstat + 1 <= lend) {
1903                 __swab32s(&lstat->ns_status);
1904                 /* struct lnet_nid never needs to be swabed */
1905                 lstat = lnet_ping_sts_next(lstat);
1906         }
1907 }
1908
1909 int
1910 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1911 {
1912         if (!pinfo)
1913                 return -EINVAL;
1914         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1915                 return -EPROTO;
1916         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1917                 return -EPROTO;
1918         /* Loopback is guaranteed to be present */
1919         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
1920                 return -ERANGE;
1921         if (LNET_PING_INFO_LONI(pinfo) != LNET_NID_LO_0)
1922                 return -EPROTO;
1923         return 0;
1924 }
1925
1926 static void
1927 lnet_ping_target_destroy(void)
1928 {
1929         struct lnet_net *net;
1930         struct lnet_ni  *ni;
1931
1932         lnet_net_lock(LNET_LOCK_EX);
1933
1934         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1935                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1936                         lnet_ni_lock(ni);
1937                         ni->ni_status = NULL;
1938                         lnet_ni_unlock(ni);
1939                 }
1940         }
1941
1942         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1943         the_lnet.ln_ping_target = NULL;
1944
1945         lnet_net_unlock(LNET_LOCK_EX);
1946 }
1947
1948 static void
1949 lnet_ping_target_event_handler(struct lnet_event *event)
1950 {
1951         struct lnet_ping_buffer *pbuf = event->md_user_ptr;
1952
1953         if (event->unlinked)
1954                 lnet_ping_buffer_decref(pbuf);
1955 }
1956
1957 static int
1958 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1959                        struct lnet_handle_md *ping_mdh,
1960                        int ni_bytes, bool set_eq)
1961 {
1962         struct lnet_processid id = {
1963                 .nid = LNET_ANY_NID,
1964                 .pid = LNET_PID_ANY
1965         };
1966         struct lnet_me *me;
1967         struct lnet_md md = { NULL };
1968         int rc;
1969
1970         if (set_eq)
1971                 the_lnet.ln_ping_target_handler =
1972                         lnet_ping_target_event_handler;
1973
1974         *ppbuf = lnet_ping_target_create(ni_bytes);
1975         if (*ppbuf == NULL) {
1976                 rc = -ENOMEM;
1977                 goto fail_free_eq;
1978         }
1979
1980         /* Ping target ME/MD */
1981         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
1982                           LNET_PROTO_PING_MATCHBITS, 0,
1983                           LNET_UNLINK, LNET_INS_AFTER);
1984         if (IS_ERR(me)) {
1985                 rc = PTR_ERR(me);
1986                 CERROR("Can't create ping target ME: %d\n", rc);
1987                 goto fail_decref_ping_buffer;
1988         }
1989
1990         /* initialize md content */
1991         md.start     = &(*ppbuf)->pb_info;
1992         md.length    = (*ppbuf)->pb_nbytes;
1993         md.threshold = LNET_MD_THRESH_INF;
1994         md.max_size  = 0;
1995         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1996                        LNET_MD_MANAGE_REMOTE;
1997         md.handler   = the_lnet.ln_ping_target_handler;
1998         md.user_ptr  = *ppbuf;
1999
2000         rc = LNetMDAttach(me, &md, LNET_RETAIN, ping_mdh);
2001         if (rc != 0) {
2002                 CERROR("Can't attach ping target MD: %d\n", rc);
2003                 goto fail_decref_ping_buffer;
2004         }
2005         lnet_ping_buffer_addref(*ppbuf);
2006
2007         return 0;
2008
2009 fail_decref_ping_buffer:
2010         LASSERT(atomic_read(&(*ppbuf)->pb_refcnt) == 1);
2011         lnet_ping_buffer_decref(*ppbuf);
2012         *ppbuf = NULL;
2013 fail_free_eq:
2014         return rc;
2015 }
2016
2017 static void
2018 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
2019                     struct lnet_handle_md *ping_mdh)
2020 {
2021         LNetMDUnlink(*ping_mdh);
2022         LNetInvalidateMDHandle(ping_mdh);
2023
2024         /* NB the MD could be busy; this just starts the unlink */
2025         wait_var_event_warning(&pbuf->pb_refcnt,
2026                                atomic_read(&pbuf->pb_refcnt) <= 1,
2027                                "Still waiting for ping data MD to unlink\n");
2028 }
2029
2030 static void
2031 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
2032 {
2033         struct lnet_ni *ni;
2034         struct lnet_net *net;
2035         struct lnet_ni_status *ns, *end;
2036         struct lnet_ni_large_status *lns, *lend;
2037         int rc;
2038
2039         pbuf->pb_info.pi_nnis = 0;
2040         ns = &pbuf->pb_info.pi_ni[0];
2041         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
2042         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2043                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2044                         if (!nid_is_nid4(&ni->ni_nid)) {
2045                                 if (ns == &pbuf->pb_info.pi_ni[1]) {
2046                                         /* This is primary, and it is long */
2047                                         pbuf->pb_info.pi_features |=
2048                                                 LNET_PING_FEAT_PRIMARY_LARGE;
2049                                 }
2050                                 continue;
2051                         }
2052                         LASSERT(ns + 1 <= end);
2053                         ns->ns_nid = lnet_nid_to_nid4(&ni->ni_nid);
2054
2055                         lnet_ni_lock(ni);
2056                         ns->ns_status = lnet_ni_get_status_locked(ni);
2057                         ni->ni_status = &ns->ns_status;
2058                         lnet_ni_unlock(ni);
2059
2060                         pbuf->pb_info.pi_nnis++;
2061                         ns++;
2062                 }
2063         }
2064
2065         lns = (void *)ns;
2066         lend = (void *)end;
2067         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2068                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2069                         if (nid_is_nid4(&ni->ni_nid))
2070                                 continue;
2071                         LASSERT(lns + 1 <= lend);
2072
2073                         lns->ns_nid = ni->ni_nid;
2074
2075                         lnet_ni_lock(ni);
2076                         ns->ns_status = lnet_ni_get_status_locked(ni);
2077                         ni->ni_status = &lns->ns_status;
2078                         lnet_ni_unlock(ni);
2079
2080                         lns = lnet_ping_sts_next(lns);
2081                 }
2082         }
2083         if ((void *)lns > (void *)ns) {
2084                 /* Record total info size */
2085                 pbuf->pb_info.pi_ni[0].ns_msg_size =
2086                         (void *)lns - (void *)&pbuf->pb_info;
2087                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_LARGE_ADDR;
2088         }
2089
2090         /* We (ab)use the ns_status of the loopback interface to
2091          * transmit the sequence number. The first interface listed
2092          * must be the loopback interface.
2093          */
2094         rc = lnet_ping_info_validate(&pbuf->pb_info);
2095         if (rc) {
2096                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
2097                 LBUG();
2098         }
2099         LNET_PING_BUFFER_SEQNO(pbuf) =
2100                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
2101 }
2102
2103 static void
2104 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
2105                         struct lnet_handle_md ping_mdh)
2106 __must_hold(&the_lnet.ln_api_mutex)
2107 {
2108         struct lnet_ping_buffer *old_pbuf = NULL;
2109         struct lnet_handle_md old_ping_md;
2110
2111         /* switch the NIs to point to the new ping info created */
2112         lnet_net_lock(LNET_LOCK_EX);
2113
2114         if (!the_lnet.ln_routing)
2115                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
2116         if (!lnet_peer_discovery_disabled)
2117                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
2118
2119         /* Ensure only known feature bits have been set. */
2120         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
2121         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
2122
2123         lnet_ping_target_install_locked(pbuf);
2124
2125         if (the_lnet.ln_ping_target) {
2126                 old_pbuf = the_lnet.ln_ping_target;
2127                 old_ping_md = the_lnet.ln_ping_target_md;
2128         }
2129         the_lnet.ln_ping_target_md = ping_mdh;
2130         the_lnet.ln_ping_target = pbuf;
2131
2132         lnet_net_unlock(LNET_LOCK_EX);
2133
2134         if (old_pbuf) {
2135                 /* unlink and free the old ping info.
2136                  * There may be outstanding traffic on this MD, and
2137                  * ln_api_mutex may be required to finalize that
2138                  * traffic. Release ln_api_mutex while we wait for
2139                  * refs on this ping buffer to drop
2140                  */
2141                 mutex_unlock(&the_lnet.ln_api_mutex);
2142                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
2143                 mutex_lock(&the_lnet.ln_api_mutex);
2144                 lnet_ping_buffer_decref(old_pbuf);
2145         }
2146
2147         lnet_push_update_to_peers(0);
2148 }
2149
2150 static void
2151 lnet_ping_target_fini(void)
2152 {
2153         lnet_ping_md_unlink(the_lnet.ln_ping_target,
2154                             &the_lnet.ln_ping_target_md);
2155
2156         lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
2157         lnet_ping_target_destroy();
2158 }
2159
2160 /* Resize the push target. */
2161 int lnet_push_target_resize(void)
2162 {
2163         struct lnet_handle_md mdh;
2164         struct lnet_handle_md old_mdh;
2165         struct lnet_ping_buffer *pbuf;
2166         struct lnet_ping_buffer *old_pbuf;
2167         int nbytes;
2168         int rc;
2169
2170 again:
2171         nbytes = the_lnet.ln_push_target_nbytes;
2172         if (nbytes <= 0) {
2173                 CDEBUG(D_NET, "Invalid nbytes %d\n", nbytes);
2174                 return -EINVAL;
2175         }
2176
2177         /* NB: lnet_ping_buffer_alloc() sets pbuf refcount to 1. That ref is
2178          * dropped when we need to resize again (see "old_pbuf" below) or when
2179          * LNet is shutdown (see lnet_push_target_fini())
2180          */
2181         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
2182         if (!pbuf) {
2183                 CDEBUG(D_NET, "Can't allocate pbuf for nbytes %d\n", nbytes);
2184                 return -ENOMEM;
2185         }
2186
2187         rc = lnet_push_target_post(pbuf, &mdh);
2188         if (rc) {
2189                 CDEBUG(D_NET, "Failed to post push target: %d\n", rc);
2190                 lnet_ping_buffer_decref(pbuf);
2191                 return rc;
2192         }
2193
2194         lnet_net_lock(LNET_LOCK_EX);
2195         old_pbuf = the_lnet.ln_push_target;
2196         old_mdh = the_lnet.ln_push_target_md;
2197         the_lnet.ln_push_target = pbuf;
2198         the_lnet.ln_push_target_md = mdh;
2199         lnet_net_unlock(LNET_LOCK_EX);
2200
2201         if (old_pbuf) {
2202                 LNetMDUnlink(old_mdh);
2203                 /* Drop ref set by lnet_ping_buffer_alloc() */
2204                 lnet_ping_buffer_decref(old_pbuf);
2205         }
2206
2207         /* Received another push or reply that requires a larger buffer */
2208         if (nbytes < the_lnet.ln_push_target_nbytes)
2209                 goto again;
2210
2211         CDEBUG(D_NET, "nbytes %d success\n", nbytes);
2212         return 0;
2213 }
2214
2215 int lnet_push_target_post(struct lnet_ping_buffer *pbuf,
2216                           struct lnet_handle_md *mdhp)
2217 {
2218         struct lnet_processid id = { LNET_ANY_NID, LNET_PID_ANY };
2219         struct lnet_md md = { NULL };
2220         struct lnet_me *me;
2221         int rc;
2222
2223         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
2224                           LNET_PROTO_PING_MATCHBITS, 0,
2225                           LNET_UNLINK, LNET_INS_AFTER);
2226         if (IS_ERR(me)) {
2227                 rc = PTR_ERR(me);
2228                 CERROR("Can't create push target ME: %d\n", rc);
2229                 return rc;
2230         }
2231
2232         pbuf->pb_needs_post = false;
2233
2234         /* This reference is dropped by lnet_push_target_event_handler() */
2235         lnet_ping_buffer_addref(pbuf);
2236
2237         /* initialize md content */
2238         md.start     = &pbuf->pb_info;
2239         md.length    = pbuf->pb_nbytes;
2240         md.threshold = 1;
2241         md.max_size  = 0;
2242         md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE;
2243         md.user_ptr  = pbuf;
2244         md.handler   = the_lnet.ln_push_target_handler;
2245
2246         rc = LNetMDAttach(me, &md, LNET_UNLINK, mdhp);
2247         if (rc) {
2248                 CERROR("Can't attach push MD: %d\n", rc);
2249                 lnet_ping_buffer_decref(pbuf);
2250                 pbuf->pb_needs_post = true;
2251                 return rc;
2252         }
2253
2254         CDEBUG(D_NET, "posted push target %p\n", pbuf);
2255
2256         return 0;
2257 }
2258
2259 static void lnet_push_target_event_handler(struct lnet_event *ev)
2260 {
2261         struct lnet_ping_buffer *pbuf = ev->md_user_ptr;
2262
2263         CDEBUG(D_NET, "type %d status %d unlinked %d\n", ev->type, ev->status,
2264                ev->unlinked);
2265
2266         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
2267                 lnet_swap_pinginfo(pbuf);
2268
2269         if (ev->type == LNET_EVENT_UNLINK) {
2270                 /* Drop ref added by lnet_push_target_post() */
2271                 lnet_ping_buffer_decref(pbuf);
2272                 return;
2273         }
2274
2275         lnet_peer_push_event(ev);
2276         if (ev->unlinked)
2277                 /* Drop ref added by lnet_push_target_post */
2278                 lnet_ping_buffer_decref(pbuf);
2279 }
2280
2281 /* Initialize the push target. */
2282 static int lnet_push_target_init(void)
2283 {
2284         int rc;
2285
2286         if (the_lnet.ln_push_target)
2287                 return -EALREADY;
2288
2289         the_lnet.ln_push_target_handler =
2290                 lnet_push_target_event_handler;
2291
2292         rc = LNetSetLazyPortal(LNET_RESERVED_PORTAL);
2293         LASSERT(rc == 0);
2294
2295         /* Start at the required minimum, we'll enlarge if required. */
2296         the_lnet.ln_push_target_nbytes = LNET_PING_INFO_MIN_SIZE;
2297
2298         rc = lnet_push_target_resize();
2299         if (rc) {
2300                 LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2301                 the_lnet.ln_push_target_handler = NULL;
2302         }
2303
2304         return rc;
2305 }
2306
2307 /* Clean up the push target. */
2308 static void lnet_push_target_fini(void)
2309 {
2310         if (!the_lnet.ln_push_target)
2311                 return;
2312
2313         /* Unlink and invalidate to prevent new references. */
2314         LNetMDUnlink(the_lnet.ln_push_target_md);
2315         LNetInvalidateMDHandle(&the_lnet.ln_push_target_md);
2316
2317         /* Wait for the unlink to complete. */
2318         wait_var_event_warning(&the_lnet.ln_push_target->pb_refcnt,
2319                                atomic_read(&the_lnet.ln_push_target->pb_refcnt) <= 1,
2320                                "Still waiting for ping data MD to unlink\n");
2321
2322         /* Drop ref set by lnet_ping_buffer_alloc() */
2323         lnet_ping_buffer_decref(the_lnet.ln_push_target);
2324         the_lnet.ln_push_target = NULL;
2325         the_lnet.ln_push_target_nbytes = 0;
2326
2327         LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2328         lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
2329         the_lnet.ln_push_target_handler = NULL;
2330 }
2331
2332 static int
2333 lnet_ni_tq_credits(struct lnet_ni *ni)
2334 {
2335         int     credits;
2336
2337         LASSERT(ni->ni_ncpts >= 1);
2338
2339         if (ni->ni_ncpts == 1)
2340                 return ni->ni_net->net_tunables.lct_max_tx_credits;
2341
2342         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
2343         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
2344         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
2345
2346         return credits;
2347 }
2348
2349 static void
2350 lnet_ni_unlink_locked(struct lnet_ni *ni)
2351 {
2352         /* move it to zombie list and nobody can find it anymore */
2353         LASSERT(!list_empty(&ni->ni_netlist));
2354         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
2355         lnet_ni_decref_locked(ni, 0);
2356 }
2357
2358 static void
2359 lnet_clear_zombies_nis_locked(struct lnet_net *net)
2360 {
2361         int             i;
2362         int             islo;
2363         struct lnet_ni  *ni;
2364         struct list_head *zombie_list = &net->net_ni_zombie;
2365
2366         /*
2367          * Now wait for the NIs I just nuked to show up on the zombie
2368          * list and shut them down in guaranteed thread context
2369          */
2370         i = 2;
2371         while ((ni = list_first_entry_or_null(zombie_list,
2372                                               struct lnet_ni,
2373                                               ni_netlist)) != NULL) {
2374                 int *ref;
2375                 int j;
2376
2377                 list_del_init(&ni->ni_netlist);
2378                 /* the ni should be in deleting state. If it's not it's
2379                  * a bug */
2380                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
2381                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
2382                         if (*ref == 0)
2383                                 continue;
2384                         /* still busy, add it back to zombie list */
2385                         list_add(&ni->ni_netlist, zombie_list);
2386                         break;
2387                 }
2388
2389                 if (!list_empty(&ni->ni_netlist)) {
2390                         /* Unlock mutex while waiting to allow other
2391                          * threads to read the LNet state and fall through
2392                          * to avoid deadlock
2393                          */
2394                         lnet_net_unlock(LNET_LOCK_EX);
2395                         mutex_unlock(&the_lnet.ln_api_mutex);
2396
2397                         ++i;
2398                         if ((i & (-i)) == i) {
2399                                 CDEBUG(D_WARNING,
2400                                        "Waiting for zombie LNI %s\n",
2401                                        libcfs_nidstr(&ni->ni_nid));
2402                         }
2403                         schedule_timeout_uninterruptible(cfs_time_seconds(1));
2404
2405                         mutex_lock(&the_lnet.ln_api_mutex);
2406                         lnet_net_lock(LNET_LOCK_EX);
2407                         continue;
2408                 }
2409
2410                 lnet_net_unlock(LNET_LOCK_EX);
2411
2412                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
2413
2414                 LASSERT(!in_interrupt());
2415                 /* Holding the LND mutex makes it safe for lnd_shutdown
2416                  * to call module_put(). Module unload cannot finish
2417                  * until lnet_unregister_lnd() completes, and that
2418                  * requires the LND mutex.
2419                  */
2420                 mutex_unlock(&the_lnet.ln_api_mutex);
2421                 mutex_lock(&the_lnet.ln_lnd_mutex);
2422                 (net->net_lnd->lnd_shutdown)(ni);
2423                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2424                 mutex_lock(&the_lnet.ln_api_mutex);
2425
2426                 if (!islo)
2427                         CDEBUG(D_LNI, "Removed LNI %s\n",
2428                               libcfs_nidstr(&ni->ni_nid));
2429
2430                 lnet_ni_free(ni);
2431                 i = 2;
2432                 lnet_net_lock(LNET_LOCK_EX);
2433         }
2434 }
2435
2436 /* shutdown down the NI and release refcount */
2437 static void
2438 lnet_shutdown_lndni(struct lnet_ni *ni)
2439 {
2440         int i;
2441         struct lnet_net *net = ni->ni_net;
2442
2443         lnet_net_lock(LNET_LOCK_EX);
2444         lnet_ni_lock(ni);
2445         ni->ni_state = LNET_NI_STATE_DELETING;
2446         lnet_ni_unlock(ni);
2447         lnet_ni_unlink_locked(ni);
2448         lnet_incr_dlc_seq();
2449         lnet_net_unlock(LNET_LOCK_EX);
2450
2451         /* clear messages for this NI on the lazy portal */
2452         for (i = 0; i < the_lnet.ln_nportals; i++)
2453                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
2454
2455         lnet_net_lock(LNET_LOCK_EX);
2456         lnet_clear_zombies_nis_locked(net);
2457         lnet_net_unlock(LNET_LOCK_EX);
2458 }
2459
2460 static void
2461 lnet_shutdown_lndnet(struct lnet_net *net)
2462 {
2463         struct lnet_ni *ni;
2464
2465         lnet_net_lock(LNET_LOCK_EX);
2466
2467         list_del_init(&net->net_list);
2468
2469         while ((ni = list_first_entry_or_null(&net->net_ni_list,
2470                                               struct lnet_ni,
2471                                               ni_netlist)) != NULL) {
2472                 lnet_net_unlock(LNET_LOCK_EX);
2473                 lnet_shutdown_lndni(ni);
2474                 lnet_net_lock(LNET_LOCK_EX);
2475         }
2476
2477         lnet_net_unlock(LNET_LOCK_EX);
2478
2479         /* Do peer table cleanup for this net */
2480         lnet_peer_tables_cleanup(net);
2481
2482         lnet_net_free(net);
2483 }
2484
2485 static void
2486 lnet_shutdown_lndnets(void)
2487 {
2488         struct lnet_net *net;
2489         LIST_HEAD(resend);
2490         struct lnet_msg *msg, *tmp;
2491
2492         /* NB called holding the global mutex */
2493
2494         /* All quiet on the API front */
2495         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING ||
2496                 the_lnet.ln_state == LNET_STATE_STOPPING);
2497         LASSERT(the_lnet.ln_refcount == 0);
2498
2499         lnet_net_lock(LNET_LOCK_EX);
2500         the_lnet.ln_state = LNET_STATE_STOPPING;
2501
2502         /*
2503          * move the nets to the zombie list to avoid them being
2504          * picked up for new work. LONET is also included in the
2505          * Nets that will be moved to the zombie list
2506          */
2507         list_splice_init(&the_lnet.ln_nets, &the_lnet.ln_net_zombie);
2508
2509         /* Drop the cached loopback Net. */
2510         if (the_lnet.ln_loni != NULL) {
2511                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
2512                 the_lnet.ln_loni = NULL;
2513         }
2514         lnet_net_unlock(LNET_LOCK_EX);
2515
2516         /* iterate through the net zombie list and delete each net */
2517         while ((net = list_first_entry_or_null(&the_lnet.ln_net_zombie,
2518                                                struct lnet_net,
2519                                                net_list)) != NULL)
2520                 lnet_shutdown_lndnet(net);
2521
2522         spin_lock(&the_lnet.ln_msg_resend_lock);
2523         list_splice(&the_lnet.ln_msg_resend, &resend);
2524         spin_unlock(&the_lnet.ln_msg_resend_lock);
2525
2526         list_for_each_entry_safe(msg, tmp, &resend, msg_list) {
2527                 list_del_init(&msg->msg_list);
2528                 msg->msg_no_resend = true;
2529                 lnet_finalize(msg, -ECANCELED);
2530         }
2531
2532         lnet_net_lock(LNET_LOCK_EX);
2533         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
2534         lnet_net_unlock(LNET_LOCK_EX);
2535 }
2536
2537 static int
2538 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
2539 {
2540         int                     rc = -EINVAL;
2541         struct lnet_tx_queue    *tq;
2542         int                     i;
2543         struct lnet_net         *net = ni->ni_net;
2544
2545         mutex_lock(&the_lnet.ln_lnd_mutex);
2546
2547         if (tun) {
2548                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
2549                 ni->ni_lnd_tunables_set = true;
2550         }
2551
2552         rc = (net->net_lnd->lnd_startup)(ni);
2553
2554         mutex_unlock(&the_lnet.ln_lnd_mutex);
2555
2556         if (rc != 0) {
2557                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
2558                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
2559                 goto failed0;
2560         }
2561
2562         /* We keep a reference on the loopback net through the loopback NI */
2563         if (net->net_lnd->lnd_type == LOLND) {
2564                 lnet_ni_addref(ni);
2565                 LASSERT(the_lnet.ln_loni == NULL);
2566                 the_lnet.ln_loni = ni;
2567                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
2568                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
2569                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
2570                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
2571                 return 0;
2572         }
2573
2574         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
2575             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
2576                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
2577                                    libcfs_lnd2str(net->net_lnd->lnd_type),
2578                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
2579                                         "" : "per-peer ");
2580                 /* shutdown the NI since if we get here then it must've already
2581                  * been started
2582                  */
2583                 lnet_shutdown_lndni(ni);
2584                 return -EINVAL;
2585         }
2586
2587         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
2588                 tq->tq_credits_min =
2589                 tq->tq_credits_max =
2590                 tq->tq_credits = lnet_ni_tq_credits(ni);
2591         }
2592
2593         atomic_set(&ni->ni_tx_credits,
2594                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
2595         atomic_set(&ni->ni_healthv, LNET_MAX_HEALTH_VALUE);
2596
2597         /* Nodes with small feet have little entropy. The NID for this
2598          * node gives the most entropy in the low bits.
2599          */
2600         add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid));
2601
2602         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
2603                 libcfs_nidstr(&ni->ni_nid),
2604                 ni->ni_net->net_tunables.lct_peer_tx_credits,
2605                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
2606                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
2607                 ni->ni_net->net_tunables.lct_peer_timeout);
2608
2609         return 0;
2610 failed0:
2611         lnet_ni_free(ni);
2612         return rc;
2613 }
2614
2615 static const struct lnet_lnd *lnet_load_lnd(u32 lnd_type)
2616 {
2617         const struct lnet_lnd *lnd;
2618         int rc = 0;
2619
2620         mutex_lock(&the_lnet.ln_lnd_mutex);
2621         lnd = lnet_find_lnd_by_type(lnd_type);
2622         if (!lnd) {
2623                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2624                 rc = request_module("%s", libcfs_lnd2modname(lnd_type));
2625                 mutex_lock(&the_lnet.ln_lnd_mutex);
2626
2627                 lnd = lnet_find_lnd_by_type(lnd_type);
2628                 if (!lnd) {
2629                         mutex_unlock(&the_lnet.ln_lnd_mutex);
2630                         CERROR("Can't load LND %s, module %s, rc=%d\n",
2631                         libcfs_lnd2str(lnd_type),
2632                         libcfs_lnd2modname(lnd_type), rc);
2633 #ifndef HAVE_MODULE_LOADING_SUPPORT
2634                         LCONSOLE_ERROR_MSG(0x104,
2635                                            "Your kernel must be compiled with kernel module loading support.");
2636 #endif
2637                         return ERR_PTR(-EINVAL);
2638                 }
2639         }
2640         mutex_unlock(&the_lnet.ln_lnd_mutex);
2641
2642         return lnd;
2643 }
2644
2645 static int
2646 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
2647 {
2648         struct lnet_ni *ni;
2649         struct lnet_net *net_l = NULL;
2650         LIST_HEAD(local_ni_list);
2651         int rc;
2652         int ni_count = 0;
2653         __u32 lnd_type;
2654         const struct lnet_lnd  *lnd;
2655         int peer_timeout =
2656                 net->net_tunables.lct_peer_timeout;
2657         int maxtxcredits =
2658                 net->net_tunables.lct_max_tx_credits;
2659         int peerrtrcredits =
2660                 net->net_tunables.lct_peer_rtr_credits;
2661
2662         /*
2663          * make sure that this net is unique. If it isn't then
2664          * we are adding interfaces to an already existing network, and
2665          * 'net' is just a convenient way to pass in the list.
2666          * if it is unique we need to find the LND and load it if
2667          * necessary.
2668          */
2669         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
2670                 lnd_type = LNET_NETTYP(net->net_id);
2671
2672                 lnd = lnet_load_lnd(lnd_type);
2673                 if (IS_ERR(lnd)) {
2674                         rc = PTR_ERR(lnd);
2675                         goto failed0;
2676                 }
2677
2678                 mutex_lock(&the_lnet.ln_lnd_mutex);
2679                 net->net_lnd = lnd;
2680                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2681
2682                 net_l = net;
2683         }
2684
2685         /*
2686          * net_l: if the network being added is unique then net_l
2687          *        will point to that network
2688          *        if the network being added is not unique then
2689          *        net_l points to the existing network.
2690          *
2691          * When we enter the loop below, we'll pick NIs off he
2692          * network beign added and start them up, then add them to
2693          * a local ni list. Once we've successfully started all
2694          * the NIs then we join the local NI list (of started up
2695          * networks) with the net_l->net_ni_list, which should
2696          * point to the correct network to add the new ni list to
2697          *
2698          * If any of the new NIs fail to start up, then we want to
2699          * iterate through the local ni list, which should include
2700          * any NIs which were successfully started up, and shut
2701          * them down.
2702          *
2703          * After than we want to delete the network being added,
2704          * to avoid a memory leak.
2705          */
2706         while ((ni = list_first_entry_or_null(&net->net_ni_added,
2707                                               struct lnet_ni,
2708                                               ni_netlist)) != NULL) {
2709                 list_del_init(&ni->ni_netlist);
2710
2711                 /* make sure that the the NI we're about to start
2712                  * up is actually unique. if it's not fail. */
2713                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
2714                                         ni->ni_interface)) {
2715                         rc = -EEXIST;
2716                         goto failed1;
2717                 }
2718
2719                 /* adjust the pointer the parent network, just in case it
2720                  * the net is a duplicate */
2721                 ni->ni_net = net_l;
2722
2723                 rc = lnet_startup_lndni(ni, tun);
2724
2725                 if (rc != 0)
2726                         goto failed1;
2727
2728                 lnet_ni_addref(ni);
2729                 list_add_tail(&ni->ni_netlist, &local_ni_list);
2730
2731                 ni_count++;
2732         }
2733
2734         lnet_net_lock(LNET_LOCK_EX);
2735         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
2736         lnet_incr_dlc_seq();
2737
2738         list_for_each_entry(ni, &net_l->net_ni_list, ni_netlist) {
2739                 if (!ni)
2740                         break;
2741                 lnet_ni_lock(ni);
2742                 ni->ni_state = LNET_NI_STATE_ACTIVE;
2743                 lnet_ni_unlock(ni);
2744         }
2745         lnet_net_unlock(LNET_LOCK_EX);
2746
2747         /* if the network is not unique then we don't want to keep
2748          * it around after we're done. Free it. Otherwise add that
2749          * net to the global the_lnet.ln_nets */
2750         if (net_l != net && net_l != NULL) {
2751                 /*
2752                  * TODO - note. currently the tunables can not be updated
2753                  * once added
2754                  */
2755                 lnet_net_free(net);
2756         } else {
2757                 /*
2758                  * restore tunables after it has been overwitten by the
2759                  * lnd
2760                  */
2761                 if (peer_timeout != -1)
2762                         net->net_tunables.lct_peer_timeout = peer_timeout;
2763                 if (maxtxcredits != -1)
2764                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
2765                 if (peerrtrcredits != -1)
2766                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
2767
2768                 lnet_net_lock(LNET_LOCK_EX);
2769                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
2770                 lnet_net_unlock(LNET_LOCK_EX);
2771         }
2772
2773         return ni_count;
2774
2775 failed1:
2776         /*
2777          * shutdown the new NIs that are being started up
2778          * free the NET being started
2779          */
2780         while ((ni = list_first_entry_or_null(&local_ni_list,
2781                                               struct lnet_ni,
2782                                               ni_netlist)) != NULL)
2783                 lnet_shutdown_lndni(ni);
2784
2785 failed0:
2786         lnet_net_free(net);
2787
2788         return rc;
2789 }
2790
2791 static int
2792 lnet_startup_lndnets(struct list_head *netlist)
2793 {
2794         struct lnet_net         *net;
2795         int                     rc;
2796         int                     ni_count = 0;
2797
2798         /*
2799          * Change to running state before bringing up the LNDs. This
2800          * allows lnet_shutdown_lndnets() to assert that we've passed
2801          * through here.
2802          */
2803         lnet_net_lock(LNET_LOCK_EX);
2804         the_lnet.ln_state = LNET_STATE_RUNNING;
2805         lnet_net_unlock(LNET_LOCK_EX);
2806
2807         while ((net = list_first_entry_or_null(netlist,
2808                                                struct lnet_net,
2809                                                net_list)) != NULL) {
2810                 list_del_init(&net->net_list);
2811
2812                 rc = lnet_startup_lndnet(net, NULL);
2813
2814                 if (rc < 0)
2815                         goto failed;
2816
2817                 ni_count += rc;
2818         }
2819
2820         return ni_count;
2821 failed:
2822         lnet_shutdown_lndnets();
2823
2824         return rc;
2825 }
2826
2827 static int lnet_genl_parse_list(struct sk_buff *msg,
2828                                 const struct ln_key_list *data[], u16 idx)
2829 {
2830         const struct ln_key_list *list = data[idx];
2831         const struct ln_key_props *props;
2832         struct nlattr *node;
2833         u16 count;
2834
2835         if (!list)
2836                 return 0;
2837
2838         if (!list->lkl_maxattr)
2839                 return -ERANGE;
2840
2841         props = list->lkl_list;
2842         if (!props)
2843                 return -EINVAL;
2844
2845         node = nla_nest_start(msg, LN_SCALAR_ATTR_LIST);
2846         if (!node)
2847                 return -ENOBUFS;
2848
2849         for (count = 1; count <= list->lkl_maxattr; count++) {
2850                 struct nlattr *key = nla_nest_start(msg, count);
2851
2852                 if (count == 1)
2853                         nla_put_u16(msg, LN_SCALAR_ATTR_LIST_SIZE,
2854                                     list->lkl_maxattr);
2855
2856                 nla_put_u16(msg, LN_SCALAR_ATTR_INDEX, count);
2857                 if (props[count].lkp_value)
2858                         nla_put_string(msg, LN_SCALAR_ATTR_VALUE,
2859                                        props[count].lkp_value);
2860                 if (props[count].lkp_key_format)
2861                         nla_put_u16(msg, LN_SCALAR_ATTR_KEY_FORMAT,
2862                                     props[count].lkp_key_format);
2863                 nla_put_u16(msg, LN_SCALAR_ATTR_NLA_TYPE,
2864                             props[count].lkp_data_type);
2865                 if (props[count].lkp_data_type == NLA_NESTED) {
2866                         int rc;
2867
2868                         rc = lnet_genl_parse_list(msg, data, ++idx);
2869                         if (rc < 0)
2870                                 return rc;
2871                         idx = rc;
2872                 }
2873
2874                 nla_nest_end(msg, key);
2875         }
2876
2877         nla_nest_end(msg, node);
2878         return idx;
2879 }
2880
2881 int lnet_genl_send_scalar_list(struct sk_buff *msg, u32 portid, u32 seq,
2882                                const struct genl_family *family, int flags,
2883                                u8 cmd, const struct ln_key_list *data[])
2884 {
2885         int rc = 0;
2886         void *hdr;
2887
2888         if (!data[0])
2889                 return -EINVAL;
2890
2891         hdr = genlmsg_put(msg, portid, seq, family, flags, cmd);
2892         if (!hdr)
2893                 GOTO(canceled, rc = -EMSGSIZE);
2894
2895         rc = lnet_genl_parse_list(msg, data, 0);
2896         if (rc < 0)
2897                 GOTO(canceled, rc);
2898
2899         genlmsg_end(msg, hdr);
2900 canceled:
2901         if (rc < 0)
2902                 genlmsg_cancel(msg, hdr);
2903         return rc > 0 ? 0 : rc;
2904 }
2905 EXPORT_SYMBOL(lnet_genl_send_scalar_list);
2906
2907 static struct genl_family lnet_family;
2908
2909 /**
2910  * Initialize LNet library.
2911  *
2912  * Automatically called at module loading time. Caller has to call
2913  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
2914  * latter returned 0. It must be called exactly once.
2915  *
2916  * \retval 0 on success
2917  * \retval -ve on failures.
2918  */
2919 int lnet_lib_init(void)
2920 {
2921         int rc;
2922
2923         lnet_assert_wire_constants();
2924
2925         /* refer to global cfs_cpt_table for now */
2926         the_lnet.ln_cpt_table = cfs_cpt_tab;
2927         the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab);
2928
2929         LASSERT(the_lnet.ln_cpt_number > 0);
2930         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
2931                 /* we are under risk of consuming all lh_cookie */
2932                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
2933                        "please change setting of CPT-table and retry\n",
2934                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
2935                 return -E2BIG;
2936         }
2937
2938         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
2939                 the_lnet.ln_cpt_bits++;
2940
2941         rc = lnet_create_locks();
2942         if (rc != 0) {
2943                 CERROR("Can't create LNet global locks: %d\n", rc);
2944                 return rc;
2945         }
2946
2947         rc = genl_register_family(&lnet_family);
2948         if (rc != 0) {
2949                 lnet_destroy_locks();
2950                 CERROR("Can't register LNet netlink family: %d\n", rc);
2951                 return rc;
2952         }
2953
2954         the_lnet.ln_refcount = 0;
2955         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
2956         INIT_LIST_HEAD(&the_lnet.ln_msg_resend);
2957
2958         /* The hash table size is the number of bits it takes to express the set
2959          * ln_num_routes, minus 1 (better to under estimate than over so we
2960          * don't waste memory). */
2961         if (rnet_htable_size <= 0)
2962                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
2963         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
2964                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
2965         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
2966                                            order_base_2(rnet_htable_size) - 1);
2967
2968         /* All LNDs apart from the LOLND are in separate modules.  They
2969          * register themselves when their module loads, and unregister
2970          * themselves when their module is unloaded. */
2971         lnet_register_lnd(&the_lolnd);
2972         return 0;
2973 }
2974
2975 /**
2976  * Finalize LNet library.
2977  *
2978  * \pre lnet_lib_init() called with success.
2979  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
2980  *
2981  * As this happens at module-unload, all lnds must already be unloaded,
2982  * so they must already be unregistered.
2983  */
2984 void lnet_lib_exit(void)
2985 {
2986         int i;
2987
2988         LASSERT(the_lnet.ln_refcount == 0);
2989         lnet_unregister_lnd(&the_lolnd);
2990         for (i = 0; i < NUM_LNDS; i++)
2991                 LASSERT(!the_lnet.ln_lnds[i]);
2992         lnet_destroy_locks();
2993         genl_unregister_family(&lnet_family);
2994 }
2995
2996 /**
2997  * Set LNet PID and start LNet interfaces, routing, and forwarding.
2998  *
2999  * Users must call this function at least once before any other functions.
3000  * For each successful call there must be a corresponding call to
3001  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
3002  * ignored.
3003  *
3004  * The PID used by LNet may be different from the one requested.
3005  * See LNetGetId().
3006  *
3007  * \param requested_pid PID requested by the caller.
3008  *
3009  * \return >= 0 on success, and < 0 error code on failures.
3010  */
3011 int
3012 LNetNIInit(lnet_pid_t requested_pid)
3013 {
3014         int im_a_router = 0;
3015         int rc;
3016         int ni_bytes;
3017         struct lnet_ping_buffer *pbuf;
3018         struct lnet_handle_md ping_mdh;
3019         LIST_HEAD(net_head);
3020         struct lnet_net *net;
3021
3022         mutex_lock(&the_lnet.ln_api_mutex);
3023
3024         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
3025
3026         if (the_lnet.ln_state == LNET_STATE_STOPPING) {
3027                 mutex_unlock(&the_lnet.ln_api_mutex);
3028                 return -ESHUTDOWN;
3029         }
3030
3031         if (the_lnet.ln_refcount > 0) {
3032                 rc = the_lnet.ln_refcount++;
3033                 mutex_unlock(&the_lnet.ln_api_mutex);
3034                 return rc;
3035         }
3036
3037         rc = lnet_prepare(requested_pid);
3038         if (rc != 0) {
3039                 mutex_unlock(&the_lnet.ln_api_mutex);
3040                 return rc;
3041         }
3042
3043         /* create a network for Loopback network */
3044         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
3045         if (net == NULL) {
3046                 rc = -ENOMEM;
3047                 goto err_empty_list;
3048         }
3049
3050         /* Add in the loopback NI */
3051         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
3052                 rc = -ENOMEM;
3053                 goto err_empty_list;
3054         }
3055
3056         if (use_tcp_bonding)
3057                 CWARN("use_tcp_bonding has been removed. Use Multi-Rail and Dynamic Discovery instead, see LU-13641\n");
3058
3059         /* If LNet is being initialized via DLC it is possible
3060          * that the user requests not to load module parameters (ones which
3061          * are supported by DLC) on initialization.  Therefore, make sure not
3062          * to load networks, routes and forwarding from module parameters
3063          * in this case.  On cleanup in case of failure only clean up
3064          * routes if it has been loaded */
3065         if (!the_lnet.ln_nis_from_mod_params) {
3066                 rc = lnet_parse_networks(&net_head, lnet_get_networks());
3067                 if (rc < 0)
3068                         goto err_empty_list;
3069         }
3070
3071         rc = lnet_startup_lndnets(&net_head);
3072         if (rc < 0)
3073                 goto err_empty_list;
3074
3075         if (!the_lnet.ln_nis_from_mod_params) {
3076                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
3077                 if (rc != 0)
3078                         goto err_shutdown_lndnis;
3079
3080                 rc = lnet_rtrpools_alloc(im_a_router);
3081                 if (rc != 0)
3082                         goto err_destroy_routes;
3083         }
3084
3085         rc = lnet_acceptor_start();
3086         if (rc != 0)
3087                 goto err_destroy_routes;
3088
3089         the_lnet.ln_refcount = 1;
3090         /* Now I may use my own API functions... */
3091
3092         ni_bytes = LNET_PING_INFO_HDR_SIZE;
3093         list_for_each_entry(net, &the_lnet.ln_nets, net_list)
3094                 ni_bytes += lnet_get_net_ni_bytes_locked(net);
3095
3096         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_bytes, true);
3097         if (rc != 0)
3098                 goto err_acceptor_stop;
3099
3100         lnet_ping_target_update(pbuf, ping_mdh);
3101
3102         the_lnet.ln_mt_handler = lnet_mt_event_handler;
3103
3104         rc = lnet_push_target_init();
3105         if (rc != 0)
3106                 goto err_stop_ping;
3107
3108         rc = lnet_monitor_thr_start();
3109         if (rc != 0)
3110                 goto err_destroy_push_target;
3111
3112         rc = lnet_peer_discovery_start();
3113         if (rc != 0)
3114                 goto err_stop_monitor_thr;
3115
3116         lnet_fault_init();
3117         lnet_router_debugfs_init();
3118
3119         mutex_unlock(&the_lnet.ln_api_mutex);
3120
3121         complete_all(&the_lnet.ln_started);
3122
3123         /* wait for all routers to start */
3124         lnet_wait_router_start();
3125
3126         return 0;
3127
3128 err_stop_monitor_thr:
3129         lnet_monitor_thr_stop();
3130 err_destroy_push_target:
3131         lnet_push_target_fini();
3132 err_stop_ping:
3133         lnet_ping_target_fini();
3134 err_acceptor_stop:
3135         the_lnet.ln_refcount = 0;
3136         lnet_acceptor_stop();
3137 err_destroy_routes:
3138         if (!the_lnet.ln_nis_from_mod_params)
3139                 lnet_destroy_routes();
3140 err_shutdown_lndnis:
3141         lnet_shutdown_lndnets();
3142 err_empty_list:
3143         lnet_unprepare();
3144         LASSERT(rc < 0);
3145         mutex_unlock(&the_lnet.ln_api_mutex);
3146         while ((net = list_first_entry_or_null(&net_head,
3147                                                struct lnet_net,
3148                                                net_list)) != NULL) {
3149                 list_del_init(&net->net_list);
3150                 lnet_net_free(net);
3151         }
3152         return rc;
3153 }
3154 EXPORT_SYMBOL(LNetNIInit);
3155
3156 /**
3157  * Stop LNet interfaces, routing, and forwarding.
3158  *
3159  * Users must call this function once for each successful call to LNetNIInit().
3160  * Once the LNetNIFini() operation has been started, the results of pending
3161  * API operations are undefined.
3162  *
3163  * \return always 0 for current implementation.
3164  */
3165 int
3166 LNetNIFini(void)
3167 {
3168         mutex_lock(&the_lnet.ln_api_mutex);
3169
3170         LASSERT(the_lnet.ln_refcount > 0);
3171
3172         if (the_lnet.ln_refcount != 1) {
3173                 the_lnet.ln_refcount--;
3174         } else {
3175                 LASSERT(!the_lnet.ln_niinit_self);
3176
3177                 lnet_net_lock(LNET_LOCK_EX);
3178                 the_lnet.ln_state = LNET_STATE_STOPPING;
3179                 lnet_net_unlock(LNET_LOCK_EX);
3180
3181                 lnet_fault_fini();
3182
3183                 lnet_router_debugfs_fini();
3184                 lnet_peer_discovery_stop();
3185                 lnet_monitor_thr_stop();
3186                 lnet_push_target_fini();
3187                 lnet_ping_target_fini();
3188
3189                 /* Teardown fns that use my own API functions BEFORE here */
3190                 the_lnet.ln_refcount = 0;
3191
3192                 lnet_acceptor_stop();
3193                 lnet_destroy_routes();
3194                 lnet_shutdown_lndnets();
3195                 lnet_unprepare();
3196         }
3197
3198         mutex_unlock(&the_lnet.ln_api_mutex);
3199         return 0;
3200 }
3201 EXPORT_SYMBOL(LNetNIFini);
3202
3203 /**
3204  * Grabs the ni data from the ni structure and fills the out
3205  * parameters
3206  *
3207  * \param[in] ni network        interface structure
3208  * \param[out] cfg_ni           NI config information
3209  * \param[out] tun              network and LND tunables
3210  */
3211 static void
3212 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
3213                    struct lnet_ioctl_config_lnd_tunables *tun,
3214                    struct lnet_ioctl_element_stats *stats,
3215                    __u32 tun_size)
3216 {
3217         size_t min_size = 0;
3218         int i;
3219
3220         if (!ni || !cfg_ni || !tun || !nid_is_nid4(&ni->ni_nid))
3221                 return;
3222
3223         if (ni->ni_interface != NULL) {
3224                 strncpy(cfg_ni->lic_ni_intf,
3225                         ni->ni_interface,
3226                         sizeof(cfg_ni->lic_ni_intf));
3227         }
3228
3229         cfg_ni->lic_nid = lnet_nid_to_nid4(&ni->ni_nid);
3230         cfg_ni->lic_status = lnet_ni_get_status_locked(ni);
3231         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
3232
3233         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
3234
3235         if (stats) {
3236                 stats->iel_send_count = lnet_sum_stats(&ni->ni_stats,
3237                                                        LNET_STATS_TYPE_SEND);
3238                 stats->iel_recv_count = lnet_sum_stats(&ni->ni_stats,
3239                                                        LNET_STATS_TYPE_RECV);
3240                 stats->iel_drop_count = lnet_sum_stats(&ni->ni_stats,
3241                                                        LNET_STATS_TYPE_DROP);
3242         }
3243
3244         /*
3245          * tun->lt_tun will always be present, but in order to be
3246          * backwards compatible, we need to deal with the cases when
3247          * tun->lt_tun is smaller than what the kernel has, because it
3248          * comes from an older version of a userspace program, then we'll
3249          * need to copy as much information as we have available space.
3250          */
3251         min_size = tun_size - sizeof(tun->lt_cmn);
3252         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
3253
3254         /* copy over the cpts */
3255         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
3256             ni->ni_cpts == NULL)  {
3257                 for (i = 0; i < ni->ni_ncpts; i++)
3258                         cfg_ni->lic_cpts[i] = i;
3259         } else {
3260                 for (i = 0;
3261                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
3262                      i < LNET_MAX_SHOW_NUM_CPT;
3263                      i++)
3264                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
3265         }
3266         cfg_ni->lic_ncpts = ni->ni_ncpts;
3267 }
3268
3269 /**
3270  * NOTE: This is a legacy function left in the code to be backwards
3271  * compatible with older userspace programs. It should eventually be
3272  * removed.
3273  *
3274  * Grabs the ni data from the ni structure and fills the out
3275  * parameters
3276  *
3277  * \param[in] ni network        interface structure
3278  * \param[out] config           config information
3279  */
3280 static void
3281 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
3282                          struct lnet_ioctl_config_data *config)
3283 {
3284         struct lnet_ioctl_net_config *net_config;
3285         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
3286         size_t min_size, tunable_size = 0;
3287         int i;
3288
3289         if (!ni || !config || !nid_is_nid4(&ni->ni_nid))
3290                 return;
3291
3292         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
3293         if (!net_config)
3294                 return;
3295
3296         if (!ni->ni_interface)
3297                 return;
3298
3299         strncpy(net_config->ni_interface,
3300                 ni->ni_interface,
3301                 sizeof(net_config->ni_interface));
3302
3303         config->cfg_nid = lnet_nid_to_nid4(&ni->ni_nid);
3304         config->cfg_config_u.cfg_net.net_peer_timeout =
3305                 ni->ni_net->net_tunables.lct_peer_timeout;
3306         config->cfg_config_u.cfg_net.net_max_tx_credits =
3307                 ni->ni_net->net_tunables.lct_max_tx_credits;
3308         config->cfg_config_u.cfg_net.net_peer_tx_credits =
3309                 ni->ni_net->net_tunables.lct_peer_tx_credits;
3310         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
3311                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
3312
3313         net_config->ni_status = lnet_ni_get_status_locked(ni);
3314
3315         if (ni->ni_cpts) {
3316                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
3317
3318                 for (i = 0; i < num_cpts; i++)
3319                         net_config->ni_cpts[i] = ni->ni_cpts[i];
3320
3321                 config->cfg_ncpts = num_cpts;
3322         }
3323
3324         /*
3325          * See if user land tools sent in a newer and larger version
3326          * of struct lnet_tunables than what the kernel uses.
3327          */
3328         min_size = sizeof(*config) + sizeof(*net_config);
3329
3330         if (config->cfg_hdr.ioc_len > min_size)
3331                 tunable_size = config->cfg_hdr.ioc_len - min_size;
3332
3333         /* Don't copy too much data to user space */
3334         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
3335         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
3336
3337         if (lnd_cfg && min_size) {
3338                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
3339                 config->cfg_config_u.cfg_net.net_interface_count = 1;
3340
3341                 /* Tell user land that kernel side has less data */
3342                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
3343                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
3344                         config->cfg_hdr.ioc_len -= min_size;
3345                 }
3346         }
3347 }
3348
3349 struct lnet_ni *
3350 lnet_get_ni_idx_locked(int idx)
3351 {
3352         struct lnet_ni          *ni;
3353         struct lnet_net         *net;
3354
3355         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3356                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3357                         if (idx-- == 0)
3358                                 return ni;
3359                 }
3360         }
3361
3362         return NULL;
3363 }
3364
3365 int lnet_get_net_healthv_locked(struct lnet_net *net)
3366 {
3367         struct lnet_ni *ni;
3368         int best_healthv = 0;
3369         int healthv, ni_fatal;
3370
3371         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3372                 healthv = atomic_read(&ni->ni_healthv);
3373                 ni_fatal = atomic_read(&ni->ni_fatal_error_on);
3374                 if (!ni_fatal && healthv > best_healthv)
3375                         best_healthv = healthv;
3376         }
3377
3378         return best_healthv;
3379 }
3380
3381 struct lnet_ni *
3382 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
3383 {
3384         struct lnet_ni          *ni;
3385         struct lnet_net         *net = mynet;
3386
3387         /*
3388          * It is possible that the net has been cleaned out while there is
3389          * a message being sent. This function accessed the net without
3390          * checking if the list is empty
3391          */
3392         if (!prev) {
3393                 if (!net)
3394                         net = list_first_entry(&the_lnet.ln_nets,
3395                                                struct lnet_net,
3396                                                net_list);
3397                 if (list_empty(&net->net_ni_list))
3398                         return NULL;
3399                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3400                                       ni_netlist);
3401
3402                 return ni;
3403         }
3404
3405         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
3406                 /* if you reached the end of the ni list and the net is
3407                  * specified, then there are no more nis in that net */
3408                 if (net != NULL)
3409                         return NULL;
3410
3411                 /* we reached the end of this net ni list. move to the
3412                  * next net */
3413                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
3414                         /* no more nets and no more NIs. */
3415                         return NULL;
3416
3417                 /* get the next net */
3418                 net = list_first_entry(&prev->ni_net->net_list, struct lnet_net,
3419                                        net_list);
3420                 if (list_empty(&net->net_ni_list))
3421                         return NULL;
3422                 /* get the ni on it */
3423                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3424                                       ni_netlist);
3425
3426                 return ni;
3427         }
3428
3429         if (list_empty(&prev->ni_netlist))
3430                 return NULL;
3431
3432         /* there are more nis left */
3433         ni = list_first_entry(&prev->ni_netlist, struct lnet_ni, ni_netlist);
3434
3435         return ni;
3436 }
3437
3438 static int
3439 lnet_get_net_config(struct lnet_ioctl_config_data *config)
3440 {
3441         struct lnet_ni *ni;
3442         int cpt;
3443         int rc = -ENOENT;
3444         int idx = config->cfg_count;
3445
3446         cpt = lnet_net_lock_current();
3447
3448         ni = lnet_get_ni_idx_locked(idx);
3449
3450         if (ni != NULL) {
3451                 rc = 0;
3452                 lnet_ni_lock(ni);
3453                 lnet_fill_ni_info_legacy(ni, config);
3454                 lnet_ni_unlock(ni);
3455         }
3456
3457         lnet_net_unlock(cpt);
3458         return rc;
3459 }
3460
3461 static int
3462 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
3463                    struct lnet_ioctl_config_lnd_tunables *tun,
3464                    struct lnet_ioctl_element_stats *stats,
3465                    __u32 tun_size)
3466 {
3467         struct lnet_ni          *ni;
3468         int                     cpt;
3469         int                     rc = -ENOENT;
3470
3471         if (!cfg_ni || !tun || !stats)
3472                 return -EINVAL;
3473
3474         cpt = lnet_net_lock_current();
3475
3476         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
3477
3478         if (ni) {
3479                 rc = 0;
3480                 lnet_ni_lock(ni);
3481                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
3482                 lnet_ni_unlock(ni);
3483         }
3484
3485         lnet_net_unlock(cpt);
3486         return rc;
3487 }
3488
3489 static int lnet_get_ni_stats(struct lnet_ioctl_element_msg_stats *msg_stats)
3490 {
3491         struct lnet_ni *ni;
3492         int rc = -ENOENT;
3493
3494         if (!msg_stats)
3495                 return -EINVAL;
3496
3497         ni = lnet_get_ni_idx_locked(msg_stats->im_idx);
3498
3499         if (ni) {
3500                 lnet_usr_translate_stats(msg_stats, &ni->ni_stats);
3501                 rc = 0;
3502         }
3503
3504         return rc;
3505 }
3506
3507 static int lnet_add_net_common(struct lnet_net *net,
3508                                struct lnet_ioctl_config_lnd_tunables *tun)
3509 {
3510         struct lnet_handle_md ping_mdh;
3511         struct lnet_ping_buffer *pbuf;
3512         struct lnet_remotenet *rnet;
3513         struct lnet_ni *ni;
3514         u32 net_id;
3515         int rc;
3516
3517         lnet_net_lock(LNET_LOCK_EX);
3518         rnet = lnet_find_rnet_locked(net->net_id);
3519         lnet_net_unlock(LNET_LOCK_EX);
3520         /*
3521          * make sure that the net added doesn't invalidate the current
3522          * configuration LNet is keeping
3523          */
3524         if (rnet) {
3525                 CERROR("Adding net %s will invalidate routing configuration\n",
3526                        libcfs_net2str(net->net_id));
3527                 lnet_net_free(net);
3528                 return -EUSERS;
3529         }
3530
3531         if (tun)
3532                 memcpy(&net->net_tunables,
3533                        &tun->lt_cmn, sizeof(net->net_tunables));
3534         else
3535                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
3536
3537         net_id = net->net_id;
3538
3539         rc = lnet_startup_lndnet(net,
3540                                  (tun) ? &tun->lt_tun : NULL);
3541         if (rc < 0)
3542                 return rc;
3543
3544         /* make sure you calculate the correct number of slots in the ping
3545          * buffer. Since the ping info is a flattened list of all the NIs,
3546          * we should allocate enough slots to accomodate the number of NIs
3547          * which will be added.
3548          */
3549         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3550                                     LNET_PING_INFO_HDR_SIZE +
3551                                     lnet_get_ni_bytes(),
3552                                     false);
3553         if (rc < 0) {
3554                 lnet_shutdown_lndnet(net);
3555                 return rc;
3556         }
3557
3558         lnet_net_lock(LNET_LOCK_EX);
3559         net = lnet_get_net_locked(net_id);
3560         LASSERT(net);
3561
3562         /* apply the UDSPs */
3563         rc = lnet_udsp_apply_policies_on_net(net);
3564         if (rc)
3565                 CERROR("Failed to apply UDSPs on local net %s\n",
3566                        libcfs_net2str(net->net_id));
3567
3568         /* At this point we lost track of which NI was just added, so we
3569          * just re-apply the policies on all of the NIs on this net
3570          */
3571         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3572                 rc = lnet_udsp_apply_policies_on_ni(ni);
3573                 if (rc)
3574                         CERROR("Failed to apply UDSPs on ni %s\n",
3575                                libcfs_nidstr(&ni->ni_nid));
3576         }
3577         lnet_net_unlock(LNET_LOCK_EX);
3578
3579         /*
3580          * Start the acceptor thread if this is the first network
3581          * being added that requires the thread.
3582          */
3583         if (net->net_lnd->lnd_accept) {
3584                 rc = lnet_acceptor_start();
3585                 if (rc < 0) {
3586                         /* shutdown the net that we just started */
3587                         CERROR("Failed to start up acceptor thread\n");
3588                         lnet_shutdown_lndnet(net);
3589                         goto failed;
3590                 }
3591         }
3592
3593         lnet_net_lock(LNET_LOCK_EX);
3594         lnet_peer_net_added(net);
3595         lnet_net_unlock(LNET_LOCK_EX);
3596
3597         lnet_ping_target_update(pbuf, ping_mdh);
3598
3599         return 0;
3600
3601 failed:
3602         lnet_ping_md_unlink(pbuf, &ping_mdh);
3603         lnet_ping_buffer_decref(pbuf);
3604         return rc;
3605 }
3606
3607 static void
3608 lnet_set_tune_defaults(struct lnet_ioctl_config_lnd_tunables *tun)
3609 {
3610         if (tun) {
3611                 if (tun->lt_cmn.lct_peer_timeout < 0)
3612                         tun->lt_cmn.lct_peer_timeout = DEFAULT_PEER_TIMEOUT;
3613                 if (!tun->lt_cmn.lct_peer_tx_credits)
3614                         tun->lt_cmn.lct_peer_tx_credits = DEFAULT_PEER_CREDITS;
3615                 if (!tun->lt_cmn.lct_max_tx_credits)
3616                         tun->lt_cmn.lct_max_tx_credits = DEFAULT_CREDITS;
3617         }
3618 }
3619
3620 static int lnet_handle_legacy_ip2nets(char *ip2nets,
3621                                       struct lnet_ioctl_config_lnd_tunables *tun)
3622 {
3623         struct lnet_net *net;
3624         const char *nets;
3625         int rc;
3626         LIST_HEAD(net_head);
3627
3628         rc = lnet_parse_ip2nets(&nets, ip2nets);
3629         if (rc < 0)
3630                 return rc;
3631
3632         rc = lnet_parse_networks(&net_head, nets);
3633         if (rc < 0)
3634                 return rc;
3635
3636         lnet_set_tune_defaults(tun);
3637
3638         mutex_lock(&the_lnet.ln_api_mutex);
3639         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3640                 rc = -ESHUTDOWN;
3641                 goto out;
3642         }
3643
3644         while ((net = list_first_entry_or_null(&net_head,
3645                                                struct lnet_net,
3646                                                net_list)) != NULL) {
3647                 list_del_init(&net->net_list);
3648                 rc = lnet_add_net_common(net, tun);
3649                 if (rc < 0)
3650                         goto out;
3651         }
3652
3653 out:
3654         mutex_unlock(&the_lnet.ln_api_mutex);
3655
3656         while ((net = list_first_entry_or_null(&net_head,
3657                                                struct lnet_net,
3658                                                net_list)) != NULL) {
3659                 list_del_init(&net->net_list);
3660                 lnet_net_free(net);
3661         }
3662         return rc;
3663 }
3664
3665 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf, u32 net_id,
3666                     struct lnet_ioctl_config_lnd_tunables *tun)
3667 {
3668         struct lnet_net *net;
3669         struct lnet_ni *ni;
3670         int rc, i;
3671         u32 lnd_type;
3672
3673         /* handle legacy ip2nets from DLC */
3674         if (conf->lic_legacy_ip2nets[0] != '\0')
3675                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
3676                                                   tun);
3677
3678         lnd_type = LNET_NETTYP(net_id);
3679
3680         if (!libcfs_isknown_lnd(lnd_type)) {
3681                 CERROR("No valid net and lnd information provided\n");
3682                 return -ENOENT;
3683         }
3684
3685         net = lnet_net_alloc(net_id, NULL);
3686         if (!net)
3687                 return -ENOMEM;
3688
3689         for (i = 0; i < conf->lic_ncpts; i++) {
3690                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER) {
3691                         lnet_net_free(net);
3692                         return -ERANGE;
3693                 }
3694         }
3695
3696         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
3697                                        conf->lic_ni_intf);
3698         if (!ni) {
3699                 lnet_net_free(net);
3700                 return -ENOMEM;
3701         }
3702
3703         lnet_set_tune_defaults(tun);
3704
3705         mutex_lock(&the_lnet.ln_api_mutex);
3706         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3707                 lnet_net_free(net);
3708                 rc = -ESHUTDOWN;
3709         } else {
3710                 rc = lnet_add_net_common(net, tun);
3711         }
3712
3713         mutex_unlock(&the_lnet.ln_api_mutex);
3714
3715         /* If NI already exist delete this new unused copy */
3716         if (rc == -EEXIST)
3717                 lnet_ni_free(ni);
3718
3719         return rc;
3720 }
3721
3722 int lnet_dyn_del_ni(struct lnet_nid *nid)
3723 {
3724         struct lnet_net *net;
3725         struct lnet_ni *ni;
3726         u32 net_id = LNET_NID_NET(nid);
3727         struct lnet_ping_buffer *pbuf;
3728         struct lnet_handle_md ping_mdh;
3729         int net_bytes, rc;
3730         bool net_empty;
3731
3732         /* don't allow userspace to shutdown the LOLND */
3733         if (LNET_NETTYP(net_id) == LOLND)
3734                 return -EINVAL;
3735
3736         mutex_lock(&the_lnet.ln_api_mutex);
3737         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3738                 rc = -ESHUTDOWN;
3739                 goto unlock_api_mutex;
3740         }
3741
3742         lnet_net_lock(0);
3743
3744         net = lnet_get_net_locked(net_id);
3745         if (!net) {
3746                 CERROR("net %s not found\n",
3747                        libcfs_net2str(net_id));
3748                 rc = -ENOENT;
3749                 goto unlock_net;
3750         }
3751
3752         if (!nid_addr_is_set(nid)) {
3753                 /* remove the entire net */
3754                 net_bytes = lnet_get_net_ni_bytes_locked(net);
3755
3756                 lnet_net_unlock(0);
3757
3758                 /* create and link a new ping info, before removing the old one */
3759                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3760                                             LNET_PING_INFO_HDR_SIZE +
3761                                             lnet_get_ni_bytes() - net_bytes,
3762                                             false);
3763                 if (rc != 0)
3764                         goto unlock_api_mutex;
3765
3766                 lnet_shutdown_lndnet(net);
3767
3768                 lnet_acceptor_stop();
3769
3770                 lnet_ping_target_update(pbuf, ping_mdh);
3771
3772                 goto unlock_api_mutex;
3773         }
3774
3775         ni = lnet_nid_to_ni_locked(nid, 0);
3776         if (!ni) {
3777                 CERROR("nid %s not found\n", libcfs_nidstr(nid));
3778                 rc = -ENOENT;
3779                 goto unlock_net;
3780         }
3781
3782         net_bytes = lnet_get_net_ni_bytes_locked(net);
3783         net_empty = list_is_singular(&net->net_ni_list);
3784
3785         lnet_net_unlock(0);
3786
3787         /* create and link a new ping info, before removing the old one */
3788         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3789                                     (LNET_PING_INFO_HDR_SIZE +
3790                                      lnet_get_ni_bytes() -
3791                                      lnet_ping_sts_size(&ni->ni_nid)),
3792                                     false);
3793         if (rc != 0)
3794                 goto unlock_api_mutex;
3795
3796         lnet_shutdown_lndni(ni);
3797
3798         lnet_acceptor_stop();
3799
3800         lnet_ping_target_update(pbuf, ping_mdh);
3801
3802         /* check if the net is empty and remove it if it is */
3803         if (net_empty)
3804                 lnet_shutdown_lndnet(net);
3805
3806         goto unlock_api_mutex;
3807
3808 unlock_net:
3809         lnet_net_unlock(0);
3810 unlock_api_mutex:
3811         mutex_unlock(&the_lnet.ln_api_mutex);
3812
3813         return rc;
3814 }
3815
3816 /*
3817  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
3818  * They are only expected to be called for unique networks.
3819  * That can be as a result of older DLC library
3820  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
3821  */
3822 int
3823 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
3824 {
3825         struct lnet_net *net;
3826         LIST_HEAD(net_head);
3827         int rc;
3828         struct lnet_ioctl_config_lnd_tunables tun;
3829         const char *nets = conf->cfg_config_u.cfg_net.net_intf;
3830
3831         /* Create a net/ni structures for the network string */
3832         rc = lnet_parse_networks(&net_head, nets);
3833         if (rc <= 0)
3834                 return rc == 0 ? -EINVAL : rc;
3835
3836         mutex_lock(&the_lnet.ln_api_mutex);
3837         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3838                 rc = -ESHUTDOWN;
3839                 goto out_unlock_clean;
3840         }
3841
3842         if (rc > 1) {
3843                 rc = -EINVAL; /* only add one network per call */
3844                 goto out_unlock_clean;
3845         }
3846
3847         net = list_first_entry(&net_head, struct lnet_net, net_list);
3848         list_del_init(&net->net_list);
3849
3850         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
3851
3852         memset(&tun, 0, sizeof(tun));
3853
3854         tun.lt_cmn.lct_peer_timeout =
3855           (!conf->cfg_config_u.cfg_net.net_peer_timeout) ? DEFAULT_PEER_TIMEOUT :
3856                 conf->cfg_config_u.cfg_net.net_peer_timeout;
3857         tun.lt_cmn.lct_peer_tx_credits =
3858           (!conf->cfg_config_u.cfg_net.net_peer_tx_credits) ? DEFAULT_PEER_CREDITS :
3859                 conf->cfg_config_u.cfg_net.net_peer_tx_credits;
3860         tun.lt_cmn.lct_peer_rtr_credits =
3861           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
3862         tun.lt_cmn.lct_max_tx_credits =
3863           (!conf->cfg_config_u.cfg_net.net_max_tx_credits) ? DEFAULT_CREDITS :
3864                 conf->cfg_config_u.cfg_net.net_max_tx_credits;
3865
3866         rc = lnet_add_net_common(net, &tun);
3867
3868 out_unlock_clean:
3869         mutex_unlock(&the_lnet.ln_api_mutex);
3870         /* net_head list is empty in success case */
3871         while ((net = list_first_entry_or_null(&net_head,
3872                                                struct lnet_net,
3873                                                net_list)) != NULL) {
3874                 list_del_init(&net->net_list);
3875                 lnet_net_free(net);
3876         }
3877         return rc;
3878 }
3879
3880 int
3881 lnet_dyn_del_net(u32 net_id)
3882 {
3883         struct lnet_net *net;
3884         struct lnet_ping_buffer *pbuf;
3885         struct lnet_handle_md ping_mdh;
3886         int net_ni_bytes, rc;
3887
3888         /* don't allow userspace to shutdown the LOLND */
3889         if (LNET_NETTYP(net_id) == LOLND)
3890                 return -EINVAL;
3891
3892         mutex_lock(&the_lnet.ln_api_mutex);
3893         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3894                 rc = -ESHUTDOWN;
3895                 goto out;
3896         }
3897
3898         lnet_net_lock(0);
3899
3900         net = lnet_get_net_locked(net_id);
3901         if (net == NULL) {
3902                 lnet_net_unlock(0);
3903                 rc = -EINVAL;
3904                 goto out;
3905         }
3906
3907         net_ni_bytes = lnet_get_net_ni_bytes_locked(net);
3908
3909         lnet_net_unlock(0);
3910
3911         /* create and link a new ping info, before removing the old one */
3912         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3913                                     LNET_PING_INFO_HDR_SIZE +
3914                                     lnet_get_ni_bytes() - net_ni_bytes,
3915                                     false);
3916         if (rc != 0)
3917                 goto out;
3918
3919         lnet_shutdown_lndnet(net);
3920
3921         lnet_acceptor_stop();
3922
3923         lnet_ping_target_update(pbuf, ping_mdh);
3924
3925 out:
3926         mutex_unlock(&the_lnet.ln_api_mutex);
3927
3928         return rc;
3929 }
3930
3931 void lnet_mark_ping_buffer_for_update(void)
3932 {
3933         if (the_lnet.ln_routing)
3934                 return;
3935
3936         atomic_set(&the_lnet.ln_update_ping_buf, 1);
3937         complete(&the_lnet.ln_mt_wait_complete);
3938 }
3939 EXPORT_SYMBOL(lnet_mark_ping_buffer_for_update);
3940
3941 void lnet_update_ping_buffer(struct work_struct *work)
3942 {
3943         struct lnet_ping_buffer *pbuf;
3944         struct lnet_handle_md ping_mdh;
3945
3946         mutex_lock(&the_lnet.ln_api_mutex);
3947
3948         atomic_set(&the_lnet.ln_pb_update_ready, 1);
3949
3950         if ((the_lnet.ln_state == LNET_STATE_RUNNING) &&
3951             !lnet_ping_target_setup(&pbuf, &ping_mdh,
3952                                     LNET_PING_INFO_HDR_SIZE +
3953                                     lnet_get_ni_bytes(),
3954                                     false))
3955                 lnet_ping_target_update(pbuf, ping_mdh);
3956
3957
3958         mutex_unlock(&the_lnet.ln_api_mutex);
3959 }
3960
3961
3962 void lnet_queue_ping_buffer_update(void)
3963 {
3964         /* don't queue pb update if it is not needed */
3965         if (atomic_dec_if_positive(&the_lnet.ln_update_ping_buf) < 0)
3966                 return;
3967
3968         /* don't queue pb update if already queued and not processed */
3969         if (atomic_dec_if_positive(&the_lnet.ln_pb_update_ready) < 0)
3970                 return;
3971
3972         INIT_WORK(&the_lnet.ln_pb_update_work, lnet_update_ping_buffer);
3973         queue_work(the_lnet.ln_pb_update_wq, &the_lnet.ln_pb_update_work);
3974 }
3975
3976 void lnet_incr_dlc_seq(void)
3977 {
3978         atomic_inc(&lnet_dlc_seq_no);
3979 }
3980
3981 __u32 lnet_get_dlc_seq_locked(void)
3982 {
3983         return atomic_read(&lnet_dlc_seq_no);
3984 }
3985
3986 static void
3987 lnet_ni_set_healthv(lnet_nid_t nid, int value, bool all)
3988 {
3989         struct lnet_net *net;
3990         struct lnet_ni *ni;
3991
3992         lnet_net_lock(LNET_LOCK_EX);
3993         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3994                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3995                         if (all || (nid_is_nid4(&ni->ni_nid) &&
3996                                     lnet_nid_to_nid4(&ni->ni_nid) == nid)) {
3997                                 atomic_set(&ni->ni_healthv, value);
3998                                 if (list_empty(&ni->ni_recovery) &&
3999                                     value < LNET_MAX_HEALTH_VALUE) {
4000                                         CERROR("manually adding local NI %s to recovery\n",
4001                                                libcfs_nidstr(&ni->ni_nid));
4002                                         list_add_tail(&ni->ni_recovery,
4003                                                       &the_lnet.ln_mt_localNIRecovq);
4004                                         lnet_ni_addref_locked(ni, 0);
4005                                 }
4006                                 if (!all) {
4007                                         lnet_net_unlock(LNET_LOCK_EX);
4008                                         return;
4009                                 }
4010                         }
4011                 }
4012         }
4013         lnet_net_unlock(LNET_LOCK_EX);
4014 }
4015
4016 static void
4017 lnet_ni_set_conns_per_peer(lnet_nid_t nid, int value, bool all)
4018 {
4019         struct lnet_net *net;
4020         struct lnet_ni *ni;
4021
4022         lnet_net_lock(LNET_LOCK_EX);
4023         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
4024                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
4025                         if (lnet_nid_to_nid4(&ni->ni_nid) != nid && !all)
4026                                 continue;
4027                         if (LNET_NETTYP(net->net_id) == SOCKLND)
4028                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_sock.lnd_conns_per_peer = value;
4029                         else if (LNET_NETTYP(net->net_id) == O2IBLND)
4030                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer = value;
4031                         if (!all) {
4032                                 lnet_net_unlock(LNET_LOCK_EX);
4033                                 return;
4034                         }
4035                 }
4036         }
4037         lnet_net_unlock(LNET_LOCK_EX);
4038 }
4039
4040 static int
4041 lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats)
4042 {
4043         int cpt, rc = 0;
4044         struct lnet_ni *ni;
4045         struct lnet_nid nid;
4046
4047         lnet_nid4_to_nid(stats->hlni_nid, &nid);
4048         cpt = lnet_net_lock_current();
4049         ni = lnet_nid_to_ni_locked(&nid, cpt);
4050         if (!ni) {
4051                 rc = -ENOENT;
4052                 goto unlock;
4053         }
4054
4055         stats->hlni_local_interrupt = atomic_read(&ni->ni_hstats.hlt_local_interrupt);
4056         stats->hlni_local_dropped = atomic_read(&ni->ni_hstats.hlt_local_dropped);
4057         stats->hlni_local_aborted = atomic_read(&ni->ni_hstats.hlt_local_aborted);
4058         stats->hlni_local_no_route = atomic_read(&ni->ni_hstats.hlt_local_no_route);
4059         stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout);
4060         stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error);
4061         stats->hlni_fatal_error = atomic_read(&ni->ni_fatal_error_on);
4062         stats->hlni_health_value = atomic_read(&ni->ni_healthv);
4063         stats->hlni_ping_count = ni->ni_ping_count;
4064         stats->hlni_next_ping = ni->ni_next_ping;
4065
4066 unlock:
4067         lnet_net_unlock(cpt);
4068
4069         return rc;
4070 }
4071
4072 static int
4073 lnet_get_local_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4074 {
4075         struct lnet_ni *ni;
4076         int i = 0;
4077
4078         lnet_net_lock(LNET_LOCK_EX);
4079         list_for_each_entry(ni, &the_lnet.ln_mt_localNIRecovq, ni_recovery) {
4080                 if (!nid_is_nid4(&ni->ni_nid))
4081                         continue;
4082                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&ni->ni_nid);
4083                 i++;
4084                 if (i >= LNET_MAX_SHOW_NUM_NID)
4085                         break;
4086         }
4087         lnet_net_unlock(LNET_LOCK_EX);
4088         list->rlst_num_nids = i;
4089
4090         return 0;
4091 }
4092
4093 static int
4094 lnet_get_peer_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4095 {
4096         struct lnet_peer_ni *lpni;
4097         int i = 0;
4098
4099         lnet_net_lock(LNET_LOCK_EX);
4100         list_for_each_entry(lpni, &the_lnet.ln_mt_peerNIRecovq, lpni_recovery) {
4101                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&lpni->lpni_nid);
4102                 i++;
4103                 if (i >= LNET_MAX_SHOW_NUM_NID)
4104                         break;
4105         }
4106         lnet_net_unlock(LNET_LOCK_EX);
4107         list->rlst_num_nids = i;
4108
4109         return 0;
4110 }
4111
4112 /**
4113  * LNet ioctl handler.
4114  *
4115  */
4116 int
4117 LNetCtl(unsigned int cmd, void *arg)
4118 {
4119         struct libcfs_ioctl_data *data = arg;
4120         struct lnet_ioctl_config_data *config;
4121         struct lnet_ni           *ni;
4122         struct lnet_nid           nid;
4123         int                       rc;
4124
4125         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
4126                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
4127
4128         switch (cmd) {
4129         case IOC_LIBCFS_GET_NI: {
4130                 struct lnet_processid id = {};
4131
4132                 rc = LNetGetId(data->ioc_count, &id, false);
4133                 data->ioc_nid = lnet_nid_to_nid4(&id.nid);
4134                 return rc;
4135         }
4136         case IOC_LIBCFS_FAIL_NID:
4137                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
4138
4139         case IOC_LIBCFS_ADD_ROUTE: {
4140                 /* default router sensitivity to 1 */
4141                 unsigned int sensitivity = 1;
4142                 config = arg;
4143
4144                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4145                         return -EINVAL;
4146
4147                 if (config->cfg_config_u.cfg_route.rtr_sensitivity) {
4148                         sensitivity =
4149                           config->cfg_config_u.cfg_route.rtr_sensitivity;
4150                 }
4151
4152                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4153                 mutex_lock(&the_lnet.ln_api_mutex);
4154                 rc = lnet_add_route(config->cfg_net,
4155                                     config->cfg_config_u.cfg_route.rtr_hop,
4156                                     &nid,
4157                                     config->cfg_config_u.cfg_route.
4158                                         rtr_priority, sensitivity);
4159                 mutex_unlock(&the_lnet.ln_api_mutex);
4160                 return rc;
4161         }
4162
4163         case IOC_LIBCFS_DEL_ROUTE:
4164                 config = arg;
4165
4166                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4167                         return -EINVAL;
4168
4169                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4170                 mutex_lock(&the_lnet.ln_api_mutex);
4171                 rc = lnet_del_route(config->cfg_net, &nid);
4172                 mutex_unlock(&the_lnet.ln_api_mutex);
4173                 return rc;
4174
4175         case IOC_LIBCFS_GET_ROUTE:
4176                 config = arg;
4177
4178                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4179                         return -EINVAL;
4180
4181                 mutex_lock(&the_lnet.ln_api_mutex);
4182                 rc = lnet_get_route(config->cfg_count,
4183                                     &config->cfg_net,
4184                                     &config->cfg_config_u.cfg_route.rtr_hop,
4185                                     &config->cfg_nid,
4186                                     &config->cfg_config_u.cfg_route.rtr_flags,
4187                                     &config->cfg_config_u.cfg_route.
4188                                         rtr_priority,
4189                                     &config->cfg_config_u.cfg_route.
4190                                         rtr_sensitivity);
4191                 mutex_unlock(&the_lnet.ln_api_mutex);
4192                 return rc;
4193
4194         case IOC_LIBCFS_GET_LOCAL_NI: {
4195                 struct lnet_ioctl_config_ni *cfg_ni;
4196                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
4197                 struct lnet_ioctl_element_stats *stats;
4198                 __u32 tun_size;
4199
4200                 cfg_ni = arg;
4201
4202                 /* get the tunables if they are available */
4203                 if (cfg_ni->lic_cfg_hdr.ioc_len <
4204                     sizeof(*cfg_ni) + sizeof(*stats) + sizeof(*tun))
4205                         return -EINVAL;
4206
4207                 stats = (struct lnet_ioctl_element_stats *)
4208                         cfg_ni->lic_bulk;
4209                 tun = (struct lnet_ioctl_config_lnd_tunables *)
4210                                 (cfg_ni->lic_bulk + sizeof(*stats));
4211
4212                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
4213                         sizeof(*stats);
4214
4215                 mutex_lock(&the_lnet.ln_api_mutex);
4216                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
4217                 mutex_unlock(&the_lnet.ln_api_mutex);
4218                 return rc;
4219         }
4220
4221         case IOC_LIBCFS_GET_LOCAL_NI_MSG_STATS: {
4222                 struct lnet_ioctl_element_msg_stats *msg_stats = arg;
4223                 int cpt;
4224
4225                 if (msg_stats->im_hdr.ioc_len != sizeof(*msg_stats))
4226                         return -EINVAL;
4227
4228                 mutex_lock(&the_lnet.ln_api_mutex);
4229
4230                 cpt = lnet_net_lock_current();
4231                 rc = lnet_get_ni_stats(msg_stats);
4232                 lnet_net_unlock(cpt);
4233
4234                 mutex_unlock(&the_lnet.ln_api_mutex);
4235
4236                 return rc;
4237         }
4238
4239         case IOC_LIBCFS_GET_NET: {
4240                 size_t total = sizeof(*config) +
4241                                sizeof(struct lnet_ioctl_net_config);
4242                 config = arg;
4243
4244                 if (config->cfg_hdr.ioc_len < total)
4245                         return -EINVAL;
4246
4247                 mutex_lock(&the_lnet.ln_api_mutex);
4248                 rc = lnet_get_net_config(config);
4249                 mutex_unlock(&the_lnet.ln_api_mutex);
4250                 return rc;
4251         }
4252
4253         case IOC_LIBCFS_GET_LNET_STATS:
4254         {
4255                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
4256
4257                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
4258                         return -EINVAL;
4259
4260                 mutex_lock(&the_lnet.ln_api_mutex);
4261                 rc = lnet_counters_get(&lnet_stats->st_cntrs);
4262                 mutex_unlock(&the_lnet.ln_api_mutex);
4263                 return rc;
4264         }
4265
4266         case IOC_LIBCFS_RESET_LNET_STATS:
4267         {
4268                 mutex_lock(&the_lnet.ln_api_mutex);
4269                 lnet_counters_reset();
4270                 mutex_unlock(&the_lnet.ln_api_mutex);
4271                 return 0;
4272         }
4273
4274         case IOC_LIBCFS_CONFIG_RTR:
4275                 config = arg;
4276
4277                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4278                         return -EINVAL;
4279
4280                 mutex_lock(&the_lnet.ln_api_mutex);
4281                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
4282                         rc = lnet_rtrpools_enable();
4283                         mutex_unlock(&the_lnet.ln_api_mutex);
4284                         return rc;
4285                 }
4286                 lnet_rtrpools_disable();
4287                 mutex_unlock(&the_lnet.ln_api_mutex);
4288                 return 0;
4289
4290         case IOC_LIBCFS_ADD_BUF:
4291                 config = arg;
4292
4293                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4294                         return -EINVAL;
4295
4296                 mutex_lock(&the_lnet.ln_api_mutex);
4297                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
4298                                                 buf_tiny,
4299                                           config->cfg_config_u.cfg_buffers.
4300                                                 buf_small,
4301                                           config->cfg_config_u.cfg_buffers.
4302                                                 buf_large);
4303                 mutex_unlock(&the_lnet.ln_api_mutex);
4304                 return rc;
4305
4306         case IOC_LIBCFS_SET_NUMA_RANGE: {
4307                 struct lnet_ioctl_set_value *numa;
4308                 numa = arg;
4309                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4310                         return -EINVAL;
4311                 lnet_net_lock(LNET_LOCK_EX);
4312                 lnet_numa_range = numa->sv_value;
4313                 lnet_net_unlock(LNET_LOCK_EX);
4314                 return 0;
4315         }
4316
4317         case IOC_LIBCFS_GET_NUMA_RANGE: {
4318                 struct lnet_ioctl_set_value *numa;
4319                 numa = arg;
4320                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4321                         return -EINVAL;
4322                 numa->sv_value = lnet_numa_range;
4323                 return 0;
4324         }
4325
4326         case IOC_LIBCFS_GET_BUF: {
4327                 struct lnet_ioctl_pool_cfg *pool_cfg;
4328                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
4329
4330                 config = arg;
4331
4332                 if (config->cfg_hdr.ioc_len < total)
4333                         return -EINVAL;
4334
4335                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
4336
4337                 mutex_lock(&the_lnet.ln_api_mutex);
4338                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
4339                 mutex_unlock(&the_lnet.ln_api_mutex);
4340                 return rc;
4341         }
4342
4343         case IOC_LIBCFS_GET_LOCAL_HSTATS: {
4344                 struct lnet_ioctl_local_ni_hstats *stats = arg;
4345
4346                 if (stats->hlni_hdr.ioc_len < sizeof(*stats))
4347                         return -EINVAL;
4348
4349                 mutex_lock(&the_lnet.ln_api_mutex);
4350                 rc = lnet_get_local_ni_hstats(stats);
4351                 mutex_unlock(&the_lnet.ln_api_mutex);
4352
4353                 return rc;
4354         }
4355
4356         case IOC_LIBCFS_GET_RECOVERY_QUEUE: {
4357                 struct lnet_ioctl_recovery_list *list = arg;
4358                 if (list->rlst_hdr.ioc_len < sizeof(*list))
4359                         return -EINVAL;
4360
4361                 mutex_lock(&the_lnet.ln_api_mutex);
4362                 if (list->rlst_type == LNET_HEALTH_TYPE_LOCAL_NI)
4363                         rc = lnet_get_local_ni_recovery_list(list);
4364                 else
4365                         rc = lnet_get_peer_ni_recovery_list(list);
4366                 mutex_unlock(&the_lnet.ln_api_mutex);
4367                 return rc;
4368         }
4369
4370         case IOC_LIBCFS_ADD_PEER_NI: {
4371                 struct lnet_ioctl_peer_cfg *cfg = arg;
4372                 struct lnet_nid prim_nid;
4373
4374                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4375                         return -EINVAL;
4376
4377                 mutex_lock(&the_lnet.ln_api_mutex);
4378                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4379                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4380                 rc = lnet_user_add_peer_ni(&prim_nid, &nid, cfg->prcfg_mr,
4381                                            cfg->prcfg_count == 1);
4382                 mutex_unlock(&the_lnet.ln_api_mutex);
4383                 return rc;
4384         }
4385
4386         case IOC_LIBCFS_DEL_PEER_NI: {
4387                 struct lnet_ioctl_peer_cfg *cfg = arg;
4388                 struct lnet_nid prim_nid;
4389
4390                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4391                         return -EINVAL;
4392
4393                 mutex_lock(&the_lnet.ln_api_mutex);
4394                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4395                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4396                 rc = lnet_del_peer_ni(&prim_nid,
4397                                       &nid,
4398                                       cfg->prcfg_count);
4399                 mutex_unlock(&the_lnet.ln_api_mutex);
4400                 return rc;
4401         }
4402
4403         case IOC_LIBCFS_GET_PEER_INFO: {
4404                 struct lnet_ioctl_peer *peer_info = arg;
4405
4406                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
4407                         return -EINVAL;
4408
4409                 mutex_lock(&the_lnet.ln_api_mutex);
4410                 rc = lnet_get_peer_ni_info(
4411                    peer_info->pr_count,
4412                    &peer_info->pr_nid,
4413                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
4414                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
4415                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
4416                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
4417                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
4418                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
4419                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
4420                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
4421                 mutex_unlock(&the_lnet.ln_api_mutex);
4422                 return rc;
4423         }
4424
4425         case IOC_LIBCFS_GET_PEER_NI: {
4426                 struct lnet_ioctl_peer_cfg *cfg = arg;
4427
4428                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4429                         return -EINVAL;
4430
4431                 mutex_lock(&the_lnet.ln_api_mutex);
4432                 rc = lnet_get_peer_info(cfg,
4433                                         (void __user *)cfg->prcfg_bulk);
4434                 mutex_unlock(&the_lnet.ln_api_mutex);
4435                 return rc;
4436         }
4437
4438         case IOC_LIBCFS_GET_PEER_LIST: {
4439                 struct lnet_ioctl_peer_cfg *cfg = arg;
4440
4441                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4442                         return -EINVAL;
4443
4444                 mutex_lock(&the_lnet.ln_api_mutex);
4445                 rc = lnet_get_peer_list(&cfg->prcfg_count, &cfg->prcfg_size,
4446                                 (struct lnet_process_id __user *)cfg->prcfg_bulk);
4447                 mutex_unlock(&the_lnet.ln_api_mutex);
4448                 return rc;
4449         }
4450
4451         case IOC_LIBCFS_SET_HEALHV: {
4452                 struct lnet_ioctl_reset_health_cfg *cfg = arg;
4453                 int value;
4454
4455                 if (cfg->rh_hdr.ioc_len < sizeof(*cfg))
4456                         return -EINVAL;
4457                 if (cfg->rh_value < 0 ||
4458                     cfg->rh_value > LNET_MAX_HEALTH_VALUE)
4459                         value = LNET_MAX_HEALTH_VALUE;
4460                 else
4461                         value = cfg->rh_value;
4462                 CDEBUG(D_NET, "Manually setting healthv to %d for %s:%s. all = %d\n",
4463                        value, (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) ?
4464                        "local" : "peer", libcfs_nid2str(cfg->rh_nid), cfg->rh_all);
4465                 lnet_nid4_to_nid(cfg->rh_nid, &nid);
4466                 mutex_lock(&the_lnet.ln_api_mutex);
4467                 if (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI)
4468                         lnet_ni_set_healthv(cfg->rh_nid, value,
4469                                              cfg->rh_all);
4470                 else
4471                         lnet_peer_ni_set_healthv(&nid, value, cfg->rh_all);
4472                 mutex_unlock(&the_lnet.ln_api_mutex);
4473                 return 0;
4474         }
4475
4476         case IOC_LIBCFS_SET_PEER: {
4477                 struct lnet_ioctl_peer_cfg *cfg = arg;
4478                 struct lnet_peer *lp;
4479
4480                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4481                         return -EINVAL;
4482
4483                 mutex_lock(&the_lnet.ln_api_mutex);
4484                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &nid);
4485                 lp = lnet_find_peer(&nid);
4486                 if (!lp) {
4487                         mutex_unlock(&the_lnet.ln_api_mutex);
4488                         return -ENOENT;
4489                 }
4490                 spin_lock(&lp->lp_lock);
4491                 lp->lp_state = cfg->prcfg_state;
4492                 spin_unlock(&lp->lp_lock);
4493                 lnet_peer_decref_locked(lp);
4494                 mutex_unlock(&the_lnet.ln_api_mutex);
4495                 CDEBUG(D_NET, "Set peer %s state to %u\n",
4496                        libcfs_nid2str(cfg->prcfg_prim_nid), cfg->prcfg_state);
4497                 return 0;
4498         }
4499
4500         case IOC_LIBCFS_SET_CONNS_PER_PEER: {
4501                 struct lnet_ioctl_reset_conns_per_peer_cfg *cfg = arg;
4502                 int value;
4503
4504                 if (cfg->rcpp_hdr.ioc_len < sizeof(*cfg))
4505                         return -EINVAL;
4506                 if (cfg->rcpp_value < 0)
4507                         value = 1;
4508                 else
4509                         value = cfg->rcpp_value;
4510                 CDEBUG(D_NET,
4511                        "Setting conns_per_peer to %d for %s. all = %d\n",
4512                        value, libcfs_nid2str(cfg->rcpp_nid), cfg->rcpp_all);
4513                 mutex_lock(&the_lnet.ln_api_mutex);
4514                 lnet_ni_set_conns_per_peer(cfg->rcpp_nid, value, cfg->rcpp_all);
4515                 mutex_unlock(&the_lnet.ln_api_mutex);
4516                 return 0;
4517         }
4518
4519         case IOC_LIBCFS_NOTIFY_ROUTER: {
4520                 /* Convert the user-supplied real time to monotonic.
4521                  * NB: "when" is always in the past
4522                  */
4523                 time64_t when = ktime_get_seconds() -
4524                                 (ktime_get_real_seconds() - data->ioc_u64[0]);
4525
4526                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4527                 return lnet_notify(NULL, &nid, data->ioc_flags, false, when);
4528         }
4529
4530         case IOC_LIBCFS_LNET_DIST:
4531                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4532                 rc = LNetDist(&nid, &nid, &data->ioc_u32[1]);
4533                 if (rc < 0 && rc != -EHOSTUNREACH)
4534                         return rc;
4535
4536                 data->ioc_nid = lnet_nid_to_nid4(&nid);
4537                 data->ioc_u32[0] = rc;
4538                 return 0;
4539
4540         case IOC_LIBCFS_TESTPROTOCOMPAT:
4541                 the_lnet.ln_testprotocompat = data->ioc_flags;
4542                 return 0;
4543
4544         case IOC_LIBCFS_LNET_FAULT:
4545                 return lnet_fault_ctl(data->ioc_flags, data);
4546
4547         case IOC_LIBCFS_PING_PEER: {
4548                 struct lnet_ioctl_ping_data *ping = arg;
4549                 struct lnet_process_id __user *ids = ping->ping_buf;
4550                 struct lnet_nid src_nid = LNET_ANY_NID;
4551                 struct lnet_genl_ping_list plist;
4552                 struct lnet_processid id;
4553                 struct lnet_peer *lp;
4554                 signed long timeout;
4555                 int count, i;
4556
4557                 /* Check if the supplied ping data supports source nid
4558                  * NB: This check is sufficient if lnet_ioctl_ping_data has
4559                  * additional fields added, but if they are re-ordered or
4560                  * fields removed then this will break. It is expected that
4561                  * these ioctls will be replaced with netlink implementation, so
4562                  * it is probably not worth coming up with a more robust version
4563                  * compatibility scheme.
4564                  */
4565                 if (ping->ping_hdr.ioc_len >= sizeof(struct lnet_ioctl_ping_data))
4566                         lnet_nid4_to_nid(ping->ping_src, &src_nid);
4567
4568                 /* If timeout is negative then set default of 3 minutes */
4569                 if (((s32)ping->op_param) <= 0 ||
4570                     ping->op_param > (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
4571                         timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
4572                 else
4573                         timeout = nsecs_to_jiffies(ping->op_param * NSEC_PER_MSEC);
4574
4575                 id.pid = ping->ping_id.pid;
4576                 lnet_nid4_to_nid(ping->ping_id.nid, &id.nid);
4577                 rc = lnet_ping(&id, &src_nid, timeout, &plist,
4578                                ping->ping_count);
4579                 if (rc < 0)
4580                         goto report_ping_err;
4581                 count = rc;
4582                 rc = 0;
4583
4584                 for (i = 0; i < count; i++) {
4585                         struct lnet_processid *result;
4586                         struct lnet_process_id tmpid;
4587
4588                         result = genradix_ptr(&plist.lgpl_list, i);
4589                         memset(&tmpid, 0, sizeof(tmpid));
4590                         tmpid.pid = result->pid;
4591                         tmpid.nid = lnet_nid_to_nid4(&result->nid);
4592                         if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
4593                                 rc = -EFAULT;
4594                                 goto report_ping_err;
4595                         }
4596                 }
4597
4598                 mutex_lock(&the_lnet.ln_api_mutex);
4599                 lp = lnet_find_peer(&id.nid);
4600                 if (lp) {
4601                         ping->ping_id.nid =
4602                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4603                         ping->mr_info = lnet_peer_is_multi_rail(lp);
4604                         lnet_peer_decref_locked(lp);
4605                 }
4606                 mutex_unlock(&the_lnet.ln_api_mutex);
4607
4608                 ping->ping_count = count;
4609 report_ping_err:
4610                 genradix_free(&plist.lgpl_list);
4611                 return rc;
4612         }
4613
4614         case IOC_LIBCFS_DISCOVER: {
4615                 struct lnet_ioctl_ping_data *discover = arg;
4616                 struct lnet_process_id __user *ids;
4617                 struct lnet_genl_ping_list dlists;
4618                 struct lnet_processid id;
4619                 struct lnet_peer *lp;
4620                 int count, i;
4621
4622                 if (discover->ping_count <= 0)
4623                         return -EINVAL;
4624
4625                 genradix_init(&dlists.lgpl_list);
4626                 /* If the user buffer has more space than the lnet_interfaces_max,
4627                  * then only fill it up to lnet_interfaces_max.
4628                  */
4629                 if (discover->ping_count > lnet_interfaces_max)
4630                         discover->ping_count = lnet_interfaces_max;
4631
4632                 id.pid = discover->ping_id.pid;
4633                 lnet_nid4_to_nid(discover->ping_id.nid, &id.nid);
4634                 rc = lnet_discover(&id, discover->op_param, &dlists);
4635                 if (rc < 0)
4636                         goto report_discover_err;
4637                 count = rc;
4638
4639                 ids = discover->ping_buf;
4640                 for (i = 0; i < count; i++) {
4641                         struct lnet_processid *result;
4642                         struct lnet_process_id tmpid;
4643
4644                         result = genradix_ptr(&dlists.lgpl_list, i);
4645                         memset(&tmpid, 0, sizeof(tmpid));
4646                         tmpid.pid = result->pid;
4647                         tmpid.nid = lnet_nid_to_nid4(&result->nid);
4648                         if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
4649                                 rc = -EFAULT;
4650                                 goto report_discover_err;
4651                         }
4652
4653                         if (i >= discover->ping_count)
4654                                 break;
4655                 }
4656                 rc = 0;
4657
4658                 mutex_lock(&the_lnet.ln_api_mutex);
4659                 lp = lnet_find_peer(&id.nid);
4660                 if (lp) {
4661                         discover->ping_id.nid =
4662                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4663                         discover->mr_info = lnet_peer_is_multi_rail(lp);
4664                         lnet_peer_decref_locked(lp);
4665                 }
4666                 mutex_unlock(&the_lnet.ln_api_mutex);
4667
4668                 discover->ping_count = count;
4669 report_discover_err:
4670                 genradix_free(&dlists.lgpl_list);
4671                 return rc;
4672         }
4673
4674         case IOC_LIBCFS_ADD_UDSP: {
4675                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4676                 __u32 bulk_size = ioc_udsp->iou_hdr.ioc_len;
4677
4678                 mutex_lock(&the_lnet.ln_api_mutex);
4679                 rc = lnet_udsp_demarshal_add(arg, bulk_size);
4680                 if (!rc) {
4681                         rc = lnet_udsp_apply_policies(NULL, false);
4682                         CDEBUG(D_NET, "policy application returned %d\n", rc);
4683                         rc = 0;
4684                 }
4685                 mutex_unlock(&the_lnet.ln_api_mutex);
4686
4687                 return rc;
4688         }
4689
4690         case IOC_LIBCFS_DEL_UDSP: {
4691                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4692                 int idx = ioc_udsp->iou_idx;
4693
4694                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4695                         return -EINVAL;
4696
4697                 mutex_lock(&the_lnet.ln_api_mutex);
4698                 rc = lnet_udsp_del_policy(idx);
4699                 mutex_unlock(&the_lnet.ln_api_mutex);
4700
4701                 return rc;
4702         }
4703
4704         case IOC_LIBCFS_GET_UDSP_SIZE: {
4705                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4706                 struct lnet_udsp *udsp;
4707
4708                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4709                         return -EINVAL;
4710
4711                 rc = 0;
4712
4713                 mutex_lock(&the_lnet.ln_api_mutex);
4714                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4715                 if (!udsp) {
4716                         rc = -ENOENT;
4717                 } else {
4718                         /* coming in iou_idx will hold the idx of the udsp
4719                          * to get the size of. going out the iou_idx will
4720                          * hold the size of the UDSP found at the passed
4721                          * in index.
4722                          */
4723                         ioc_udsp->iou_idx = lnet_get_udsp_size(udsp);
4724                         if (ioc_udsp->iou_idx < 0)
4725                                 rc = -EINVAL;
4726                 }
4727                 mutex_unlock(&the_lnet.ln_api_mutex);
4728
4729                 return rc;
4730         }
4731
4732         case IOC_LIBCFS_GET_UDSP: {
4733                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4734                 struct lnet_udsp *udsp;
4735
4736                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4737                         return -EINVAL;
4738
4739                 rc = 0;
4740
4741                 mutex_lock(&the_lnet.ln_api_mutex);
4742                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4743                 if (!udsp)
4744                         rc = -ENOENT;
4745                 else
4746                         rc = lnet_udsp_marshal(udsp, ioc_udsp);
4747                 mutex_unlock(&the_lnet.ln_api_mutex);
4748
4749                 return rc;
4750         }
4751
4752         case IOC_LIBCFS_GET_CONST_UDSP_INFO: {
4753                 struct lnet_ioctl_construct_udsp_info *info = arg;
4754
4755                 if (info->cud_hdr.ioc_len < sizeof(*info))
4756                         return -EINVAL;
4757
4758                 CDEBUG(D_NET, "GET_UDSP_INFO for %s\n",
4759                        libcfs_nid2str(info->cud_nid));
4760
4761                 lnet_nid4_to_nid(info->cud_nid, &nid);
4762                 mutex_lock(&the_lnet.ln_api_mutex);
4763                 lnet_net_lock(0);
4764                 lnet_udsp_get_construct_info(info, &nid);
4765                 lnet_net_unlock(0);
4766                 mutex_unlock(&the_lnet.ln_api_mutex);
4767
4768                 return 0;
4769         }
4770
4771         default:
4772                 ni = lnet_net2ni_addref(data->ioc_net);
4773                 if (ni == NULL)
4774                         return -EINVAL;
4775
4776                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
4777                         rc = -EINVAL;
4778                 else
4779                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
4780
4781                 lnet_ni_decref(ni);
4782                 return rc <= 0 ? rc : 0;
4783         }
4784         /* not reached */
4785 }
4786 EXPORT_SYMBOL(LNetCtl);
4787
4788 struct lnet_nid_cpt {
4789         struct lnet_nid lnc_nid;
4790         unsigned int lnc_cpt;
4791 };
4792
4793 struct lnet_genl_nid_cpt_list {
4794         unsigned int lgncl_index;
4795         unsigned int lgncl_list_count;
4796         GENRADIX(struct lnet_nid_cpt) lgncl_lnc_list;
4797 };
4798
4799 static inline struct lnet_genl_nid_cpt_list *
4800 lnet_cpt_of_nid_dump_ctx(struct netlink_callback *cb)
4801 {
4802         return (struct lnet_genl_nid_cpt_list *)cb->args[0];
4803 }
4804
4805 static int lnet_cpt_of_nid_show_done(struct netlink_callback *cb)
4806 {
4807         struct lnet_genl_nid_cpt_list *lgncl;
4808
4809         lgncl = lnet_cpt_of_nid_dump_ctx(cb);
4810
4811         if (lgncl) {
4812                 genradix_free(&lgncl->lgncl_lnc_list);
4813                 LIBCFS_FREE(lgncl, sizeof(*lgncl));
4814                 cb->args[0] = 0;
4815         }
4816
4817         return 0;
4818 }
4819
4820 static int lnet_cpt_of_nid_show_start(struct netlink_callback *cb)
4821 {
4822         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
4823 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4824         struct netlink_ext_ack *extack = NULL;
4825 #endif
4826         struct lnet_genl_nid_cpt_list *lgncl;
4827         int msg_len = genlmsg_len(gnlh);
4828         struct nlattr *params, *top;
4829         int rem, rc = 0;
4830
4831 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4832         extack = cb->extack;
4833 #endif
4834
4835         mutex_lock(&the_lnet.ln_api_mutex);
4836         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
4837                 NL_SET_ERR_MSG(extack, "Network is down");
4838                 mutex_unlock(&the_lnet.ln_api_mutex);
4839                 return -ENETDOWN;
4840         }
4841
4842         msg_len = genlmsg_len(gnlh);
4843         if (!msg_len) {
4844                 NL_SET_ERR_MSG(extack, "Missing NID argument(s)");
4845                 mutex_unlock(&the_lnet.ln_api_mutex);
4846                 return -ENOENT;
4847         }
4848
4849         LIBCFS_ALLOC(lgncl, sizeof(*lgncl));
4850         if (!lgncl) {
4851                 mutex_unlock(&the_lnet.ln_api_mutex);
4852                 return -ENOMEM;
4853         }
4854
4855         genradix_init(&lgncl->lgncl_lnc_list);
4856         lgncl->lgncl_list_count = 0;
4857         cb->args[0] = (long)lgncl;
4858
4859         params = genlmsg_data(gnlh);
4860         nla_for_each_attr(top, params, msg_len, rem) {
4861                 struct nlattr *nids;
4862                 int rem2;
4863
4864                 switch (nla_type(top)) {
4865                 case LN_SCALAR_ATTR_LIST:
4866                         nla_for_each_nested(nids, top, rem2) {
4867                                 char nidstr[LNET_NIDSTR_SIZE + 1];
4868                                 struct lnet_nid_cpt *lnc;
4869
4870                                 if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
4871                                         continue;
4872
4873                                 memset(nidstr, 0, sizeof(nidstr));
4874                                 rc = nla_strscpy(nidstr, nids, sizeof(nidstr));
4875                                 if (rc < 0) {
4876                                         NL_SET_ERR_MSG(extack,
4877                                                        "failed to get NID");
4878                                         GOTO(report_err, rc);
4879                                 }
4880
4881                                 lnc = genradix_ptr_alloc(&lgncl->lgncl_lnc_list,
4882                                                          lgncl->lgncl_list_count++,
4883                                                          GFP_ATOMIC);
4884                                 if (!lnc) {
4885                                         NL_SET_ERR_MSG(extack,
4886                                                        "failed to allocate NID");
4887                                         GOTO(report_err, rc = -ENOMEM);
4888                                 }
4889
4890                                 rc = libcfs_strnid(&lnc->lnc_nid, strim(nidstr));
4891                                 if (rc < 0) {
4892                                         NL_SET_ERR_MSG(extack, "invalid NID");
4893                                         GOTO(report_err, rc);
4894                                 }
4895                                 rc = 0;
4896                                 CDEBUG(D_NET, "nid: %s\n", libcfs_nidstr(&lnc->lnc_nid));
4897                         }
4898                         fallthrough;
4899                 default:
4900                         break;
4901                 }
4902         }
4903 report_err:
4904         mutex_unlock(&the_lnet.ln_api_mutex);
4905
4906         if (rc < 0)
4907                 lnet_cpt_of_nid_show_done(cb);
4908
4909         return rc;
4910 }
4911
4912 static const struct ln_key_list cpt_of_nid_props_list = {
4913         .lkl_maxattr                    = LNET_CPT_OF_NID_ATTR_MAX,
4914         .lkl_list                       = {
4915                 [LNET_CPT_OF_NID_ATTR_HDR]      = {
4916                         .lkp_value              = "cpt-of-nid",
4917                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
4918                         .lkp_data_type          = NLA_NUL_STRING,
4919                 },
4920                 [LNET_CPT_OF_NID_ATTR_NID]      = {
4921                         .lkp_value              = "nid",
4922                         .lkp_data_type          = NLA_STRING,
4923                 },
4924                 [LNET_CPT_OF_NID_ATTR_CPT]      = {
4925                         .lkp_value              = "cpt",
4926                         .lkp_data_type          = NLA_U32,
4927                 },
4928         },
4929 };
4930
4931 static int lnet_cpt_of_nid_show_dump(struct sk_buff *msg,
4932                                      struct netlink_callback *cb)
4933 {
4934         struct lnet_genl_nid_cpt_list *lgncl;
4935 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4936         struct netlink_ext_ack *extack = NULL;
4937 #endif
4938         int portid = NETLINK_CB(cb->skb).portid;
4939         int seq = cb->nlh->nlmsg_seq;
4940         int idx;
4941         int rc = 0;
4942         bool need_hdr = true;
4943
4944 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4945         extack = cb->extack;
4946 #endif
4947
4948         mutex_lock(&the_lnet.ln_api_mutex);
4949         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
4950                 NL_SET_ERR_MSG(extack, "Network is down");
4951                 GOTO(send_error, rc = -ENETDOWN);
4952         }
4953
4954         lgncl = lnet_cpt_of_nid_dump_ctx(cb);
4955         idx = lgncl->lgncl_index;
4956
4957         if (!lgncl->lgncl_index) {
4958                 const struct ln_key_list *all[] = {
4959                         &cpt_of_nid_props_list, NULL, NULL
4960                 };
4961
4962                 rc = lnet_genl_send_scalar_list(msg, portid, seq, &lnet_family,
4963                                                 NLM_F_CREATE | NLM_F_MULTI,
4964                                                 LNET_CMD_CPT_OF_NID, all);
4965                 if (rc < 0) {
4966                         NL_SET_ERR_MSG(extack, "failed to send key table");
4967                         GOTO(send_error, rc);
4968                 }
4969         }
4970
4971         while (idx < lgncl->lgncl_list_count) {
4972                 struct lnet_nid_cpt *lnc;
4973                 void *hdr;
4974                 int cpt;
4975
4976                 lnc = genradix_ptr(&lgncl->lgncl_lnc_list, idx++);
4977
4978                 cpt = lnet_nid_cpt_hash(&lnc->lnc_nid, LNET_CPT_NUMBER);
4979
4980                 CDEBUG(D_NET, "nid: %s cpt: %d\n", libcfs_nidstr(&lnc->lnc_nid), cpt);
4981                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
4982                                   NLM_F_MULTI, LNET_CMD_CPT_OF_NID);
4983                 if (!hdr) {
4984                         NL_SET_ERR_MSG(extack, "failed to send values");
4985                         genlmsg_cancel(msg, hdr);
4986                         GOTO(send_error, rc = -EMSGSIZE);
4987                 }
4988
4989                 if (need_hdr) {
4990                         nla_put_string(msg, LNET_CPT_OF_NID_ATTR_HDR, "");
4991                         need_hdr = false;
4992                 }
4993
4994                 nla_put_string(msg, LNET_CPT_OF_NID_ATTR_NID,
4995                                libcfs_nidstr(&lnc->lnc_nid));
4996                 nla_put_u32(msg, LNET_CPT_OF_NID_ATTR_CPT, cpt);
4997
4998                 genlmsg_end(msg, hdr);
4999         }
5000
5001         genradix_free(&lgncl->lgncl_lnc_list);
5002         rc = 0;
5003         lgncl->lgncl_index = idx;
5004
5005 send_error:
5006         mutex_unlock(&the_lnet.ln_api_mutex);
5007
5008         return lnet_nl_send_error(cb->skb, portid, seq, rc);
5009 }
5010
5011 #ifndef HAVE_NETLINK_CALLBACK_START
5012 static int lnet_old_cpt_of_nid_show_dump(struct sk_buff *msg,
5013                                          struct netlink_callback *cb)
5014 {
5015         if (!cb->args[0]) {
5016                 int rc = lnet_cpt_of_nid_show_start(cb);
5017
5018                 if (rc < 0)
5019                         return rc;
5020         }
5021
5022         return lnet_cpt_of_nid_show_dump(msg, cb);
5023 }
5024 #endif
5025
5026 /* This is the keys for the UDSP info which is used by many
5027  * Netlink commands.
5028  */
5029 static const struct ln_key_list udsp_info_list = {
5030         .lkl_maxattr                    = LNET_UDSP_INFO_ATTR_MAX,
5031         .lkl_list                       = {
5032                 [LNET_UDSP_INFO_ATTR_NET_PRIORITY]              = {
5033                         .lkp_value      = "net priority",
5034                         .lkp_data_type  = NLA_S32
5035                 },
5036                 [LNET_UDSP_INFO_ATTR_NID_PRIORITY]              = {
5037                         .lkp_value      = "nid priority",
5038                         .lkp_data_type  = NLA_S32
5039                 },
5040                 [LNET_UDSP_INFO_ATTR_PREF_RTR_NIDS_LIST]        = {
5041                         .lkp_value      = "Preferred gateway NIDs",
5042                         .lkp_key_format = LNKF_MAPPING,
5043                         .lkp_data_type  = NLA_NESTED,
5044                 },
5045                 [LNET_UDSP_INFO_ATTR_PREF_NIDS_LIST]            = {
5046                         .lkp_value      = "Preferred source NIDs",
5047                         .lkp_key_format = LNKF_MAPPING,
5048                         .lkp_data_type  = NLA_NESTED,
5049                 },
5050         },
5051 };
5052
5053 static const struct ln_key_list udsp_info_pref_nids_list = {
5054         .lkl_maxattr                    = LNET_UDSP_INFO_PREF_NIDS_ATTR_MAX,
5055         .lkl_list                       = {
5056                 [LNET_UDSP_INFO_PREF_NIDS_ATTR_INDEX]           = {
5057                         .lkp_value      = "NID-0",
5058                         .lkp_data_type  = NLA_NUL_STRING,
5059                 },
5060                 [LNET_UDSP_INFO_PREF_NIDS_ATTR_NID]             = {
5061                         .lkp_value      = "0@lo",
5062                         .lkp_data_type  = NLA_STRING,
5063                 },
5064         },
5065 };
5066
5067 static int lnet_udsp_info_send(struct sk_buff *msg, int attr,
5068                                struct lnet_nid *nid, bool remote)
5069 {
5070         struct lnet_ioctl_construct_udsp_info *udsp;
5071         struct nlattr *udsp_attr, *udsp_info;
5072         struct nlattr *udsp_list_attr;
5073         struct nlattr *udsp_list_info;
5074         int i;
5075
5076         CFS_ALLOC_PTR(udsp);
5077         if (!udsp)
5078                 return -ENOMEM;
5079
5080         udsp->cud_peer = remote;
5081         lnet_udsp_get_construct_info(udsp, nid);
5082
5083         udsp_info = nla_nest_start(msg, attr);
5084         udsp_attr = nla_nest_start(msg, 0);
5085         nla_put_s32(msg, LNET_UDSP_INFO_ATTR_NET_PRIORITY,
5086                     udsp->cud_net_priority);
5087         nla_put_s32(msg, LNET_UDSP_INFO_ATTR_NID_PRIORITY,
5088                     udsp->cud_nid_priority);
5089
5090         if (udsp->cud_pref_rtr_nid[0] == 0)
5091                 goto skip_list;
5092
5093         udsp_list_info = nla_nest_start(msg,
5094                                         LNET_UDSP_INFO_ATTR_PREF_RTR_NIDS_LIST);
5095         for (i = 0; i < LNET_MAX_SHOW_NUM_NID; i++) {
5096                 char tmp[8]; /* NID-"3 number"\0 */
5097
5098                 if (udsp->cud_pref_rtr_nid[i] == 0)
5099                         break;
5100
5101                 udsp_list_attr = nla_nest_start(msg, i);
5102                 snprintf(tmp, sizeof(tmp), "NID-%d", i);
5103                 nla_put_string(msg, LNET_UDSP_INFO_PREF_NIDS_ATTR_INDEX,
5104                                tmp);
5105                 nla_put_string(msg, LNET_UDSP_INFO_PREF_NIDS_ATTR_NID,
5106                                libcfs_nid2str(udsp->cud_pref_rtr_nid[i]));
5107                 nla_nest_end(msg, udsp_list_attr);
5108         }
5109         nla_nest_end(msg, udsp_list_info);
5110 skip_list:
5111         nla_nest_end(msg, udsp_attr);
5112         nla_nest_end(msg, udsp_info);
5113         LIBCFS_FREE(udsp, sizeof(*udsp));
5114
5115         return 0;
5116 }
5117
5118 /* LNet NI handling */
5119 static const struct ln_key_list net_props_list = {
5120         .lkl_maxattr                    = LNET_NET_ATTR_MAX,
5121         .lkl_list                       = {
5122                 [LNET_NET_ATTR_HDR]             = {
5123                         .lkp_value              = "net",
5124                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5125                         .lkp_data_type          = NLA_NUL_STRING,
5126                 },
5127                 [LNET_NET_ATTR_TYPE]            = {
5128                         .lkp_value              = "net type",
5129                         .lkp_data_type          = NLA_STRING
5130                 },
5131                 [LNET_NET_ATTR_LOCAL]           = {
5132                         .lkp_value              = "local NI(s)",
5133                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5134                         .lkp_data_type          = NLA_NESTED
5135                 },
5136         },
5137 };
5138
5139 static struct ln_key_list local_ni_list = {
5140         .lkl_maxattr                    = LNET_NET_LOCAL_NI_ATTR_MAX,
5141         .lkl_list                       = {
5142                 [LNET_NET_LOCAL_NI_ATTR_NID]            = {
5143                         .lkp_value              = "nid",
5144                         .lkp_data_type          = NLA_STRING
5145                 },
5146                 [LNET_NET_LOCAL_NI_ATTR_STATUS]         = {
5147                         .lkp_value              = "status",
5148                         .lkp_data_type          = NLA_STRING
5149                 },
5150                 [LNET_NET_LOCAL_NI_ATTR_INTERFACE]      = {
5151                         .lkp_value              = "interfaces",
5152                         .lkp_key_format         = LNKF_MAPPING,
5153                         .lkp_data_type          = NLA_NESTED
5154                 },
5155                 [LNET_NET_LOCAL_NI_ATTR_STATS]          = {
5156                         .lkp_value              = "statistics",
5157                         .lkp_key_format         = LNKF_MAPPING,
5158                         .lkp_data_type          = NLA_NESTED
5159                 },
5160                 [LNET_NET_LOCAL_NI_ATTR_UDSP_INFO]      = {
5161                         .lkp_value              = "udsp info",
5162                         .lkp_key_format         = LNKF_MAPPING,
5163                         .lkp_data_type          = NLA_NESTED
5164                 },
5165                 [LNET_NET_LOCAL_NI_ATTR_SEND_STATS]     = {
5166                         .lkp_value              = "sent_stats",
5167                         .lkp_key_format         = LNKF_MAPPING,
5168                         .lkp_data_type          = NLA_NESTED
5169                 },
5170                 [LNET_NET_LOCAL_NI_ATTR_RECV_STATS]     = {
5171                         .lkp_value              = "received_stats",
5172                         .lkp_key_format         = LNKF_MAPPING,
5173                         .lkp_data_type          = NLA_NESTED
5174                 },
5175                 [LNET_NET_LOCAL_NI_ATTR_DROPPED_STATS]  = {
5176                         .lkp_value              = "dropped_stats",
5177                         .lkp_key_format         = LNKF_MAPPING,
5178                         .lkp_data_type          = NLA_NESTED
5179
5180                 },
5181                 [LNET_NET_LOCAL_NI_ATTR_HEALTH_STATS]   = {
5182                         .lkp_value              = "health stats",
5183                         .lkp_key_format         = LNKF_MAPPING,
5184                         .lkp_data_type          = NLA_NESTED
5185                 },
5186                 [LNET_NET_LOCAL_NI_ATTR_TUNABLES]       = {
5187                         .lkp_value              = "tunables",
5188                         .lkp_key_format         = LNKF_MAPPING,
5189                         .lkp_data_type          = NLA_NESTED
5190                 },
5191                 [LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES]   = {
5192                         .lkp_value              = "lnd tunables",
5193                         .lkp_key_format         = LNKF_MAPPING,
5194                         .lkp_data_type          = NLA_NESTED
5195                 },
5196                 [LNET_NET_LOCAL_NI_DEV_CPT]             = {
5197                         .lkp_value              = "dev cpt",
5198                         .lkp_data_type          = NLA_S32,
5199                 },
5200                 [LNET_NET_LOCAL_NI_CPTS]                = {
5201                         .lkp_value              = "CPT",
5202                         .lkp_data_type          = NLA_STRING,
5203                 },
5204         },
5205 };
5206
5207 static const struct ln_key_list local_ni_interfaces_list = {
5208         .lkl_maxattr                    = LNET_NET_LOCAL_NI_INTF_ATTR_MAX,
5209         .lkl_list                       = {
5210                 [LNET_NET_LOCAL_NI_INTF_ATTR_TYPE] = {
5211                         .lkp_value      = "0",
5212                         .lkp_data_type  = NLA_STRING
5213                 },
5214         },
5215 };
5216
5217 static const struct ln_key_list local_ni_stats_list = {
5218         .lkl_maxattr                    = LNET_NET_LOCAL_NI_STATS_ATTR_MAX,
5219         .lkl_list                       = {
5220                 [LNET_NET_LOCAL_NI_STATS_ATTR_SEND_COUNT]       = {
5221                         .lkp_value      = "send_count",
5222                         .lkp_data_type  = NLA_U32
5223                 },
5224                 [LNET_NET_LOCAL_NI_STATS_ATTR_RECV_COUNT]       = {
5225                         .lkp_value      = "recv_count",
5226                         .lkp_data_type  = NLA_U32
5227                 },
5228                 [LNET_NET_LOCAL_NI_STATS_ATTR_DROP_COUNT]       = {
5229                         .lkp_value      = "drop_count",
5230                         .lkp_data_type  = NLA_U32
5231                 },
5232         },
5233 };
5234
5235 static const struct ln_key_list local_ni_msg_stats_list = {
5236         .lkl_maxattr                    = LNET_NET_LOCAL_NI_MSG_STATS_ATTR_MAX,
5237         .lkl_list                       = {
5238                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT]    = {
5239                         .lkp_value      = "put",
5240                         .lkp_data_type  = NLA_U32
5241                 },
5242                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT]    = {
5243                         .lkp_value      = "get",
5244                         .lkp_data_type  = NLA_U32
5245                 },
5246                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT]  = {
5247                         .lkp_value      = "reply",
5248                         .lkp_data_type  = NLA_U32
5249                 },
5250                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT]    = {
5251                         .lkp_value      = "ack",
5252                         .lkp_data_type  = NLA_U32
5253                 },
5254                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT]  = {
5255                         .lkp_value      = "hello",
5256                         .lkp_data_type  = NLA_U32
5257                 },
5258         },
5259 };
5260
5261 static const struct ln_key_list local_ni_health_stats_list = {
5262         .lkl_maxattr                    = LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_MAX,
5263         .lkl_list                       = {
5264                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_FATAL_ERRORS] = {
5265                         .lkp_value      = "fatal_error",
5266                         .lkp_data_type  = NLA_S32
5267                 },
5268                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_LEVEL] = {
5269                         .lkp_value      = "health value",
5270                         .lkp_data_type  = NLA_S32
5271                 },
5272                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_INTERRUPTS] = {
5273                         .lkp_value      = "interrupts",
5274                         .lkp_data_type  = NLA_U32
5275                 },
5276                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_DROPPED] = {
5277                         .lkp_value      = "dropped",
5278                         .lkp_data_type  = NLA_U32
5279                 },
5280                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ABORTED] = {
5281                         .lkp_value      = "aborted",
5282                         .lkp_data_type  = NLA_U32
5283                 },
5284                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NO_ROUTE] = {
5285                         .lkp_value      = "no route",
5286                         .lkp_data_type  = NLA_U32
5287                 },
5288                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_TIMEOUTS] = {
5289                         .lkp_value      = "timeouts",
5290                         .lkp_data_type  = NLA_U32
5291                 },
5292                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ERROR] = {
5293                         .lkp_value      = "error",
5294                         .lkp_data_type  = NLA_U32
5295                 },
5296                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PING_COUNT] = {
5297                         .lkp_value      = "ping_count",
5298                         .lkp_data_type  = NLA_U32,
5299                 },
5300                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NEXT_PING] = {
5301                         .lkp_value      = "next_ping",
5302                         .lkp_data_type  = NLA_U64
5303                 },
5304         },
5305 };
5306
5307 static const struct ln_key_list local_ni_tunables_list = {
5308         .lkl_maxattr                    = LNET_NET_LOCAL_NI_TUNABLES_ATTR_MAX,
5309         .lkl_list                       = {
5310                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT]  = {
5311                         .lkp_value      = "peer_timeout",
5312                         .lkp_data_type  = NLA_S32
5313                 },
5314                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS]  = {
5315                         .lkp_value      = "peer_credits",
5316                         .lkp_data_type  = NLA_S32
5317                 },
5318                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS] = {
5319                         .lkp_value      = "peer_buffer_credits",
5320                         .lkp_data_type  = NLA_S32
5321                 },
5322                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS] = {
5323                         .lkp_value      = "credits",
5324                         .lkp_data_type  = NLA_S32
5325                 },
5326         },
5327 };
5328
5329 /* Use an index since the traversal is across LNet nets and ni collections */
5330 struct lnet_genl_net_list {
5331         unsigned int    lngl_net_id;
5332         unsigned int    lngl_idx;
5333 };
5334
5335 static inline struct lnet_genl_net_list *
5336 lnet_net_dump_ctx(struct netlink_callback *cb)
5337 {
5338         return (struct lnet_genl_net_list *)cb->args[0];
5339 }
5340
5341 static int lnet_net_show_done(struct netlink_callback *cb)
5342 {
5343         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
5344
5345         if (nlist) {
5346                 LIBCFS_FREE(nlist, sizeof(*nlist));
5347                 cb->args[0] = 0;
5348         }
5349
5350         return 0;
5351 }
5352
5353 /* LNet net ->start() handler for GET requests */
5354 static int lnet_net_show_start(struct netlink_callback *cb)
5355 {
5356         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5357 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5358         struct netlink_ext_ack *extack = NULL;
5359 #endif
5360         struct lnet_genl_net_list *nlist;
5361         int msg_len = genlmsg_len(gnlh);
5362         struct nlattr *params, *top;
5363         int rem, rc = 0;
5364
5365 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5366         extack = cb->extack;
5367 #endif
5368         if (the_lnet.ln_refcount == 0) {
5369                 NL_SET_ERR_MSG(extack, "LNet stack down");
5370                 return -ENETDOWN;
5371         }
5372
5373         LIBCFS_ALLOC(nlist, sizeof(*nlist));
5374         if (!nlist)
5375                 return -ENOMEM;
5376
5377         nlist->lngl_net_id = LNET_NET_ANY;
5378         nlist->lngl_idx = 0;
5379         cb->args[0] = (long)nlist;
5380
5381         if (!msg_len)
5382                 return 0;
5383
5384         params = genlmsg_data(gnlh);
5385         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
5386                 NL_SET_ERR_MSG(extack, "invalid configuration");
5387                 return -EINVAL;
5388         }
5389
5390         nla_for_each_nested(top, params, rem) {
5391                 struct nlattr *net;
5392                 int rem2;
5393
5394                 nla_for_each_nested(net, top, rem2) {
5395                         char filter[LNET_NIDSTR_SIZE];
5396
5397                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE ||
5398                             nla_strcmp(net, "net type") != 0)
5399                                 continue;
5400
5401                         net = nla_next(net, &rem2);
5402                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE) {
5403                                 NL_SET_ERR_MSG(extack, "invalid config param");
5404                                 GOTO(report_err, rc = -EINVAL);
5405                         }
5406
5407                         rc = nla_strscpy(filter, net, sizeof(filter));
5408                         if (rc < 0) {
5409                                 NL_SET_ERR_MSG(extack, "failed to get param");
5410                                 GOTO(report_err, rc);
5411                         }
5412                         rc = 0;
5413
5414                         nlist->lngl_net_id = libcfs_str2net(filter);
5415                         if (nlist->lngl_net_id == LNET_NET_ANY) {
5416                                 NL_SET_ERR_MSG(extack, "cannot parse net");
5417                                 GOTO(report_err, rc = -ENOENT);
5418                         }
5419                 }
5420         }
5421 report_err:
5422         if (rc < 0)
5423                 lnet_net_show_done(cb);
5424
5425         return rc;
5426 }
5427
5428 static int lnet_net_show_dump(struct sk_buff *msg,
5429                               struct netlink_callback *cb)
5430 {
5431         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
5432 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5433         struct netlink_ext_ack *extack = NULL;
5434 #endif
5435         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5436         int portid = NETLINK_CB(cb->skb).portid;
5437         bool found = false, started = true;
5438         const struct lnet_lnd *lnd = NULL;
5439         int idx = nlist->lngl_idx, rc = 0;
5440         int seq = cb->nlh->nlmsg_seq;
5441         struct lnet_net *net;
5442         void *hdr = NULL;
5443
5444 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5445         extack = cb->extack;
5446 #endif
5447         lnet_net_lock(LNET_LOCK_EX);
5448
5449         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
5450                 struct nlattr *local_ni, *ni_attr;
5451                 struct lnet_ni *ni;
5452                 int dev = 0;
5453
5454                 if (nlist->lngl_net_id != LNET_NET_ANY &&
5455                     nlist->lngl_net_id != net->net_id)
5456                         continue;
5457
5458                 if (gnlh->version && LNET_NETTYP(net->net_id) != LOLND) {
5459                         if (!net->net_lnd) {
5460                                 NL_SET_ERR_MSG(extack,
5461                                                "LND not setup for NI");
5462                                 GOTO(net_unlock, rc = -ENODEV);
5463                         }
5464                         if (net->net_lnd != lnd)
5465                                 lnd = net->net_lnd;
5466                         else
5467                                 lnd = NULL;
5468                 }
5469
5470                 /* We need to resend the key table every time the base LND
5471                  * changed.
5472                  */
5473                 if (!idx || lnd) {
5474                         const struct ln_key_list *all[] = {
5475                                 &net_props_list, &local_ni_list,
5476                                 &local_ni_interfaces_list,
5477                                 &local_ni_stats_list,
5478                                 &udsp_info_list,
5479                                 &udsp_info_pref_nids_list,
5480                                 &udsp_info_pref_nids_list,
5481                                 &local_ni_msg_stats_list,
5482                                 &local_ni_msg_stats_list,
5483                                 &local_ni_msg_stats_list,
5484                                 &local_ni_health_stats_list,
5485                                 &local_ni_tunables_list,
5486                                 NULL, /* lnd tunables */
5487                                 NULL
5488                         };
5489                         int flags = NLM_F_CREATE | NLM_F_MULTI;
5490
5491                         if (lnd) {
5492                                 all[ARRAY_SIZE(all) - 2] = lnd->lnd_keys;
5493                                 if (idx)
5494                                         flags |= NLM_F_REPLACE;
5495                                 started = true;
5496                         }
5497
5498                         rc = lnet_genl_send_scalar_list(msg, portid, seq,
5499                                                         &lnet_family, flags,
5500                                                         LNET_CMD_NETS, all);
5501                         if (rc < 0) {
5502                                 NL_SET_ERR_MSG(extack, "failed to send key table");
5503                                 GOTO(net_unlock, rc);
5504                         }
5505                 }
5506
5507                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
5508                                   NLM_F_MULTI, LNET_CMD_NETS);
5509                 if (!hdr) {
5510                         NL_SET_ERR_MSG(extack, "failed to send values");
5511                         GOTO(net_unlock, rc = -EMSGSIZE);
5512                 }
5513
5514                 if (started) {
5515                         nla_put_string(msg, LNET_NET_ATTR_HDR, "");
5516                         started = false;
5517                 }
5518
5519                 nla_put_string(msg, LNET_NET_ATTR_TYPE,
5520                                libcfs_net2str(net->net_id));
5521
5522                 local_ni = nla_nest_start(msg, LNET_NET_ATTR_LOCAL);
5523                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
5524                         char *status = "up";
5525
5526                         if (idx++ < nlist->lngl_idx)
5527                                 continue;
5528
5529                         ni_attr = nla_nest_start(msg, dev++);
5530                         found = true;
5531                         lnet_ni_lock(ni);
5532                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_NID,
5533                                        libcfs_nidstr(&ni->ni_nid));
5534                         if (!nid_is_lo0(&ni->ni_nid) &&
5535                             lnet_ni_get_status_locked(ni) != LNET_NI_STATUS_UP)
5536                                 status = "down";
5537                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_STATUS,
5538                                        status);
5539
5540                         if (!nid_is_lo0(&ni->ni_nid) && ni->ni_interface) {
5541                                 struct nlattr *intf_nest, *intf_attr;
5542
5543                                 intf_nest = nla_nest_start(msg,
5544                                                            LNET_NET_LOCAL_NI_ATTR_INTERFACE);
5545                                 intf_attr = nla_nest_start(msg, 0);
5546                                 nla_put_string(msg,
5547                                                LNET_NET_LOCAL_NI_INTF_ATTR_TYPE,
5548                                                ni->ni_interface);
5549                                 nla_nest_end(msg, intf_attr);
5550                                 nla_nest_end(msg, intf_nest);
5551                         }
5552
5553                         if (gnlh->version) {
5554                                 char cpts[LNET_MAX_SHOW_NUM_CPT * 4 + 4], *cpt;
5555                                 struct lnet_ioctl_element_msg_stats msg_stats;
5556                                 struct lnet_ioctl_element_stats stats;
5557                                 size_t buf_len = sizeof(cpts), len;
5558                                 struct nlattr *health_attr, *health_stats;
5559                                 struct nlattr *send_attr, *send_stats;
5560                                 struct nlattr *recv_attr, *recv_stats;
5561                                 struct nlattr *drop_attr, *drop_stats;
5562                                 struct nlattr *stats_attr, *ni_stats;
5563                                 struct nlattr *tun_attr, *ni_tun;
5564                                 int j;
5565
5566                                 stats.iel_send_count = lnet_sum_stats(&ni->ni_stats,
5567                                                                       LNET_STATS_TYPE_SEND);
5568                                 stats.iel_recv_count = lnet_sum_stats(&ni->ni_stats,
5569                                                                       LNET_STATS_TYPE_RECV);
5570                                 stats.iel_drop_count = lnet_sum_stats(&ni->ni_stats,
5571                                                                       LNET_STATS_TYPE_DROP);
5572                                 lnet_ni_unlock(ni);
5573
5574                                 stats_attr = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_STATS);
5575                                 ni_stats = nla_nest_start(msg, 0);
5576                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_SEND_COUNT,
5577                                             stats.iel_send_count);
5578                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_RECV_COUNT,
5579                                             stats.iel_recv_count);
5580                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_DROP_COUNT,
5581                                             stats.iel_drop_count);
5582                                 nla_nest_end(msg, ni_stats);
5583                                 nla_nest_end(msg, stats_attr);
5584
5585                                 if (gnlh->version < 4)
5586                                         goto skip_udsp;
5587
5588                                 /* UDSP info */
5589                                 rc = lnet_udsp_info_send(msg, LNET_NET_LOCAL_NI_ATTR_UDSP_INFO,
5590                                                          &ni->ni_nid, false);
5591                                 if (rc < 0) {
5592                                         NL_SET_ERR_MSG(extack,
5593                                                        "Failed to get udsp info");
5594                                         genlmsg_cancel(msg, hdr);
5595                                         GOTO(net_unlock, rc = -ENOMEM);
5596                                 }
5597 skip_udsp:
5598                                 if (gnlh->version < 2)
5599                                         goto skip_msg_stats;
5600
5601                                 msg_stats.im_idx = idx - 1;
5602                                 rc = lnet_get_ni_stats(&msg_stats);
5603                                 if (rc < 0) {
5604                                         NL_SET_ERR_MSG(extack,
5605                                                        "failed to get msg stats");
5606                                         genlmsg_cancel(msg, hdr);
5607                                         GOTO(net_unlock, rc = -ENOMEM);
5608                                 }
5609
5610                                 send_stats = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_SEND_STATS);
5611                                 send_attr = nla_nest_start(msg, 0);
5612                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5613                                             msg_stats.im_send_stats.ico_get_count);
5614                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5615                                             msg_stats.im_send_stats.ico_put_count);
5616                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5617                                             msg_stats.im_send_stats.ico_reply_count);
5618                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5619                                             msg_stats.im_send_stats.ico_ack_count);
5620                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5621                                             msg_stats.im_send_stats.ico_hello_count);
5622                                 nla_nest_end(msg, send_attr);
5623                                 nla_nest_end(msg, send_stats);
5624
5625                                 recv_stats = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_RECV_STATS);
5626                                 recv_attr = nla_nest_start(msg, 0);
5627                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5628                                             msg_stats.im_recv_stats.ico_get_count);
5629                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5630                                             msg_stats.im_recv_stats.ico_put_count);
5631                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5632                                             msg_stats.im_recv_stats.ico_reply_count);
5633                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5634                                             msg_stats.im_recv_stats.ico_ack_count);
5635                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5636                                             msg_stats.im_recv_stats.ico_hello_count);
5637                                 nla_nest_end(msg, recv_attr);
5638                                 nla_nest_end(msg, recv_stats);
5639
5640                                 drop_stats = nla_nest_start(msg,
5641                                                             LNET_NET_LOCAL_NI_ATTR_DROPPED_STATS);
5642                                 drop_attr = nla_nest_start(msg, 0);
5643                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5644                                             msg_stats.im_drop_stats.ico_get_count);
5645                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5646                                             msg_stats.im_drop_stats.ico_put_count);
5647                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5648                                             msg_stats.im_drop_stats.ico_reply_count);
5649                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5650                                             msg_stats.im_drop_stats.ico_ack_count);
5651                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5652                                             msg_stats.im_drop_stats.ico_hello_count);
5653                                 nla_nest_end(msg, drop_attr);
5654                                 nla_nest_end(msg, drop_stats);
5655
5656                                 /* health stats */
5657                                 health_stats = nla_nest_start(msg,
5658                                                               LNET_NET_LOCAL_NI_ATTR_HEALTH_STATS);
5659                                 health_attr = nla_nest_start(msg, 0);
5660                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_FATAL_ERRORS,
5661                                             atomic_read(&ni->ni_fatal_error_on));
5662                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_LEVEL,
5663                                             atomic_read(&ni->ni_healthv));
5664                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_INTERRUPTS,
5665                                             atomic_read(&ni->ni_hstats.hlt_local_interrupt));
5666                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_DROPPED,
5667                                             atomic_read(&ni->ni_hstats.hlt_local_dropped));
5668                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ABORTED,
5669                                             atomic_read(&ni->ni_hstats.hlt_local_aborted));
5670                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NO_ROUTE,
5671                                             atomic_read(&ni->ni_hstats.hlt_local_no_route));
5672                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_TIMEOUTS,
5673                                             atomic_read(&ni->ni_hstats.hlt_local_timeout));
5674                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ERROR,
5675                                             atomic_read(&ni->ni_hstats.hlt_local_error));
5676                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PING_COUNT,
5677                                             ni->ni_ping_count);
5678                                 nla_put_u64_64bit(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NEXT_PING,
5679                                                   ni->ni_next_ping,
5680                                                   LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PAD);
5681                                 nla_nest_end(msg, health_attr);
5682                                 nla_nest_end(msg, health_stats);
5683 skip_msg_stats:
5684                                 /* Report net tunables */
5685                                 tun_attr = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_TUNABLES);
5686                                 ni_tun = nla_nest_start(msg, 0);
5687                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT,
5688                                             ni->ni_net->net_tunables.lct_peer_timeout);
5689                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS,
5690                                             ni->ni_net->net_tunables.lct_peer_tx_credits);
5691                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS,
5692                                             ni->ni_net->net_tunables.lct_peer_rtr_credits);
5693                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS,
5694                                             ni->ni_net->net_tunables.lct_max_tx_credits);
5695                                 nla_nest_end(msg, ni_tun);
5696
5697                                 nla_nest_end(msg, tun_attr);
5698
5699                                 if (lnd && lnd->lnd_nl_get && lnd->lnd_keys) {
5700                                         struct nlattr *lnd_tun_attr, *lnd_ni_tun;
5701
5702                                         lnd_tun_attr = nla_nest_start(msg,
5703                                                                       LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES);
5704                                         lnd_ni_tun = nla_nest_start(msg, 0);
5705                                         rc = lnd->lnd_nl_get(LNET_CMD_NETS, msg,
5706                                                              LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES,
5707                                                              ni);
5708                                         if (rc < 0) {
5709                                                 NL_SET_ERR_MSG(extack,
5710                                                                "failed to get lnd tunables");
5711                                                 genlmsg_cancel(msg, hdr);
5712                                                 GOTO(net_unlock, rc);
5713                                         }
5714                                         nla_nest_end(msg, lnd_ni_tun);
5715                                         nla_nest_end(msg, lnd_tun_attr);
5716                                 }
5717
5718                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_DEV_CPT, ni->ni_dev_cpt);
5719
5720                                 /* Report cpts. We could send this as a nested list
5721                                  * of integers but older versions of the tools
5722                                  * except a string. The new versions can handle
5723                                  * both formats so in the future we can change
5724                                  * this to a nested list.
5725                                  */
5726                                 len = snprintf(cpts, buf_len, "\"[");
5727                                 cpt = cpts + len;
5728                                 buf_len -= len;
5729
5730                                 if (ni->ni_ncpts == LNET_CPT_NUMBER && !ni->ni_cpts)  {
5731                                         for (j = 0; j < ni->ni_ncpts; j++) {
5732                                                 len = snprintf(cpt, buf_len, "%d,", j);
5733                                                 buf_len -= len;
5734                                                 cpt += len;
5735                                         }
5736                                 } else {
5737                                         for (j = 0;
5738                                              ni->ni_cpts && j < ni->ni_ncpts &&
5739                                              j < LNET_MAX_SHOW_NUM_CPT; j++) {
5740                                                 len = snprintf(cpt, buf_len, "%d,",
5741                                                                ni->ni_cpts[j]);
5742                                                 buf_len -= len;
5743                                                 cpt += len;
5744                                         }
5745                                 }
5746                                 snprintf(cpt - 1, sizeof(cpts), "]\"");
5747
5748                                 nla_put_string(msg, LNET_NET_LOCAL_NI_CPTS, cpts);
5749                         } else {
5750                                 lnet_ni_unlock(ni);
5751                         }
5752                         nla_nest_end(msg, ni_attr);
5753                 }
5754                 nla_nest_end(msg, local_ni);
5755
5756                 genlmsg_end(msg, hdr);
5757         }
5758
5759         if (!found) {
5760                 struct nlmsghdr *nlh = nlmsg_hdr(msg);
5761
5762                 nlmsg_cancel(msg, nlh);
5763                 NL_SET_ERR_MSG(extack, "Network is down");
5764                 rc = -ESRCH;
5765         }
5766         nlist->lngl_idx = idx;
5767 net_unlock:
5768         lnet_net_unlock(LNET_LOCK_EX);
5769
5770         return lnet_nl_send_error(cb->skb, portid, seq, rc);
5771 }
5772
5773 #ifndef HAVE_NETLINK_CALLBACK_START
5774 static int lnet_old_net_show_dump(struct sk_buff *msg,
5775                                    struct netlink_callback *cb)
5776 {
5777         if (!cb->args[0]) {
5778                 int rc = lnet_net_show_start(cb);
5779
5780                 if (rc < 0)
5781                         return rc;
5782         }
5783
5784         return lnet_net_show_dump(msg, cb);
5785 }
5786 #endif
5787
5788 static int lnet_genl_parse_tunables(struct nlattr *settings,
5789                                     struct lnet_ioctl_config_lnd_tunables *tun)
5790 {
5791         struct nlattr *param;
5792         int rem, rc = 0;
5793
5794         nla_for_each_nested(param, settings, rem) {
5795                 int type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_UNSPEC;
5796                 s64 num;
5797
5798                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
5799                         continue;
5800
5801                 if (nla_strcmp(param, "peer_timeout") == 0)
5802                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT;
5803                 else if (nla_strcmp(param, "peer_credits") == 0)
5804                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS;
5805                 else if (nla_strcmp(param, "peer_buffer_credits") == 0)
5806                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS;
5807                 else if (nla_strcmp(param, "credits") == 0)
5808                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS;
5809
5810                 param = nla_next(param, &rem);
5811                 if (nla_type(param) != LN_SCALAR_ATTR_INT_VALUE)
5812                         return -EINVAL;
5813
5814                 num = nla_get_s64(param);
5815                 switch (type) {
5816                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT:
5817                         if (num >= 0)
5818                                 tun->lt_cmn.lct_peer_timeout = num;
5819                         break;
5820                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS:
5821                         if (num > 0)
5822                                 tun->lt_cmn.lct_peer_tx_credits = num;
5823                         break;
5824                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS:
5825                         if (num > 0)
5826                                 tun->lt_cmn.lct_peer_rtr_credits = num;
5827                         break;
5828                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS:
5829                         if (num > 0)
5830                                 tun->lt_cmn.lct_max_tx_credits = num;
5831                         break;
5832                 default:
5833                         rc = -EINVAL;
5834                         break;
5835                 }
5836         }
5837         return rc;
5838 }
5839
5840 static int lnet_genl_parse_lnd_tunables(struct nlattr *settings,
5841                                         struct lnet_lnd_tunables *tun,
5842                                         const struct lnet_lnd *lnd)
5843 {
5844         const struct ln_key_list *list = lnd->lnd_keys;
5845         struct nlattr *param;
5846         int rem, rc = 0;
5847         int i = 1;
5848
5849         /* silently ignore these setting if the LND driver doesn't
5850          * support any LND tunables
5851          */
5852         if (!list || !lnd->lnd_nl_set || !list->lkl_maxattr)
5853                 return 0;
5854
5855         nla_for_each_nested(param, settings, rem) {
5856                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
5857                         continue;
5858
5859                 for (i = 1; i <= list->lkl_maxattr; i++) {
5860                         if (!list->lkl_list[i].lkp_value ||
5861                             nla_strcmp(param, list->lkl_list[i].lkp_value) != 0)
5862                                 continue;
5863
5864                         param = nla_next(param, &rem);
5865                         rc = lnd->lnd_nl_set(LNET_CMD_NETS, param, i, tun);
5866                         if (rc < 0)
5867                                 return rc;
5868                 }
5869         }
5870
5871         return rc;
5872 }
5873
5874 static int
5875 lnet_genl_parse_local_ni(struct nlattr *entry, struct genl_info *info,
5876                          int net_id, struct lnet_ioctl_config_ni *conf,
5877                          bool *ni_list)
5878 {
5879         bool create = info->nlhdr->nlmsg_flags & NLM_F_CREATE;
5880         struct lnet_ioctl_config_lnd_tunables *tun;
5881         struct nlattr *settings;
5882         int rem3, rc = 0;
5883
5884         LIBCFS_ALLOC(tun, sizeof(struct lnet_ioctl_config_lnd_tunables));
5885         if (!tun) {
5886                 GENL_SET_ERR_MSG(info, "cannot allocate memory for tunables");
5887                 GOTO(out, rc = -ENOMEM);
5888         }
5889
5890         /* Use LND defaults */
5891         tun->lt_cmn.lct_peer_timeout = -1;
5892         tun->lt_cmn.lct_peer_tx_credits = -1;
5893         tun->lt_cmn.lct_peer_rtr_credits = -1;
5894         tun->lt_cmn.lct_max_tx_credits = -1;
5895         conf->lic_ncpts = 0;
5896
5897         nla_for_each_nested(settings, entry, rem3) {
5898                 if (nla_type(settings) != LN_SCALAR_ATTR_VALUE)
5899                         continue;
5900
5901                 if (nla_strcmp(settings, "interfaces") == 0) {
5902                         struct nlattr *intf;
5903                         int rem4;
5904
5905                         settings = nla_next(settings, &rem3);
5906                         if (nla_type(settings) !=
5907                             LN_SCALAR_ATTR_LIST) {
5908                                 GENL_SET_ERR_MSG(info,
5909                                                  "invalid interfaces");
5910                                 GOTO(out, rc = -EINVAL);
5911                         }
5912
5913                         nla_for_each_nested(intf, settings, rem4) {
5914                                 intf = nla_next(intf, &rem4);
5915                                 if (nla_type(intf) !=
5916                                     LN_SCALAR_ATTR_VALUE) {
5917                                         GENL_SET_ERR_MSG(info,
5918                                                          "cannot parse interface");
5919                                         GOTO(out, rc = -EINVAL);
5920                                 }
5921
5922                                 rc = nla_strscpy(conf->lic_ni_intf, intf,
5923                                                  sizeof(conf->lic_ni_intf));
5924                                 if (rc < 0) {
5925                                         GENL_SET_ERR_MSG(info,
5926                                                          "failed to parse interfaces");
5927                                         GOTO(out, rc);
5928                                 }
5929                         }
5930                         *ni_list = true;
5931                 } else if (nla_strcmp(settings, "tunables") == 0) {
5932                         settings = nla_next(settings, &rem3);
5933                         if (nla_type(settings) !=
5934                             LN_SCALAR_ATTR_LIST) {
5935                                 GENL_SET_ERR_MSG(info,
5936                                                  "invalid tunables");
5937                                 GOTO(out, rc = -EINVAL);
5938                         }
5939
5940                         rc = lnet_genl_parse_tunables(settings, tun);
5941                         if (rc < 0) {
5942                                 GENL_SET_ERR_MSG(info,
5943                                                  "failed to parse tunables");
5944                                 GOTO(out, rc);
5945                         }
5946                 } else if ((nla_strcmp(settings, "lnd tunables") == 0)) {
5947                         const struct lnet_lnd *lnd;
5948
5949                         lnd = lnet_load_lnd(LNET_NETTYP(net_id));
5950                         if (IS_ERR(lnd)) {
5951                                 GENL_SET_ERR_MSG(info,
5952                                                  "LND type not supported");
5953                                 GOTO(out, rc = PTR_ERR(lnd));
5954                         }
5955
5956                         settings = nla_next(settings, &rem3);
5957                         if (nla_type(settings) !=
5958                             LN_SCALAR_ATTR_LIST) {
5959                                 GENL_SET_ERR_MSG(info,
5960                                                  "lnd tunables should be list\n");
5961                                 GOTO(out, rc = -EINVAL);
5962                         }
5963
5964                         rc = lnet_genl_parse_lnd_tunables(settings,
5965                                                           &tun->lt_tun, lnd);
5966                         if (rc < 0) {
5967                                 GENL_SET_ERR_MSG(info,
5968                                                  "failed to parse lnd tunables");
5969                                 GOTO(out, rc);
5970                         }
5971                 } else if (nla_strcmp(settings, "CPT") == 0) {
5972                         struct nlattr *cpt;
5973                         int rem4;
5974
5975                         settings = nla_next(settings, &rem3);
5976                         if (nla_type(settings) != LN_SCALAR_ATTR_LIST) {
5977                                 GENL_SET_ERR_MSG(info,
5978                                                  "CPT should be list");
5979                                 GOTO(out, rc = -EINVAL);
5980                         }
5981
5982                         nla_for_each_nested(cpt, settings, rem4) {
5983                                 s64 core;
5984
5985                                 if (nla_type(cpt) !=
5986                                     LN_SCALAR_ATTR_INT_VALUE) {
5987                                         GENL_SET_ERR_MSG(info,
5988                                                          "invalid CPT config");
5989                                         GOTO(out, rc = -EINVAL);
5990                                 }
5991
5992                                 core = nla_get_s64(cpt);
5993                                 if (core >= LNET_CPT_NUMBER) {
5994                                         GENL_SET_ERR_MSG(info,
5995                                                          "invalid CPT value");
5996                                         GOTO(out, rc = -ERANGE);
5997                                 }
5998
5999                                 conf->lic_cpts[conf->lic_ncpts] = core;
6000                                 conf->lic_ncpts++;
6001                         }
6002                 }
6003         }
6004
6005         if (!create) {
6006                 struct lnet_net *net;
6007                 struct lnet_ni *ni;
6008
6009                 rc = -ENODEV;
6010                 if (!strlen(conf->lic_ni_intf)) {
6011                         GENL_SET_ERR_MSG(info,
6012                                          "interface is missing");
6013                         GOTO(out, rc);
6014                 }
6015
6016                 lnet_net_lock(LNET_LOCK_EX);
6017                 net = lnet_get_net_locked(net_id);
6018                 if (!net) {
6019                         GENL_SET_ERR_MSG(info,
6020                                          "LNet net doesn't exist");
6021                         lnet_net_unlock(LNET_LOCK_EX);
6022                         GOTO(out, rc);
6023                 }
6024
6025                 list_for_each_entry(ni, &net->net_ni_list,
6026                                     ni_netlist) {
6027                         if (!ni->ni_interface ||
6028                             strcmp(ni->ni_interface,
6029                                   conf->lic_ni_intf) != 0)
6030                                 continue;
6031
6032                         lnet_net_unlock(LNET_LOCK_EX);
6033                         rc = lnet_dyn_del_ni(&ni->ni_nid);
6034                         if (rc < 0) {
6035                                 GENL_SET_ERR_MSG(info,
6036                                                  "cannot del LNet NI");
6037                                 GOTO(out, rc);
6038                         }
6039                         break;
6040                 }
6041
6042                 if (rc < 0) { /* will be -ENODEV */
6043                         GENL_SET_ERR_MSG(info,
6044                                          "interface invalid for deleting LNet NI");
6045                         lnet_net_unlock(LNET_LOCK_EX);
6046                 }
6047         } else {
6048                 if (!strlen(conf->lic_ni_intf)) {
6049                         GENL_SET_ERR_MSG(info,
6050                                          "interface is missing");
6051                         GOTO(out, rc);
6052                 }
6053
6054                 rc = lnet_dyn_add_ni(conf, net_id, tun);
6055                 switch (rc) {
6056                 case -ENOENT:
6057                         GENL_SET_ERR_MSG(info,
6058                                          "cannot parse net");
6059                         break;
6060                 case -ERANGE:
6061                         GENL_SET_ERR_MSG(info,
6062                                          "invalid CPT set");
6063                         break;
6064                 default:
6065                         GENL_SET_ERR_MSG(info,
6066                                          "cannot add LNet NI");
6067                 case 0:
6068                         break;
6069                 }
6070         }
6071 out:
6072         if (tun)
6073                 LIBCFS_FREE(tun, sizeof(struct lnet_ioctl_config_lnd_tunables));
6074
6075         return rc;
6076 }
6077
6078 static int lnet_net_cmd(struct sk_buff *skb, struct genl_info *info)
6079 {
6080         struct nlmsghdr *nlh = nlmsg_hdr(skb);
6081         struct genlmsghdr *gnlh = nlmsg_data(nlh);
6082         struct nlattr *params = genlmsg_data(gnlh);
6083         int msg_len, rem, rc = 0;
6084         struct nlattr *attr;
6085
6086         msg_len = genlmsg_len(gnlh);
6087         if (!msg_len) {
6088                 GENL_SET_ERR_MSG(info, "no configuration");
6089                 return -ENOMSG;
6090         }
6091
6092         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
6093                 GENL_SET_ERR_MSG(info, "invalid configuration");
6094                 return -EINVAL;
6095         }
6096
6097         nla_for_each_nested(attr, params, rem) {
6098                 bool ni_list = false, ipnets = false;
6099                 struct lnet_ioctl_config_ni conf;
6100                 u32 net_id = LNET_NET_ANY;
6101                 struct nlattr *entry;
6102                 int rem2;
6103
6104                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6105                         continue;
6106
6107                 nla_for_each_nested(entry, attr, rem2) {
6108                         switch (nla_type(entry)) {
6109                         case LN_SCALAR_ATTR_VALUE: {
6110                                 ssize_t len;
6111
6112                                 memset(&conf, 0, sizeof(conf));
6113                                 if (nla_strcmp(entry, "ip2net") == 0) {
6114                                         entry = nla_next(entry, &rem2);
6115                                         if (nla_type(entry) !=
6116                                             LN_SCALAR_ATTR_VALUE) {
6117                                                 GENL_SET_ERR_MSG(info,
6118                                                                  "ip2net has invalid key");
6119                                                 GOTO(out, rc = -EINVAL);
6120                                         }
6121
6122                                         len = nla_strscpy(conf.lic_legacy_ip2nets,
6123                                                           entry,
6124                                                           sizeof(conf.lic_legacy_ip2nets));
6125                                         if (len < 0) {
6126                                                 GENL_SET_ERR_MSG(info,
6127                                                                  "ip2net key string is invalid");
6128                                                 GOTO(out, rc = len);
6129                                         }
6130                                         ni_list = true;
6131                                         ipnets = true;
6132                                 } else if (nla_strcmp(entry, "net type") == 0) {
6133                                         char tmp[LNET_NIDSTR_SIZE];
6134
6135                                         entry = nla_next(entry, &rem2);
6136                                         if (nla_type(entry) !=
6137                                             LN_SCALAR_ATTR_VALUE) {
6138                                                 GENL_SET_ERR_MSG(info,
6139                                                                  "net type has invalid key");
6140                                                 GOTO(out, rc = -EINVAL);
6141                                         }
6142
6143                                         len = nla_strscpy(tmp, entry,
6144                                                           sizeof(tmp));
6145                                         if (len < 0) {
6146                                                 GENL_SET_ERR_MSG(info,
6147                                                                  "net type key string is invalid");
6148                                                 GOTO(out, rc = len);
6149                                         }
6150
6151                                         net_id = libcfs_str2net(tmp);
6152                                         if (!net_id) {
6153                                                 GENL_SET_ERR_MSG(info,
6154                                                                  "cannot parse net");
6155                                                 GOTO(out, rc = -ENODEV);
6156                                         }
6157                                         if (LNET_NETTYP(net_id) == LOLND) {
6158                                                 GENL_SET_ERR_MSG(info,
6159                                                                  "setting @lo not allowed");
6160                                                 GOTO(out, rc = -ENODEV);
6161                                         }
6162                                         conf.lic_legacy_ip2nets[0] = '\0';
6163                                         conf.lic_ni_intf[0] = '\0';
6164                                         ni_list = false;
6165                                 }
6166                                 if (rc < 0)
6167                                         GOTO(out, rc);
6168                                 break;
6169                         }
6170                         case LN_SCALAR_ATTR_LIST: {
6171                                 struct nlattr *interface;
6172                                 int rem3;
6173
6174                                 ipnets = false;
6175                                 nla_for_each_nested(interface, entry, rem3) {
6176                                         rc = lnet_genl_parse_local_ni(interface, info,
6177                                                                       net_id, &conf,
6178                                                                       &ni_list);
6179                                         if (rc < 0)
6180                                                 GOTO(out, rc);
6181                                 }
6182                                 break;
6183                         }
6184                         /* it is possible a newer version of the user land send
6185                          * values older kernels doesn't handle. So silently
6186                          * ignore these values
6187                          */
6188                         default:
6189                                 break;
6190                         }
6191                 }
6192
6193                 /* Handle case of just sent NET with no list of NIDs */
6194                 if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && !ni_list) {
6195                         rc = lnet_dyn_del_net(net_id);
6196                         if (rc < 0) {
6197                                 GENL_SET_ERR_MSG(info,
6198                                                  "cannot del network");
6199                         }
6200                 } else if ((info->nlhdr->nlmsg_flags & NLM_F_CREATE) &&
6201                            ipnets && ni_list) {
6202                         rc = lnet_handle_legacy_ip2nets(conf.lic_legacy_ip2nets,
6203                                                         NULL);
6204                         if (rc < 0)
6205                                 GENL_SET_ERR_MSG(info,
6206                                                  "cannot setup ip2nets");
6207                 }
6208         }
6209 out:
6210         return rc;
6211 }
6212
6213 /* Called with ln_api_mutex */
6214 static int lnet_parse_peer_nis(struct nlattr *rlist, struct lnet_nid *pnid,
6215                                bool mr, struct genl_info *info)
6216 {
6217         struct lnet_nid snid = LNET_ANY_NID;
6218         struct nlattr *props;
6219         bool all = false;
6220         int rem, rc = 0;
6221         s64 num = -1;
6222
6223         nla_for_each_nested(props, rlist, rem) {
6224                 if (nla_type(props) != LN_SCALAR_ATTR_VALUE)
6225                         continue;
6226
6227                 if (nla_strcmp(props, "nid") == 0) {
6228                         char nidstr[LNET_NIDSTR_SIZE];
6229
6230                         props = nla_next(props, &rem);
6231                         if (nla_type(props) != LN_SCALAR_ATTR_VALUE) {
6232                                 GENL_SET_ERR_MSG(info,
6233                                                  "invalid secondary NID");
6234                                 GOTO(report_err, rc = -EINVAL);
6235                         }
6236
6237                         rc = nla_strscpy(nidstr, props, sizeof(nidstr));
6238                         if (rc < 0) {
6239                                 GENL_SET_ERR_MSG(info,
6240                                                  "failed to get secondary NID");
6241                                 GOTO(report_err, rc);
6242                         }
6243
6244                         rc = libcfs_strnid(&snid, strim(nidstr));
6245                         if (rc < 0) {
6246                                 GENL_SET_ERR_MSG(info, "unsupported secondary NID");
6247                                 GOTO(report_err, rc);
6248                         }
6249
6250                         if (LNET_NID_IS_ANY(&snid))
6251                                 all = true;
6252                 } else if (nla_strcmp(props, "health stats") == 0) {
6253                         struct nlattr *health;
6254                         int rem2;
6255
6256                         props = nla_next(props, &rem);
6257                         if (nla_type(props) !=
6258                               LN_SCALAR_ATTR_LIST) {
6259                                 GENL_SET_ERR_MSG(info,
6260                                                  "invalid health configuration");
6261                                 GOTO(report_err, rc = -EINVAL);
6262                         }
6263
6264                         nla_for_each_nested(health, props, rem2) {
6265                                 if (nla_type(health) != LN_SCALAR_ATTR_VALUE ||
6266                                     nla_strcmp(health, "health value") != 0) {
6267                                         GENL_SET_ERR_MSG(info,
6268                                                          "wrong health config format");
6269                                         GOTO(report_err, rc = -EINVAL);
6270                                 }
6271
6272                                 health = nla_next(health, &rem2);
6273                                 if (nla_type(health) !=
6274                                     LN_SCALAR_ATTR_INT_VALUE) {
6275                                         GENL_SET_ERR_MSG(info,
6276                                                          "invalid health config format");
6277                                         GOTO(report_err, rc = -EINVAL);
6278                                 }
6279
6280                                 num = nla_get_s64(health);
6281                                 clamp_t(s64, num, 0, LNET_MAX_HEALTH_VALUE);
6282                         }
6283                 }
6284         }
6285
6286         if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE && num != -1) {
6287                 lnet_peer_ni_set_healthv(pnid, num, all);
6288         } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
6289                 bool lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6290
6291                 rc = lnet_user_add_peer_ni(pnid, &snid, mr, lock_prim);
6292                 if (rc < 0)
6293                         GENL_SET_ERR_MSG(info,
6294                                          "failed to add peer");
6295         } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
6296                 bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6297
6298                 rc = lnet_del_peer_ni(pnid, &snid, force);
6299                 if (rc < 0)
6300                         GENL_SET_ERR_MSG(info,
6301                                          "failed to del peer");
6302         }
6303 report_err:
6304         return rc;
6305 }
6306
6307 static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info)
6308 {
6309         struct nlmsghdr *nlh = nlmsg_hdr(skb);
6310         struct genlmsghdr *gnlh = nlmsg_data(nlh);
6311         struct nlattr *params = genlmsg_data(gnlh);
6312         int msg_len, rem, rc = 0;
6313         struct nlattr *attr;
6314
6315         mutex_lock(&the_lnet.ln_api_mutex);
6316         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
6317                 GENL_SET_ERR_MSG(info, "Network is down");
6318                 mutex_unlock(&the_lnet.ln_api_mutex);
6319                 return -ENETDOWN;
6320         }
6321
6322         msg_len = genlmsg_len(gnlh);
6323         if (!msg_len) {
6324                 GENL_SET_ERR_MSG(info, "no configuration");
6325                 mutex_unlock(&the_lnet.ln_api_mutex);
6326                 return -ENOMSG;
6327         }
6328
6329         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
6330                 GENL_SET_ERR_MSG(info, "invalid configuration");
6331                 mutex_unlock(&the_lnet.ln_api_mutex);
6332                 return -EINVAL;
6333         }
6334
6335         nla_for_each_nested(attr, params, rem) {
6336                 struct lnet_nid pnid = LNET_ANY_NID;
6337                 bool parse_peer_nis = false;
6338                 struct nlattr *pnid_prop;
6339                 int rem2;
6340
6341                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6342                         continue;
6343
6344                 nla_for_each_nested(pnid_prop, attr, rem2) {
6345                         bool mr = true;
6346
6347                         if (nla_type(pnid_prop) != LN_SCALAR_ATTR_VALUE)
6348                                 continue;
6349
6350                         if (nla_strcmp(pnid_prop, "primary nid") == 0) {
6351                                 char nidstr[LNET_NIDSTR_SIZE];
6352
6353                                 pnid_prop = nla_next(pnid_prop, &rem2);
6354                                 if (nla_type(pnid_prop) !=
6355                                     LN_SCALAR_ATTR_VALUE) {
6356                                         GENL_SET_ERR_MSG(info,
6357                                                           "invalid primary NID type");
6358                                         GOTO(report_err, rc = -EINVAL);
6359                                 }
6360
6361                                 rc = nla_strscpy(nidstr, pnid_prop,
6362                                                  sizeof(nidstr));
6363                                 if (rc < 0) {
6364                                         GENL_SET_ERR_MSG(info,
6365                                                          "failed to get primary NID");
6366                                         GOTO(report_err, rc);
6367                                 }
6368
6369                                 rc = libcfs_strnid(&pnid, strim(nidstr));
6370                                 if (rc < 0) {
6371                                         GENL_SET_ERR_MSG(info,
6372                                                          "unsupported primary NID");
6373                                         GOTO(report_err, rc);
6374                                 }
6375
6376                                 /* we must create primary NID for peer ni
6377                                  * creation
6378                                  */
6379                                 if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
6380                                         bool lock_prim;
6381
6382                                         lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6383                                         rc = lnet_user_add_peer_ni(&pnid,
6384                                                                    &LNET_ANY_NID,
6385                                                                    true, lock_prim);
6386                                         if (rc < 0) {
6387                                                 GENL_SET_ERR_MSG(info,
6388                                                                  "failed to add primary peer");
6389                                                 GOTO(report_err, rc);
6390                                         }
6391                                 }
6392                         } else if (nla_strcmp(pnid_prop, "Multi-Rail") == 0) {
6393                                 pnid_prop = nla_next(pnid_prop, &rem2);
6394                                 if (nla_type(pnid_prop) !=
6395                                     LN_SCALAR_ATTR_INT_VALUE) {
6396                                         GENL_SET_ERR_MSG(info,
6397                                                           "invalid MR flag param");
6398                                         GOTO(report_err, rc = -EINVAL);
6399                                 }
6400
6401                                 if (nla_get_s64(pnid_prop) == 0)
6402                                         mr = false;
6403                         } else if (nla_strcmp(pnid_prop, "peer state") == 0) {
6404                                 struct lnet_peer_ni *lpni;
6405                                 struct lnet_peer *lp;
6406
6407                                 pnid_prop = nla_next(pnid_prop, &rem2);
6408                                 if (nla_type(pnid_prop) !=
6409                                     LN_SCALAR_ATTR_INT_VALUE) {
6410                                         GENL_SET_ERR_MSG(info,
6411                                                           "invalid peer state param");
6412                                         GOTO(report_err, rc = -EINVAL);
6413                                 }
6414
6415                                 lpni = lnet_peer_ni_find_locked(&pnid);
6416                                 if (!lpni) {
6417                                         GENL_SET_ERR_MSG(info,
6418                                                           "invalid peer state param");
6419                                         GOTO(report_err, rc = -ENOENT);
6420                                 }
6421                                 lnet_peer_ni_decref_locked(lpni);
6422                                 lp = lpni->lpni_peer_net->lpn_peer;
6423                                 lp->lp_state = nla_get_s64(pnid_prop);
6424                         } else if (nla_strcmp(pnid_prop, "peer ni") == 0) {
6425                                 struct nlattr *rlist;
6426                                 int rem3;
6427
6428                                 if (LNET_NID_IS_ANY(&pnid)) {
6429                                         GENL_SET_ERR_MSG(info,
6430                                                          "missing required primary NID");
6431                                         GOTO(report_err, rc);
6432                                 }
6433
6434                                 pnid_prop = nla_next(pnid_prop, &rem2);
6435                                 if (nla_type(pnid_prop) !=
6436                                     LN_SCALAR_ATTR_LIST) {
6437                                         GENL_SET_ERR_MSG(info,
6438                                                           "invalid NIDs list");
6439                                         GOTO(report_err, rc = -EINVAL);
6440                                 }
6441
6442                                 nla_for_each_nested(rlist, pnid_prop, rem3) {
6443                                         rc = lnet_parse_peer_nis(rlist, &pnid,
6444                                                                  mr, info);
6445                                         if (rc < 0)
6446                                                 GOTO(report_err, rc);
6447                                 }
6448                                 parse_peer_nis = true;
6449                         }
6450                 }
6451
6452                 /* If we have remote peer ni's we already add /del peers */
6453                 if (parse_peer_nis)
6454                         continue;
6455
6456                 if (LNET_NID_IS_ANY(&pnid)) {
6457                         GENL_SET_ERR_MSG(info, "missing primary NID");
6458                         GOTO(report_err, rc);
6459                 }
6460
6461                 if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
6462                         bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6463
6464                         rc = lnet_del_peer_ni(&pnid, &LNET_ANY_NID,
6465                                               force);
6466                         if (rc < 0) {
6467                                 GENL_SET_ERR_MSG(info,
6468                                                  "failed to del peer");
6469                                 GOTO(report_err, rc);
6470                         }
6471                 }
6472         }
6473 report_err:
6474         mutex_unlock(&the_lnet.ln_api_mutex);
6475
6476         return rc;
6477 }
6478
6479 /** LNet route handling */
6480
6481 /* We can't use struct lnet_ioctl_config_data since it lacks
6482  * support for large NIDS
6483  */
6484 struct lnet_route_properties {
6485         struct lnet_nid         lrp_gateway;
6486         u32                     lrp_net;
6487         s32                     lrp_hop;
6488         u32                     lrp_flags;
6489         u32                     lrp_priority;
6490         u32                     lrp_sensitivity;
6491 };
6492
6493 struct lnet_genl_route_list {
6494         unsigned int                            lgrl_index;
6495         unsigned int                            lgrl_count;
6496         GENRADIX(struct lnet_route_properties)  lgrl_list;
6497 };
6498
6499 static inline struct lnet_genl_route_list *
6500 lnet_route_dump_ctx(struct netlink_callback *cb)
6501 {
6502         return (struct lnet_genl_route_list *)cb->args[0];
6503 }
6504
6505 static int lnet_route_show_done(struct netlink_callback *cb)
6506 {
6507         struct lnet_genl_route_list *rlist = lnet_route_dump_ctx(cb);
6508
6509         if (rlist) {
6510                 genradix_free(&rlist->lgrl_list);
6511                 CFS_FREE_PTR(rlist);
6512         }
6513         cb->args[0] = 0;
6514
6515         return 0;
6516 }
6517
6518 static int lnet_scan_route(struct lnet_genl_route_list *rlist,
6519                     struct lnet_route_properties *settings)
6520 {
6521         struct lnet_remotenet *rnet;
6522         struct list_head *rn_list;
6523         struct lnet_route *route;
6524         int cpt, i, rc = 0;
6525
6526         cpt = lnet_net_lock_current();
6527
6528         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
6529                 rn_list = &the_lnet.ln_remote_nets_hash[i];
6530                 list_for_each_entry(rnet, rn_list, lrn_list) {
6531                         if (settings->lrp_net != LNET_NET_ANY &&
6532                             settings->lrp_net != rnet->lrn_net)
6533                                 continue;
6534
6535                         list_for_each_entry(route, &rnet->lrn_routes,
6536                                             lr_list) {
6537                                 struct lnet_route_properties *prop;
6538
6539                                 if (!LNET_NID_IS_ANY(&settings->lrp_gateway) &&
6540                                     !nid_same(&settings->lrp_gateway,
6541                                               &route->lr_nid)) {
6542                                         continue;
6543                                 }
6544
6545                                 if (settings->lrp_hop != -1 &&
6546                                     settings->lrp_hop != route->lr_hops)
6547                                         continue;
6548
6549                                 if (settings->lrp_priority != -1 &&
6550                                     settings->lrp_priority != route->lr_priority)
6551                                         continue;
6552
6553                                 if (settings->lrp_sensitivity != -1 &&
6554                                     settings->lrp_sensitivity !=
6555                                     route->lr_gateway->lp_health_sensitivity)
6556                                         continue;
6557
6558                                 prop = genradix_ptr_alloc(&rlist->lgrl_list,
6559                                                           rlist->lgrl_count++,
6560                                                           GFP_KERNEL);
6561                                 if (!prop)
6562                                         GOTO(failed_alloc, rc = -ENOMEM);
6563
6564                                 prop->lrp_net = rnet->lrn_net;
6565                                 prop->lrp_gateway = route->lr_nid;
6566                                 prop->lrp_hop = route->lr_hops;
6567                                 prop->lrp_priority = route->lr_priority;
6568                                 prop->lrp_sensitivity =
6569                                         route->lr_gateway->lp_health_sensitivity;
6570                                 if (lnet_is_route_alive(route))
6571                                         prop->lrp_flags |= LNET_RT_ALIVE;
6572                                 else
6573                                         prop->lrp_flags &= ~LNET_RT_ALIVE;
6574                                 if (route->lr_single_hop)
6575                                         prop->lrp_flags &= ~LNET_RT_MULTI_HOP;
6576                                 else
6577                                         prop->lrp_flags |= LNET_RT_MULTI_HOP;
6578                         }
6579                 }
6580         }
6581
6582 failed_alloc:
6583         lnet_net_unlock(cpt);
6584         return rc;
6585 }
6586
6587 /* LNet route ->start() handler for GET requests */
6588 static int lnet_route_show_start(struct netlink_callback *cb)
6589 {
6590         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6591 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6592         struct netlink_ext_ack *extack = NULL;
6593 #endif
6594         struct lnet_genl_route_list *rlist;
6595         int msg_len = genlmsg_len(gnlh);
6596         int rc = 0;
6597
6598 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6599         extack = cb->extack;
6600 #endif
6601         if (the_lnet.ln_refcount == 0 ||
6602             the_lnet.ln_state != LNET_STATE_RUNNING) {
6603                 NL_SET_ERR_MSG(extack, "Network is down");
6604                 return -ENETDOWN;
6605         }
6606
6607         CFS_ALLOC_PTR(rlist);
6608         if (!rlist) {
6609                 NL_SET_ERR_MSG(extack, "No memory for route list");
6610                 return -ENOMEM;
6611         }
6612
6613         genradix_init(&rlist->lgrl_list);
6614         rlist->lgrl_count = 0;
6615         rlist->lgrl_index = 0;
6616         cb->args[0] = (long)rlist;
6617
6618         mutex_lock(&the_lnet.ln_api_mutex);
6619         if (!msg_len) {
6620                 struct lnet_route_properties tmp = {
6621                         .lrp_gateway            = LNET_ANY_NID,
6622                         .lrp_net                = LNET_NET_ANY,
6623                         .lrp_hop                = -1,
6624                         .lrp_priority           = -1,
6625                         .lrp_sensitivity        = -1,
6626                 };
6627
6628                 rc = lnet_scan_route(rlist, &tmp);
6629                 if (rc < 0) {
6630                         NL_SET_ERR_MSG(extack,
6631                                        "failed to allocate router data");
6632                         GOTO(report_err, rc);
6633                 }
6634         } else {
6635                 struct nlattr *params = genlmsg_data(gnlh);
6636                 struct nlattr *attr;
6637                 int rem;
6638
6639                 nla_for_each_nested(attr, params, rem) {
6640                         struct lnet_route_properties tmp = {
6641                                 .lrp_gateway            = LNET_ANY_NID,
6642                                 .lrp_net                = LNET_NET_ANY,
6643                                 .lrp_hop                = -1,
6644                                 .lrp_priority           = -1,
6645                                 .lrp_sensitivity        = -1,
6646                         };
6647                         struct nlattr *route;
6648                         int rem2;
6649
6650                         if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6651                                 continue;
6652
6653                         nla_for_each_nested(route, attr, rem2) {
6654                                 if (nla_type(route) != LN_SCALAR_ATTR_VALUE)
6655                                         continue;
6656
6657                                 if (nla_strcmp(route, "net") == 0) {
6658                                         char nw[LNET_NIDSTR_SIZE];
6659
6660                                         route = nla_next(route, &rem2);
6661                                         if (nla_type(route) !=
6662                                             LN_SCALAR_ATTR_VALUE) {
6663                                                 NL_SET_ERR_MSG(extack,
6664                                                                "invalid net param");
6665                                                 GOTO(report_err, rc = -EINVAL);
6666                                         }
6667
6668                                         rc = nla_strscpy(nw, route, sizeof(nw));
6669                                         if (rc < 0) {
6670                                                 NL_SET_ERR_MSG(extack,
6671                                                                "failed to get route param");
6672                                                 GOTO(report_err, rc);
6673                                         }
6674                                         rc = 0;
6675                                         tmp.lrp_net = libcfs_str2net(strim(nw));
6676                                 } else if (nla_strcmp(route, "gateway") == 0) {
6677                                         char gw[LNET_NIDSTR_SIZE];
6678
6679                                         route = nla_next(route, &rem2);
6680                                         if (nla_type(route) !=
6681                                             LN_SCALAR_ATTR_VALUE) {
6682                                                 NL_SET_ERR_MSG(extack,
6683                                                                "invalid gateway param");
6684                                                 GOTO(report_err, rc = -EINVAL);
6685                                         }
6686
6687                                         rc = nla_strscpy(gw, route, sizeof(gw));
6688                                         if (rc < 0) {
6689                                                 NL_SET_ERR_MSG(extack,
6690                                                                "failed to get route param");
6691                                                 GOTO(report_err, rc);
6692                                         }
6693
6694                                         rc = libcfs_strnid(&tmp.lrp_gateway, strim(gw));
6695                                         if (rc < 0) {
6696                                                 NL_SET_ERR_MSG(extack,
6697                                                                "cannot parse gateway");
6698                                                 GOTO(report_err, rc = -ENODEV);
6699                                         }
6700                                         rc = 0;
6701                                 } else if (nla_strcmp(route, "hop") == 0) {
6702                                         route = nla_next(route, &rem2);
6703                                         if (nla_type(route) !=
6704                                             LN_SCALAR_ATTR_INT_VALUE) {
6705                                                 NL_SET_ERR_MSG(extack,
6706                                                                "invalid hop param");
6707                                                 GOTO(report_err, rc = -EINVAL);
6708                                         }
6709
6710                                         tmp.lrp_hop = nla_get_s64(route);
6711                                         if (tmp.lrp_hop != -1)
6712                                                 clamp_t(s32, tmp.lrp_hop, 1, 127);
6713                                 } else if (nla_strcmp(route, "priority") == 0) {
6714                                         route = nla_next(route, &rem2);
6715                                         if (nla_type(route) !=
6716                                             LN_SCALAR_ATTR_INT_VALUE) {
6717                                                 NL_SET_ERR_MSG(extack,
6718                                                                "invalid priority param");
6719                                                 GOTO(report_err, rc = -EINVAL);
6720                                         }
6721
6722                                         tmp.lrp_priority = nla_get_s64(route);
6723                                 }
6724                         }
6725
6726                         rc = lnet_scan_route(rlist, &tmp);
6727                         if (rc < 0) {
6728                                 NL_SET_ERR_MSG(extack,
6729                                                "failed to allocate router data");
6730                                 GOTO(report_err, rc);
6731                         }
6732                 }
6733         }
6734 report_err:
6735         mutex_unlock(&the_lnet.ln_api_mutex);
6736
6737         if (rc < 0)
6738                 lnet_route_show_done(cb);
6739
6740         return rc;
6741 }
6742
6743 static const struct ln_key_list route_props_list = {
6744         .lkl_maxattr                    = LNET_ROUTE_ATTR_MAX,
6745         .lkl_list                       = {
6746                 [LNET_ROUTE_ATTR_HDR]                   = {
6747                         .lkp_value                      = "route",
6748                         .lkp_key_format                 = LNKF_SEQUENCE | LNKF_MAPPING,
6749                         .lkp_data_type                  = NLA_NUL_STRING,
6750                 },
6751                 [LNET_ROUTE_ATTR_NET]                   = {
6752                         .lkp_value                      = "net",
6753                         .lkp_data_type                  = NLA_STRING
6754                 },
6755                 [LNET_ROUTE_ATTR_GATEWAY]               = {
6756                         .lkp_value                      = "gateway",
6757                         .lkp_data_type                  = NLA_STRING
6758                 },
6759                 [LNET_ROUTE_ATTR_HOP]                   = {
6760                         .lkp_value                      = "hop",
6761                         .lkp_data_type                  = NLA_S32
6762                 },
6763                 [LNET_ROUTE_ATTR_PRIORITY]              = {
6764                         .lkp_value                      = "priority",
6765                         .lkp_data_type                  = NLA_U32
6766                 },
6767                 [LNET_ROUTE_ATTR_HEALTH_SENSITIVITY]    = {
6768                         .lkp_value                      = "health_sensitivity",
6769                         .lkp_data_type                  = NLA_U32
6770                 },
6771                 [LNET_ROUTE_ATTR_STATE] = {
6772                         .lkp_value                      = "state",
6773                         .lkp_data_type                  = NLA_STRING,
6774                 },
6775                 [LNET_ROUTE_ATTR_TYPE]  = {
6776                         .lkp_value                      = "type",
6777                         .lkp_data_type                  = NLA_STRING,
6778                 },
6779         },
6780 };
6781
6782
6783 static int lnet_route_show_dump(struct sk_buff *msg,
6784                                 struct netlink_callback *cb)
6785 {
6786         struct lnet_genl_route_list *rlist = lnet_route_dump_ctx(cb);
6787         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6788 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6789         struct netlink_ext_ack *extack = NULL;
6790 #endif
6791         int portid = NETLINK_CB(cb->skb).portid;
6792         int seq = cb->nlh->nlmsg_seq;
6793         int idx = rlist->lgrl_index;
6794         int rc = 0;
6795
6796 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6797         extack = cb->extack;
6798 #endif
6799         if (!rlist->lgrl_count) {
6800                 NL_SET_ERR_MSG(extack, "No routes found");
6801                 GOTO(send_error, rc = -ENOENT);
6802         }
6803
6804         if (!idx) {
6805                 const struct ln_key_list *all[] = {
6806                         &route_props_list, NULL
6807                 };
6808
6809                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
6810                                                 &lnet_family,
6811                                                 NLM_F_CREATE | NLM_F_MULTI,
6812                                                 LNET_CMD_ROUTES, all);
6813                 if (rc < 0) {
6814                         NL_SET_ERR_MSG(extack, "failed to send key table");
6815                         GOTO(send_error, rc);
6816                 }
6817         }
6818
6819         /* If not routes found send an empty message and not an error */
6820         if (!rlist->lgrl_count) {
6821                 void *hdr;
6822
6823                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
6824                                   NLM_F_MULTI, LNET_CMD_ROUTES);
6825                 if (!hdr) {
6826                         NL_SET_ERR_MSG(extack, "failed to send values");
6827                         genlmsg_cancel(msg, hdr);
6828                         GOTO(send_error, rc = -EMSGSIZE);
6829                 }
6830                 genlmsg_end(msg, hdr);
6831
6832                 goto send_error;
6833         }
6834
6835         while (idx < rlist->lgrl_count) {
6836                 struct lnet_route_properties *prop;
6837                 void *hdr;
6838
6839                 prop = genradix_ptr(&rlist->lgrl_list, idx++);
6840
6841                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
6842                                   NLM_F_MULTI, LNET_CMD_ROUTES);
6843                 if (!hdr) {
6844                         NL_SET_ERR_MSG(extack, "failed to send values");
6845                         genlmsg_cancel(msg, hdr);
6846                         GOTO(send_error, rc = -EMSGSIZE);
6847                 }
6848
6849                 if (idx == 1)
6850                         nla_put_string(msg, LNET_ROUTE_ATTR_HDR, "");
6851
6852                 nla_put_string(msg, LNET_ROUTE_ATTR_NET,
6853                                libcfs_net2str(prop->lrp_net));
6854                 nla_put_string(msg, LNET_ROUTE_ATTR_GATEWAY,
6855                                libcfs_nidstr(&prop->lrp_gateway));
6856                 if (gnlh->version) {
6857                         nla_put_s32(msg, LNET_ROUTE_ATTR_HOP, prop->lrp_hop);
6858                         nla_put_u32(msg, LNET_ROUTE_ATTR_PRIORITY, prop->lrp_priority);
6859                         nla_put_u32(msg, LNET_ROUTE_ATTR_HEALTH_SENSITIVITY,
6860                                     prop->lrp_sensitivity);
6861
6862                         nla_put_string(msg, LNET_ROUTE_ATTR_STATE,
6863                                        prop->lrp_flags & LNET_RT_ALIVE ?
6864                                        "up" : "down");
6865                         nla_put_string(msg, LNET_ROUTE_ATTR_TYPE,
6866                                        prop->lrp_flags & LNET_RT_MULTI_HOP ?
6867                                        "multi-hop" : "single-hop");
6868                 }
6869                 genlmsg_end(msg, hdr);
6870         }
6871         rlist->lgrl_index = idx;
6872 send_error:
6873         return lnet_nl_send_error(cb->skb, portid, seq, rc);
6874 };
6875
6876 #ifndef HAVE_NETLINK_CALLBACK_START
6877 static int lnet_old_route_show_dump(struct sk_buff *msg,
6878                                     struct netlink_callback *cb)
6879 {
6880         if (!cb->args[0]) {
6881                 int rc = lnet_route_show_start(cb);
6882
6883                 if (rc < 0)
6884                         return rc;
6885         }
6886
6887         return lnet_route_show_dump(msg, cb);
6888 }
6889 #endif /* !HAVE_NETLINK_CALLBACK_START */
6890
6891 /** LNet peer handling */
6892 struct lnet_genl_processid_list {
6893         unsigned int                    lgpl_index;
6894         unsigned int                    lgpl_count;
6895         GENRADIX(struct lnet_processid) lgpl_list;
6896 };
6897
6898 static inline struct lnet_genl_processid_list *
6899 lnet_peer_dump_ctx(struct netlink_callback *cb)
6900 {
6901         return (struct lnet_genl_processid_list *)cb->args[0];
6902 }
6903
6904 static int lnet_peer_ni_show_done(struct netlink_callback *cb)
6905 {
6906         struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb);
6907
6908         if (plist) {
6909                 genradix_free(&plist->lgpl_list);
6910                 CFS_FREE_PTR(plist);
6911         }
6912         cb->args[0] = 0;
6913
6914         return 0;
6915 }
6916
6917 /* LNet peer ->start() handler for GET requests */
6918 static int lnet_peer_ni_show_start(struct netlink_callback *cb)
6919 {
6920         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6921 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6922         struct netlink_ext_ack *extack = NULL;
6923 #endif
6924         struct lnet_genl_processid_list *plist;
6925         int msg_len = genlmsg_len(gnlh);
6926         int rc = 0;
6927
6928 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6929         extack = cb->extack;
6930 #endif
6931         mutex_lock(&the_lnet.ln_api_mutex);
6932         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
6933                 NL_SET_ERR_MSG(extack, "Network is down");
6934                 mutex_unlock(&the_lnet.ln_api_mutex);
6935                 return -ENETDOWN;
6936         }
6937
6938         CFS_ALLOC_PTR(plist);
6939         if (!plist) {
6940                 NL_SET_ERR_MSG(extack, "No memory for peer list");
6941                 mutex_unlock(&the_lnet.ln_api_mutex);
6942                 return -ENOMEM;
6943         }
6944
6945         genradix_init(&plist->lgpl_list);
6946         plist->lgpl_count = 0;
6947         plist->lgpl_index = 0;
6948         cb->args[0] = (long)plist;
6949
6950         if (!msg_len) {
6951                 struct lnet_peer_table *ptable;
6952                 int cpt;
6953
6954                 cfs_percpt_for_each(ptable, cpt, the_lnet.ln_peer_tables) {
6955                         struct lnet_peer *lp;
6956
6957                         list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
6958                                 struct lnet_processid *lpi;
6959
6960                                 lpi = genradix_ptr_alloc(&plist->lgpl_list,
6961                                                          plist->lgpl_count++,
6962                                                          GFP_KERNEL);
6963                                 if (!lpi) {
6964                                         NL_SET_ERR_MSG(extack, "failed to allocate NID");
6965                                         GOTO(report_err, rc = -ENOMEM);
6966                                 }
6967
6968                                 lpi->pid = LNET_PID_LUSTRE;
6969                                 lpi->nid = lp->lp_primary_nid;
6970                         }
6971                 }
6972         } else {
6973                 struct nlattr *params = genlmsg_data(gnlh);
6974                 struct nlattr *attr;
6975                 int rem;
6976
6977                 nla_for_each_nested(attr, params, rem) {
6978                         struct nlattr *nid;
6979                         int rem2;
6980
6981                         if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6982                                 continue;
6983
6984                         nla_for_each_nested(nid, attr, rem2) {
6985                                 char addr[LNET_NIDSTR_SIZE];
6986                                 struct lnet_processid *id;
6987
6988                                 if (nla_type(nid) != LN_SCALAR_ATTR_VALUE ||
6989                                     nla_strcmp(nid, "primary nid") != 0)
6990                                         continue;
6991
6992                                 nid = nla_next(nid, &rem2);
6993                                 if (nla_type(nid) != LN_SCALAR_ATTR_VALUE) {
6994                                         NL_SET_ERR_MSG(extack,
6995                                                        "invalid primary nid param");
6996                                         GOTO(report_err, rc = -EINVAL);
6997                                 }
6998
6999                                 rc = nla_strscpy(addr, nid, sizeof(addr));
7000                                 if (rc < 0) {
7001                                         NL_SET_ERR_MSG(extack,
7002                                                        "failed to get primary nid param");
7003                                         GOTO(report_err, rc);
7004                                 }
7005
7006                                 id = genradix_ptr_alloc(&plist->lgpl_list,
7007                                                         plist->lgpl_count++,
7008                                                         GFP_KERNEL);
7009                                 if (!id) {
7010                                         NL_SET_ERR_MSG(extack, "failed to allocate NID");
7011                                         GOTO(report_err, rc = -ENOMEM);
7012                                 }
7013
7014                                 rc = libcfs_strid(id, strim(addr));
7015                                 if (rc < 0) {
7016                                         NL_SET_ERR_MSG(extack, "invalid NID");
7017                                         GOTO(report_err, rc);
7018                                 }
7019                                 rc = 0;
7020                         }
7021                 }
7022         }
7023 report_err:
7024         mutex_unlock(&the_lnet.ln_api_mutex);
7025
7026         if (rc < 0)
7027                 lnet_peer_ni_show_done(cb);
7028
7029         return rc;
7030 }
7031
7032 static const struct ln_key_list lnet_peer_ni_keys = {
7033         .lkl_maxattr                    = LNET_PEER_NI_ATTR_MAX,
7034         .lkl_list                       = {
7035                 [LNET_PEER_NI_ATTR_HDR]  = {
7036                         .lkp_value              = "peer",
7037                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7038                         .lkp_data_type          = NLA_NUL_STRING,
7039                 },
7040                 [LNET_PEER_NI_ATTR_PRIMARY_NID] = {
7041                         .lkp_value              = "primary nid",
7042                         .lkp_data_type          = NLA_STRING,
7043                 },
7044                 [LNET_PEER_NI_ATTR_MULTIRAIL]   = {
7045                         .lkp_value              = "Multi-Rail",
7046                         .lkp_data_type          = NLA_FLAG
7047                 },
7048                 [LNET_PEER_NI_ATTR_STATE]       = {
7049                         .lkp_value              = "peer state",
7050                         .lkp_data_type          = NLA_U32
7051                 },
7052                 [LNET_PEER_NI_ATTR_PEER_NI_LIST] = {
7053                         .lkp_value              = "peer ni",
7054                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7055                         .lkp_data_type          = NLA_NESTED,
7056                 },
7057         },
7058 };
7059
7060 static const struct ln_key_list lnet_peer_ni_list = {
7061         .lkl_maxattr                    = LNET_PEER_NI_LIST_ATTR_MAX,
7062         .lkl_list                       = {
7063                 [LNET_PEER_NI_LIST_ATTR_NID]            = {
7064                         .lkp_value                      = "nid",
7065                         .lkp_data_type                  = NLA_STRING,
7066                 },
7067                 [LNET_PEER_NI_LIST_ATTR_UDSP_INFO]      = {
7068                         .lkp_value                      = "udsp info",
7069                         .lkp_key_format                 = LNKF_MAPPING,
7070                         .lkp_data_type                  = NLA_NESTED,
7071                 },
7072                 [LNET_PEER_NI_LIST_ATTR_STATE]          = {
7073                         .lkp_value                      = "state",
7074                         .lkp_data_type                  = NLA_STRING,
7075                 },
7076                 [LNET_PEER_NI_LIST_ATTR_MAX_TX_CREDITS] = {
7077                         .lkp_value                      = "max_ni_tx_credits",
7078                         .lkp_data_type                  = NLA_U32,
7079                 },
7080                 [LNET_PEER_NI_LIST_ATTR_CUR_TX_CREDITS] = {
7081                         .lkp_value                      = "available_tx_credits",
7082                         .lkp_data_type                  = NLA_U32,
7083                 },
7084                 [LNET_PEER_NI_LIST_ATTR_MIN_TX_CREDITS] = {
7085                         .lkp_value                      = "min_tx_credits",
7086                         .lkp_data_type                  = NLA_U32,
7087                 },
7088                 [LNET_PEER_NI_LIST_ATTR_QUEUE_BUF_COUNT] = {
7089                         .lkp_value                      = "tx_q_num_of_buf",
7090                         .lkp_data_type                  = NLA_U32,
7091                 },
7092                 [LNET_PEER_NI_LIST_ATTR_CUR_RTR_CREDITS] = {
7093                         .lkp_value                      = "available_rtr_credits",
7094                         .lkp_data_type                  = NLA_U32,
7095                 },
7096                 [LNET_PEER_NI_LIST_ATTR_MIN_RTR_CREDITS] = {
7097                         .lkp_value                      = "min_rtr_credits",
7098                         .lkp_data_type                  = NLA_U32,
7099                 },
7100                 [LNET_PEER_NI_LIST_ATTR_REFCOUNT]       = {
7101                         .lkp_value                      = "refcount",
7102                         .lkp_data_type                  = NLA_U32,
7103                 },
7104                 [LNET_PEER_NI_LIST_ATTR_STATS_COUNT]    = {
7105                         .lkp_value                      = "statistics",
7106                         .lkp_key_format                 = LNKF_MAPPING,
7107                         .lkp_data_type                  = NLA_NESTED
7108                 },
7109                 [LNET_PEER_NI_LIST_ATTR_SENT_STATS]     = {
7110                         .lkp_value                      = "sent_stats",
7111                         .lkp_key_format                 = LNKF_MAPPING,
7112                         .lkp_data_type                  = NLA_NESTED
7113                 },
7114                 [LNET_PEER_NI_LIST_ATTR_RECV_STATS]     = {
7115                         .lkp_value                      = "received_stats",
7116                         .lkp_key_format                 = LNKF_MAPPING,
7117                         .lkp_data_type                  = NLA_NESTED
7118                 },
7119                 [LNET_PEER_NI_LIST_ATTR_DROP_STATS]     = {
7120                         .lkp_value                      = "dropped_stats",
7121                         .lkp_key_format                 = LNKF_MAPPING,
7122                         .lkp_data_type                  = NLA_NESTED
7123                 },
7124                 [LNET_PEER_NI_LIST_ATTR_HEALTH_STATS]   = {
7125                         .lkp_value                      = "health stats",
7126                         .lkp_key_format                 = LNKF_MAPPING,
7127                         .lkp_data_type                  = NLA_NESTED
7128                 },
7129         },
7130 };
7131
7132 static const struct ln_key_list lnet_peer_ni_list_stats_count = {
7133         .lkl_maxattr                    = LNET_PEER_NI_LIST_STATS_COUNT_ATTR_MAX,
7134         .lkl_list                       = {
7135                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_SEND_COUNT] = {
7136                         .lkp_value                              = "send_count",
7137                         .lkp_data_type                          = NLA_U32,
7138                 },
7139                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_RECV_COUNT] = {
7140                         .lkp_value                              = "recv_count",
7141                         .lkp_data_type                          = NLA_U32,
7142                 },
7143                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_DROP_COUNT] = {
7144                         .lkp_value                              = "drop_count",
7145                         .lkp_data_type                          = NLA_U32,
7146                 },
7147         },
7148 };
7149
7150 static const struct ln_key_list lnet_peer_ni_list_stats = {
7151         .lkl_maxattr                    = LNET_PEER_NI_LIST_STATS_ATTR_MAX,
7152         .lkl_list                       = {
7153                 [LNET_PEER_NI_LIST_STATS_ATTR_PUT]      = {
7154                         .lkp_value                      = "put",
7155                         .lkp_data_type                  = NLA_U32,
7156                 },
7157                 [LNET_PEER_NI_LIST_STATS_ATTR_GET]      = {
7158                         .lkp_value                      = "get",
7159                         .lkp_data_type                  = NLA_U32,
7160                 },
7161                 [LNET_PEER_NI_LIST_STATS_ATTR_REPLY]    = {
7162                         .lkp_value                      = "reply",
7163                         .lkp_data_type                  = NLA_U32,
7164                 },
7165                 [LNET_PEER_NI_LIST_STATS_ATTR_ACK]      = {
7166                         .lkp_value                      = "ack",
7167                         .lkp_data_type                  = NLA_U32,
7168                 },
7169                 [LNET_PEER_NI_LIST_STATS_ATTR_HELLO]    = {
7170                         .lkp_value                      = "hello",
7171                         .lkp_data_type                  = NLA_U32,
7172                 },
7173         },
7174 };
7175
7176 static const struct ln_key_list lnet_peer_ni_list_health = {
7177         .lkl_maxattr                    = LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_MAX,
7178         .lkl_list                       = {
7179                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_VALUE]     = {
7180                         .lkp_value                      = "health value",
7181                         .lkp_data_type                  = NLA_S32,
7182                 },
7183                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_DROPPED]   = {
7184                         .lkp_value                      = "dropped",
7185                         .lkp_data_type                  = NLA_U32,
7186                 },
7187                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_TIMEOUT]   = {
7188                         .lkp_value                      = "timeout",
7189                         .lkp_data_type                  = NLA_U32,
7190                 },
7191                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_ERROR]     = {
7192                         .lkp_value                      = "error",
7193                         .lkp_data_type                  = NLA_U32,
7194                 },
7195                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NETWORK_TIMEOUT] = {
7196                         .lkp_value                      = "network timeout",
7197                         .lkp_data_type                  = NLA_U32,
7198                 },
7199                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PING_COUNT] = {
7200                         .lkp_value                      = "ping_count",
7201                         .lkp_data_type                  = NLA_U32,
7202                 },
7203                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NEXT_PING] = {
7204                         .lkp_value                      = "next_ping",
7205                         .lkp_data_type                  = NLA_S64,
7206                 },
7207         },
7208 };
7209
7210 static int lnet_peer_ni_show_dump(struct sk_buff *msg,
7211                                   struct netlink_callback *cb)
7212 {
7213         struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb);
7214         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
7215 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7216         struct netlink_ext_ack *extack = NULL;
7217 #endif
7218         int portid = NETLINK_CB(cb->skb).portid;
7219         int seq = cb->nlh->nlmsg_seq;
7220         int idx = plist->lgpl_index;
7221         int rc = 0;
7222
7223 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
7224         extack = cb->extack;
7225 #endif
7226         if (!plist->lgpl_count) {
7227                 NL_SET_ERR_MSG(extack, "No peers found");
7228                 GOTO(send_error, rc = -ENOENT);
7229         }
7230
7231         if (!idx) {
7232                 const struct ln_key_list *all[] = {
7233                         &lnet_peer_ni_keys, &lnet_peer_ni_list,
7234                         &udsp_info_list, &udsp_info_pref_nids_list,
7235                         &udsp_info_pref_nids_list,
7236                         &lnet_peer_ni_list_stats_count,
7237                         &lnet_peer_ni_list_stats, /* send_stats */
7238                         &lnet_peer_ni_list_stats, /* recv_stats */
7239                         &lnet_peer_ni_list_stats, /* drop stats */
7240                         &lnet_peer_ni_list_health,
7241                         NULL
7242                 };
7243
7244                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
7245                                                 &lnet_family,
7246                                                 NLM_F_CREATE | NLM_F_MULTI,
7247                                                 LNET_CMD_PEERS, all);
7248                 if (rc < 0) {
7249                         NL_SET_ERR_MSG(extack, "failed to send key table");
7250                         GOTO(send_error, rc);
7251                 }
7252         }
7253
7254         mutex_lock(&the_lnet.ln_api_mutex);
7255         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
7256                 NL_SET_ERR_MSG(extack, "Network is down");
7257                 GOTO(unlock_api_mutex, rc = -ENETDOWN);
7258         }
7259
7260         while (idx < plist->lgpl_count) {
7261                 struct lnet_processid *id;
7262                 struct lnet_peer_ni *lpni;
7263                 struct nlattr *nid_list;
7264                 struct lnet_peer *lp;
7265                 int count = 1;
7266                 void *hdr;
7267
7268                 id = genradix_ptr(&plist->lgpl_list, idx++);
7269                 if (nid_is_lo0(&id->nid))
7270                         continue;
7271
7272                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
7273                                   NLM_F_MULTI, LNET_CMD_PEERS);
7274                 if (!hdr) {
7275                         NL_SET_ERR_MSG(extack, "failed to send values");
7276                         genlmsg_cancel(msg, hdr);
7277                         GOTO(unlock_api_mutex, rc = -EMSGSIZE);
7278                 }
7279
7280                 lp = lnet_find_peer(&id->nid);
7281                 if (!lp) {
7282                         NL_SET_ERR_MSG(extack, "cannot find peer");
7283                         GOTO(unlock_api_mutex, rc = -ENOENT);
7284                 }
7285
7286                 if (idx == 1)
7287                         nla_put_string(msg, LNET_PEER_NI_ATTR_HDR, "");
7288
7289                 nla_put_string(msg, LNET_PEER_NI_ATTR_PRIMARY_NID,
7290                                libcfs_nidstr(&lp->lp_primary_nid));
7291                 if (lnet_peer_is_multi_rail(lp))
7292                         nla_put_flag(msg, LNET_PEER_NI_ATTR_MULTIRAIL);
7293
7294                 if (gnlh->version >= 3)
7295                         nla_put_u32(msg, LNET_PEER_NI_ATTR_STATE, lp->lp_state);
7296
7297                 nid_list = nla_nest_start(msg, LNET_PEER_NI_ATTR_PEER_NI_LIST);
7298                 while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
7299                         struct nlattr *peer_nid = nla_nest_start(msg, count++);
7300
7301                         nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_NID,
7302                                        libcfs_nidstr(&lpni->lpni_nid));
7303
7304                         if (gnlh->version >= 4) {
7305                                 rc = lnet_udsp_info_send(msg,
7306                                                          LNET_PEER_NI_LIST_ATTR_UDSP_INFO,
7307                                                          &lpni->lpni_nid, true);
7308                                 if (rc < 0) {
7309                                         lnet_peer_decref_locked(lp);
7310                                         NL_SET_ERR_MSG(extack,
7311                                                        "failed to get UDSP info");
7312                                         GOTO(unlock_api_mutex, rc);
7313                                 }
7314                         }
7315
7316                         if (lnet_isrouter(lpni) ||
7317                             lnet_peer_aliveness_enabled(lpni)) {
7318                                 nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_STATE,
7319                                                lnet_is_peer_ni_alive(lpni) ?
7320                                                "up" : "down");
7321                         } else {
7322                                 nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_STATE,
7323                                                "NA");
7324                         }
7325
7326                         if (gnlh->version) {
7327                                 struct lnet_ioctl_element_msg_stats lpni_msg_stats;
7328                                 struct nlattr *send_stats_list, *send_stats;
7329                                 struct nlattr *recv_stats_list, *recv_stats;
7330                                 struct nlattr *drop_stats_list, *drop_stats;
7331                                 struct nlattr *health_list, *health_stats;
7332                                 struct lnet_ioctl_element_stats stats;
7333                                 struct nlattr *stats_attr, *ni_stats;
7334
7335                                 nla_put_u32(msg,
7336                                             LNET_PEER_NI_LIST_ATTR_MAX_TX_CREDITS,
7337                                             lpni->lpni_net ?
7338                                                 lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0);
7339                                 nla_put_u32(msg,
7340                                             LNET_PEER_NI_LIST_ATTR_CUR_TX_CREDITS,
7341                                             lpni->lpni_txcredits);
7342                                 nla_put_u32(msg,
7343                                             LNET_PEER_NI_LIST_ATTR_MIN_TX_CREDITS,
7344                                             lpni->lpni_mintxcredits);
7345                                 nla_put_u32(msg,
7346                                             LNET_PEER_NI_LIST_ATTR_QUEUE_BUF_COUNT,
7347                                             lpni->lpni_txqnob);
7348                                 nla_put_u32(msg,
7349                                             LNET_PEER_NI_LIST_ATTR_CUR_RTR_CREDITS,
7350                                             lpni->lpni_rtrcredits);
7351                                 nla_put_u32(msg,
7352                                             LNET_PEER_NI_LIST_ATTR_MIN_RTR_CREDITS,
7353                                             lpni->lpni_minrtrcredits);
7354                                 nla_put_u32(msg,
7355                                             LNET_PEER_NI_LIST_ATTR_REFCOUNT,
7356                                             kref_read(&lpni->lpni_kref));
7357
7358                                 memset(&stats, 0, sizeof(stats));
7359                                 stats.iel_send_count = lnet_sum_stats(&lpni->lpni_stats,
7360                                                                       LNET_STATS_TYPE_SEND);
7361                                 stats.iel_recv_count = lnet_sum_stats(&lpni->lpni_stats,
7362                                                                       LNET_STATS_TYPE_RECV);
7363                                 stats.iel_drop_count = lnet_sum_stats(&lpni->lpni_stats,
7364                                                                       LNET_STATS_TYPE_DROP);
7365
7366                                 stats_attr = nla_nest_start(msg,
7367                                                             LNET_PEER_NI_LIST_ATTR_STATS_COUNT);
7368                                 ni_stats = nla_nest_start(msg, 0);
7369                                 nla_put_u32(msg,
7370                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_SEND_COUNT,
7371                                             stats.iel_send_count);
7372                                 nla_put_u32(msg,
7373                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_RECV_COUNT,
7374                                             stats.iel_recv_count);
7375                                 nla_put_u32(msg,
7376                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_DROP_COUNT,
7377                                             stats.iel_drop_count);
7378                                 nla_nest_end(msg, ni_stats);
7379                                 nla_nest_end(msg, stats_attr);
7380
7381                                 if (gnlh->version < 2)
7382                                         goto skip_msg_stats;
7383
7384                                 lnet_usr_translate_stats(&lpni_msg_stats, &lpni->lpni_stats);
7385
7386                                 send_stats_list = nla_nest_start(msg,
7387                                                                  LNET_PEER_NI_LIST_ATTR_SENT_STATS);
7388                                 send_stats = nla_nest_start(msg, 0);
7389                                 nla_put_u32(msg,
7390                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7391                                             lpni_msg_stats.im_send_stats.ico_put_count);
7392                                 nla_put_u32(msg,
7393                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7394                                             lpni_msg_stats.im_send_stats.ico_get_count);
7395                                 nla_put_u32(msg,
7396                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7397                                             lpni_msg_stats.im_send_stats.ico_reply_count);
7398                                 nla_put_u32(msg,
7399                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7400                                             lpni_msg_stats.im_send_stats.ico_ack_count);
7401                                 nla_put_u32(msg,
7402                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7403                                             lpni_msg_stats.im_send_stats.ico_hello_count);
7404                                 nla_nest_end(msg, send_stats);
7405                                 nla_nest_end(msg, send_stats_list);
7406
7407                                 recv_stats_list = nla_nest_start(msg,
7408                                                                  LNET_PEER_NI_LIST_ATTR_RECV_STATS);
7409                                 recv_stats = nla_nest_start(msg, 0);
7410                                 nla_put_u32(msg,
7411                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7412                                             lpni_msg_stats.im_recv_stats.ico_put_count);
7413                                 nla_put_u32(msg,
7414                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7415                                             lpni_msg_stats.im_recv_stats.ico_get_count);
7416                                 nla_put_u32(msg,
7417                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7418                                             lpni_msg_stats.im_recv_stats.ico_reply_count);
7419                                 nla_put_u32(msg,
7420                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7421                                             lpni_msg_stats.im_recv_stats.ico_ack_count);
7422                                 nla_put_u32(msg,
7423                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7424                                             lpni_msg_stats.im_recv_stats.ico_hello_count);
7425                                 nla_nest_end(msg, recv_stats);
7426                                 nla_nest_end(msg, recv_stats_list);
7427
7428                                 drop_stats_list = nla_nest_start(msg,
7429                                                                  LNET_PEER_NI_LIST_ATTR_DROP_STATS);
7430                                 drop_stats = nla_nest_start(msg, 0);
7431                                 nla_put_u32(msg,
7432                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7433                                             lpni_msg_stats.im_drop_stats.ico_put_count);
7434                                 nla_put_u32(msg,
7435                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7436                                             lpni_msg_stats.im_drop_stats.ico_get_count);
7437                                 nla_put_u32(msg,
7438                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7439                                             lpni_msg_stats.im_drop_stats.ico_reply_count);
7440                                 nla_put_u32(msg,
7441                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7442                                             lpni_msg_stats.im_drop_stats.ico_ack_count);
7443                                 nla_put_u32(msg,
7444                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7445                                             lpni_msg_stats.im_drop_stats.ico_hello_count);
7446                                 nla_nest_end(msg, drop_stats);
7447                                 nla_nest_end(msg, drop_stats_list);
7448
7449                                 health_list = nla_nest_start(msg,
7450                                                              LNET_PEER_NI_LIST_ATTR_HEALTH_STATS);
7451                                 health_stats = nla_nest_start(msg, 0);
7452                                 nla_put_s32(msg,
7453                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_VALUE,
7454                                             atomic_read(&lpni->lpni_healthv));
7455                                 nla_put_u32(msg,
7456                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_DROPPED,
7457                                             atomic_read(&lpni->lpni_hstats.hlt_remote_dropped));
7458                                 nla_put_u32(msg,
7459                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_TIMEOUT,
7460                                             atomic_read(&lpni->lpni_hstats.hlt_remote_timeout));
7461                                 nla_put_u32(msg,
7462                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_ERROR,
7463                                             atomic_read(&lpni->lpni_hstats.hlt_remote_error));
7464                                 nla_put_u32(msg,
7465                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NETWORK_TIMEOUT,
7466                                             atomic_read(&lpni->lpni_hstats.hlt_network_timeout));
7467                                 nla_put_u32(msg,
7468                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PING_COUNT,
7469                                             lpni->lpni_ping_count);
7470                                 nla_put_s64(msg,
7471                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NEXT_PING,
7472                                             lpni->lpni_next_ping,
7473                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PAD);
7474                                 nla_nest_end(msg, health_stats);
7475                                 nla_nest_end(msg, health_list);
7476                         }
7477 skip_msg_stats:
7478                         nla_nest_end(msg, peer_nid);
7479                 }
7480                 nla_nest_end(msg, nid_list);
7481
7482                 genlmsg_end(msg, hdr);
7483                 lnet_peer_decref_locked(lp);
7484         }
7485         plist->lgpl_index = idx;
7486 unlock_api_mutex:
7487         mutex_unlock(&the_lnet.ln_api_mutex);
7488 send_error:
7489         return lnet_nl_send_error(cb->skb, portid, seq, rc);
7490 };
7491
7492 #ifndef HAVE_NETLINK_CALLBACK_START
7493 static int lnet_old_peer_ni_show_dump(struct sk_buff *msg,
7494                                       struct netlink_callback *cb)
7495 {
7496         if (!cb->args[0]) {
7497                 int rc = lnet_peer_ni_show_start(cb);
7498
7499                 if (rc < 0)
7500                         return rc;
7501         }
7502
7503         return lnet_peer_ni_show_dump(msg, cb);
7504 }
7505 #endif
7506
7507 static int lnet_route_cmd(struct sk_buff *skb, struct genl_info *info)
7508 {
7509         struct nlmsghdr *nlh = nlmsg_hdr(skb);
7510         struct genlmsghdr *gnlh = nlmsg_data(nlh);
7511         struct nlattr *params = genlmsg_data(gnlh);
7512         int msg_len, rem, rc = 0;
7513         struct nlattr *attr;
7514
7515         mutex_lock(&the_lnet.ln_api_mutex);
7516         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
7517                 GENL_SET_ERR_MSG(info, "Network is down");
7518                 mutex_unlock(&the_lnet.ln_api_mutex);
7519                 return -ENETDOWN;
7520         }
7521
7522         msg_len = genlmsg_len(gnlh);
7523         if (!msg_len) {
7524                 GENL_SET_ERR_MSG(info, "no configuration");
7525                 mutex_unlock(&the_lnet.ln_api_mutex);
7526                 return -ENOMSG;
7527         }
7528
7529         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
7530                 GENL_SET_ERR_MSG(info, "invalid configuration");
7531                 mutex_unlock(&the_lnet.ln_api_mutex);
7532                 return -EINVAL;
7533         }
7534
7535         nla_for_each_nested(attr, params, rem) {
7536                 u32 net_id = LNET_NET_ANY, hops = LNET_UNDEFINED_HOPS;
7537                 u32 priority = 0, sensitivity = 1;
7538                 struct lnet_nid gw_nid = LNET_ANY_NID;
7539                 struct nlattr *route_prop;
7540                 bool alive = true;
7541                 s64 when = 0;
7542                 int rem2;
7543
7544                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
7545                         continue;
7546
7547                 nla_for_each_nested(route_prop, attr, rem2) {
7548                         char tmp[LNET_NIDSTR_SIZE];
7549                         ssize_t len;
7550                         s64 num;
7551
7552                         if (nla_type(route_prop) != LN_SCALAR_ATTR_VALUE)
7553                                 continue;
7554
7555                         if (nla_strcmp(route_prop, "net") == 0) {
7556                                 route_prop = nla_next(route_prop, &rem2);
7557                                 if (nla_type(route_prop) !=
7558                                     LN_SCALAR_ATTR_VALUE) {
7559                                         GENL_SET_ERR_MSG(info,
7560                                                          "net is invalid key");
7561                                         GOTO(report_err, rc = -EINVAL);
7562                                 }
7563
7564                                 len = nla_strscpy(tmp, route_prop, sizeof(tmp));
7565                                 if (len < 0) {
7566                                         GENL_SET_ERR_MSG(info,
7567                                                          "net key string is invalid");
7568                                         GOTO(report_err, rc = len);
7569                                 }
7570
7571                                 net_id = libcfs_str2net(tmp);
7572                                 if (!net_id) {
7573                                         GENL_SET_ERR_MSG(info,
7574                                                          "cannot parse remote net");
7575                                         GOTO(report_err, rc = -ENODEV);
7576                                 }
7577
7578                                 if (LNET_NETTYP(net_id) == LOLND) {
7579                                         GENL_SET_ERR_MSG(info,
7580                                                          "setting @lo not allowed");
7581                                         GOTO(report_err, rc = -EACCES);
7582                                 }
7583
7584                                 if (net_id == LNET_NET_ANY) {
7585                                         GENL_SET_ERR_MSG(info,
7586                                                          "setting LNET_NET_ANY not allowed");
7587                                         GOTO(report_err, rc = -ENXIO);
7588                                 }
7589                         } else if (nla_strcmp(route_prop, "gateway") == 0) {
7590                                 route_prop = nla_next(route_prop, &rem2);
7591                                 if (nla_type(route_prop) !=
7592                                     LN_SCALAR_ATTR_VALUE) {
7593                                         GENL_SET_ERR_MSG(info,
7594                                                          "gateway is invalid key");
7595                                         GOTO(report_err, rc = -EINVAL);
7596                                 }
7597
7598                                 len = nla_strscpy(tmp, route_prop, sizeof(tmp));
7599                                 if (len < 0) {
7600                                         GENL_SET_ERR_MSG(info,
7601                                                          "gateway string is invalid");
7602                                         GOTO(report_err, rc = len);
7603                                 }
7604
7605                                 rc = libcfs_strnid(&gw_nid, strim(tmp));
7606                                 if (rc < 0) {
7607                                         GENL_SET_ERR_MSG(info,
7608                                                          "cannot parse gateway");
7609                                         GOTO(report_err, rc = -ENODEV);
7610                                 }
7611                         } else if (nla_strcmp(route_prop, "state") == 0) {
7612                                 route_prop = nla_next(route_prop, &rem2);
7613                                 if (nla_type(route_prop) !=
7614                                     LN_SCALAR_ATTR_VALUE) {
7615                                         GENL_SET_ERR_MSG(info,
7616                                                          "state is invalid key");
7617                                         GOTO(report_err, rc = -EINVAL);
7618                                 }
7619
7620                                 if (nla_strcmp(route_prop, "down") == 0) {
7621                                         alive = false;
7622                                 } else if (nla_strcmp(route_prop, "up") == 0) {
7623                                         alive = true;
7624                                 } else {
7625                                         GENL_SET_ERR_MSG(info,
7626                                                          "status string bad value");
7627                                         GOTO(report_err, rc = -EINVAL);
7628                                 }
7629                         } else if (nla_strcmp(route_prop, "notify_time") == 0) {
7630                                 route_prop = nla_next(route_prop, &rem2);
7631                                 if (nla_type(route_prop) !=
7632                                     LN_SCALAR_ATTR_INT_VALUE) {
7633                                         GENL_SET_ERR_MSG(info,
7634                                                          "notify_time is invalid key");
7635                                         GOTO(report_err, rc = -EINVAL);
7636                                 }
7637
7638                                 when = nla_get_s64(route_prop);
7639                                 if (ktime_get_real_seconds() < when) {
7640                                         GENL_SET_ERR_MSG(info,
7641                                                          "notify_time is in the future");
7642                                         GOTO(report_err, rc = -EINVAL);
7643                                 }
7644                         } else if (nla_strcmp(route_prop, "hop") == 0) {
7645                                 route_prop = nla_next(route_prop, &rem2);
7646                                 if (nla_type(route_prop) !=
7647                                     LN_SCALAR_ATTR_INT_VALUE) {
7648                                         GENL_SET_ERR_MSG(info,
7649                                                          "hop has invalid key");
7650                                         GOTO(report_err, rc = -EINVAL);
7651                                 }
7652
7653                                 hops = nla_get_s64(route_prop);
7654                                 if ((hops < 1 || hops > 255) && hops != -1) {
7655                                         GENL_SET_ERR_MSG(info,
7656                                                          "invalid hop count must be between 1 and 255");
7657                                         GOTO(report_err, rc = -EINVAL);
7658                                 }
7659                         } else if (nla_strcmp(route_prop, "priority") == 0) {
7660                                 route_prop = nla_next(route_prop, &rem2);
7661                                 if (nla_type(route_prop) !=
7662                                     LN_SCALAR_ATTR_INT_VALUE) {
7663                                         GENL_SET_ERR_MSG(info,
7664                                                          "priority has invalid key");
7665                                         GOTO(report_err, rc = -EINVAL);
7666                                 }
7667
7668                                 num = nla_get_s64(route_prop);
7669                                 if (num < 0) {
7670                                         GENL_SET_ERR_MSG(info,
7671                                                          "invalid priority, must not be negative");
7672                                         GOTO(report_err, rc = -EINVAL);
7673                                 }
7674                                 priority = num;
7675                         } else if (nla_strcmp(route_prop,
7676                                               "health_sensitivity") == 0) {
7677                                 route_prop = nla_next(route_prop, &rem2);
7678                                 if (nla_type(route_prop) !=
7679                                     LN_SCALAR_ATTR_INT_VALUE) {
7680                                         GENL_SET_ERR_MSG(info,
7681                                                          "sensitivity has invalid key");
7682                                         GOTO(report_err, rc = -EINVAL);
7683                                 }
7684
7685                                 num = nla_get_s64(route_prop);
7686                                 if (num < 1) {
7687                                         GENL_SET_ERR_MSG(info,
7688                                                          "invalid health sensitivity, must be 1 or greater");
7689                                         GOTO(report_err, rc = -EINVAL);
7690                                 }
7691                                 sensitivity = num;
7692                         }
7693                 }
7694
7695                 if (net_id == LNET_NET_ANY) {
7696                         GENL_SET_ERR_MSG(info,
7697                                          "missing mandatory parameter: network");
7698                         GOTO(report_err, rc = -ENODEV);
7699                 }
7700
7701                 if (LNET_NID_IS_ANY(&gw_nid)) {
7702                         GENL_SET_ERR_MSG(info,
7703                                          "missing mandatory parameter: gateway");
7704                         GOTO(report_err, rc = -ENODEV);
7705                 }
7706
7707                 if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) {
7708                         /* Convert the user-supplied real time to monotonic.
7709                          * NB: "when" is always in the past
7710                          */
7711                         when = ktime_get_seconds() -
7712                                 (ktime_get_real_seconds() - when);
7713
7714                         mutex_unlock(&the_lnet.ln_api_mutex);
7715                         rc = lnet_notify(NULL, &gw_nid, alive, false, when);
7716                         mutex_lock(&the_lnet.ln_api_mutex);
7717                         if (rc < 0)
7718                                 GOTO(report_err, rc);
7719                         else if (the_lnet.ln_state != LNET_STATE_RUNNING)
7720                                 GOTO(report_err, rc = -ENETDOWN);
7721                 } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
7722                         rc = lnet_add_route(net_id, hops, &gw_nid, priority,
7723                                             sensitivity);
7724                         if (rc < 0) {
7725                                 switch (rc) {
7726                                 case -EINVAL:
7727                                         GENL_SET_ERR_MSG(info,
7728                                                          "invalid settings for route creation");
7729                                         break;
7730                                 case -EHOSTUNREACH:
7731                                         GENL_SET_ERR_MSG(info,
7732                                                          "No interface configured on the same net as gateway");
7733                                         break;
7734                                 case -ESHUTDOWN:
7735                                         GENL_SET_ERR_MSG(info,
7736                                                          "Network is down");
7737                                         break;
7738                                 case -EEXIST:
7739                                         GENL_SET_ERR_MSG(info,
7740                                                          "Route already exists or the specified network is local");
7741                                         break;
7742                                 default:
7743                                         GENL_SET_ERR_MSG(info,
7744                                                          "failed to create route");
7745                                         break;
7746                                 }
7747                                 GOTO(report_err, rc);
7748                         }
7749                 } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
7750                         rc = lnet_del_route(net_id, &gw_nid);
7751                         if (rc < 0) {
7752                                 GENL_SET_ERR_MSG(info,
7753                                                  "failed to delete route");
7754                                 GOTO(report_err, rc);
7755                         }
7756                 }
7757         }
7758 report_err:
7759         mutex_unlock(&the_lnet.ln_api_mutex);
7760
7761         return rc;
7762 }
7763
7764 static inline struct lnet_genl_ping_list *
7765 lnet_ping_dump_ctx(struct netlink_callback *cb)
7766 {
7767         return (struct lnet_genl_ping_list *)cb->args[0];
7768 }
7769
7770 static int lnet_ping_show_done(struct netlink_callback *cb)
7771 {
7772         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
7773
7774         if (plist) {
7775                 genradix_free(&plist->lgpl_failed);
7776                 genradix_free(&plist->lgpl_list);
7777                 LIBCFS_FREE(plist, sizeof(*plist));
7778                 cb->args[0] = 0;
7779         }
7780
7781         return 0;
7782 }
7783
7784 /* LNet ping ->start() handler for GET requests */
7785 static int lnet_ping_show_start(struct netlink_callback *cb)
7786 {
7787         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
7788 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7789         struct netlink_ext_ack *extack = NULL;
7790 #endif
7791         struct lnet_genl_ping_list *plist;
7792         int msg_len = genlmsg_len(gnlh);
7793         struct nlattr *params, *top;
7794         int rem, rc = 0;
7795
7796 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
7797         extack = cb->extack;
7798 #endif
7799         if (the_lnet.ln_refcount == 0) {
7800                 NL_SET_ERR_MSG(extack, "Network is down");
7801                 return -ENETDOWN;
7802         }
7803
7804         if (!msg_len) {
7805                 NL_SET_ERR_MSG(extack, "Ping needs NID targets");
7806                 return -ENOENT;
7807         }
7808
7809         LIBCFS_ALLOC(plist, sizeof(*plist));
7810         if (!plist) {
7811                 NL_SET_ERR_MSG(extack, "failed to setup ping list");
7812                 return -ENOMEM;
7813         }
7814         genradix_init(&plist->lgpl_list);
7815         plist->lgpl_timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
7816         plist->lgpl_src_nid = LNET_ANY_NID;
7817         plist->lgpl_index = 0;
7818         plist->lgpl_list_count = 0;
7819         cb->args[0] = (long)plist;
7820
7821         params = genlmsg_data(gnlh);
7822         nla_for_each_attr(top, params, msg_len, rem) {
7823                 struct nlattr *nids;
7824                 int rem2;
7825
7826                 switch (nla_type(top)) {
7827                 case LN_SCALAR_ATTR_VALUE:
7828                         if (nla_strcmp(top, "timeout") == 0) {
7829                                 s64 timeout;
7830
7831                                 top = nla_next(top, &rem);
7832                                 if (nla_type(top) != LN_SCALAR_ATTR_INT_VALUE) {
7833                                         NL_SET_ERR_MSG(extack,
7834                                                        "invalid timeout param");
7835                                         GOTO(report_err, rc = -EINVAL);
7836                                 }
7837
7838                                 /* If timeout is negative then set default of
7839                                  * 3 minutes
7840                                  */
7841                                 timeout = nla_get_s64(top);
7842                                 if (timeout > 0 &&
7843                                     timeout < (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
7844                                         plist->lgpl_timeout =
7845                                                 nsecs_to_jiffies(timeout * NSEC_PER_MSEC);
7846                         } else if (nla_strcmp(top, "source") == 0) {
7847                                 char nidstr[LNET_NIDSTR_SIZE + 1];
7848
7849                                 top = nla_next(top, &rem);
7850                                 if (nla_type(top) != LN_SCALAR_ATTR_VALUE) {
7851                                         NL_SET_ERR_MSG(extack,
7852                                                        "invalid source param");
7853                                         GOTO(report_err, rc = -EINVAL);
7854                                 }
7855
7856                                 rc = nla_strscpy(nidstr, top, sizeof(nidstr));
7857                                 if (rc < 0) {
7858                                         NL_SET_ERR_MSG(extack,
7859                                                        "failed to parse source nid");
7860                                         GOTO(report_err, rc);
7861                                 }
7862
7863                                 rc = libcfs_strnid(&plist->lgpl_src_nid,
7864                                                    strim(nidstr));
7865                                 if (rc < 0) {
7866                                         NL_SET_ERR_MSG(extack,
7867                                                        "invalid source nid");
7868                                         GOTO(report_err, rc);
7869                                 }
7870                                 rc = 0;
7871                         }
7872                         break;
7873                 case LN_SCALAR_ATTR_LIST:
7874                         nla_for_each_nested(nids, top, rem2) {
7875                                 char nid[LNET_NIDSTR_SIZE + 1];
7876                                 struct lnet_processid *id;
7877
7878                                 if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
7879                                         continue;
7880
7881                                 memset(nid, 0, sizeof(nid));
7882                                 rc = nla_strscpy(nid, nids, sizeof(nid));
7883                                 if (rc < 0) {
7884                                         NL_SET_ERR_MSG(extack,
7885                                                        "failed to get NID");
7886                                         GOTO(report_err, rc);
7887                                 }
7888
7889                                 id = genradix_ptr_alloc(&plist->lgpl_list,
7890                                                         plist->lgpl_list_count++,
7891                                                         GFP_ATOMIC);
7892                                 if (!id) {
7893                                         NL_SET_ERR_MSG(extack,
7894                                                        "failed to allocate NID");
7895                                         GOTO(report_err, rc = -ENOMEM);
7896                                 }
7897
7898                                 rc = libcfs_strid(id, strim(nid));
7899                                 if (rc < 0) {
7900                                         NL_SET_ERR_MSG(extack, "invalid NID");
7901                                         GOTO(report_err, rc);
7902                                 }
7903                                 rc = 0;
7904                         }
7905                         fallthrough;
7906                 default:
7907                         break;
7908                 }
7909         }
7910 report_err:
7911         if (rc < 0)
7912                 lnet_ping_show_done(cb);
7913
7914         return rc;
7915 }
7916
7917 static const struct ln_key_list ping_props_list = {
7918         .lkl_maxattr                    = LNET_PING_ATTR_MAX,
7919         .lkl_list                       = {
7920                 [LNET_PING_ATTR_HDR]            = {
7921                         .lkp_value              = "ping",
7922                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7923                         .lkp_data_type          = NLA_NUL_STRING,
7924                 },
7925                 [LNET_PING_ATTR_PRIMARY_NID]    = {
7926                         .lkp_value              = "primary nid",
7927                         .lkp_data_type          = NLA_STRING
7928                 },
7929                 [LNET_PING_ATTR_ERRNO]          = {
7930                         .lkp_value              = "errno",
7931                         .lkp_data_type          = NLA_S16
7932                 },
7933                 [LNET_PING_ATTR_MULTIRAIL]      = {
7934                         .lkp_value              = "Multi-Rail",
7935                         .lkp_data_type          = NLA_FLAG
7936                 },
7937                 [LNET_PING_ATTR_PEER_NI_LIST]   = {
7938                         .lkp_value              = "peer_ni",
7939                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7940                         .lkp_data_type          = NLA_NESTED
7941                 },
7942         },
7943 };
7944
7945 static struct ln_key_list ping_peer_ni_list = {
7946         .lkl_maxattr                    = LNET_PING_PEER_NI_ATTR_MAX,
7947         .lkl_list                       = {
7948                 [LNET_PING_PEER_NI_ATTR_NID]    = {
7949                         .lkp_value              = "nid",
7950                         .lkp_data_type          = NLA_STRING
7951                 },
7952         },
7953 };
7954
7955 static int lnet_ping_show_dump(struct sk_buff *msg,
7956                                struct netlink_callback *cb)
7957 {
7958         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
7959         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
7960 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7961         struct netlink_ext_ack *extack = NULL;
7962 #endif
7963         int portid = NETLINK_CB(cb->skb).portid;
7964         int seq = cb->nlh->nlmsg_seq;
7965         int idx = plist->lgpl_index;
7966         int rc = 0, i = 0;
7967
7968 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
7969         extack = cb->extack;
7970 #endif
7971         if (!plist->lgpl_index) {
7972                 const struct ln_key_list *all[] = {
7973                         &ping_props_list, &ping_peer_ni_list, NULL
7974                 };
7975
7976                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
7977                                                 &lnet_family,
7978                                                 NLM_F_CREATE | NLM_F_MULTI,
7979                                                 LNET_CMD_PING, all);
7980                 if (rc < 0) {
7981                         NL_SET_ERR_MSG(extack, "failed to send key table");
7982                         GOTO(send_error, rc);
7983                 }
7984
7985                 genradix_init(&plist->lgpl_failed);
7986         }
7987
7988         while (idx < plist->lgpl_list_count) {
7989                 struct lnet_nid primary_nid = LNET_ANY_NID;
7990                 struct lnet_genl_ping_list peers;
7991                 struct lnet_processid *id;
7992                 struct nlattr *nid_list;
7993                 struct lnet_peer *lp;
7994                 bool mr_flag = false;
7995                 unsigned int count;
7996                 void *hdr = NULL;
7997
7998                 id = genradix_ptr(&plist->lgpl_list, idx++);
7999                 if (nid_is_lo0(&id->nid))
8000                         continue;
8001
8002                 rc = lnet_ping(id, &plist->lgpl_src_nid, plist->lgpl_timeout,
8003                                &peers, lnet_interfaces_max);
8004                 if (rc < 0) {
8005                         struct lnet_fail_ping *fail;
8006
8007                         fail = genradix_ptr_alloc(&plist->lgpl_failed,
8008                                                   plist->lgpl_failed_count++,
8009                                                   GFP_ATOMIC);
8010                         if (!fail) {
8011                                 NL_SET_ERR_MSG(extack,
8012                                                "failed to allocate failed NID");
8013                                 GOTO(send_error, rc);
8014                         }
8015                         fail->lfp_id = *id;
8016                         fail->lfp_errno = rc;
8017                         goto cant_reach;
8018                 }
8019
8020                 mutex_lock(&the_lnet.ln_api_mutex);
8021                 lp = lnet_find_peer(&id->nid);
8022                 if (lp) {
8023                         primary_nid = lp->lp_primary_nid;
8024                         mr_flag = lnet_peer_is_multi_rail(lp);
8025                         lnet_peer_decref_locked(lp);
8026                 }
8027                 mutex_unlock(&the_lnet.ln_api_mutex);
8028
8029                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
8030                                   NLM_F_MULTI, LNET_CMD_PING);
8031                 if (!hdr) {
8032                         NL_SET_ERR_MSG(extack, "failed to send values");
8033                         genlmsg_cancel(msg, hdr);
8034                         GOTO(send_error, rc = -EMSGSIZE);
8035                 }
8036
8037                 if (i++ == 0)
8038                         nla_put_string(msg, LNET_PING_ATTR_HDR, "");
8039
8040                 nla_put_string(msg, LNET_PING_ATTR_PRIMARY_NID,
8041                                libcfs_nidstr(&primary_nid));
8042                 if (mr_flag)
8043                         nla_put_flag(msg, LNET_PING_ATTR_MULTIRAIL);
8044
8045                 nid_list = nla_nest_start(msg, LNET_PING_ATTR_PEER_NI_LIST);
8046                 for (count = 0; count < rc; count++) {
8047                         struct lnet_processid *result;
8048                         struct nlattr *nid_attr;
8049                         char *idstr;
8050
8051                         result = genradix_ptr(&peers.lgpl_list, count);
8052                         if (nid_is_lo0(&result->nid))
8053                                 continue;
8054
8055                         nid_attr = nla_nest_start(msg, count + 1);
8056                         if (gnlh->version == 1)
8057                                 idstr = libcfs_nidstr(&result->nid);
8058                         else
8059                                 idstr = libcfs_idstr(result);
8060                         nla_put_string(msg, LNET_PING_PEER_NI_ATTR_NID, idstr);
8061                         nla_nest_end(msg, nid_attr);
8062                 }
8063                 nla_nest_end(msg, nid_list);
8064                 genlmsg_end(msg, hdr);
8065 cant_reach:
8066                 genradix_free(&peers.lgpl_list);
8067         }
8068
8069         for (i = 0; i < plist->lgpl_failed_count; i++) {
8070                 struct lnet_fail_ping *fail;
8071                 void *hdr;
8072
8073                 fail = genradix_ptr(&plist->lgpl_failed, i);
8074
8075                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
8076                                   NLM_F_MULTI, LNET_CMD_PING);
8077                 if (!hdr) {
8078                         NL_SET_ERR_MSG(extack, "failed to send failed values");
8079                         genlmsg_cancel(msg, hdr);
8080                         GOTO(send_error, rc = -EMSGSIZE);
8081                 }
8082
8083                 if (i == 0)
8084                         nla_put_string(msg, LNET_PING_ATTR_HDR, "");
8085
8086                 nla_put_string(msg, LNET_PING_ATTR_PRIMARY_NID,
8087                                libcfs_nidstr(&fail->lfp_id.nid));
8088                 nla_put_s16(msg, LNET_PING_ATTR_ERRNO, fail->lfp_errno);
8089                 genlmsg_end(msg, hdr);
8090         }
8091         rc = 0; /* don't treat it as an error */
8092
8093         plist->lgpl_index = idx;
8094 send_error:
8095         return lnet_nl_send_error(cb->skb, portid, seq, rc);
8096 }
8097
8098 #ifndef HAVE_NETLINK_CALLBACK_START
8099 static int lnet_old_ping_show_dump(struct sk_buff *msg,
8100                                    struct netlink_callback *cb)
8101 {
8102         if (!cb->args[0]) {
8103                 int rc = lnet_ping_show_start(cb);
8104
8105                 if (rc < 0)
8106                         return rc;
8107         }
8108
8109         return lnet_ping_show_dump(msg, cb);
8110 }
8111 #endif
8112
8113 static const struct ln_key_list discover_err_props_list = {
8114         .lkl_maxattr                    = LNET_ERR_ATTR_MAX,
8115         .lkl_list                       = {
8116                 [LNET_ERR_ATTR_HDR]             = {
8117                         .lkp_value              = "manage",
8118                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8119                         .lkp_data_type          = NLA_NUL_STRING,
8120                 },
8121                 [LNET_ERR_ATTR_TYPE]            = {
8122                         .lkp_value              = "discover",
8123                         .lkp_data_type          = NLA_STRING,
8124                 },
8125                 [LNET_ERR_ATTR_ERRNO]           = {
8126                         .lkp_value              = "errno",
8127                         .lkp_data_type          = NLA_S16,
8128                 },
8129                 [LNET_ERR_ATTR_DESCR]           = {
8130                         .lkp_value              = "descr",
8131                         .lkp_data_type          = NLA_STRING,
8132                 },
8133         },
8134 };
8135
8136 static const struct ln_key_list discover_props_list = {
8137         .lkl_maxattr                    = LNET_PING_ATTR_MAX,
8138         .lkl_list                       = {
8139                 [LNET_PING_ATTR_HDR]            = {
8140                         .lkp_value              = "discover",
8141                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8142                         .lkp_data_type          = NLA_NUL_STRING,
8143                 },
8144                 [LNET_PING_ATTR_PRIMARY_NID]    = {
8145                         .lkp_value              = "primary nid",
8146                         .lkp_data_type          = NLA_STRING
8147                 },
8148                 [LNET_PING_ATTR_ERRNO]          = {
8149                         .lkp_value              = "errno",
8150                         .lkp_data_type          = NLA_S16
8151                 },
8152                 [LNET_PING_ATTR_MULTIRAIL]      = {
8153                         .lkp_value              = "Multi-Rail",
8154                         .lkp_data_type          = NLA_FLAG
8155                 },
8156                 [LNET_PING_ATTR_PEER_NI_LIST]   = {
8157                         .lkp_value              = "peer_ni",
8158                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8159                         .lkp_data_type          = NLA_NESTED
8160                 },
8161         },
8162 };
8163
8164 static int lnet_ping_cmd(struct sk_buff *skb, struct genl_info *info)
8165 {
8166         const struct ln_key_list *all[] = {
8167                 &discover_props_list, &ping_peer_ni_list, NULL
8168         };
8169         struct nlmsghdr *nlh = nlmsg_hdr(skb);
8170         struct genlmsghdr *gnlh = nlmsg_data(nlh);
8171         struct nlattr *params = genlmsg_data(gnlh);
8172         struct lnet_genl_ping_list dlists;
8173         int msg_len, rem, rc = 0, i;
8174         bool clear_hdr = false;
8175         struct sk_buff *reply;
8176         struct nlattr *attr;
8177         void *hdr = NULL;
8178
8179         msg_len = genlmsg_len(gnlh);
8180         if (!msg_len) {
8181                 GENL_SET_ERR_MSG(info, "no configuration");
8182                 return -ENOMSG;
8183         }
8184
8185         if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
8186                 GENL_SET_ERR_MSG(info, "only NLM_F_CREATE setting is allowed");
8187                 return -EINVAL;
8188         }
8189
8190         reply = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
8191         if (!reply) {
8192                 GENL_SET_ERR_MSG(info,
8193                                  "fail to allocate reply");
8194                 return -ENOMEM;
8195         }
8196
8197         genradix_init(&dlists.lgpl_failed);
8198         dlists.lgpl_failed_count = 0;
8199         genradix_init(&dlists.lgpl_list);
8200         dlists.lgpl_list_count = 0;
8201
8202         rc = lnet_genl_send_scalar_list(reply, info->snd_portid,
8203                                         info->snd_seq, &lnet_family,
8204                                         NLM_F_CREATE | NLM_F_MULTI,
8205                                         LNET_CMD_PING, all);
8206         if (rc < 0) {
8207                 GENL_SET_ERR_MSG(info,
8208                                  "failed to send key table");
8209                 GOTO(report_err, rc);
8210         }
8211
8212         nla_for_each_attr(attr, params, msg_len, rem) {
8213                 struct nlattr *nids;
8214                 int rem2;
8215
8216                 /* We only care about the NID list to discover with */
8217                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
8218                         continue;
8219
8220                 nla_for_each_nested(nids, attr, rem2) {
8221                         char nid[LNET_NIDSTR_SIZE + 1];
8222                         struct lnet_processid id;
8223                         struct nlattr *nid_list;
8224                         struct lnet_peer *lp;
8225                         ssize_t len;
8226
8227                         if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
8228                                 continue;
8229
8230                         memset(nid, 0, sizeof(nid));
8231                         rc = nla_strscpy(nid, nids, sizeof(nid));
8232                         if (rc < 0) {
8233                                 GENL_SET_ERR_MSG(info,
8234                                                  "failed to get NID");
8235                                 GOTO(report_err, rc);
8236                         }
8237
8238                         len = libcfs_strid(&id, strim(nid));
8239                         if (len < 0) {
8240                                 struct lnet_fail_ping *fail;
8241
8242                                 fail = genradix_ptr_alloc(&dlists.lgpl_failed,
8243                                                           dlists.lgpl_failed_count++,
8244                                                           GFP_KERNEL);
8245                                 if (!fail) {
8246                                         GENL_SET_ERR_MSG(info,
8247                                                          "failed to allocate improper NID");
8248                                         GOTO(report_err, rc = -ENOMEM);
8249                                 }
8250                                 memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
8251                                 snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
8252                                          "cannot parse NID '%s'", strim(nid));
8253                                 fail->lfp_id = id;
8254                                 fail->lfp_errno = len;
8255                                 continue;
8256                         }
8257
8258                         if (LNET_NID_IS_ANY(&id.nid))
8259                                 continue;
8260
8261                         rc = lnet_discover(&id,
8262                                            info->nlhdr->nlmsg_flags & NLM_F_EXCL,
8263                                            &dlists);
8264                         if (rc < 0) {
8265                                 struct lnet_fail_ping *fail;
8266
8267                                 fail = genradix_ptr_alloc(&dlists.lgpl_failed,
8268                                                           dlists.lgpl_failed_count++,
8269                                                           GFP_KERNEL);
8270                                 if (!fail) {
8271                                         GENL_SET_ERR_MSG(info,
8272                                                          "failed to allocate failed NID");
8273                                         GOTO(report_err, rc = -ENOMEM);
8274                                 }
8275                                 memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
8276                                 snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
8277                                          "failed to discover %s",
8278                                          libcfs_nidstr(&id.nid));
8279                                 fail->lfp_id = id;
8280                                 fail->lfp_errno = rc;
8281                                 continue;
8282                         }
8283
8284                         /* create the genetlink message header */
8285                         hdr = genlmsg_put(reply, info->snd_portid, info->snd_seq,
8286                                           &lnet_family, NLM_F_MULTI, LNET_CMD_PING);
8287                         if (!hdr) {
8288                                 GENL_SET_ERR_MSG(info,
8289                                                  "failed to allocate hdr");
8290                                 GOTO(report_err, rc = -ENOMEM);
8291                         }
8292
8293                         if (!clear_hdr) {
8294                                 nla_put_string(reply, LNET_PING_ATTR_HDR, "");
8295                                 clear_hdr = true;
8296                         }
8297
8298                         lp = lnet_find_peer(&id.nid);
8299                         if (lp) {
8300                                 nla_put_string(reply, LNET_PING_ATTR_PRIMARY_NID,
8301                                                libcfs_nidstr(&lp->lp_primary_nid));
8302                                 if (lnet_peer_is_multi_rail(lp))
8303                                         nla_put_flag(reply, LNET_PING_ATTR_MULTIRAIL);
8304                                 lnet_peer_decref_locked(lp);
8305                         }
8306
8307                         nid_list = nla_nest_start(reply, LNET_PING_ATTR_PEER_NI_LIST);
8308                         for (i = 0; i < dlists.lgpl_list_count; i++) {
8309                                 struct lnet_processid *found;
8310                                 struct nlattr *nid_attr;
8311                                 char *idstr;
8312
8313                                 found = genradix_ptr(&dlists.lgpl_list, i);
8314                                 if (nid_is_lo0(&found->nid))
8315                                         continue;
8316
8317                                 nid_attr = nla_nest_start(reply, i + 1);
8318                                 if (id.pid == LNET_PID_LUSTRE)
8319                                         idstr = libcfs_nidstr(&found->nid);
8320                                 else
8321                                         idstr = libcfs_idstr(found);
8322                                 nla_put_string(reply, LNET_PING_PEER_NI_ATTR_NID, idstr);
8323                                 nla_nest_end(reply, nid_attr);
8324                         }
8325                         nla_nest_end(reply, nid_list);
8326
8327                         genlmsg_end(reply, hdr);
8328                 }
8329         }
8330
8331         if (dlists.lgpl_failed_count) {
8332                 int flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
8333                 const struct ln_key_list *fail[] = {
8334                         &discover_err_props_list, NULL
8335                 };
8336
8337                 rc = lnet_genl_send_scalar_list(reply, info->snd_portid,
8338                                                 info->snd_seq, &lnet_family,
8339                                                 flags, LNET_CMD_PING, fail);
8340                 if (rc < 0) {
8341                         GENL_SET_ERR_MSG(info,
8342                                          "failed to send new key table");
8343                         GOTO(report_err, rc);
8344                 }
8345
8346                 for (i = 0; i < dlists.lgpl_failed_count; i++) {
8347                         struct lnet_fail_ping *fail;
8348
8349                         hdr = genlmsg_put(reply, info->snd_portid, info->snd_seq,
8350                                           &lnet_family, NLM_F_MULTI, LNET_CMD_PING);
8351                         if (!hdr) {
8352                                 GENL_SET_ERR_MSG(info,
8353                                                  "failed to send failed values");
8354                                 GOTO(report_err, rc = -ENOMSG);
8355                         }
8356
8357                         fail = genradix_ptr(&dlists.lgpl_failed, i);
8358                         if (i == 0)
8359                                 nla_put_string(reply, LNET_ERR_ATTR_HDR, "");
8360
8361                         nla_put_string(reply, LNET_ERR_ATTR_TYPE, "\n");
8362                         nla_put_s16(reply, LNET_ERR_ATTR_ERRNO,
8363                                     fail->lfp_errno);
8364                         nla_put_string(reply, LNET_ERR_ATTR_DESCR,
8365                                        fail->lfp_msg);
8366                         genlmsg_end(reply, hdr);
8367                 }
8368         }
8369
8370         nlh = nlmsg_put(reply, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
8371                         NLM_F_MULTI);
8372         if (!nlh) {
8373                 genlmsg_cancel(reply, hdr);
8374                 GENL_SET_ERR_MSG(info,
8375                                  "failed to finish message");
8376                 GOTO(report_err, rc = -EMSGSIZE);
8377         }
8378
8379 report_err:
8380         genradix_free(&dlists.lgpl_failed);
8381         genradix_free(&dlists.lgpl_list);
8382
8383         if (rc < 0) {
8384                 genlmsg_cancel(reply, hdr);
8385                 nlmsg_free(reply);
8386         } else {
8387                 rc = genlmsg_reply(reply, info);
8388         }
8389
8390         return rc;
8391 }
8392
8393 static const struct genl_multicast_group lnet_mcast_grps[] = {
8394         { .name =       "ip2net",       },
8395         { .name =       "net",          },
8396         { .name =       "peer",         },
8397         { .name =       "route",        },
8398         { .name =       "ping",         },
8399         { .name =       "discover",     },
8400         { .name =       "cpt-of-nid",   },
8401 };
8402
8403 static const struct genl_ops lnet_genl_ops[] = {
8404         {
8405                 .cmd            = LNET_CMD_NETS,
8406                 .flags          = GENL_ADMIN_PERM,
8407 #ifdef HAVE_NETLINK_CALLBACK_START
8408                 .start          = lnet_net_show_start,
8409                 .dumpit         = lnet_net_show_dump,
8410 #else
8411                 .dumpit         = lnet_old_net_show_dump,
8412 #endif
8413                 .done           = lnet_net_show_done,
8414                 .doit           = lnet_net_cmd,
8415         },
8416         {
8417                 .cmd            = LNET_CMD_PEERS,
8418                 .flags          = GENL_ADMIN_PERM,
8419 #ifdef HAVE_NETLINK_CALLBACK_START
8420                 .start          = lnet_peer_ni_show_start,
8421                 .dumpit         = lnet_peer_ni_show_dump,
8422 #else
8423                 .dumpit         = lnet_old_peer_ni_show_dump,
8424 #endif
8425                 .done           = lnet_peer_ni_show_done,
8426                 .doit           = lnet_peer_ni_cmd,
8427         },
8428         {
8429                 .cmd            = LNET_CMD_ROUTES,
8430                 .flags          = GENL_ADMIN_PERM,
8431 #ifdef HAVE_NETLINK_CALLBACK_START
8432                 .start          = lnet_route_show_start,
8433                 .dumpit         = lnet_route_show_dump,
8434 #else
8435                 .dumpit         = lnet_old_route_show_dump,
8436 #endif
8437                 .done           = lnet_route_show_done,
8438                 .doit           = lnet_route_cmd,
8439         },
8440         {
8441                 .cmd            = LNET_CMD_PING,
8442                 .flags          = GENL_ADMIN_PERM,
8443 #ifdef HAVE_NETLINK_CALLBACK_START
8444                 .start          = lnet_ping_show_start,
8445                 .dumpit         = lnet_ping_show_dump,
8446 #else
8447                 .dumpit         = lnet_old_ping_show_dump,
8448 #endif
8449                 .done           = lnet_ping_show_done,
8450                 .doit           = lnet_ping_cmd,
8451         },
8452         {
8453                 .cmd            = LNET_CMD_CPT_OF_NID,
8454 #ifdef HAVE_NETLINK_CALLBACK_START
8455                 .start          = lnet_cpt_of_nid_show_start,
8456                 .dumpit         = lnet_cpt_of_nid_show_dump,
8457 #else
8458                 .dumpit         = lnet_old_cpt_of_nid_show_dump,
8459 #endif
8460                 .done           = lnet_cpt_of_nid_show_done,
8461         },
8462 };
8463
8464 static struct genl_family lnet_family = {
8465         .name           = LNET_GENL_NAME,
8466         .version        = LNET_GENL_VERSION,
8467         .module         = THIS_MODULE,
8468         .netnsok        = true,
8469         .ops            = lnet_genl_ops,
8470         .n_ops          = ARRAY_SIZE(lnet_genl_ops),
8471         .mcgrps         = lnet_mcast_grps,
8472         .n_mcgrps       = ARRAY_SIZE(lnet_mcast_grps),
8473 #ifdef GENL_FAMILY_HAS_RESV_START_OP
8474         .resv_start_op  = __LNET_CMD_MAX_PLUS_ONE,
8475 #endif
8476 };
8477
8478 void LNetDebugPeer(struct lnet_processid *id)
8479 {
8480         lnet_debug_peer(&id->nid);
8481 }
8482 EXPORT_SYMBOL(LNetDebugPeer);
8483
8484 /**
8485  * Determine if the specified peer \a nid is on the local node.
8486  *
8487  * \param nid   peer nid to check
8488  *
8489  * \retval true         If peer NID is on the local node.
8490  * \retval false        If peer NID is not on the local node.
8491  */
8492 bool LNetIsPeerLocal(struct lnet_nid *nid)
8493 {
8494         struct lnet_net *net;
8495         struct lnet_ni *ni;
8496         int cpt;
8497
8498         cpt = lnet_net_lock_current();
8499         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
8500                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
8501                         if (nid_same(&ni->ni_nid, nid)) {
8502                                 lnet_net_unlock(cpt);
8503                                 return true;
8504                         }
8505                 }
8506         }
8507         lnet_net_unlock(cpt);
8508
8509         return false;
8510 }
8511 EXPORT_SYMBOL(LNetIsPeerLocal);
8512
8513 /**
8514  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
8515  * Note that all interfaces share a same PID, as requested by LNetNIInit().
8516  *
8517  * @index       Index of the interface to look up.
8518  * @id          On successful return, this location will hold the
8519  *              struct lnet_process_id ID of the interface.
8520  * @large_nids  Report large NIDs if this is true.
8521  *
8522  * RETURN       0 If an interface exists at \a index.
8523  *              -ENOENT If no interface has been found.
8524  */
8525 int
8526 LNetGetId(unsigned int index, struct lnet_processid *id, bool large_nids)
8527 {
8528         struct lnet_ni   *ni;
8529         struct lnet_net  *net;
8530         int               cpt;
8531         int               rc = -ENOENT;
8532
8533         LASSERT(the_lnet.ln_refcount > 0);
8534
8535         cpt = lnet_net_lock_current();
8536
8537         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
8538                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
8539                         if (!large_nids && !nid_is_nid4(&ni->ni_nid))
8540                                 continue;
8541
8542                         if (index-- != 0)
8543                                 continue;
8544
8545                         id->nid = ni->ni_nid;
8546                         id->pid = the_lnet.ln_pid;
8547                         rc = 0;
8548                         break;
8549                 }
8550         }
8551
8552         lnet_net_unlock(cpt);
8553         return rc;
8554 }
8555 EXPORT_SYMBOL(LNetGetId);
8556
8557 struct ping_data {
8558         int rc;
8559         int replied;
8560         int pd_unlinked;
8561         struct lnet_handle_md mdh;
8562         struct completion completion;
8563 };
8564
8565 static void
8566 lnet_ping_event_handler(struct lnet_event *event)
8567 {
8568         struct ping_data *pd = event->md_user_ptr;
8569
8570         CDEBUG(D_NET, "ping event (%d %d)%s\n",
8571                event->type, event->status,
8572                event->unlinked ? " unlinked" : "");
8573
8574         if (event->status) {
8575                 if (!pd->rc)
8576                         pd->rc = event->status;
8577         } else if (event->type == LNET_EVENT_REPLY) {
8578                 pd->replied = 1;
8579                 pd->rc = event->mlength;
8580         }
8581
8582         if (event->unlinked)
8583                 pd->pd_unlinked = 1;
8584
8585         if (event->unlinked ||
8586             (event->type == LNET_EVENT_SEND && event->status))
8587                 complete(&pd->completion);
8588 }
8589
8590 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
8591                      signed long timeout, struct lnet_genl_ping_list *plist,
8592                      int n_ids)
8593 {
8594         int id_bytes = sizeof(struct lnet_ni_status); /* For 0@lo */
8595         struct lnet_md md = { NULL };
8596         struct ping_data pd = { 0 };
8597         struct lnet_ping_buffer *pbuf;
8598         struct lnet_processid pid;
8599         struct lnet_ping_iter pi;
8600         int i = 0;
8601         u32 *st;
8602         int nob;
8603         int rc;
8604         int rc2;
8605
8606         genradix_init(&plist->lgpl_list);
8607
8608         /* n_ids limit is arbitrary */
8609         if (n_ids <= 0 || LNET_NID_IS_ANY(&id->nid))
8610                 return -EINVAL;
8611
8612         /* if the user buffer has more space than the lnet_interfaces_max
8613          * then only fill it up to lnet_interfaces_max
8614          */
8615         if (n_ids > lnet_interfaces_max)
8616                 n_ids = lnet_interfaces_max;
8617
8618         if (id->pid == LNET_PID_ANY)
8619                 id->pid = LNET_PID_LUSTRE;
8620
8621         id_bytes += lnet_ping_sts_size(&id->nid) * n_ids;
8622         pbuf = lnet_ping_buffer_alloc(id_bytes, GFP_NOFS);
8623         if (!pbuf)
8624                 return -ENOMEM;
8625
8626         /* initialize md content */
8627         md.start     = &pbuf->pb_info;
8628         md.length    = id_bytes;
8629         md.threshold = 2; /* GET/REPLY */
8630         md.max_size  = 0;
8631         md.options   = LNET_MD_TRUNCATE;
8632         md.user_ptr  = &pd;
8633         md.handler   = lnet_ping_event_handler;
8634
8635         init_completion(&pd.completion);
8636
8637         rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh);
8638         if (rc != 0) {
8639                 CERROR("Can't bind MD: %d\n", rc);
8640                 goto fail_ping_buffer_decref;
8641         }
8642
8643         rc = LNetGet(src_nid, pd.mdh, id, LNET_RESERVED_PORTAL,
8644                      LNET_PROTO_PING_MATCHBITS, 0, false);
8645         if (rc != 0) {
8646                 /* Don't CERROR; this could be deliberate! */
8647                 rc2 = LNetMDUnlink(pd.mdh);
8648                 LASSERT(rc2 == 0);
8649
8650                 /* NB must wait for the UNLINK event below... */
8651         }
8652
8653         /* Ensure completion in finite time... */
8654         wait_for_completion_timeout(&pd.completion, timeout);
8655         if (!pd.pd_unlinked) {
8656                 LNetMDUnlink(pd.mdh);
8657                 wait_for_completion(&pd.completion);
8658         }
8659
8660         if (!pd.replied) {
8661                 rc = pd.rc ?: -EIO;
8662                 goto fail_ping_buffer_decref;
8663         }
8664
8665         nob = pd.rc;
8666         LASSERT(nob >= 0 && nob <= id_bytes);
8667
8668         rc = -EPROTO;           /* if I can't parse... */
8669
8670         if (nob < LNET_PING_INFO_HDR_SIZE) {
8671                 CERROR("%s: ping info too short %d\n",
8672                        libcfs_idstr(id), nob);
8673                 goto fail_ping_buffer_decref;
8674         }
8675
8676         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
8677                 lnet_swap_pinginfo(pbuf);
8678         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
8679                 CERROR("%s: Unexpected magic %08x\n",
8680                        libcfs_idstr(id), pbuf->pb_info.pi_magic);
8681                 goto fail_ping_buffer_decref;
8682         }
8683
8684         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
8685                 CERROR("%s: ping w/o NI status: 0x%x\n",
8686                        libcfs_idstr(id), pbuf->pb_info.pi_features);
8687                 goto fail_ping_buffer_decref;
8688         }
8689
8690         /* Test if smaller than lnet_pinginfo with just one pi_ni status info.
8691          * That one might contain size when large nids are used.
8692          */
8693         if (nob < offsetof(struct lnet_ping_info, pi_ni[1])) {
8694                 CERROR("%s: Short reply %d(%lu min)\n",
8695                        libcfs_idstr(id), nob,
8696                        offsetof(struct lnet_ping_info, pi_ni[1]));
8697                 goto fail_ping_buffer_decref;
8698         }
8699
8700         if (ping_info_count_entries(pbuf) < n_ids) {
8701                 n_ids = ping_info_count_entries(pbuf);
8702                 id_bytes = lnet_ping_info_size(&pbuf->pb_info);
8703         }
8704
8705         if (nob < id_bytes) {
8706                 CERROR("%s: Short reply %d(%d expected)\n",
8707                        libcfs_idstr(id), nob, id_bytes);
8708                 goto fail_ping_buffer_decref;
8709         }
8710
8711         for (st = ping_iter_first(&pi, pbuf, &pid.nid);
8712              st;
8713              st = ping_iter_next(&pi, &pid.nid)) {
8714                 id = genradix_ptr_alloc(&plist->lgpl_list, i++, GFP_ATOMIC);
8715                 if (!id) {
8716                         rc = -ENOMEM;
8717                         goto fail_ping_buffer_decref;
8718                 }
8719
8720                 id->pid = pbuf->pb_info.pi_pid;
8721                 id->nid = pid.nid;
8722         }
8723         rc = i;
8724 fail_ping_buffer_decref:
8725         lnet_ping_buffer_decref(pbuf);
8726         return rc;
8727 }
8728
8729 static int
8730 lnet_discover(struct lnet_processid *pid, u32 force,
8731               struct lnet_genl_ping_list *dlist)
8732 {
8733         struct lnet_peer_ni *lpni;
8734         struct lnet_peer_ni *p;
8735         struct lnet_peer *lp;
8736         int cpt;
8737         int rc;
8738
8739         if (LNET_NID_IS_ANY(&pid->nid))
8740                 return -EINVAL;
8741
8742         if (pid->pid == LNET_PID_ANY)
8743                 pid->pid = LNET_PID_LUSTRE;
8744
8745         cpt = lnet_net_lock_current();
8746         lpni = lnet_peerni_by_nid_locked(&pid->nid, NULL, cpt);
8747         if (IS_ERR(lpni)) {
8748                 rc = PTR_ERR(lpni);
8749                 goto out;
8750         }
8751
8752         /*
8753          * Clearing the NIDS_UPTODATE flag ensures the peer will
8754          * be discovered, provided discovery has not been disabled.
8755          */
8756         lp = lpni->lpni_peer_net->lpn_peer;
8757         spin_lock(&lp->lp_lock);
8758         lp->lp_state &= ~LNET_PEER_NIDS_UPTODATE;
8759         /* If the force flag is set, force a PING and PUSH as well. */
8760         if (force)
8761                 lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH;
8762         spin_unlock(&lp->lp_lock);
8763         rc = lnet_discover_peer_locked(lpni, cpt, true);
8764         if (rc)
8765                 goto out_decref;
8766
8767         /* The lpni (or lp) for this NID may have changed and our ref is
8768          * the only thing keeping the old one around. Release the ref
8769          * and lookup the lpni again
8770          */
8771         lnet_peer_ni_decref_locked(lpni);
8772         lpni = lnet_peer_ni_find_locked(&pid->nid);
8773         if (!lpni) {
8774                 rc = -ENOENT;
8775                 goto out;
8776         }
8777         lp = lpni->lpni_peer_net->lpn_peer;
8778
8779         dlist->lgpl_list_count = 0;
8780         p = NULL;
8781         while ((p = lnet_get_next_peer_ni_locked(lp, NULL, p)) != NULL) {
8782                 struct lnet_processid *id;
8783
8784                 id = genradix_ptr_alloc(&dlist->lgpl_list,
8785                                         dlist->lgpl_list_count++, GFP_KERNEL);
8786                 if (!id) {
8787                         rc = -ENOMEM;
8788                         goto out_decref;
8789                 }
8790                 id->pid = pid->pid;
8791                 id->nid = p->lpni_nid;
8792         }
8793         rc = dlist->lgpl_list_count;
8794
8795 out_decref:
8796         lnet_peer_ni_decref_locked(lpni);
8797 out:
8798         lnet_net_unlock(cpt);
8799
8800         return rc;
8801 }
8802
8803 /**
8804  * Retrieve peer discovery status.
8805  *
8806  * \retval 1 if lnet_peer_discovery_disabled is 0
8807  * \retval 0 if lnet_peer_discovery_disabled is 1
8808  */
8809 int
8810 LNetGetPeerDiscoveryStatus(void)
8811 {
8812         return !lnet_peer_discovery_disabled;
8813 }
8814 EXPORT_SYMBOL(LNetGetPeerDiscoveryStatus);