Whamcloud - gitweb
LU-10391 lnet: update ping to handle multiple NIDs
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  */
31
32 #define DEBUG_SUBSYSTEM S_LNET
33
34 #include <linux/ctype.h>
35 #include <linux/generic-radix-tree.h>
36 #include <linux/log2.h>
37 #include <linux/ktime.h>
38 #include <linux/moduleparam.h>
39 #include <linux/uaccess.h>
40 #ifdef HAVE_SCHED_HEADERS
41 #include <linux/sched/signal.h>
42 #endif
43 #include <net/genetlink.h>
44
45 #include <libcfs/linux/linux-net.h>
46 #include <lnet/udsp.h>
47 #include <lnet/lib-lnet.h>
48
49 #define D_LNI D_CONSOLE
50
51 /*
52  * initialize ln_api_mutex statically, since it needs to be used in
53  * discovery_set callback. That module parameter callback can be called
54  * before module init completes. The mutex needs to be ready for use then.
55  */
56 struct lnet the_lnet = {
57         .ln_api_mutex = __MUTEX_INITIALIZER(the_lnet.ln_api_mutex),
58 };              /* THE state of the network */
59 EXPORT_SYMBOL(the_lnet);
60
61 static char *ip2nets = "";
62 module_param(ip2nets, charp, 0444);
63 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
64
65 static char *networks = "";
66 module_param(networks, charp, 0444);
67 MODULE_PARM_DESC(networks, "local networks");
68
69 static char *routes = "";
70 module_param(routes, charp, 0444);
71 MODULE_PARM_DESC(routes, "routes to non-local networks");
72
73 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
74 module_param(rnet_htable_size, int, 0444);
75 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
76
77 static int use_tcp_bonding;
78 module_param(use_tcp_bonding, int, 0444);
79 MODULE_PARM_DESC(use_tcp_bonding,
80                  "use_tcp_bonding parameter has been removed");
81
82 unsigned int lnet_numa_range = 0;
83 module_param(lnet_numa_range, uint, 0444);
84 MODULE_PARM_DESC(lnet_numa_range,
85                 "NUMA range to consider during Multi-Rail selection");
86
87 /*
88  * lnet_health_sensitivity determines by how much we decrement the health
89  * value on sending error. The value defaults to 100, which means health
90  * interface health is decremented by 100 points every failure.
91  */
92 unsigned int lnet_health_sensitivity = 100;
93 static int sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp);
94 #ifdef HAVE_KERNEL_PARAM_OPS
95 static struct kernel_param_ops param_ops_health_sensitivity = {
96         .set = sensitivity_set,
97         .get = param_get_int,
98 };
99 #define param_check_health_sensitivity(name, p) \
100                 __param_check(name, p, int)
101 module_param(lnet_health_sensitivity, health_sensitivity, S_IRUGO|S_IWUSR);
102 #else
103 module_param_call(lnet_health_sensitivity, sensitivity_set, param_get_int,
104                   &lnet_health_sensitivity, S_IRUGO|S_IWUSR);
105 #endif
106 MODULE_PARM_DESC(lnet_health_sensitivity,
107                 "Value to decrement the health value by on error");
108
109 /*
110  * lnet_recovery_interval determines how often we should perform recovery
111  * on unhealthy interfaces.
112  */
113 unsigned int lnet_recovery_interval = 1;
114 static int recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp);
115 #ifdef HAVE_KERNEL_PARAM_OPS
116 static struct kernel_param_ops param_ops_recovery_interval = {
117         .set = recovery_interval_set,
118         .get = param_get_int,
119 };
120 #define param_check_recovery_interval(name, p) \
121                 __param_check(name, p, int)
122 module_param(lnet_recovery_interval, recovery_interval, S_IRUGO|S_IWUSR);
123 #else
124 module_param_call(lnet_recovery_interval, recovery_interval_set, param_get_int,
125                   &lnet_recovery_interval, S_IRUGO|S_IWUSR);
126 #endif
127 MODULE_PARM_DESC(lnet_recovery_interval,
128                 "DEPRECATED - Interval to recover unhealthy interfaces in seconds");
129
130 unsigned int lnet_recovery_limit;
131 module_param(lnet_recovery_limit, uint, 0644);
132 MODULE_PARM_DESC(lnet_recovery_limit,
133                  "How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery");
134
135 unsigned int lnet_max_recovery_ping_interval = 900;
136 unsigned int lnet_max_recovery_ping_count = 9;
137 static int max_recovery_ping_interval_set(const char *val,
138                                           cfs_kernel_param_arg_t *kp);
139
140 #define param_check_max_recovery_ping_interval(name, p) \
141                 __param_check(name, p, int)
142
143 #ifdef HAVE_KERNEL_PARAM_OPS
144 static struct kernel_param_ops param_ops_max_recovery_ping_interval = {
145         .set = max_recovery_ping_interval_set,
146         .get = param_get_int,
147 };
148 module_param(lnet_max_recovery_ping_interval, max_recovery_ping_interval, 0644);
149 #else
150 module_param_call(lnet_max_recovery_ping_interval, max_recovery_ping_interval,
151                   param_get_int, &lnet_max_recovery_ping_interval, 0644);
152 #endif
153 MODULE_PARM_DESC(lnet_max_recovery_ping_interval,
154                  "The max interval between LNet recovery pings, in seconds");
155
156 static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
157 static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp);
158
159 static struct kernel_param_ops param_ops_interfaces_max = {
160         .set = intf_max_set,
161         .get = param_get_int,
162 };
163
164 #define param_check_interfaces_max(name, p) \
165                 __param_check(name, p, int)
166
167 #ifdef HAVE_KERNEL_PARAM_OPS
168 module_param(lnet_interfaces_max, interfaces_max, 0644);
169 #else
170 module_param_call(lnet_interfaces_max, intf_max_set, param_get_int,
171                   &param_ops_interfaces_max, 0644);
172 #endif
173 MODULE_PARM_DESC(lnet_interfaces_max,
174                 "Maximum number of interfaces in a node.");
175
176 unsigned lnet_peer_discovery_disabled = 0;
177 static int discovery_set(const char *val, cfs_kernel_param_arg_t *kp);
178
179 static struct kernel_param_ops param_ops_discovery_disabled = {
180         .set = discovery_set,
181         .get = param_get_int,
182 };
183
184 #define param_check_discovery_disabled(name, p) \
185                 __param_check(name, p, int)
186 #ifdef HAVE_KERNEL_PARAM_OPS
187 module_param(lnet_peer_discovery_disabled, discovery_disabled, 0644);
188 #else
189 module_param_call(lnet_peer_discovery_disabled, discovery_set, param_get_int,
190                   &param_ops_discovery_disabled, 0644);
191 #endif
192 MODULE_PARM_DESC(lnet_peer_discovery_disabled,
193                 "Set to 1 to disable peer discovery on this node.");
194
195 unsigned int lnet_drop_asym_route;
196 static int drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp);
197
198 static struct kernel_param_ops param_ops_drop_asym_route = {
199         .set = drop_asym_route_set,
200         .get = param_get_int,
201 };
202
203 #define param_check_drop_asym_route(name, p)    \
204         __param_check(name, p, int)
205 #ifdef HAVE_KERNEL_PARAM_OPS
206 module_param(lnet_drop_asym_route, drop_asym_route, 0644);
207 #else
208 module_param_call(lnet_drop_asym_route, drop_asym_route_set, param_get_int,
209                   &param_ops_drop_asym_route, 0644);
210 #endif
211 MODULE_PARM_DESC(lnet_drop_asym_route,
212                  "Set to 1 to drop asymmetrical route messages.");
213
214 #define LNET_TRANSACTION_TIMEOUT_DEFAULT 150
215 unsigned int lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_DEFAULT;
216 static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
217 #ifdef HAVE_KERNEL_PARAM_OPS
218 static struct kernel_param_ops param_ops_transaction_timeout = {
219         .set = transaction_to_set,
220         .get = param_get_int,
221 };
222
223 #define param_check_transaction_timeout(name, p) \
224                 __param_check(name, p, int)
225 module_param(lnet_transaction_timeout, transaction_timeout, S_IRUGO|S_IWUSR);
226 #else
227 module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
228                   &lnet_transaction_timeout, S_IRUGO|S_IWUSR);
229 #endif
230 MODULE_PARM_DESC(lnet_transaction_timeout,
231                 "Maximum number of seconds to wait for a peer response.");
232
233 #define LNET_RETRY_COUNT_DEFAULT 2
234 unsigned int lnet_retry_count = LNET_RETRY_COUNT_DEFAULT;
235 static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
236 #ifdef HAVE_KERNEL_PARAM_OPS
237 static struct kernel_param_ops param_ops_retry_count = {
238         .set = retry_count_set,
239         .get = param_get_int,
240 };
241
242 #define param_check_retry_count(name, p) \
243                 __param_check(name, p, int)
244 module_param(lnet_retry_count, retry_count, S_IRUGO|S_IWUSR);
245 #else
246 module_param_call(lnet_retry_count, retry_count_set, param_get_int,
247                   &lnet_retry_count, S_IRUGO|S_IWUSR);
248 #endif
249 MODULE_PARM_DESC(lnet_retry_count,
250                  "Maximum number of times to retry transmitting a message");
251
252 unsigned int lnet_response_tracking = 3;
253 static int response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp);
254
255 #ifdef HAVE_KERNEL_PARAM_OPS
256 static struct kernel_param_ops param_ops_response_tracking = {
257         .set = response_tracking_set,
258         .get = param_get_int,
259 };
260
261 #define param_check_response_tracking(name, p)  \
262         __param_check(name, p, int)
263 module_param(lnet_response_tracking, response_tracking, 0644);
264 #else
265 module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
266                   &lnet_response_tracking, 0644);
267 #endif
268 MODULE_PARM_DESC(lnet_response_tracking,
269                  "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
270
271 int lock_prim_nid = 1;
272 module_param(lock_prim_nid, int, 0444);
273 MODULE_PARM_DESC(lock_prim_nid,
274                  "Whether nid passed down by Lustre is locked as primary");
275
276 #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
277                                   (LNET_RETRY_COUNT_DEFAULT + 1))
278 unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
279 static void lnet_set_lnd_timeout(void)
280 {
281         lnet_lnd_timeout = max((lnet_transaction_timeout - 1) /
282                                (lnet_retry_count + 1), 1U);
283 }
284
285 /*
286  * This sequence number keeps track of how many times DLC was used to
287  * update the local NIs. It is incremented when a NI is added or
288  * removed and checked when sending a message to determine if there is
289  * a need to re-run the selection algorithm. See lnet_select_pathway()
290  * for more details on its usage.
291  */
292 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
293
294 struct lnet_fail_ping {
295         struct lnet_processid           lfp_id;
296         int                             lfp_errno;
297         char                            lfp_msg[256];
298 };
299
300 struct lnet_genl_ping_list {
301         unsigned int                    lgpl_index;
302         unsigned int                    lgpl_list_count;
303         unsigned int                    lgpl_failed_count;
304         signed long                     lgpl_timeout;
305         struct lnet_nid                 lgpl_src_nid;
306         GENRADIX(struct lnet_fail_ping) lgpl_failed;
307         GENRADIX(struct lnet_processid) lgpl_list;
308 };
309
310 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
311                      signed long timeout, struct lnet_genl_ping_list *plist,
312                      int n_ids);
313
314 static int lnet_discover(struct lnet_processid *id, u32 force,
315                          struct lnet_genl_ping_list *dlists);
316
317 static int
318 sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
319 {
320         int rc;
321         unsigned *sensitivity = (unsigned *)kp->arg;
322         unsigned long value;
323
324         rc = kstrtoul(val, 0, &value);
325         if (rc) {
326                 CERROR("Invalid module parameter value for 'lnet_health_sensitivity'\n");
327                 return rc;
328         }
329
330         /*
331          * The purpose of locking the api_mutex here is to ensure that
332          * the correct value ends up stored properly.
333          */
334         mutex_lock(&the_lnet.ln_api_mutex);
335
336         if (value > LNET_MAX_HEALTH_VALUE) {
337                 mutex_unlock(&the_lnet.ln_api_mutex);
338                 CERROR("Invalid health value. Maximum: %d value = %lu\n",
339                        LNET_MAX_HEALTH_VALUE, value);
340                 return -EINVAL;
341         }
342
343         if (*sensitivity != 0 && value == 0 && lnet_retry_count != 0) {
344                 lnet_retry_count = 0;
345                 lnet_set_lnd_timeout();
346         }
347
348         *sensitivity = value;
349
350         mutex_unlock(&the_lnet.ln_api_mutex);
351
352         return 0;
353 }
354
355 static int
356 recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
357 {
358         CWARN("'lnet_recovery_interval' has been deprecated\n");
359
360         return 0;
361 }
362
363 static int
364 max_recovery_ping_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
365 {
366         int rc;
367         unsigned long value;
368
369         rc = kstrtoul(val, 0, &value);
370         if (rc) {
371                 CERROR("Invalid module parameter value for 'lnet_max_recovery_ping_interval'\n");
372                 return rc;
373         }
374
375         if (!value) {
376                 CERROR("Invalid max ping timeout. Must be strictly positive\n");
377                 return -EINVAL;
378         }
379
380         /* The purpose of locking the api_mutex here is to ensure that
381          * the correct value ends up stored properly.
382          */
383         mutex_lock(&the_lnet.ln_api_mutex);
384         lnet_max_recovery_ping_interval = value;
385         lnet_max_recovery_ping_count = 0;
386         value >>= 1;
387         while (value) {
388                 lnet_max_recovery_ping_count++;
389                 value >>= 1;
390         }
391         mutex_unlock(&the_lnet.ln_api_mutex);
392
393         return 0;
394 }
395
396 static int
397 discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
398 {
399         int rc;
400         unsigned *discovery_off = (unsigned *)kp->arg;
401         unsigned long value;
402         struct lnet_ping_buffer *pbuf;
403
404         rc = kstrtoul(val, 0, &value);
405         if (rc) {
406                 CERROR("Invalid module parameter value for 'lnet_peer_discovery_disabled'\n");
407                 return rc;
408         }
409
410         value = (value) ? 1 : 0;
411
412         /*
413          * The purpose of locking the api_mutex here is to ensure that
414          * the correct value ends up stored properly.
415          */
416         mutex_lock(&the_lnet.ln_api_mutex);
417
418         if (value == *discovery_off) {
419                 mutex_unlock(&the_lnet.ln_api_mutex);
420                 return 0;
421         }
422
423         /*
424          * We still want to set the discovery value even when LNet is not
425          * running. This is the case when LNet is being loaded and we want
426          * the module parameters to take effect. Otherwise if we're
427          * changing the value dynamically, we want to set it after
428          * updating the peers
429          */
430         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
431                 *discovery_off = value;
432                 mutex_unlock(&the_lnet.ln_api_mutex);
433                 return 0;
434         }
435
436         /* tell peers that discovery setting has changed */
437         lnet_net_lock(LNET_LOCK_EX);
438         pbuf = the_lnet.ln_ping_target;
439         if (value)
440                 pbuf->pb_info.pi_features &= ~LNET_PING_FEAT_DISCOVERY;
441         else
442                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
443         lnet_net_unlock(LNET_LOCK_EX);
444
445         /* only send a push when we're turning off discovery */
446         if (*discovery_off <= 0 && value > 0)
447                 lnet_push_update_to_peers(1);
448         *discovery_off = value;
449
450         mutex_unlock(&the_lnet.ln_api_mutex);
451
452         return 0;
453 }
454
455 static int
456 drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp)
457 {
458         int rc;
459         unsigned int *drop_asym_route = (unsigned int *)kp->arg;
460         unsigned long value;
461
462         rc = kstrtoul(val, 0, &value);
463         if (rc) {
464                 CERROR("Invalid module parameter value for "
465                        "'lnet_drop_asym_route'\n");
466                 return rc;
467         }
468
469         /*
470          * The purpose of locking the api_mutex here is to ensure that
471          * the correct value ends up stored properly.
472          */
473         mutex_lock(&the_lnet.ln_api_mutex);
474
475         if (value == *drop_asym_route) {
476                 mutex_unlock(&the_lnet.ln_api_mutex);
477                 return 0;
478         }
479
480         *drop_asym_route = value;
481
482         mutex_unlock(&the_lnet.ln_api_mutex);
483
484         return 0;
485 }
486
487 static int
488 transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp)
489 {
490         int rc;
491         unsigned *transaction_to = (unsigned *)kp->arg;
492         unsigned long value;
493
494         rc = kstrtoul(val, 0, &value);
495         if (rc) {
496                 CERROR("Invalid module parameter value for 'lnet_transaction_timeout'\n");
497                 return rc;
498         }
499
500         /*
501          * The purpose of locking the api_mutex here is to ensure that
502          * the correct value ends up stored properly.
503          */
504         mutex_lock(&the_lnet.ln_api_mutex);
505
506         if (value <= lnet_retry_count || value == 0) {
507                 mutex_unlock(&the_lnet.ln_api_mutex);
508                 CERROR("Invalid value for lnet_transaction_timeout (%lu). "
509                        "Has to be greater than lnet_retry_count (%u)\n",
510                        value, lnet_retry_count);
511                 return -EINVAL;
512         }
513
514         if (value == *transaction_to) {
515                 mutex_unlock(&the_lnet.ln_api_mutex);
516                 return 0;
517         }
518
519         *transaction_to = value;
520         /* Update the lnet_lnd_timeout now that we've modified the
521          * transaction timeout
522          */
523         lnet_set_lnd_timeout();
524
525         mutex_unlock(&the_lnet.ln_api_mutex);
526
527         return 0;
528 }
529
530 static int
531 retry_count_set(const char *val, cfs_kernel_param_arg_t *kp)
532 {
533         int rc;
534         unsigned *retry_count = (unsigned *)kp->arg;
535         unsigned long value;
536
537         rc = kstrtoul(val, 0, &value);
538         if (rc) {
539                 CERROR("Invalid module parameter value for 'lnet_retry_count'\n");
540                 return rc;
541         }
542
543         /*
544          * The purpose of locking the api_mutex here is to ensure that
545          * the correct value ends up stored properly.
546          */
547         mutex_lock(&the_lnet.ln_api_mutex);
548
549         if (lnet_health_sensitivity == 0 && value > 0) {
550                 mutex_unlock(&the_lnet.ln_api_mutex);
551                 CERROR("Can not set lnet_retry_count when health feature is turned off\n");
552                 return -EINVAL;
553         }
554
555         if (value > lnet_transaction_timeout) {
556                 mutex_unlock(&the_lnet.ln_api_mutex);
557                 CERROR("Invalid value for lnet_retry_count (%lu). "
558                        "Has to be smaller than lnet_transaction_timeout (%u)\n",
559                        value, lnet_transaction_timeout);
560                 return -EINVAL;
561         }
562
563         *retry_count = value;
564
565         /* Update the lnet_lnd_timeout now that we've modified the
566          * retry count
567          */
568         lnet_set_lnd_timeout();
569
570         mutex_unlock(&the_lnet.ln_api_mutex);
571
572         return 0;
573 }
574
575 static int
576 intf_max_set(const char *val, cfs_kernel_param_arg_t *kp)
577 {
578         int value, rc;
579
580         rc = kstrtoint(val, 0, &value);
581         if (rc) {
582                 CERROR("Invalid module parameter value for 'lnet_interfaces_max'\n");
583                 return rc;
584         }
585
586         if (value < LNET_INTERFACES_MIN) {
587                 CWARN("max interfaces provided are too small, setting to %d\n",
588                       LNET_INTERFACES_MAX_DEFAULT);
589                 value = LNET_INTERFACES_MAX_DEFAULT;
590         }
591
592         *(int *)kp->arg = value;
593
594         return 0;
595 }
596
597 static int
598 response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp)
599 {
600         int rc;
601         unsigned long new_value;
602
603         rc = kstrtoul(val, 0, &new_value);
604         if (rc) {
605                 CERROR("Invalid value for 'lnet_response_tracking'\n");
606                 return -EINVAL;
607         }
608
609         if (new_value < 0 || new_value > 3) {
610                 CWARN("Invalid value (%lu) for 'lnet_response_tracking'\n",
611                       new_value);
612                 return -EINVAL;
613         }
614
615         lnet_response_tracking = new_value;
616
617         return 0;
618 }
619
620 static const char *
621 lnet_get_routes(void)
622 {
623         return routes;
624 }
625
626 static const char *
627 lnet_get_networks(void)
628 {
629         const char *nets;
630         int rc;
631
632         if (*networks != 0 && *ip2nets != 0) {
633                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
634                                    "'ip2nets' but not both at once\n");
635                 return NULL;
636         }
637
638         if (*ip2nets != 0) {
639                 rc = lnet_parse_ip2nets(&nets, ip2nets);
640                 return (rc == 0) ? nets : NULL;
641         }
642
643         if (*networks != 0)
644                 return networks;
645
646         return "tcp";
647 }
648
649 static void
650 lnet_init_locks(void)
651 {
652         spin_lock_init(&the_lnet.ln_eq_wait_lock);
653         spin_lock_init(&the_lnet.ln_msg_resend_lock);
654         init_completion(&the_lnet.ln_mt_wait_complete);
655         mutex_init(&the_lnet.ln_lnd_mutex);
656 }
657
658 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
659 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
660                                             *  MDs kmem_cache */
661 struct kmem_cache *lnet_udsp_cachep;       /* udsp cache */
662 struct kmem_cache *lnet_rspt_cachep;       /* response tracker cache */
663 struct kmem_cache *lnet_msg_cachep;
664
665 static int
666 lnet_slab_setup(void)
667 {
668         /* create specific kmem_cache for MEs and small MDs (i.e., originally
669          * allocated in <size-xxx> kmem_cache).
670          */
671         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(struct lnet_me),
672                                             0, 0, NULL);
673         if (!lnet_mes_cachep)
674                 return -ENOMEM;
675
676         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
677                                                   LNET_SMALL_MD_SIZE, 0, 0,
678                                                   NULL);
679         if (!lnet_small_mds_cachep)
680                 return -ENOMEM;
681
682         lnet_udsp_cachep = kmem_cache_create("lnet_udsp",
683                                              sizeof(struct lnet_udsp),
684                                              0, 0, NULL);
685         if (!lnet_udsp_cachep)
686                 return -ENOMEM;
687
688         lnet_rspt_cachep = kmem_cache_create("lnet_rspt", sizeof(struct lnet_rsp_tracker),
689                                             0, 0, NULL);
690         if (!lnet_rspt_cachep)
691                 return -ENOMEM;
692
693         lnet_msg_cachep = kmem_cache_create("lnet_msg", sizeof(struct lnet_msg),
694                                             0, 0, NULL);
695         if (!lnet_msg_cachep)
696                 return -ENOMEM;
697
698         return 0;
699 }
700
701 static void
702 lnet_slab_cleanup(void)
703 {
704         if (lnet_msg_cachep) {
705                 kmem_cache_destroy(lnet_msg_cachep);
706                 lnet_msg_cachep = NULL;
707         }
708
709         if (lnet_rspt_cachep) {
710                 kmem_cache_destroy(lnet_rspt_cachep);
711                 lnet_rspt_cachep = NULL;
712         }
713
714         if (lnet_udsp_cachep) {
715                 kmem_cache_destroy(lnet_udsp_cachep);
716                 lnet_udsp_cachep = NULL;
717         }
718
719         if (lnet_small_mds_cachep) {
720                 kmem_cache_destroy(lnet_small_mds_cachep);
721                 lnet_small_mds_cachep = NULL;
722         }
723
724         if (lnet_mes_cachep) {
725                 kmem_cache_destroy(lnet_mes_cachep);
726                 lnet_mes_cachep = NULL;
727         }
728 }
729
730 static int
731 lnet_create_remote_nets_table(void)
732 {
733         int               i;
734         struct list_head *hash;
735
736         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
737         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
738         CFS_ALLOC_PTR_ARRAY(hash, LNET_REMOTE_NETS_HASH_SIZE);
739         if (hash == NULL) {
740                 CERROR("Failed to create remote nets hash table\n");
741                 return -ENOMEM;
742         }
743
744         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
745                 INIT_LIST_HEAD(&hash[i]);
746         the_lnet.ln_remote_nets_hash = hash;
747         return 0;
748 }
749
750 static void
751 lnet_destroy_remote_nets_table(void)
752 {
753         int i;
754
755         if (the_lnet.ln_remote_nets_hash == NULL)
756                 return;
757
758         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
759                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
760
761         CFS_FREE_PTR_ARRAY(the_lnet.ln_remote_nets_hash,
762                            LNET_REMOTE_NETS_HASH_SIZE);
763         the_lnet.ln_remote_nets_hash = NULL;
764 }
765
766 static void
767 lnet_destroy_locks(void)
768 {
769         if (the_lnet.ln_res_lock != NULL) {
770                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
771                 the_lnet.ln_res_lock = NULL;
772         }
773
774         if (the_lnet.ln_net_lock != NULL) {
775                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
776                 the_lnet.ln_net_lock = NULL;
777         }
778 }
779
780 static int
781 lnet_create_locks(void)
782 {
783         lnet_init_locks();
784
785         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
786         if (the_lnet.ln_res_lock == NULL)
787                 goto failed;
788
789         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
790         if (the_lnet.ln_net_lock == NULL)
791                 goto failed;
792
793         return 0;
794
795  failed:
796         lnet_destroy_locks();
797         return -ENOMEM;
798 }
799
800 static void lnet_assert_wire_constants(void)
801 {
802         /* Wire protocol assertions generated by 'wirecheck'
803          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
804          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
805          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
806          */
807
808         /* Constants... */
809         BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
810         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
811         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
812         BUILD_BUG_ON(LNET_MSG_ACK != 0);
813         BUILD_BUG_ON(LNET_MSG_PUT != 1);
814         BUILD_BUG_ON(LNET_MSG_GET != 2);
815         BUILD_BUG_ON(LNET_MSG_REPLY != 3);
816         BUILD_BUG_ON(LNET_MSG_HELLO != 4);
817
818         BUILD_BUG_ON((int)sizeof(lnet_nid_t) != 8);
819         BUILD_BUG_ON((int)sizeof(lnet_pid_t) != 4);
820
821         /* Checks for struct lnet_nid */
822         BUILD_BUG_ON((int)sizeof(struct lnet_nid) != 20);
823         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_size) != 0);
824         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_size) != 1);
825         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_type) != 1);
826         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_type) != 1);
827         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_num) != 2);
828         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_num) != 2);
829         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_addr) != 4);
830         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_addr) != 16);
831
832         /* Checks for struct lnet_process_id_packed */
833         BUILD_BUG_ON((int)sizeof(struct lnet_process_id_packed) != 12);
834         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, nid) != 0);
835         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->nid) != 8);
836         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, pid) != 8);
837         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->pid) != 4);
838
839         /* Checks for struct lnet_handle_wire */
840         BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
841         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
842                                    wh_interface_cookie) != 0);
843         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
844         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
845                                    wh_object_cookie) != 8);
846         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
847
848         /* Checks for struct struct lnet_magicversion */
849         BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
850         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
851         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
852         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
853         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
854         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion,
855                                    version_minor) != 6);
856         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
857
858         /* Checks for struct _lnet_hdr_nid4 */
859         BUILD_BUG_ON((int)sizeof(struct _lnet_hdr_nid4) != 72);
860         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_nid) != 0);
861         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_nid) != 8);
862         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_nid) != 8);
863         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_nid) != 8);
864         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_pid) != 16);
865         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_pid) != 4);
866         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_pid) != 20);
867         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_pid) != 4);
868         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, type) != 24);
869         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->type) != 4);
870         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, payload_length) != 28);
871         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->payload_length) != 4);
872         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg) != 32);
873         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg) != 40);
874
875         /* Ack */
876         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.dst_wmd) != 32);
877         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.dst_wmd) != 16);
878         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.match_bits) != 48);
879         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.match_bits) != 8);
880         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.mlength) != 56);
881         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.mlength) != 4);
882
883         /* Put */
884         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ack_wmd) != 32);
885         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ack_wmd) != 16);
886         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.match_bits) != 48);
887         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.match_bits) != 8);
888         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.hdr_data) != 56);
889         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.hdr_data) != 8);
890         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ptl_index) != 64);
891         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ptl_index) != 4);
892         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.offset) != 68);
893         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.offset) != 4);
894
895         /* Get */
896         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.return_wmd) != 32);
897         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.return_wmd) != 16);
898         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.match_bits) != 48);
899         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.match_bits) != 8);
900         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.ptl_index) != 56);
901         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.ptl_index) != 4);
902         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.src_offset) != 60);
903         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.src_offset) != 4);
904         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.sink_length) != 64);
905         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.sink_length) != 4);
906
907         /* Reply */
908         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.reply.dst_wmd) != 32);
909         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.reply.dst_wmd) != 16);
910
911         /* Hello */
912         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.incarnation) != 32);
913         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.incarnation) != 8);
914         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.type) != 40);
915         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.type) != 4);
916
917         /* Checks for struct lnet_ni_status and related constants */
918         BUILD_BUG_ON(LNET_NI_STATUS_INVALID != 0x00000000);
919         BUILD_BUG_ON(LNET_NI_STATUS_UP != 0x15aac0de);
920         BUILD_BUG_ON(LNET_NI_STATUS_DOWN != 0xdeadface);
921
922         /* Checks for struct lnet_ni_status */
923         BUILD_BUG_ON((int)sizeof(struct lnet_ni_status) != 16);
924         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_nid) != 0);
925         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) != 8);
926         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_status) != 8);
927         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_status) != 4);
928         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_msg_size) != 12);
929         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_msg_size) != 4);
930
931         /* Checks for struct lnet_ni_large_status */
932         BUILD_BUG_ON((int)sizeof(struct lnet_ni_large_status) != 24);
933         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_status) != 0);
934         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_status) != 4);
935         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_nid) != 4);
936         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_nid) != 20);
937
938         /* Checks for struct lnet_ping_info and related constants */
939         BUILD_BUG_ON(LNET_PROTO_PING_MAGIC != 0x70696E67);
940         BUILD_BUG_ON(LNET_PING_FEAT_INVAL != 0);
941         BUILD_BUG_ON(LNET_PING_FEAT_BASE != 1);
942         BUILD_BUG_ON(LNET_PING_FEAT_NI_STATUS != 2);
943         BUILD_BUG_ON(LNET_PING_FEAT_RTE_DISABLED != 4);
944         BUILD_BUG_ON(LNET_PING_FEAT_MULTI_RAIL != 8);
945         BUILD_BUG_ON(LNET_PING_FEAT_DISCOVERY != 16);
946         BUILD_BUG_ON(LNET_PING_FEAT_LARGE_ADDR != 32);
947         BUILD_BUG_ON(LNET_PING_FEAT_PRIMARY_LARGE != 64);
948         BUILD_BUG_ON(LNET_PING_FEAT_BITS != 127);
949
950         /* Checks for struct lnet_ping_info */
951         BUILD_BUG_ON((int)sizeof(struct lnet_ping_info) != 16);
952         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_magic) != 0);
953         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) != 4);
954         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_features) != 4);
955         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_features) != 4);
956         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_pid) != 8);
957         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) != 4);
958         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_nnis) != 12);
959         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) != 4);
960         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_ni) != 16);
961         BUILD_BUG_ON(offsetof(struct lnet_ping_info, pi_ni) != sizeof(struct lnet_ping_info));
962
963         /* Acceptor connection request */
964         BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
965
966         /* Checks for struct lnet_acceptor_connreq */
967         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq) != 16);
968         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_magic) != 0);
969         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_magic) != 4);
970         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_version) != 4);
971         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_version) != 4);
972         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_nid) != 8);
973         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_nid) != 8);
974
975         /* Checks for struct lnet_acceptor_connreq_v2 */
976         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq_v2) != 28);
977         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_magic) != 0);
978         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_magic) != 4);
979         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_version) != 4);
980         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_version) != 4);
981         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_nid) != 8);
982         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_nid) != 20);
983
984         /* Checks for struct lnet_counters_common */
985         BUILD_BUG_ON((int)sizeof(struct lnet_counters_common) != 60);
986         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_alloc) != 0);
987         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_alloc) != 4);
988         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_max) != 4);
989         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_max) != 4);
990         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_errors) != 8);
991         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_errors) != 4);
992         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_count) != 12);
993         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_count) != 4);
994         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_count) != 16);
995         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_count) != 4);
996         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_count) != 20);
997         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_count) != 4);
998         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_count) != 24);
999         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_count) != 4);
1000         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_length) != 28);
1001         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_length) != 8);
1002         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_length) != 36);
1003         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_length) != 8);
1004         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_length) != 44);
1005         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_length) != 8);
1006         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_length) != 52);
1007         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_length) != 8);
1008 }
1009
1010 static const struct lnet_lnd *lnet_find_lnd_by_type(__u32 type)
1011 {
1012         const struct lnet_lnd *lnd;
1013
1014         /* holding lnd mutex */
1015         if (type >= NUM_LNDS)
1016                 return NULL;
1017         lnd = the_lnet.ln_lnds[type];
1018         LASSERT(!lnd || lnd->lnd_type == type);
1019
1020         return lnd;
1021 }
1022
1023 unsigned int
1024 lnet_get_lnd_timeout(void)
1025 {
1026         return lnet_lnd_timeout;
1027 }
1028 EXPORT_SYMBOL(lnet_get_lnd_timeout);
1029
1030 void
1031 lnet_register_lnd(const struct lnet_lnd *lnd)
1032 {
1033         mutex_lock(&the_lnet.ln_lnd_mutex);
1034
1035         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
1036         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
1037
1038         the_lnet.ln_lnds[lnd->lnd_type] = lnd;
1039
1040         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
1041
1042         mutex_unlock(&the_lnet.ln_lnd_mutex);
1043 }
1044 EXPORT_SYMBOL(lnet_register_lnd);
1045
1046 void
1047 lnet_unregister_lnd(const struct lnet_lnd *lnd)
1048 {
1049         mutex_lock(&the_lnet.ln_lnd_mutex);
1050
1051         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
1052
1053         the_lnet.ln_lnds[lnd->lnd_type] = NULL;
1054         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
1055
1056         mutex_unlock(&the_lnet.ln_lnd_mutex);
1057 }
1058 EXPORT_SYMBOL(lnet_unregister_lnd);
1059
1060 static void
1061 lnet_counters_get_common_locked(struct lnet_counters_common *common)
1062 {
1063         struct lnet_counters *ctr;
1064         int i;
1065
1066         /* FIXME !!! Their is no assert_lnet_net_locked() to ensure this
1067          * actually called under the protection of the lnet_net_lock.
1068          */
1069         memset(common, 0, sizeof(*common));
1070
1071         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1072                 common->lcc_msgs_max     += ctr->lct_common.lcc_msgs_max;
1073                 common->lcc_msgs_alloc   += ctr->lct_common.lcc_msgs_alloc;
1074                 common->lcc_errors       += ctr->lct_common.lcc_errors;
1075                 common->lcc_send_count   += ctr->lct_common.lcc_send_count;
1076                 common->lcc_recv_count   += ctr->lct_common.lcc_recv_count;
1077                 common->lcc_route_count  += ctr->lct_common.lcc_route_count;
1078                 common->lcc_drop_count   += ctr->lct_common.lcc_drop_count;
1079                 common->lcc_send_length  += ctr->lct_common.lcc_send_length;
1080                 common->lcc_recv_length  += ctr->lct_common.lcc_recv_length;
1081                 common->lcc_route_length += ctr->lct_common.lcc_route_length;
1082                 common->lcc_drop_length  += ctr->lct_common.lcc_drop_length;
1083         }
1084 }
1085
1086 void
1087 lnet_counters_get_common(struct lnet_counters_common *common)
1088 {
1089         lnet_net_lock(LNET_LOCK_EX);
1090         lnet_counters_get_common_locked(common);
1091         lnet_net_unlock(LNET_LOCK_EX);
1092 }
1093 EXPORT_SYMBOL(lnet_counters_get_common);
1094
1095 int
1096 lnet_counters_get(struct lnet_counters *counters)
1097 {
1098         struct lnet_counters *ctr;
1099         struct lnet_counters_health *health = &counters->lct_health;
1100         int i, rc = 0;
1101
1102         memset(counters, 0, sizeof(*counters));
1103
1104         lnet_net_lock(LNET_LOCK_EX);
1105
1106         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1107                 GOTO(out_unlock, rc = -ENODEV);
1108
1109         lnet_counters_get_common_locked(&counters->lct_common);
1110
1111         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1112                 health->lch_rst_alloc    += ctr->lct_health.lch_rst_alloc;
1113                 health->lch_resend_count += ctr->lct_health.lch_resend_count;
1114                 health->lch_response_timeout_count +=
1115                                 ctr->lct_health.lch_response_timeout_count;
1116                 health->lch_local_interrupt_count +=
1117                                 ctr->lct_health.lch_local_interrupt_count;
1118                 health->lch_local_dropped_count +=
1119                                 ctr->lct_health.lch_local_dropped_count;
1120                 health->lch_local_aborted_count +=
1121                                 ctr->lct_health.lch_local_aborted_count;
1122                 health->lch_local_no_route_count +=
1123                                 ctr->lct_health.lch_local_no_route_count;
1124                 health->lch_local_timeout_count +=
1125                                 ctr->lct_health.lch_local_timeout_count;
1126                 health->lch_local_error_count +=
1127                                 ctr->lct_health.lch_local_error_count;
1128                 health->lch_remote_dropped_count +=
1129                                 ctr->lct_health.lch_remote_dropped_count;
1130                 health->lch_remote_error_count +=
1131                                 ctr->lct_health.lch_remote_error_count;
1132                 health->lch_remote_timeout_count +=
1133                                 ctr->lct_health.lch_remote_timeout_count;
1134                 health->lch_network_timeout_count +=
1135                                 ctr->lct_health.lch_network_timeout_count;
1136         }
1137 out_unlock:
1138         lnet_net_unlock(LNET_LOCK_EX);
1139         return rc;
1140 }
1141 EXPORT_SYMBOL(lnet_counters_get);
1142
1143 void
1144 lnet_counters_reset(void)
1145 {
1146         struct lnet_counters *counters;
1147         int             i;
1148
1149         lnet_net_lock(LNET_LOCK_EX);
1150
1151         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1152                 goto avoid_reset;
1153
1154         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
1155                 memset(counters, 0, sizeof(struct lnet_counters));
1156 avoid_reset:
1157         lnet_net_unlock(LNET_LOCK_EX);
1158 }
1159
1160 static char *
1161 lnet_res_type2str(int type)
1162 {
1163         switch (type) {
1164         default:
1165                 LBUG();
1166         case LNET_COOKIE_TYPE_MD:
1167                 return "MD";
1168         case LNET_COOKIE_TYPE_ME:
1169                 return "ME";
1170         case LNET_COOKIE_TYPE_EQ:
1171                 return "EQ";
1172         }
1173 }
1174
1175 static void
1176 lnet_res_container_cleanup(struct lnet_res_container *rec)
1177 {
1178         int     count = 0;
1179
1180         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
1181                 return;
1182
1183         while (!list_empty(&rec->rec_active)) {
1184                 struct list_head *e = rec->rec_active.next;
1185
1186                 list_del_init(e);
1187                 if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
1188                         lnet_md_free(list_entry(e, struct lnet_libmd, md_list));
1189
1190                 } else { /* NB: Active MEs should be attached on portals */
1191                         LBUG();
1192                 }
1193                 count++;
1194         }
1195
1196         if (count > 0) {
1197                 /* Found alive MD/ME/EQ, user really should unlink/free
1198                  * all of them before finalize LNet, but if someone didn't,
1199                  * we have to recycle garbage for him */
1200                 CERROR("%d active elements on exit of %s container\n",
1201                        count, lnet_res_type2str(rec->rec_type));
1202         }
1203
1204         if (rec->rec_lh_hash != NULL) {
1205                 CFS_FREE_PTR_ARRAY(rec->rec_lh_hash, LNET_LH_HASH_SIZE);
1206                 rec->rec_lh_hash = NULL;
1207         }
1208
1209         rec->rec_type = 0; /* mark it as finalized */
1210 }
1211
1212 static int
1213 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
1214 {
1215         int     rc = 0;
1216         int     i;
1217
1218         LASSERT(rec->rec_type == 0);
1219
1220         rec->rec_type = type;
1221         INIT_LIST_HEAD(&rec->rec_active);
1222
1223         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
1224
1225         /* Arbitrary choice of hash table size */
1226         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
1227                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
1228         if (rec->rec_lh_hash == NULL) {
1229                 rc = -ENOMEM;
1230                 goto out;
1231         }
1232
1233         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
1234                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
1235
1236         return 0;
1237
1238 out:
1239         CERROR("Failed to setup %s resource container\n",
1240                lnet_res_type2str(type));
1241         lnet_res_container_cleanup(rec);
1242         return rc;
1243 }
1244
1245 static void
1246 lnet_res_containers_destroy(struct lnet_res_container **recs)
1247 {
1248         struct lnet_res_container       *rec;
1249         int                             i;
1250
1251         cfs_percpt_for_each(rec, i, recs)
1252                 lnet_res_container_cleanup(rec);
1253
1254         cfs_percpt_free(recs);
1255 }
1256
1257 static struct lnet_res_container **
1258 lnet_res_containers_create(int type)
1259 {
1260         struct lnet_res_container       **recs;
1261         struct lnet_res_container       *rec;
1262         int                             rc;
1263         int                             i;
1264
1265         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
1266         if (recs == NULL) {
1267                 CERROR("Failed to allocate %s resource containers\n",
1268                        lnet_res_type2str(type));
1269                 return NULL;
1270         }
1271
1272         cfs_percpt_for_each(rec, i, recs) {
1273                 rc = lnet_res_container_setup(rec, i, type);
1274                 if (rc != 0) {
1275                         lnet_res_containers_destroy(recs);
1276                         return NULL;
1277                 }
1278         }
1279
1280         return recs;
1281 }
1282
1283 struct lnet_libhandle *
1284 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
1285 {
1286         /* ALWAYS called with lnet_res_lock held */
1287         struct list_head        *head;
1288         struct lnet_libhandle   *lh;
1289         unsigned int            hash;
1290
1291         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
1292                 return NULL;
1293
1294         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
1295         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
1296
1297         list_for_each_entry(lh, head, lh_hash_chain) {
1298                 if (lh->lh_cookie == cookie)
1299                         return lh;
1300         }
1301
1302         return NULL;
1303 }
1304
1305 void
1306 lnet_res_lh_initialize(struct lnet_res_container *rec,
1307                        struct lnet_libhandle *lh)
1308 {
1309         /* ALWAYS called with lnet_res_lock held */
1310         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
1311         unsigned int    hash;
1312
1313         lh->lh_cookie = rec->rec_lh_cookie;
1314         rec->rec_lh_cookie += 1 << ibits;
1315
1316         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
1317
1318         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
1319 }
1320
1321 struct list_head **
1322 lnet_create_array_of_queues(void)
1323 {
1324         struct list_head **qs;
1325         struct list_head *q;
1326         int i;
1327
1328         qs = cfs_percpt_alloc(lnet_cpt_table(),
1329                               sizeof(struct list_head));
1330         if (!qs) {
1331                 CERROR("Failed to allocate queues\n");
1332                 return NULL;
1333         }
1334
1335         cfs_percpt_for_each(q, i, qs)
1336                 INIT_LIST_HEAD(q);
1337
1338         return qs;
1339 }
1340
1341 static int lnet_unprepare(void);
1342
1343 static int
1344 lnet_prepare(lnet_pid_t requested_pid)
1345 {
1346         /* Prepare to bring up the network */
1347         struct lnet_res_container **recs;
1348         int                       rc = 0;
1349
1350         if (requested_pid == LNET_PID_ANY) {
1351                 /* Don't instantiate LNET just for me */
1352                 return -ENETDOWN;
1353         }
1354
1355         LASSERT(the_lnet.ln_refcount == 0);
1356
1357         the_lnet.ln_routing = 0;
1358
1359         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
1360         the_lnet.ln_pid = requested_pid;
1361
1362         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
1363         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
1364         INIT_LIST_HEAD(&the_lnet.ln_nets);
1365         INIT_LIST_HEAD(&the_lnet.ln_routers);
1366         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
1367         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
1368         INIT_LIST_HEAD(&the_lnet.ln_dc_request);
1369         INIT_LIST_HEAD(&the_lnet.ln_dc_working);
1370         INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
1371         INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
1372         INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
1373         INIT_LIST_HEAD(&the_lnet.ln_udsp_list);
1374         init_waitqueue_head(&the_lnet.ln_dc_waitq);
1375         the_lnet.ln_mt_handler = NULL;
1376         init_completion(&the_lnet.ln_started);
1377         atomic_set(&the_lnet.ln_late_msg_count, 0);
1378         atomic64_set(&the_lnet.ln_late_msg_nsecs, 0);
1379
1380         rc = lnet_slab_setup();
1381         if (rc != 0)
1382                 goto failed;
1383
1384         rc = lnet_create_remote_nets_table();
1385         if (rc != 0)
1386                 goto failed;
1387
1388         /*
1389          * NB the interface cookie in wire handles guards against delayed
1390          * replies and ACKs appearing valid after reboot.
1391          */
1392         the_lnet.ln_interface_cookie = ktime_get_real_ns();
1393
1394         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
1395                                                 sizeof(struct lnet_counters));
1396         if (the_lnet.ln_counters == NULL) {
1397                 CERROR("Failed to allocate counters for LNet\n");
1398                 rc = -ENOMEM;
1399                 goto failed;
1400         }
1401
1402         rc = lnet_peer_tables_create();
1403         if (rc != 0)
1404                 goto failed;
1405
1406         rc = lnet_msg_containers_create();
1407         if (rc != 0)
1408                 goto failed;
1409
1410         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
1411                                       LNET_COOKIE_TYPE_EQ);
1412         if (rc != 0)
1413                 goto failed;
1414
1415         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
1416         if (recs == NULL) {
1417                 rc = -ENOMEM;
1418                 goto failed;
1419         }
1420
1421         the_lnet.ln_md_containers = recs;
1422
1423         rc = lnet_portals_create();
1424         if (rc != 0) {
1425                 CERROR("Failed to create portals for LNet: %d\n", rc);
1426                 goto failed;
1427         }
1428
1429         the_lnet.ln_mt_zombie_rstqs = lnet_create_array_of_queues();
1430         if (!the_lnet.ln_mt_zombie_rstqs) {
1431                 rc = -ENOMEM;
1432                 goto failed;
1433         }
1434
1435         return 0;
1436
1437  failed:
1438         lnet_unprepare();
1439         return rc;
1440 }
1441
1442 static int
1443 lnet_unprepare(void)
1444 {
1445         /* NB no LNET_LOCK since this is the last reference.  All LND instances
1446          * have shut down already, so it is safe to unlink and free all
1447          * descriptors, even those that appear committed to a network op (eg MD
1448          * with non-zero pending count) */
1449
1450         lnet_fail_nid(LNET_NID_ANY, 0);
1451
1452         LASSERT(the_lnet.ln_refcount == 0);
1453         LASSERT(list_empty(&the_lnet.ln_test_peers));
1454         LASSERT(list_empty(&the_lnet.ln_nets));
1455
1456         if (the_lnet.ln_mt_zombie_rstqs) {
1457                 lnet_clean_zombie_rstqs();
1458                 the_lnet.ln_mt_zombie_rstqs = NULL;
1459         }
1460
1461         lnet_assert_handler_unused(the_lnet.ln_mt_handler);
1462         the_lnet.ln_mt_handler = NULL;
1463
1464         lnet_portals_destroy();
1465
1466         if (the_lnet.ln_md_containers != NULL) {
1467                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
1468                 the_lnet.ln_md_containers = NULL;
1469         }
1470
1471         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
1472
1473         lnet_msg_containers_destroy();
1474         lnet_peer_uninit();
1475         lnet_rtrpools_free(0);
1476
1477         if (the_lnet.ln_counters != NULL) {
1478                 cfs_percpt_free(the_lnet.ln_counters);
1479                 the_lnet.ln_counters = NULL;
1480         }
1481         lnet_destroy_remote_nets_table();
1482         lnet_udsp_destroy(true);
1483         lnet_slab_cleanup();
1484
1485         return 0;
1486 }
1487
1488 struct lnet_ni  *
1489 lnet_net2ni_locked(__u32 net_id, int cpt)
1490 {
1491         struct lnet_ni   *ni;
1492         struct lnet_net  *net;
1493
1494         LASSERT(cpt != LNET_LOCK_EX);
1495
1496         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1497                 if (net->net_id == net_id) {
1498                         ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
1499                                               ni_netlist);
1500                         return ni;
1501                 }
1502         }
1503
1504         return NULL;
1505 }
1506
1507 struct lnet_ni *
1508 lnet_net2ni_addref(__u32 net)
1509 {
1510         struct lnet_ni *ni;
1511
1512         lnet_net_lock(0);
1513         ni = lnet_net2ni_locked(net, 0);
1514         if (ni)
1515                 lnet_ni_addref_locked(ni, 0);
1516         lnet_net_unlock(0);
1517
1518         return ni;
1519 }
1520 EXPORT_SYMBOL(lnet_net2ni_addref);
1521
1522 struct lnet_net *
1523 lnet_get_net_locked(__u32 net_id)
1524 {
1525         struct lnet_net  *net;
1526
1527         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1528                 if (net->net_id == net_id)
1529                         return net;
1530         }
1531
1532         return NULL;
1533 }
1534
1535 void
1536 lnet_net_clr_pref_rtrs(struct lnet_net *net)
1537 {
1538         struct list_head zombies;
1539         struct lnet_nid_list *ne;
1540         struct lnet_nid_list *tmp;
1541
1542         INIT_LIST_HEAD(&zombies);
1543
1544         lnet_net_lock(LNET_LOCK_EX);
1545         list_splice_init(&net->net_rtr_pref_nids, &zombies);
1546         lnet_net_unlock(LNET_LOCK_EX);
1547
1548         list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
1549                 list_del_init(&ne->nl_list);
1550                 LIBCFS_FREE(ne, sizeof(*ne));
1551         }
1552 }
1553
1554 int
1555 lnet_net_add_pref_rtr(struct lnet_net *net,
1556                       struct lnet_nid *gw_nid)
1557 __must_hold(&the_lnet.ln_api_mutex)
1558 {
1559         struct lnet_nid_list *ne;
1560
1561         /* This function is called with api_mutex held. When the api_mutex
1562          * is held the list can not be modified, as it is only modified as
1563          * a result of applying a UDSP and that happens under api_mutex
1564          * lock.
1565          */
1566         list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
1567                 if (nid_same(&ne->nl_nid, gw_nid))
1568                         return -EEXIST;
1569         }
1570
1571         LIBCFS_ALLOC(ne, sizeof(*ne));
1572         if (!ne)
1573                 return -ENOMEM;
1574
1575         ne->nl_nid = *gw_nid;
1576
1577         /* Lock the cpt to protect against addition and checks in the
1578          * selection algorithm
1579          */
1580         lnet_net_lock(LNET_LOCK_EX);
1581         list_add(&ne->nl_list, &net->net_rtr_pref_nids);
1582         lnet_net_unlock(LNET_LOCK_EX);
1583
1584         return 0;
1585 }
1586
1587 static unsigned int
1588 lnet_nid4_cpt_hash(lnet_nid_t nid, unsigned int number)
1589 {
1590         __u64 key = nid;
1591         __u16 lnd = LNET_NETTYP(LNET_NIDNET(nid));
1592         unsigned int cpt;
1593
1594         if (lnd == KFILND || lnd == GNILND) {
1595                 cpt = hash_long(key, LNET_CPT_BITS);
1596
1597                 /* NB: The number of CPTs needn't be a power of 2 */
1598                 if (cpt >= number)
1599                         cpt = (key + cpt + (cpt >> 1)) % number;
1600         } else {
1601                 __u64 pair_bits = 0x0001000100010001LLU;
1602                 __u64 mask = pair_bits * 0xFF;
1603                 __u64 pair_sum;
1604                 /* For ipv4 NIDs, use (sum-by-multiplication of nid bytes) mod
1605                  * (number of CPTs) to match nid to a CPT.
1606                  */
1607                 pair_sum = (key & mask) + ((key >> 8) & mask);
1608                 pair_sum = (pair_sum * pair_bits) >> 48;
1609                 cpt = (unsigned int)(pair_sum) % number;
1610         }
1611
1612         CDEBUG(D_NET, "Match nid %s to cpt %u\n",
1613                libcfs_nid2str(nid), cpt);
1614
1615         return cpt;
1616 }
1617
1618 unsigned int
1619 lnet_nid_cpt_hash(struct lnet_nid *nid, unsigned int number)
1620 {
1621         unsigned int val;
1622         u32 h = 0;
1623         int i;
1624
1625         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
1626
1627         if (number == 1)
1628                 return 0;
1629
1630         if (nid_is_nid4(nid))
1631                 return lnet_nid4_cpt_hash(lnet_nid_to_nid4(nid), number);
1632
1633         for (i = 0; i < 4; i++)
1634                 h = cfs_hash_32(nid->nid_addr[i]^h, 32);
1635         val = cfs_hash_32(LNET_NID_NET(nid) ^ h, LNET_CPT_BITS);
1636         if (val < number)
1637                 return val;
1638         return (unsigned int)(h + val + (val >> 1)) % number;
1639 }
1640
1641 int
1642 lnet_cpt_of_nid_locked(struct lnet_nid *nid, struct lnet_ni *ni)
1643 {
1644         struct lnet_net *net;
1645
1646         /* must called with hold of lnet_net_lock */
1647         if (LNET_CPT_NUMBER == 1)
1648                 return 0; /* the only one */
1649
1650         /*
1651          * If NI is provided then use the CPT identified in the NI cpt
1652          * list if one exists. If one doesn't exist, then that NI is
1653          * associated with all CPTs and it follows that the net it belongs
1654          * to is implicitly associated with all CPTs, so just hash the nid
1655          * and return that.
1656          */
1657         if (ni != NULL) {
1658                 if (ni->ni_cpts != NULL)
1659                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
1660                                                              ni->ni_ncpts)];
1661                 else
1662                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1663         }
1664
1665         /* no NI provided so look at the net */
1666         net = lnet_get_net_locked(LNET_NID_NET(nid));
1667
1668         if (net != NULL && net->net_cpts != NULL) {
1669                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
1670         }
1671
1672         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1673 }
1674
1675 int
1676 lnet_nid2cpt(struct lnet_nid *nid, struct lnet_ni *ni)
1677 {
1678         int     cpt;
1679         int     cpt2;
1680
1681         if (LNET_CPT_NUMBER == 1)
1682                 return 0; /* the only one */
1683
1684         cpt = lnet_net_lock_current();
1685
1686         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
1687
1688         lnet_net_unlock(cpt);
1689
1690         return cpt2;
1691 }
1692 EXPORT_SYMBOL(lnet_nid2cpt);
1693
1694 int
1695 lnet_cpt_of_nid(lnet_nid_t nid4, struct lnet_ni *ni)
1696 {
1697         struct lnet_nid nid;
1698
1699         if (LNET_CPT_NUMBER == 1)
1700                 return 0; /* the only one */
1701
1702         lnet_nid4_to_nid(nid4, &nid);
1703         return lnet_nid2cpt(&nid, ni);
1704 }
1705 EXPORT_SYMBOL(lnet_cpt_of_nid);
1706
1707 int
1708 lnet_islocalnet_locked(__u32 net_id)
1709 {
1710         struct lnet_net *net;
1711         bool local;
1712
1713         net = lnet_get_net_locked(net_id);
1714
1715         local = net != NULL;
1716
1717         return local;
1718 }
1719
1720 int
1721 lnet_islocalnet(__u32 net_id)
1722 {
1723         int cpt;
1724         bool local;
1725
1726         cpt = lnet_net_lock_current();
1727
1728         local = lnet_islocalnet_locked(net_id);
1729
1730         lnet_net_unlock(cpt);
1731
1732         return local;
1733 }
1734
1735 struct lnet_ni  *
1736 lnet_nid_to_ni_locked(struct lnet_nid *nid, int cpt)
1737 {
1738         struct lnet_net  *net;
1739         struct lnet_ni *ni;
1740
1741         LASSERT(cpt != LNET_LOCK_EX);
1742
1743         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1744                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1745                         if (nid_same(&ni->ni_nid, nid))
1746                                 return ni;
1747                 }
1748         }
1749
1750         return NULL;
1751 }
1752
1753 struct lnet_ni *
1754 lnet_nid_to_ni_addref(struct lnet_nid *nid)
1755 {
1756         struct lnet_ni *ni;
1757
1758         lnet_net_lock(0);
1759         ni = lnet_nid_to_ni_locked(nid, 0);
1760         if (ni)
1761                 lnet_ni_addref_locked(ni, 0);
1762         lnet_net_unlock(0);
1763
1764         return ni;
1765 }
1766 EXPORT_SYMBOL(lnet_nid_to_ni_addref);
1767
1768 int
1769 lnet_islocalnid(struct lnet_nid *nid)
1770 {
1771         struct lnet_ni  *ni;
1772         int             cpt;
1773
1774         cpt = lnet_net_lock_current();
1775         ni = lnet_nid_to_ni_locked(nid, cpt);
1776         lnet_net_unlock(cpt);
1777
1778         return ni != NULL;
1779 }
1780
1781 int
1782 lnet_count_acceptor_nets(void)
1783 {
1784         /* Return the # of NIs that need the acceptor. */
1785         int              count = 0;
1786         struct lnet_net  *net;
1787         int              cpt;
1788
1789         cpt = lnet_net_lock_current();
1790         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1791                 /* all socklnd type networks should have the acceptor
1792                  * thread started */
1793                 if (net->net_lnd->lnd_accept != NULL)
1794                         count++;
1795         }
1796
1797         lnet_net_unlock(cpt);
1798
1799         return count;
1800 }
1801
1802 struct lnet_ping_buffer *
1803 lnet_ping_buffer_alloc(int nbytes, gfp_t gfp)
1804 {
1805         struct lnet_ping_buffer *pbuf;
1806
1807         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nbytes), gfp);
1808         if (pbuf) {
1809                 pbuf->pb_nbytes = nbytes;       /* sizeof of pb_info */
1810                 pbuf->pb_needs_post = false;
1811                 atomic_set(&pbuf->pb_refcnt, 1);
1812         }
1813
1814         return pbuf;
1815 }
1816
1817 void
1818 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
1819 {
1820         LASSERT(atomic_read(&pbuf->pb_refcnt) == 0);
1821         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nbytes));
1822 }
1823
1824 static struct lnet_ping_buffer *
1825 lnet_ping_target_create(int nbytes)
1826 {
1827         struct lnet_ping_buffer *pbuf;
1828
1829         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
1830         if (pbuf == NULL) {
1831                 CERROR("Can't allocate ping source [%d]\n", nbytes);
1832                 return NULL;
1833         }
1834
1835         pbuf->pb_info.pi_nnis = 0;
1836         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1837         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1838         pbuf->pb_info.pi_features =
1839                 LNET_PING_FEAT_NI_STATUS | LNET_PING_FEAT_MULTI_RAIL;
1840
1841         return pbuf;
1842 }
1843
1844 static inline int
1845 lnet_get_net_ni_bytes_locked(struct lnet_net *net)
1846 {
1847         struct lnet_ni *ni;
1848         int bytes = 0;
1849
1850         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1851                 bytes += lnet_ping_sts_size(&ni->ni_nid);
1852
1853         return bytes;
1854 }
1855
1856 static inline int
1857 lnet_get_ni_bytes(void)
1858 {
1859         struct lnet_ni *ni;
1860         struct lnet_net *net;
1861         int bytes = 0;
1862
1863         lnet_net_lock(0);
1864
1865         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1866                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1867                         bytes += lnet_ping_sts_size(&ni->ni_nid);
1868         }
1869
1870         lnet_net_unlock(0);
1871
1872         return bytes;
1873 }
1874
1875 void
1876 lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf)
1877 {
1878         struct lnet_ni_large_status *lstat, *lend;
1879         struct lnet_ni_status *stat, *end;
1880         int nnis;
1881         int i;
1882
1883         __swab32s(&pbuf->pb_info.pi_magic);
1884         __swab32s(&pbuf->pb_info.pi_features);
1885         __swab32s(&pbuf->pb_info.pi_pid);
1886         __swab32s(&pbuf->pb_info.pi_nnis);
1887         nnis = pbuf->pb_info.pi_nnis;
1888         stat = &pbuf->pb_info.pi_ni[0];
1889         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
1890         for (i = 0; i < nnis && stat + 1 <= end; i++, stat++) {
1891                 __swab64s(&stat->ns_nid);
1892                 __swab32s(&stat->ns_status);
1893                 if (i == 0)
1894                         /* Might be total size */
1895                         __swab32s(&stat->ns_msg_size);
1896         }
1897         if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_LARGE_ADDR))
1898                 return;
1899
1900         lstat = (struct lnet_ni_large_status *)stat;
1901         lend = (void *)end;
1902         while (lstat + 1 <= lend) {
1903                 __swab32s(&lstat->ns_status);
1904                 /* struct lnet_nid never needs to be swabed */
1905                 lstat = lnet_ping_sts_next(lstat);
1906         }
1907 }
1908
1909 int
1910 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1911 {
1912         if (!pinfo)
1913                 return -EINVAL;
1914         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1915                 return -EPROTO;
1916         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1917                 return -EPROTO;
1918         /* Loopback is guaranteed to be present */
1919         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
1920                 return -ERANGE;
1921         if (LNET_PING_INFO_LONI(pinfo) != LNET_NID_LO_0)
1922                 return -EPROTO;
1923         return 0;
1924 }
1925
1926 static void
1927 lnet_ping_target_destroy(void)
1928 {
1929         struct lnet_net *net;
1930         struct lnet_ni  *ni;
1931
1932         lnet_net_lock(LNET_LOCK_EX);
1933
1934         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1935                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1936                         lnet_ni_lock(ni);
1937                         ni->ni_status = NULL;
1938                         lnet_ni_unlock(ni);
1939                 }
1940         }
1941
1942         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1943         the_lnet.ln_ping_target = NULL;
1944
1945         lnet_net_unlock(LNET_LOCK_EX);
1946 }
1947
1948 static void
1949 lnet_ping_target_event_handler(struct lnet_event *event)
1950 {
1951         struct lnet_ping_buffer *pbuf = event->md_user_ptr;
1952
1953         if (event->unlinked)
1954                 lnet_ping_buffer_decref(pbuf);
1955 }
1956
1957 static int
1958 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1959                        struct lnet_handle_md *ping_mdh,
1960                        int ni_bytes, bool set_eq)
1961 {
1962         struct lnet_processid id = {
1963                 .nid = LNET_ANY_NID,
1964                 .pid = LNET_PID_ANY
1965         };
1966         struct lnet_me *me;
1967         struct lnet_md md = { NULL };
1968         int rc;
1969
1970         if (set_eq)
1971                 the_lnet.ln_ping_target_handler =
1972                         lnet_ping_target_event_handler;
1973
1974         *ppbuf = lnet_ping_target_create(ni_bytes);
1975         if (*ppbuf == NULL) {
1976                 rc = -ENOMEM;
1977                 goto fail_free_eq;
1978         }
1979
1980         /* Ping target ME/MD */
1981         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
1982                           LNET_PROTO_PING_MATCHBITS, 0,
1983                           LNET_UNLINK, LNET_INS_AFTER);
1984         if (IS_ERR(me)) {
1985                 rc = PTR_ERR(me);
1986                 CERROR("Can't create ping target ME: %d\n", rc);
1987                 goto fail_decref_ping_buffer;
1988         }
1989
1990         /* initialize md content */
1991         md.start     = &(*ppbuf)->pb_info;
1992         md.length    = (*ppbuf)->pb_nbytes;
1993         md.threshold = LNET_MD_THRESH_INF;
1994         md.max_size  = 0;
1995         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1996                        LNET_MD_MANAGE_REMOTE;
1997         md.handler   = the_lnet.ln_ping_target_handler;
1998         md.user_ptr  = *ppbuf;
1999
2000         rc = LNetMDAttach(me, &md, LNET_RETAIN, ping_mdh);
2001         if (rc != 0) {
2002                 CERROR("Can't attach ping target MD: %d\n", rc);
2003                 goto fail_decref_ping_buffer;
2004         }
2005         lnet_ping_buffer_addref(*ppbuf);
2006
2007         return 0;
2008
2009 fail_decref_ping_buffer:
2010         LASSERT(atomic_read(&(*ppbuf)->pb_refcnt) == 1);
2011         lnet_ping_buffer_decref(*ppbuf);
2012         *ppbuf = NULL;
2013 fail_free_eq:
2014         return rc;
2015 }
2016
2017 static void
2018 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
2019                     struct lnet_handle_md *ping_mdh)
2020 {
2021         LNetMDUnlink(*ping_mdh);
2022         LNetInvalidateMDHandle(ping_mdh);
2023
2024         /* NB the MD could be busy; this just starts the unlink */
2025         wait_var_event_warning(&pbuf->pb_refcnt,
2026                                atomic_read(&pbuf->pb_refcnt) <= 1,
2027                                "Still waiting for ping data MD to unlink\n");
2028 }
2029
2030 static void
2031 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
2032 {
2033         struct lnet_ni *ni;
2034         struct lnet_net *net;
2035         struct lnet_ni_status *ns, *end;
2036         struct lnet_ni_large_status *lns, *lend;
2037         int rc;
2038
2039         pbuf->pb_info.pi_nnis = 0;
2040         ns = &pbuf->pb_info.pi_ni[0];
2041         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
2042         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2043                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2044                         if (!nid_is_nid4(&ni->ni_nid)) {
2045                                 if (ns == &pbuf->pb_info.pi_ni[1]) {
2046                                         /* This is primary, and it is long */
2047                                         pbuf->pb_info.pi_features |=
2048                                                 LNET_PING_FEAT_PRIMARY_LARGE;
2049                                 }
2050                                 continue;
2051                         }
2052                         LASSERT(ns + 1 <= end);
2053                         ns->ns_nid = lnet_nid_to_nid4(&ni->ni_nid);
2054
2055                         lnet_ni_lock(ni);
2056                         ns->ns_status = lnet_ni_get_status_locked(ni);
2057                         ni->ni_status = &ns->ns_status;
2058                         lnet_ni_unlock(ni);
2059
2060                         pbuf->pb_info.pi_nnis++;
2061                         ns++;
2062                 }
2063         }
2064
2065         lns = (void *)ns;
2066         lend = (void *)end;
2067         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2068                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2069                         if (nid_is_nid4(&ni->ni_nid))
2070                                 continue;
2071                         LASSERT(lns + 1 <= lend);
2072
2073                         lns->ns_nid = ni->ni_nid;
2074
2075                         lnet_ni_lock(ni);
2076                         lns->ns_status = lnet_ni_get_status_locked(ni);
2077                         ni->ni_status = &lns->ns_status;
2078                         lnet_ni_unlock(ni);
2079
2080                         lns = lnet_ping_sts_next(lns);
2081                 }
2082         }
2083         if ((void *)lns > (void *)ns) {
2084                 /* Record total info size */
2085                 pbuf->pb_info.pi_ni[0].ns_msg_size =
2086                         (void *)lns - (void *)&pbuf->pb_info;
2087                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_LARGE_ADDR;
2088         }
2089
2090         /* We (ab)use the ns_status of the loopback interface to
2091          * transmit the sequence number. The first interface listed
2092          * must be the loopback interface.
2093          */
2094         rc = lnet_ping_info_validate(&pbuf->pb_info);
2095         if (rc) {
2096                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
2097                 LBUG();
2098         }
2099         LNET_PING_BUFFER_SEQNO(pbuf) =
2100                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
2101 }
2102
2103 static void
2104 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
2105                         struct lnet_handle_md ping_mdh)
2106 __must_hold(&the_lnet.ln_api_mutex)
2107 {
2108         struct lnet_ping_buffer *old_pbuf = NULL;
2109         struct lnet_handle_md old_ping_md;
2110
2111         /* switch the NIs to point to the new ping info created */
2112         lnet_net_lock(LNET_LOCK_EX);
2113
2114         if (!the_lnet.ln_routing)
2115                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
2116         if (!lnet_peer_discovery_disabled)
2117                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
2118
2119         /* Ensure only known feature bits have been set. */
2120         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
2121         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
2122
2123         lnet_ping_target_install_locked(pbuf);
2124
2125         if (the_lnet.ln_ping_target) {
2126                 old_pbuf = the_lnet.ln_ping_target;
2127                 old_ping_md = the_lnet.ln_ping_target_md;
2128         }
2129         the_lnet.ln_ping_target_md = ping_mdh;
2130         the_lnet.ln_ping_target = pbuf;
2131
2132         lnet_net_unlock(LNET_LOCK_EX);
2133
2134         if (old_pbuf) {
2135                 /* unlink and free the old ping info.
2136                  * There may be outstanding traffic on this MD, and
2137                  * ln_api_mutex may be required to finalize that
2138                  * traffic. Release ln_api_mutex while we wait for
2139                  * refs on this ping buffer to drop
2140                  */
2141                 mutex_unlock(&the_lnet.ln_api_mutex);
2142                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
2143                 mutex_lock(&the_lnet.ln_api_mutex);
2144                 lnet_ping_buffer_decref(old_pbuf);
2145         }
2146
2147         lnet_push_update_to_peers(0);
2148 }
2149
2150 static void
2151 lnet_ping_target_fini(void)
2152 {
2153         lnet_ping_md_unlink(the_lnet.ln_ping_target,
2154                             &the_lnet.ln_ping_target_md);
2155
2156         lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
2157         lnet_ping_target_destroy();
2158 }
2159
2160 /* Resize the push target. */
2161 int lnet_push_target_resize(void)
2162 {
2163         struct lnet_handle_md mdh;
2164         struct lnet_handle_md old_mdh;
2165         struct lnet_ping_buffer *pbuf;
2166         struct lnet_ping_buffer *old_pbuf;
2167         int nbytes;
2168         int rc;
2169
2170 again:
2171         nbytes = the_lnet.ln_push_target_nbytes;
2172         if (nbytes <= 0) {
2173                 CDEBUG(D_NET, "Invalid nbytes %d\n", nbytes);
2174                 return -EINVAL;
2175         }
2176
2177         /* NB: lnet_ping_buffer_alloc() sets pbuf refcount to 1. That ref is
2178          * dropped when we need to resize again (see "old_pbuf" below) or when
2179          * LNet is shutdown (see lnet_push_target_fini())
2180          */
2181         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
2182         if (!pbuf) {
2183                 CDEBUG(D_NET, "Can't allocate pbuf for nbytes %d\n", nbytes);
2184                 return -ENOMEM;
2185         }
2186
2187         rc = lnet_push_target_post(pbuf, &mdh);
2188         if (rc) {
2189                 CDEBUG(D_NET, "Failed to post push target: %d\n", rc);
2190                 lnet_ping_buffer_decref(pbuf);
2191                 return rc;
2192         }
2193
2194         lnet_net_lock(LNET_LOCK_EX);
2195         old_pbuf = the_lnet.ln_push_target;
2196         old_mdh = the_lnet.ln_push_target_md;
2197         the_lnet.ln_push_target = pbuf;
2198         the_lnet.ln_push_target_md = mdh;
2199         lnet_net_unlock(LNET_LOCK_EX);
2200
2201         if (old_pbuf) {
2202                 LNetMDUnlink(old_mdh);
2203                 /* Drop ref set by lnet_ping_buffer_alloc() */
2204                 lnet_ping_buffer_decref(old_pbuf);
2205         }
2206
2207         /* Received another push or reply that requires a larger buffer */
2208         if (nbytes < the_lnet.ln_push_target_nbytes)
2209                 goto again;
2210
2211         CDEBUG(D_NET, "nbytes %d success\n", nbytes);
2212         return 0;
2213 }
2214
2215 int lnet_push_target_post(struct lnet_ping_buffer *pbuf,
2216                           struct lnet_handle_md *mdhp)
2217 {
2218         struct lnet_processid id = { LNET_ANY_NID, LNET_PID_ANY };
2219         struct lnet_md md = { NULL };
2220         struct lnet_me *me;
2221         int rc;
2222
2223         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
2224                           LNET_PROTO_PING_MATCHBITS, 0,
2225                           LNET_UNLINK, LNET_INS_AFTER);
2226         if (IS_ERR(me)) {
2227                 rc = PTR_ERR(me);
2228                 CERROR("Can't create push target ME: %d\n", rc);
2229                 return rc;
2230         }
2231
2232         pbuf->pb_needs_post = false;
2233
2234         /* This reference is dropped by lnet_push_target_event_handler() */
2235         lnet_ping_buffer_addref(pbuf);
2236
2237         /* initialize md content */
2238         md.start     = &pbuf->pb_info;
2239         md.length    = pbuf->pb_nbytes;
2240         md.threshold = 1;
2241         md.max_size  = 0;
2242         md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE;
2243         md.user_ptr  = pbuf;
2244         md.handler   = the_lnet.ln_push_target_handler;
2245
2246         rc = LNetMDAttach(me, &md, LNET_UNLINK, mdhp);
2247         if (rc) {
2248                 CERROR("Can't attach push MD: %d\n", rc);
2249                 lnet_ping_buffer_decref(pbuf);
2250                 pbuf->pb_needs_post = true;
2251                 return rc;
2252         }
2253
2254         CDEBUG(D_NET, "posted push target %p\n", pbuf);
2255
2256         return 0;
2257 }
2258
2259 static void lnet_push_target_event_handler(struct lnet_event *ev)
2260 {
2261         struct lnet_ping_buffer *pbuf = ev->md_user_ptr;
2262
2263         CDEBUG(D_NET, "type %d status %d unlinked %d\n", ev->type, ev->status,
2264                ev->unlinked);
2265
2266         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
2267                 lnet_swap_pinginfo(pbuf);
2268
2269         if (ev->type == LNET_EVENT_UNLINK) {
2270                 /* Drop ref added by lnet_push_target_post() */
2271                 lnet_ping_buffer_decref(pbuf);
2272                 return;
2273         }
2274
2275         lnet_peer_push_event(ev);
2276         if (ev->unlinked)
2277                 /* Drop ref added by lnet_push_target_post */
2278                 lnet_ping_buffer_decref(pbuf);
2279 }
2280
2281 /* Initialize the push target. */
2282 static int lnet_push_target_init(void)
2283 {
2284         int rc;
2285
2286         if (the_lnet.ln_push_target)
2287                 return -EALREADY;
2288
2289         the_lnet.ln_push_target_handler =
2290                 lnet_push_target_event_handler;
2291
2292         rc = LNetSetLazyPortal(LNET_RESERVED_PORTAL);
2293         LASSERT(rc == 0);
2294
2295         /* Start at the required minimum, we'll enlarge if required. */
2296         the_lnet.ln_push_target_nbytes = LNET_PING_INFO_MIN_SIZE;
2297
2298         rc = lnet_push_target_resize();
2299         if (rc) {
2300                 LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2301                 the_lnet.ln_push_target_handler = NULL;
2302         }
2303
2304         return rc;
2305 }
2306
2307 /* Clean up the push target. */
2308 static void lnet_push_target_fini(void)
2309 {
2310         if (!the_lnet.ln_push_target)
2311                 return;
2312
2313         /* Unlink and invalidate to prevent new references. */
2314         LNetMDUnlink(the_lnet.ln_push_target_md);
2315         LNetInvalidateMDHandle(&the_lnet.ln_push_target_md);
2316
2317         /* Wait for the unlink to complete. */
2318         wait_var_event_warning(&the_lnet.ln_push_target->pb_refcnt,
2319                                atomic_read(&the_lnet.ln_push_target->pb_refcnt) <= 1,
2320                                "Still waiting for ping data MD to unlink\n");
2321
2322         /* Drop ref set by lnet_ping_buffer_alloc() */
2323         lnet_ping_buffer_decref(the_lnet.ln_push_target);
2324         the_lnet.ln_push_target = NULL;
2325         the_lnet.ln_push_target_nbytes = 0;
2326
2327         LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2328         lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
2329         the_lnet.ln_push_target_handler = NULL;
2330 }
2331
2332 static int
2333 lnet_ni_tq_credits(struct lnet_ni *ni)
2334 {
2335         int     credits;
2336
2337         LASSERT(ni->ni_ncpts >= 1);
2338
2339         if (ni->ni_ncpts == 1)
2340                 return ni->ni_net->net_tunables.lct_max_tx_credits;
2341
2342         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
2343         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
2344         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
2345
2346         return credits;
2347 }
2348
2349 static void
2350 lnet_ni_unlink_locked(struct lnet_ni *ni)
2351 {
2352         /* move it to zombie list and nobody can find it anymore */
2353         LASSERT(!list_empty(&ni->ni_netlist));
2354         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
2355         lnet_ni_decref_locked(ni, 0);
2356 }
2357
2358 static void
2359 lnet_clear_zombies_nis_locked(struct lnet_net *net)
2360 {
2361         int             i;
2362         int             islo;
2363         struct lnet_ni  *ni;
2364         struct list_head *zombie_list = &net->net_ni_zombie;
2365
2366         /*
2367          * Now wait for the NIs I just nuked to show up on the zombie
2368          * list and shut them down in guaranteed thread context
2369          */
2370         i = 2;
2371         while ((ni = list_first_entry_or_null(zombie_list,
2372                                               struct lnet_ni,
2373                                               ni_netlist)) != NULL) {
2374                 int *ref;
2375                 int j;
2376
2377                 list_del_init(&ni->ni_netlist);
2378                 /* the ni should be in deleting state. If it's not it's
2379                  * a bug */
2380                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
2381                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
2382                         if (*ref == 0)
2383                                 continue;
2384                         /* still busy, add it back to zombie list */
2385                         list_add(&ni->ni_netlist, zombie_list);
2386                         break;
2387                 }
2388
2389                 if (!list_empty(&ni->ni_netlist)) {
2390                         /* Unlock mutex while waiting to allow other
2391                          * threads to read the LNet state and fall through
2392                          * to avoid deadlock
2393                          */
2394                         lnet_net_unlock(LNET_LOCK_EX);
2395                         mutex_unlock(&the_lnet.ln_api_mutex);
2396
2397                         ++i;
2398                         if ((i & (-i)) == i) {
2399                                 CDEBUG(D_WARNING,
2400                                        "Waiting for zombie LNI %s\n",
2401                                        libcfs_nidstr(&ni->ni_nid));
2402                         }
2403                         schedule_timeout_uninterruptible(cfs_time_seconds(1));
2404
2405                         mutex_lock(&the_lnet.ln_api_mutex);
2406                         lnet_net_lock(LNET_LOCK_EX);
2407                         continue;
2408                 }
2409
2410                 lnet_net_unlock(LNET_LOCK_EX);
2411
2412                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
2413
2414                 LASSERT(!in_interrupt());
2415                 /* Holding the LND mutex makes it safe for lnd_shutdown
2416                  * to call module_put(). Module unload cannot finish
2417                  * until lnet_unregister_lnd() completes, and that
2418                  * requires the LND mutex.
2419                  */
2420                 mutex_unlock(&the_lnet.ln_api_mutex);
2421                 mutex_lock(&the_lnet.ln_lnd_mutex);
2422                 (net->net_lnd->lnd_shutdown)(ni);
2423                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2424                 mutex_lock(&the_lnet.ln_api_mutex);
2425
2426                 if (!islo)
2427                         CDEBUG(D_LNI, "Removed LNI %s\n",
2428                               libcfs_nidstr(&ni->ni_nid));
2429
2430                 lnet_ni_free(ni);
2431                 i = 2;
2432                 lnet_net_lock(LNET_LOCK_EX);
2433         }
2434 }
2435
2436 /* shutdown down the NI and release refcount */
2437 static void
2438 lnet_shutdown_lndni(struct lnet_ni *ni)
2439 {
2440         int i;
2441         struct lnet_net *net = ni->ni_net;
2442
2443         lnet_net_lock(LNET_LOCK_EX);
2444         lnet_ni_lock(ni);
2445         ni->ni_state = LNET_NI_STATE_DELETING;
2446         lnet_ni_unlock(ni);
2447         lnet_ni_unlink_locked(ni);
2448         lnet_incr_dlc_seq();
2449         lnet_net_unlock(LNET_LOCK_EX);
2450
2451         /* clear messages for this NI on the lazy portal */
2452         for (i = 0; i < the_lnet.ln_nportals; i++)
2453                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
2454
2455         lnet_net_lock(LNET_LOCK_EX);
2456         lnet_clear_zombies_nis_locked(net);
2457         lnet_net_unlock(LNET_LOCK_EX);
2458 }
2459
2460 static void
2461 lnet_shutdown_lndnet(struct lnet_net *net)
2462 {
2463         struct lnet_ni *ni;
2464
2465         lnet_net_lock(LNET_LOCK_EX);
2466
2467         list_del_init(&net->net_list);
2468
2469         while ((ni = list_first_entry_or_null(&net->net_ni_list,
2470                                               struct lnet_ni,
2471                                               ni_netlist)) != NULL) {
2472                 lnet_net_unlock(LNET_LOCK_EX);
2473                 lnet_shutdown_lndni(ni);
2474                 lnet_net_lock(LNET_LOCK_EX);
2475         }
2476
2477         lnet_net_unlock(LNET_LOCK_EX);
2478
2479         /* Do peer table cleanup for this net */
2480         lnet_peer_tables_cleanup(net);
2481
2482         lnet_net_free(net);
2483 }
2484
2485 static void
2486 lnet_shutdown_lndnets(void)
2487 {
2488         struct lnet_net *net;
2489         LIST_HEAD(resend);
2490         struct lnet_msg *msg, *tmp;
2491
2492         /* NB called holding the global mutex */
2493
2494         /* All quiet on the API front */
2495         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING ||
2496                 the_lnet.ln_state == LNET_STATE_STOPPING);
2497         LASSERT(the_lnet.ln_refcount == 0);
2498
2499         lnet_net_lock(LNET_LOCK_EX);
2500         the_lnet.ln_state = LNET_STATE_STOPPING;
2501
2502         /*
2503          * move the nets to the zombie list to avoid them being
2504          * picked up for new work. LONET is also included in the
2505          * Nets that will be moved to the zombie list
2506          */
2507         list_splice_init(&the_lnet.ln_nets, &the_lnet.ln_net_zombie);
2508
2509         /* Drop the cached loopback Net. */
2510         if (the_lnet.ln_loni != NULL) {
2511                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
2512                 the_lnet.ln_loni = NULL;
2513         }
2514         lnet_net_unlock(LNET_LOCK_EX);
2515
2516         /* iterate through the net zombie list and delete each net */
2517         while ((net = list_first_entry_or_null(&the_lnet.ln_net_zombie,
2518                                                struct lnet_net,
2519                                                net_list)) != NULL)
2520                 lnet_shutdown_lndnet(net);
2521
2522         spin_lock(&the_lnet.ln_msg_resend_lock);
2523         list_splice(&the_lnet.ln_msg_resend, &resend);
2524         spin_unlock(&the_lnet.ln_msg_resend_lock);
2525
2526         list_for_each_entry_safe(msg, tmp, &resend, msg_list) {
2527                 list_del_init(&msg->msg_list);
2528                 msg->msg_no_resend = true;
2529                 lnet_finalize(msg, -ECANCELED);
2530         }
2531
2532         lnet_net_lock(LNET_LOCK_EX);
2533         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
2534         lnet_net_unlock(LNET_LOCK_EX);
2535 }
2536
2537 static int
2538 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
2539 {
2540         int                     rc = -EINVAL;
2541         struct lnet_tx_queue    *tq;
2542         int                     i;
2543         struct lnet_net         *net = ni->ni_net;
2544
2545         mutex_lock(&the_lnet.ln_lnd_mutex);
2546
2547         if (tun) {
2548                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
2549                 ni->ni_lnd_tunables_set = true;
2550         }
2551
2552         rc = (net->net_lnd->lnd_startup)(ni);
2553
2554         mutex_unlock(&the_lnet.ln_lnd_mutex);
2555
2556         if (rc != 0) {
2557                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
2558                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
2559                 goto failed0;
2560         }
2561
2562         /* We keep a reference on the loopback net through the loopback NI */
2563         if (net->net_lnd->lnd_type == LOLND) {
2564                 lnet_ni_addref(ni);
2565                 LASSERT(the_lnet.ln_loni == NULL);
2566                 the_lnet.ln_loni = ni;
2567                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
2568                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
2569                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
2570                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
2571                 return 0;
2572         }
2573
2574         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
2575             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
2576                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
2577                                    libcfs_lnd2str(net->net_lnd->lnd_type),
2578                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
2579                                         "" : "per-peer ");
2580                 /* shutdown the NI since if we get here then it must've already
2581                  * been started
2582                  */
2583                 lnet_shutdown_lndni(ni);
2584                 return -EINVAL;
2585         }
2586
2587         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
2588                 tq->tq_credits_min =
2589                 tq->tq_credits_max =
2590                 tq->tq_credits = lnet_ni_tq_credits(ni);
2591         }
2592
2593         atomic_set(&ni->ni_tx_credits,
2594                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
2595         atomic_set(&ni->ni_healthv, LNET_MAX_HEALTH_VALUE);
2596
2597         /* Nodes with small feet have little entropy. The NID for this
2598          * node gives the most entropy in the low bits.
2599          */
2600         add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid));
2601
2602         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
2603                 libcfs_nidstr(&ni->ni_nid),
2604                 ni->ni_net->net_tunables.lct_peer_tx_credits,
2605                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
2606                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
2607                 ni->ni_net->net_tunables.lct_peer_timeout);
2608
2609         return 0;
2610 failed0:
2611         lnet_ni_free(ni);
2612         return rc;
2613 }
2614
2615 static const struct lnet_lnd *lnet_load_lnd(u32 lnd_type)
2616 {
2617         const struct lnet_lnd *lnd;
2618         int rc = 0;
2619
2620         mutex_lock(&the_lnet.ln_lnd_mutex);
2621         lnd = lnet_find_lnd_by_type(lnd_type);
2622         if (!lnd) {
2623                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2624                 rc = request_module("%s", libcfs_lnd2modname(lnd_type));
2625                 mutex_lock(&the_lnet.ln_lnd_mutex);
2626
2627                 lnd = lnet_find_lnd_by_type(lnd_type);
2628                 if (!lnd) {
2629                         mutex_unlock(&the_lnet.ln_lnd_mutex);
2630                         CERROR("Can't load LND %s, module %s, rc=%d\n",
2631                         libcfs_lnd2str(lnd_type),
2632                         libcfs_lnd2modname(lnd_type), rc);
2633 #ifndef HAVE_MODULE_LOADING_SUPPORT
2634                         LCONSOLE_ERROR_MSG(0x104,
2635                                            "Your kernel must be compiled with kernel module loading support.");
2636 #endif
2637                         return ERR_PTR(-EINVAL);
2638                 }
2639         }
2640         mutex_unlock(&the_lnet.ln_lnd_mutex);
2641
2642         return lnd;
2643 }
2644
2645 static int
2646 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
2647 {
2648         struct lnet_ni *ni;
2649         struct lnet_net *net_l = NULL;
2650         LIST_HEAD(local_ni_list);
2651         int rc;
2652         int ni_count = 0;
2653         __u32 lnd_type;
2654         const struct lnet_lnd  *lnd;
2655         int peer_timeout =
2656                 net->net_tunables.lct_peer_timeout;
2657         int maxtxcredits =
2658                 net->net_tunables.lct_max_tx_credits;
2659         int peerrtrcredits =
2660                 net->net_tunables.lct_peer_rtr_credits;
2661
2662         /*
2663          * make sure that this net is unique. If it isn't then
2664          * we are adding interfaces to an already existing network, and
2665          * 'net' is just a convenient way to pass in the list.
2666          * if it is unique we need to find the LND and load it if
2667          * necessary.
2668          */
2669         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
2670                 lnd_type = LNET_NETTYP(net->net_id);
2671
2672                 lnd = lnet_load_lnd(lnd_type);
2673                 if (IS_ERR(lnd)) {
2674                         rc = PTR_ERR(lnd);
2675                         goto failed0;
2676                 }
2677
2678                 mutex_lock(&the_lnet.ln_lnd_mutex);
2679                 net->net_lnd = lnd;
2680                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2681
2682                 net_l = net;
2683         }
2684
2685         /*
2686          * net_l: if the network being added is unique then net_l
2687          *        will point to that network
2688          *        if the network being added is not unique then
2689          *        net_l points to the existing network.
2690          *
2691          * When we enter the loop below, we'll pick NIs off he
2692          * network beign added and start them up, then add them to
2693          * a local ni list. Once we've successfully started all
2694          * the NIs then we join the local NI list (of started up
2695          * networks) with the net_l->net_ni_list, which should
2696          * point to the correct network to add the new ni list to
2697          *
2698          * If any of the new NIs fail to start up, then we want to
2699          * iterate through the local ni list, which should include
2700          * any NIs which were successfully started up, and shut
2701          * them down.
2702          *
2703          * After than we want to delete the network being added,
2704          * to avoid a memory leak.
2705          */
2706         while ((ni = list_first_entry_or_null(&net->net_ni_added,
2707                                               struct lnet_ni,
2708                                               ni_netlist)) != NULL) {
2709                 list_del_init(&ni->ni_netlist);
2710
2711                 /* make sure that the the NI we're about to start
2712                  * up is actually unique. if it's not fail. */
2713                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
2714                                         ni->ni_interface)) {
2715                         rc = -EEXIST;
2716                         goto failed1;
2717                 }
2718
2719                 /* adjust the pointer the parent network, just in case it
2720                  * the net is a duplicate */
2721                 ni->ni_net = net_l;
2722
2723                 rc = lnet_startup_lndni(ni, tun);
2724
2725                 if (rc != 0)
2726                         goto failed1;
2727
2728                 lnet_ni_addref(ni);
2729                 list_add_tail(&ni->ni_netlist, &local_ni_list);
2730
2731                 ni_count++;
2732         }
2733
2734         lnet_net_lock(LNET_LOCK_EX);
2735         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
2736         lnet_incr_dlc_seq();
2737
2738         list_for_each_entry(ni, &net_l->net_ni_list, ni_netlist) {
2739                 if (!ni)
2740                         break;
2741                 lnet_ni_lock(ni);
2742                 ni->ni_state = LNET_NI_STATE_ACTIVE;
2743                 lnet_ni_unlock(ni);
2744         }
2745         lnet_net_unlock(LNET_LOCK_EX);
2746
2747         /* if the network is not unique then we don't want to keep
2748          * it around after we're done. Free it. Otherwise add that
2749          * net to the global the_lnet.ln_nets */
2750         if (net_l != net && net_l != NULL) {
2751                 /*
2752                  * TODO - note. currently the tunables can not be updated
2753                  * once added
2754                  */
2755                 lnet_net_free(net);
2756         } else {
2757                 /*
2758                  * restore tunables after it has been overwitten by the
2759                  * lnd
2760                  */
2761                 if (peer_timeout != -1)
2762                         net->net_tunables.lct_peer_timeout = peer_timeout;
2763                 if (maxtxcredits != -1)
2764                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
2765                 if (peerrtrcredits != -1)
2766                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
2767
2768                 lnet_net_lock(LNET_LOCK_EX);
2769                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
2770                 lnet_net_unlock(LNET_LOCK_EX);
2771         }
2772
2773         return ni_count;
2774
2775 failed1:
2776         /*
2777          * shutdown the new NIs that are being started up
2778          * free the NET being started
2779          */
2780         while ((ni = list_first_entry_or_null(&local_ni_list,
2781                                               struct lnet_ni,
2782                                               ni_netlist)) != NULL)
2783                 lnet_shutdown_lndni(ni);
2784
2785 failed0:
2786         lnet_net_free(net);
2787
2788         return rc;
2789 }
2790
2791 static int
2792 lnet_startup_lndnets(struct list_head *netlist)
2793 {
2794         struct lnet_net         *net;
2795         int                     rc;
2796         int                     ni_count = 0;
2797
2798         /*
2799          * Change to running state before bringing up the LNDs. This
2800          * allows lnet_shutdown_lndnets() to assert that we've passed
2801          * through here.
2802          */
2803         lnet_net_lock(LNET_LOCK_EX);
2804         the_lnet.ln_state = LNET_STATE_RUNNING;
2805         lnet_net_unlock(LNET_LOCK_EX);
2806
2807         while ((net = list_first_entry_or_null(netlist,
2808                                                struct lnet_net,
2809                                                net_list)) != NULL) {
2810                 list_del_init(&net->net_list);
2811
2812                 rc = lnet_startup_lndnet(net, NULL);
2813
2814                 if (rc < 0)
2815                         goto failed;
2816
2817                 ni_count += rc;
2818         }
2819
2820         return ni_count;
2821 failed:
2822         lnet_shutdown_lndnets();
2823
2824         return rc;
2825 }
2826
2827 static int lnet_genl_parse_list(struct sk_buff *msg,
2828                                 const struct ln_key_list *data[], u16 idx)
2829 {
2830         const struct ln_key_list *list = data[idx];
2831         const struct ln_key_props *props;
2832         struct nlattr *node;
2833         u16 count;
2834
2835         if (!list)
2836                 return 0;
2837
2838         if (!list->lkl_maxattr)
2839                 return -ERANGE;
2840
2841         props = list->lkl_list;
2842         if (!props)
2843                 return -EINVAL;
2844
2845         node = nla_nest_start(msg, LN_SCALAR_ATTR_LIST);
2846         if (!node)
2847                 return -ENOBUFS;
2848
2849         for (count = 1; count <= list->lkl_maxattr; count++) {
2850                 struct nlattr *key = nla_nest_start(msg, count);
2851
2852                 if (count == 1)
2853                         nla_put_u16(msg, LN_SCALAR_ATTR_LIST_SIZE,
2854                                     list->lkl_maxattr);
2855
2856                 nla_put_u16(msg, LN_SCALAR_ATTR_INDEX, count);
2857                 if (props[count].lkp_value)
2858                         nla_put_string(msg, LN_SCALAR_ATTR_VALUE,
2859                                        props[count].lkp_value);
2860                 if (props[count].lkp_key_format)
2861                         nla_put_u16(msg, LN_SCALAR_ATTR_KEY_FORMAT,
2862                                     props[count].lkp_key_format);
2863                 nla_put_u16(msg, LN_SCALAR_ATTR_NLA_TYPE,
2864                             props[count].lkp_data_type);
2865                 if (props[count].lkp_data_type == NLA_NESTED) {
2866                         int rc;
2867
2868                         rc = lnet_genl_parse_list(msg, data, ++idx);
2869                         if (rc < 0)
2870                                 return rc;
2871                         idx = rc;
2872                 }
2873
2874                 nla_nest_end(msg, key);
2875         }
2876
2877         nla_nest_end(msg, node);
2878         return idx;
2879 }
2880
2881 int lnet_genl_send_scalar_list(struct sk_buff *msg, u32 portid, u32 seq,
2882                                const struct genl_family *family, int flags,
2883                                u8 cmd, const struct ln_key_list *data[])
2884 {
2885         int rc = 0;
2886         void *hdr;
2887
2888         if (!data[0])
2889                 return -EINVAL;
2890
2891         hdr = genlmsg_put(msg, portid, seq, family, flags, cmd);
2892         if (!hdr)
2893                 GOTO(canceled, rc = -EMSGSIZE);
2894
2895         rc = lnet_genl_parse_list(msg, data, 0);
2896         if (rc < 0)
2897                 GOTO(canceled, rc);
2898
2899         genlmsg_end(msg, hdr);
2900 canceled:
2901         if (rc < 0)
2902                 genlmsg_cancel(msg, hdr);
2903         return rc > 0 ? 0 : rc;
2904 }
2905 EXPORT_SYMBOL(lnet_genl_send_scalar_list);
2906
2907 static struct genl_family lnet_family;
2908
2909 /**
2910  * Initialize LNet library.
2911  *
2912  * Automatically called at module loading time. Caller has to call
2913  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
2914  * latter returned 0. It must be called exactly once.
2915  *
2916  * \retval 0 on success
2917  * \retval -ve on failures.
2918  */
2919 int lnet_lib_init(void)
2920 {
2921         int rc;
2922
2923         lnet_assert_wire_constants();
2924
2925         /* refer to global cfs_cpt_table for now */
2926         the_lnet.ln_cpt_table = cfs_cpt_tab;
2927         the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab);
2928
2929         LASSERT(the_lnet.ln_cpt_number > 0);
2930         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
2931                 /* we are under risk of consuming all lh_cookie */
2932                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
2933                        "please change setting of CPT-table and retry\n",
2934                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
2935                 return -E2BIG;
2936         }
2937
2938         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
2939                 the_lnet.ln_cpt_bits++;
2940
2941         rc = lnet_create_locks();
2942         if (rc != 0) {
2943                 CERROR("Can't create LNet global locks: %d\n", rc);
2944                 return rc;
2945         }
2946
2947         rc = genl_register_family(&lnet_family);
2948         if (rc != 0) {
2949                 lnet_destroy_locks();
2950                 CERROR("Can't register LNet netlink family: %d\n", rc);
2951                 return rc;
2952         }
2953
2954         the_lnet.ln_refcount = 0;
2955         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
2956         INIT_LIST_HEAD(&the_lnet.ln_msg_resend);
2957
2958         /* The hash table size is the number of bits it takes to express the set
2959          * ln_num_routes, minus 1 (better to under estimate than over so we
2960          * don't waste memory). */
2961         if (rnet_htable_size <= 0)
2962                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
2963         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
2964                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
2965         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
2966                                            order_base_2(rnet_htable_size) - 1);
2967
2968         /* All LNDs apart from the LOLND are in separate modules.  They
2969          * register themselves when their module loads, and unregister
2970          * themselves when their module is unloaded. */
2971         lnet_register_lnd(&the_lolnd);
2972         return 0;
2973 }
2974
2975 /**
2976  * Finalize LNet library.
2977  *
2978  * \pre lnet_lib_init() called with success.
2979  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
2980  *
2981  * As this happens at module-unload, all lnds must already be unloaded,
2982  * so they must already be unregistered.
2983  */
2984 void lnet_lib_exit(void)
2985 {
2986         int i;
2987
2988         LASSERT(the_lnet.ln_refcount == 0);
2989         lnet_unregister_lnd(&the_lolnd);
2990         for (i = 0; i < NUM_LNDS; i++)
2991                 LASSERT(!the_lnet.ln_lnds[i]);
2992         lnet_destroy_locks();
2993         genl_unregister_family(&lnet_family);
2994 }
2995
2996 /**
2997  * Set LNet PID and start LNet interfaces, routing, and forwarding.
2998  *
2999  * Users must call this function at least once before any other functions.
3000  * For each successful call there must be a corresponding call to
3001  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
3002  * ignored.
3003  *
3004  * The PID used by LNet may be different from the one requested.
3005  * See LNetGetId().
3006  *
3007  * \param requested_pid PID requested by the caller.
3008  *
3009  * \return >= 0 on success, and < 0 error code on failures.
3010  */
3011 int
3012 LNetNIInit(lnet_pid_t requested_pid)
3013 {
3014         int im_a_router = 0;
3015         int rc;
3016         int ni_bytes;
3017         struct lnet_ping_buffer *pbuf;
3018         struct lnet_handle_md ping_mdh;
3019         LIST_HEAD(net_head);
3020         struct lnet_net *net;
3021
3022         mutex_lock(&the_lnet.ln_api_mutex);
3023
3024         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
3025
3026         if (the_lnet.ln_state == LNET_STATE_STOPPING) {
3027                 mutex_unlock(&the_lnet.ln_api_mutex);
3028                 return -ESHUTDOWN;
3029         }
3030
3031         if (the_lnet.ln_refcount > 0) {
3032                 rc = the_lnet.ln_refcount++;
3033                 mutex_unlock(&the_lnet.ln_api_mutex);
3034                 return rc;
3035         }
3036
3037         rc = lnet_prepare(requested_pid);
3038         if (rc != 0) {
3039                 mutex_unlock(&the_lnet.ln_api_mutex);
3040                 return rc;
3041         }
3042
3043         /* create a network for Loopback network */
3044         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
3045         if (net == NULL) {
3046                 rc = -ENOMEM;
3047                 goto err_empty_list;
3048         }
3049
3050         /* Add in the loopback NI */
3051         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
3052                 rc = -ENOMEM;
3053                 goto err_empty_list;
3054         }
3055
3056         if (use_tcp_bonding)
3057                 CWARN("use_tcp_bonding has been removed. Use Multi-Rail and Dynamic Discovery instead, see LU-13641\n");
3058
3059         /* If LNet is being initialized via DLC it is possible
3060          * that the user requests not to load module parameters (ones which
3061          * are supported by DLC) on initialization.  Therefore, make sure not
3062          * to load networks, routes and forwarding from module parameters
3063          * in this case.  On cleanup in case of failure only clean up
3064          * routes if it has been loaded */
3065         if (!the_lnet.ln_nis_from_mod_params) {
3066                 rc = lnet_parse_networks(&net_head, lnet_get_networks());
3067                 if (rc < 0)
3068                         goto err_empty_list;
3069         }
3070
3071         rc = lnet_startup_lndnets(&net_head);
3072         if (rc < 0)
3073                 goto err_empty_list;
3074
3075         if (!the_lnet.ln_nis_from_mod_params) {
3076                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
3077                 if (rc != 0)
3078                         goto err_shutdown_lndnis;
3079
3080                 rc = lnet_rtrpools_alloc(im_a_router);
3081                 if (rc != 0)
3082                         goto err_destroy_routes;
3083         }
3084
3085         rc = lnet_acceptor_start();
3086         if (rc != 0)
3087                 goto err_destroy_routes;
3088
3089         the_lnet.ln_refcount = 1;
3090         /* Now I may use my own API functions... */
3091
3092         ni_bytes = LNET_PING_INFO_HDR_SIZE;
3093         list_for_each_entry(net, &the_lnet.ln_nets, net_list)
3094                 ni_bytes += lnet_get_net_ni_bytes_locked(net);
3095
3096         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_bytes, true);
3097         if (rc != 0)
3098                 goto err_acceptor_stop;
3099
3100         lnet_ping_target_update(pbuf, ping_mdh);
3101
3102         the_lnet.ln_mt_handler = lnet_mt_event_handler;
3103
3104         rc = lnet_push_target_init();
3105         if (rc != 0)
3106                 goto err_stop_ping;
3107
3108         rc = lnet_monitor_thr_start();
3109         if (rc != 0)
3110                 goto err_destroy_push_target;
3111
3112         rc = lnet_peer_discovery_start();
3113         if (rc != 0)
3114                 goto err_stop_monitor_thr;
3115
3116         lnet_fault_init();
3117         lnet_router_debugfs_init();
3118
3119         mutex_unlock(&the_lnet.ln_api_mutex);
3120
3121         complete_all(&the_lnet.ln_started);
3122
3123         /* wait for all routers to start */
3124         lnet_wait_router_start();
3125
3126         return 0;
3127
3128 err_stop_monitor_thr:
3129         lnet_monitor_thr_stop();
3130 err_destroy_push_target:
3131         lnet_push_target_fini();
3132 err_stop_ping:
3133         lnet_ping_target_fini();
3134 err_acceptor_stop:
3135         the_lnet.ln_refcount = 0;
3136         lnet_acceptor_stop();
3137 err_destroy_routes:
3138         if (!the_lnet.ln_nis_from_mod_params)
3139                 lnet_destroy_routes();
3140 err_shutdown_lndnis:
3141         lnet_shutdown_lndnets();
3142 err_empty_list:
3143         lnet_unprepare();
3144         LASSERT(rc < 0);
3145         mutex_unlock(&the_lnet.ln_api_mutex);
3146         while ((net = list_first_entry_or_null(&net_head,
3147                                                struct lnet_net,
3148                                                net_list)) != NULL) {
3149                 list_del_init(&net->net_list);
3150                 lnet_net_free(net);
3151         }
3152         return rc;
3153 }
3154 EXPORT_SYMBOL(LNetNIInit);
3155
3156 /**
3157  * Stop LNet interfaces, routing, and forwarding.
3158  *
3159  * Users must call this function once for each successful call to LNetNIInit().
3160  * Once the LNetNIFini() operation has been started, the results of pending
3161  * API operations are undefined.
3162  *
3163  * \return always 0 for current implementation.
3164  */
3165 int
3166 LNetNIFini(void)
3167 {
3168         mutex_lock(&the_lnet.ln_api_mutex);
3169
3170         LASSERT(the_lnet.ln_refcount > 0);
3171
3172         if (the_lnet.ln_refcount != 1) {
3173                 the_lnet.ln_refcount--;
3174         } else {
3175                 LASSERT(!the_lnet.ln_niinit_self);
3176
3177                 lnet_net_lock(LNET_LOCK_EX);
3178                 the_lnet.ln_state = LNET_STATE_STOPPING;
3179                 lnet_net_unlock(LNET_LOCK_EX);
3180
3181                 lnet_fault_fini();
3182
3183                 lnet_router_debugfs_fini();
3184                 lnet_peer_discovery_stop();
3185                 lnet_monitor_thr_stop();
3186                 lnet_push_target_fini();
3187                 lnet_ping_target_fini();
3188
3189                 /* Teardown fns that use my own API functions BEFORE here */
3190                 the_lnet.ln_refcount = 0;
3191
3192                 lnet_acceptor_stop();
3193                 lnet_destroy_routes();
3194                 lnet_shutdown_lndnets();
3195                 lnet_unprepare();
3196         }
3197
3198         mutex_unlock(&the_lnet.ln_api_mutex);
3199         return 0;
3200 }
3201 EXPORT_SYMBOL(LNetNIFini);
3202
3203 /**
3204  * Grabs the ni data from the ni structure and fills the out
3205  * parameters
3206  *
3207  * \param[in] ni network        interface structure
3208  * \param[out] cfg_ni           NI config information
3209  * \param[out] tun              network and LND tunables
3210  */
3211 static void
3212 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
3213                    struct lnet_ioctl_config_lnd_tunables *tun,
3214                    struct lnet_ioctl_element_stats *stats,
3215                    __u32 tun_size)
3216 {
3217         size_t min_size = 0;
3218         int i;
3219
3220         if (!ni || !cfg_ni || !tun || !nid_is_nid4(&ni->ni_nid))
3221                 return;
3222
3223         if (ni->ni_interface != NULL) {
3224                 strncpy(cfg_ni->lic_ni_intf,
3225                         ni->ni_interface,
3226                         sizeof(cfg_ni->lic_ni_intf));
3227         }
3228
3229         cfg_ni->lic_nid = lnet_nid_to_nid4(&ni->ni_nid);
3230         cfg_ni->lic_status = lnet_ni_get_status_locked(ni);
3231         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
3232
3233         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
3234
3235         if (stats) {
3236                 stats->iel_send_count = lnet_sum_stats(&ni->ni_stats,
3237                                                        LNET_STATS_TYPE_SEND);
3238                 stats->iel_recv_count = lnet_sum_stats(&ni->ni_stats,
3239                                                        LNET_STATS_TYPE_RECV);
3240                 stats->iel_drop_count = lnet_sum_stats(&ni->ni_stats,
3241                                                        LNET_STATS_TYPE_DROP);
3242         }
3243
3244         /*
3245          * tun->lt_tun will always be present, but in order to be
3246          * backwards compatible, we need to deal with the cases when
3247          * tun->lt_tun is smaller than what the kernel has, because it
3248          * comes from an older version of a userspace program, then we'll
3249          * need to copy as much information as we have available space.
3250          */
3251         min_size = tun_size - sizeof(tun->lt_cmn);
3252         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
3253
3254         /* copy over the cpts */
3255         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
3256             ni->ni_cpts == NULL)  {
3257                 for (i = 0; i < ni->ni_ncpts; i++)
3258                         cfg_ni->lic_cpts[i] = i;
3259         } else {
3260                 for (i = 0;
3261                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
3262                      i < LNET_MAX_SHOW_NUM_CPT;
3263                      i++)
3264                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
3265         }
3266         cfg_ni->lic_ncpts = ni->ni_ncpts;
3267 }
3268
3269 /**
3270  * NOTE: This is a legacy function left in the code to be backwards
3271  * compatible with older userspace programs. It should eventually be
3272  * removed.
3273  *
3274  * Grabs the ni data from the ni structure and fills the out
3275  * parameters
3276  *
3277  * \param[in] ni network        interface structure
3278  * \param[out] config           config information
3279  */
3280 static void
3281 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
3282                          struct lnet_ioctl_config_data *config)
3283 {
3284         struct lnet_ioctl_net_config *net_config;
3285         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
3286         size_t min_size, tunable_size = 0;
3287         int i;
3288
3289         if (!ni || !config || !nid_is_nid4(&ni->ni_nid))
3290                 return;
3291
3292         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
3293         if (!net_config)
3294                 return;
3295
3296         if (!ni->ni_interface)
3297                 return;
3298
3299         strncpy(net_config->ni_interface,
3300                 ni->ni_interface,
3301                 sizeof(net_config->ni_interface));
3302
3303         config->cfg_nid = lnet_nid_to_nid4(&ni->ni_nid);
3304         config->cfg_config_u.cfg_net.net_peer_timeout =
3305                 ni->ni_net->net_tunables.lct_peer_timeout;
3306         config->cfg_config_u.cfg_net.net_max_tx_credits =
3307                 ni->ni_net->net_tunables.lct_max_tx_credits;
3308         config->cfg_config_u.cfg_net.net_peer_tx_credits =
3309                 ni->ni_net->net_tunables.lct_peer_tx_credits;
3310         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
3311                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
3312
3313         net_config->ni_status = lnet_ni_get_status_locked(ni);
3314
3315         if (ni->ni_cpts) {
3316                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
3317
3318                 for (i = 0; i < num_cpts; i++)
3319                         net_config->ni_cpts[i] = ni->ni_cpts[i];
3320
3321                 config->cfg_ncpts = num_cpts;
3322         }
3323
3324         /*
3325          * See if user land tools sent in a newer and larger version
3326          * of struct lnet_tunables than what the kernel uses.
3327          */
3328         min_size = sizeof(*config) + sizeof(*net_config);
3329
3330         if (config->cfg_hdr.ioc_len > min_size)
3331                 tunable_size = config->cfg_hdr.ioc_len - min_size;
3332
3333         /* Don't copy too much data to user space */
3334         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
3335         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
3336
3337         if (lnd_cfg && min_size) {
3338                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
3339                 config->cfg_config_u.cfg_net.net_interface_count = 1;
3340
3341                 /* Tell user land that kernel side has less data */
3342                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
3343                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
3344                         config->cfg_hdr.ioc_len -= min_size;
3345                 }
3346         }
3347 }
3348
3349 struct lnet_ni *
3350 lnet_get_ni_idx_locked(int idx)
3351 {
3352         struct lnet_ni          *ni;
3353         struct lnet_net         *net;
3354
3355         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3356                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3357                         if (idx-- == 0)
3358                                 return ni;
3359                 }
3360         }
3361
3362         return NULL;
3363 }
3364
3365 int lnet_get_net_healthv_locked(struct lnet_net *net)
3366 {
3367         struct lnet_ni *ni;
3368         int best_healthv = 0;
3369         int healthv, ni_fatal;
3370
3371         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3372                 healthv = atomic_read(&ni->ni_healthv);
3373                 ni_fatal = atomic_read(&ni->ni_fatal_error_on);
3374                 if (!ni_fatal && healthv > best_healthv)
3375                         best_healthv = healthv;
3376         }
3377
3378         return best_healthv;
3379 }
3380
3381 struct lnet_ni *
3382 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
3383 {
3384         struct lnet_ni          *ni;
3385         struct lnet_net         *net = mynet;
3386
3387         /*
3388          * It is possible that the net has been cleaned out while there is
3389          * a message being sent. This function accessed the net without
3390          * checking if the list is empty
3391          */
3392         if (!prev) {
3393                 if (!net)
3394                         net = list_first_entry(&the_lnet.ln_nets,
3395                                                struct lnet_net,
3396                                                net_list);
3397                 if (list_empty(&net->net_ni_list))
3398                         return NULL;
3399                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3400                                       ni_netlist);
3401
3402                 return ni;
3403         }
3404
3405         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
3406                 /* if you reached the end of the ni list and the net is
3407                  * specified, then there are no more nis in that net */
3408                 if (net != NULL)
3409                         return NULL;
3410
3411                 /* we reached the end of this net ni list. move to the
3412                  * next net */
3413                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
3414                         /* no more nets and no more NIs. */
3415                         return NULL;
3416
3417                 /* get the next net */
3418                 net = list_first_entry(&prev->ni_net->net_list, struct lnet_net,
3419                                        net_list);
3420                 if (list_empty(&net->net_ni_list))
3421                         return NULL;
3422                 /* get the ni on it */
3423                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3424                                       ni_netlist);
3425
3426                 return ni;
3427         }
3428
3429         if (list_empty(&prev->ni_netlist))
3430                 return NULL;
3431
3432         /* there are more nis left */
3433         ni = list_first_entry(&prev->ni_netlist, struct lnet_ni, ni_netlist);
3434
3435         return ni;
3436 }
3437
3438 static int
3439 lnet_get_net_config(struct lnet_ioctl_config_data *config)
3440 {
3441         struct lnet_ni *ni;
3442         int cpt;
3443         int rc = -ENOENT;
3444         int idx = config->cfg_count;
3445
3446         cpt = lnet_net_lock_current();
3447
3448         ni = lnet_get_ni_idx_locked(idx);
3449
3450         if (ni != NULL) {
3451                 rc = 0;
3452                 lnet_ni_lock(ni);
3453                 lnet_fill_ni_info_legacy(ni, config);
3454                 lnet_ni_unlock(ni);
3455         }
3456
3457         lnet_net_unlock(cpt);
3458         return rc;
3459 }
3460
3461 static int
3462 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
3463                    struct lnet_ioctl_config_lnd_tunables *tun,
3464                    struct lnet_ioctl_element_stats *stats,
3465                    __u32 tun_size)
3466 {
3467         struct lnet_ni          *ni;
3468         int                     cpt;
3469         int                     rc = -ENOENT;
3470
3471         if (!cfg_ni || !tun || !stats)
3472                 return -EINVAL;
3473
3474         cpt = lnet_net_lock_current();
3475
3476         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
3477
3478         if (ni) {
3479                 rc = 0;
3480                 lnet_ni_lock(ni);
3481                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
3482                 lnet_ni_unlock(ni);
3483         }
3484
3485         lnet_net_unlock(cpt);
3486         return rc;
3487 }
3488
3489 static int lnet_get_ni_stats(struct lnet_ioctl_element_msg_stats *msg_stats)
3490 {
3491         struct lnet_ni *ni;
3492         int rc = -ENOENT;
3493
3494         if (!msg_stats)
3495                 return -EINVAL;
3496
3497         ni = lnet_get_ni_idx_locked(msg_stats->im_idx);
3498
3499         if (ni) {
3500                 lnet_usr_translate_stats(msg_stats, &ni->ni_stats);
3501                 rc = 0;
3502         }
3503
3504         return rc;
3505 }
3506
3507 static int lnet_add_net_common(struct lnet_net *net,
3508                                struct lnet_ioctl_config_lnd_tunables *tun)
3509 {
3510         struct lnet_handle_md ping_mdh;
3511         struct lnet_ping_buffer *pbuf;
3512         struct lnet_remotenet *rnet;
3513         struct lnet_ni *ni;
3514         u32 net_id;
3515         int rc;
3516
3517         lnet_net_lock(LNET_LOCK_EX);
3518         rnet = lnet_find_rnet_locked(net->net_id);
3519         lnet_net_unlock(LNET_LOCK_EX);
3520         /*
3521          * make sure that the net added doesn't invalidate the current
3522          * configuration LNet is keeping
3523          */
3524         if (rnet) {
3525                 CERROR("Adding net %s will invalidate routing configuration\n",
3526                        libcfs_net2str(net->net_id));
3527                 lnet_net_free(net);
3528                 return -EUSERS;
3529         }
3530
3531         if (tun)
3532                 memcpy(&net->net_tunables,
3533                        &tun->lt_cmn, sizeof(net->net_tunables));
3534         else
3535                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
3536
3537         net_id = net->net_id;
3538
3539         rc = lnet_startup_lndnet(net,
3540                                  (tun) ? &tun->lt_tun : NULL);
3541         if (rc < 0)
3542                 return rc;
3543
3544         /* make sure you calculate the correct number of slots in the ping
3545          * buffer. Since the ping info is a flattened list of all the NIs,
3546          * we should allocate enough slots to accomodate the number of NIs
3547          * which will be added.
3548          */
3549         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3550                                     LNET_PING_INFO_HDR_SIZE +
3551                                     lnet_get_ni_bytes(),
3552                                     false);
3553         if (rc < 0) {
3554                 lnet_shutdown_lndnet(net);
3555                 return rc;
3556         }
3557
3558         lnet_net_lock(LNET_LOCK_EX);
3559         net = lnet_get_net_locked(net_id);
3560         LASSERT(net);
3561
3562         /* apply the UDSPs */
3563         rc = lnet_udsp_apply_policies_on_net(net);
3564         if (rc)
3565                 CERROR("Failed to apply UDSPs on local net %s\n",
3566                        libcfs_net2str(net->net_id));
3567
3568         /* At this point we lost track of which NI was just added, so we
3569          * just re-apply the policies on all of the NIs on this net
3570          */
3571         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3572                 rc = lnet_udsp_apply_policies_on_ni(ni);
3573                 if (rc)
3574                         CERROR("Failed to apply UDSPs on ni %s\n",
3575                                libcfs_nidstr(&ni->ni_nid));
3576         }
3577         lnet_net_unlock(LNET_LOCK_EX);
3578
3579         /*
3580          * Start the acceptor thread if this is the first network
3581          * being added that requires the thread.
3582          */
3583         if (net->net_lnd->lnd_accept) {
3584                 rc = lnet_acceptor_start();
3585                 if (rc < 0) {
3586                         /* shutdown the net that we just started */
3587                         CERROR("Failed to start up acceptor thread\n");
3588                         lnet_shutdown_lndnet(net);
3589                         goto failed;
3590                 }
3591         }
3592
3593         lnet_net_lock(LNET_LOCK_EX);
3594         lnet_peer_net_added(net);
3595         lnet_net_unlock(LNET_LOCK_EX);
3596
3597         lnet_ping_target_update(pbuf, ping_mdh);
3598
3599         return 0;
3600
3601 failed:
3602         lnet_ping_md_unlink(pbuf, &ping_mdh);
3603         lnet_ping_buffer_decref(pbuf);
3604         return rc;
3605 }
3606
3607 static void
3608 lnet_set_tune_defaults(struct lnet_ioctl_config_lnd_tunables *tun)
3609 {
3610         if (tun) {
3611                 if (tun->lt_cmn.lct_peer_timeout < 0)
3612                         tun->lt_cmn.lct_peer_timeout = DEFAULT_PEER_TIMEOUT;
3613                 if (!tun->lt_cmn.lct_peer_tx_credits)
3614                         tun->lt_cmn.lct_peer_tx_credits = DEFAULT_PEER_CREDITS;
3615                 if (!tun->lt_cmn.lct_max_tx_credits)
3616                         tun->lt_cmn.lct_max_tx_credits = DEFAULT_CREDITS;
3617         }
3618 }
3619
3620 static int lnet_handle_legacy_ip2nets(char *ip2nets,
3621                                       struct lnet_ioctl_config_lnd_tunables *tun)
3622 {
3623         struct lnet_net *net;
3624         const char *nets;
3625         int rc;
3626         LIST_HEAD(net_head);
3627
3628         rc = lnet_parse_ip2nets(&nets, ip2nets);
3629         if (rc < 0)
3630                 return rc;
3631
3632         rc = lnet_parse_networks(&net_head, nets);
3633         if (rc < 0)
3634                 return rc;
3635
3636         lnet_set_tune_defaults(tun);
3637
3638         mutex_lock(&the_lnet.ln_api_mutex);
3639         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3640                 rc = -ESHUTDOWN;
3641                 goto out;
3642         }
3643
3644         while ((net = list_first_entry_or_null(&net_head,
3645                                                struct lnet_net,
3646                                                net_list)) != NULL) {
3647                 list_del_init(&net->net_list);
3648                 rc = lnet_add_net_common(net, tun);
3649                 if (rc < 0)
3650                         goto out;
3651         }
3652
3653 out:
3654         mutex_unlock(&the_lnet.ln_api_mutex);
3655
3656         while ((net = list_first_entry_or_null(&net_head,
3657                                                struct lnet_net,
3658                                                net_list)) != NULL) {
3659                 list_del_init(&net->net_list);
3660                 lnet_net_free(net);
3661         }
3662         return rc;
3663 }
3664
3665 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf, u32 net_id,
3666                     struct lnet_ioctl_config_lnd_tunables *tun)
3667 {
3668         struct lnet_net *net;
3669         struct lnet_ni *ni;
3670         int rc, i;
3671         u32 lnd_type;
3672
3673         /* handle legacy ip2nets from DLC */
3674         if (conf->lic_legacy_ip2nets[0] != '\0')
3675                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
3676                                                   tun);
3677
3678         lnd_type = LNET_NETTYP(net_id);
3679
3680         if (!libcfs_isknown_lnd(lnd_type)) {
3681                 CERROR("No valid net and lnd information provided\n");
3682                 return -ENOENT;
3683         }
3684
3685         net = lnet_net_alloc(net_id, NULL);
3686         if (!net)
3687                 return -ENOMEM;
3688
3689         for (i = 0; i < conf->lic_ncpts; i++) {
3690                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER) {
3691                         lnet_net_free(net);
3692                         return -ERANGE;
3693                 }
3694         }
3695
3696         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
3697                                        conf->lic_ni_intf);
3698         if (!ni) {
3699                 lnet_net_free(net);
3700                 return -ENOMEM;
3701         }
3702
3703         lnet_set_tune_defaults(tun);
3704
3705         mutex_lock(&the_lnet.ln_api_mutex);
3706         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3707                 lnet_net_free(net);
3708                 rc = -ESHUTDOWN;
3709         } else {
3710                 rc = lnet_add_net_common(net, tun);
3711         }
3712
3713         mutex_unlock(&the_lnet.ln_api_mutex);
3714
3715         /* If NI already exist delete this new unused copy */
3716         if (rc == -EEXIST)
3717                 lnet_ni_free(ni);
3718
3719         return rc;
3720 }
3721
3722 int lnet_dyn_del_ni(struct lnet_nid *nid)
3723 {
3724         struct lnet_net *net;
3725         struct lnet_ni *ni;
3726         u32 net_id = LNET_NID_NET(nid);
3727         struct lnet_ping_buffer *pbuf;
3728         struct lnet_handle_md ping_mdh;
3729         int net_bytes, rc;
3730         bool net_empty;
3731
3732         /* don't allow userspace to shutdown the LOLND */
3733         if (LNET_NETTYP(net_id) == LOLND)
3734                 return -EINVAL;
3735
3736         mutex_lock(&the_lnet.ln_api_mutex);
3737         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3738                 rc = -ESHUTDOWN;
3739                 goto unlock_api_mutex;
3740         }
3741
3742         lnet_net_lock(0);
3743
3744         net = lnet_get_net_locked(net_id);
3745         if (!net) {
3746                 CERROR("net %s not found\n",
3747                        libcfs_net2str(net_id));
3748                 rc = -ENOENT;
3749                 goto unlock_net;
3750         }
3751
3752         if (!nid_addr_is_set(nid)) {
3753                 /* remove the entire net */
3754                 net_bytes = lnet_get_net_ni_bytes_locked(net);
3755
3756                 lnet_net_unlock(0);
3757
3758                 /* create and link a new ping info, before removing the old one */
3759                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3760                                             LNET_PING_INFO_HDR_SIZE +
3761                                             lnet_get_ni_bytes() - net_bytes,
3762                                             false);
3763                 if (rc != 0)
3764                         goto unlock_api_mutex;
3765
3766                 lnet_shutdown_lndnet(net);
3767
3768                 lnet_acceptor_stop();
3769
3770                 lnet_ping_target_update(pbuf, ping_mdh);
3771
3772                 goto unlock_api_mutex;
3773         }
3774
3775         ni = lnet_nid_to_ni_locked(nid, 0);
3776         if (!ni) {
3777                 CERROR("nid %s not found\n", libcfs_nidstr(nid));
3778                 rc = -ENOENT;
3779                 goto unlock_net;
3780         }
3781
3782         net_bytes = lnet_get_net_ni_bytes_locked(net);
3783         net_empty = list_is_singular(&net->net_ni_list);
3784
3785         lnet_net_unlock(0);
3786
3787         /* create and link a new ping info, before removing the old one */
3788         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3789                                     (LNET_PING_INFO_HDR_SIZE +
3790                                      lnet_get_ni_bytes() -
3791                                      lnet_ping_sts_size(&ni->ni_nid)),
3792                                     false);
3793         if (rc != 0)
3794                 goto unlock_api_mutex;
3795
3796         lnet_shutdown_lndni(ni);
3797
3798         lnet_acceptor_stop();
3799
3800         lnet_ping_target_update(pbuf, ping_mdh);
3801
3802         /* check if the net is empty and remove it if it is */
3803         if (net_empty)
3804                 lnet_shutdown_lndnet(net);
3805
3806         goto unlock_api_mutex;
3807
3808 unlock_net:
3809         lnet_net_unlock(0);
3810 unlock_api_mutex:
3811         mutex_unlock(&the_lnet.ln_api_mutex);
3812
3813         return rc;
3814 }
3815
3816 /*
3817  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
3818  * They are only expected to be called for unique networks.
3819  * That can be as a result of older DLC library
3820  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
3821  */
3822 int
3823 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
3824 {
3825         struct lnet_net *net;
3826         LIST_HEAD(net_head);
3827         int rc;
3828         struct lnet_ioctl_config_lnd_tunables tun;
3829         const char *nets = conf->cfg_config_u.cfg_net.net_intf;
3830
3831         /* Create a net/ni structures for the network string */
3832         rc = lnet_parse_networks(&net_head, nets);
3833         if (rc <= 0)
3834                 return rc == 0 ? -EINVAL : rc;
3835
3836         mutex_lock(&the_lnet.ln_api_mutex);
3837         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3838                 rc = -ESHUTDOWN;
3839                 goto out_unlock_clean;
3840         }
3841
3842         if (rc > 1) {
3843                 rc = -EINVAL; /* only add one network per call */
3844                 goto out_unlock_clean;
3845         }
3846
3847         net = list_first_entry(&net_head, struct lnet_net, net_list);
3848         list_del_init(&net->net_list);
3849
3850         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
3851
3852         memset(&tun, 0, sizeof(tun));
3853
3854         tun.lt_cmn.lct_peer_timeout =
3855           (!conf->cfg_config_u.cfg_net.net_peer_timeout) ? DEFAULT_PEER_TIMEOUT :
3856                 conf->cfg_config_u.cfg_net.net_peer_timeout;
3857         tun.lt_cmn.lct_peer_tx_credits =
3858           (!conf->cfg_config_u.cfg_net.net_peer_tx_credits) ? DEFAULT_PEER_CREDITS :
3859                 conf->cfg_config_u.cfg_net.net_peer_tx_credits;
3860         tun.lt_cmn.lct_peer_rtr_credits =
3861           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
3862         tun.lt_cmn.lct_max_tx_credits =
3863           (!conf->cfg_config_u.cfg_net.net_max_tx_credits) ? DEFAULT_CREDITS :
3864                 conf->cfg_config_u.cfg_net.net_max_tx_credits;
3865
3866         rc = lnet_add_net_common(net, &tun);
3867
3868 out_unlock_clean:
3869         mutex_unlock(&the_lnet.ln_api_mutex);
3870         /* net_head list is empty in success case */
3871         while ((net = list_first_entry_or_null(&net_head,
3872                                                struct lnet_net,
3873                                                net_list)) != NULL) {
3874                 list_del_init(&net->net_list);
3875                 lnet_net_free(net);
3876         }
3877         return rc;
3878 }
3879
3880 int
3881 lnet_dyn_del_net(u32 net_id)
3882 {
3883         struct lnet_net *net;
3884         struct lnet_ping_buffer *pbuf;
3885         struct lnet_handle_md ping_mdh;
3886         int net_ni_bytes, rc;
3887
3888         /* don't allow userspace to shutdown the LOLND */
3889         if (LNET_NETTYP(net_id) == LOLND)
3890                 return -EINVAL;
3891
3892         mutex_lock(&the_lnet.ln_api_mutex);
3893         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3894                 rc = -ESHUTDOWN;
3895                 goto out;
3896         }
3897
3898         lnet_net_lock(0);
3899
3900         net = lnet_get_net_locked(net_id);
3901         if (net == NULL) {
3902                 lnet_net_unlock(0);
3903                 rc = -EINVAL;
3904                 goto out;
3905         }
3906
3907         net_ni_bytes = lnet_get_net_ni_bytes_locked(net);
3908
3909         lnet_net_unlock(0);
3910
3911         /* create and link a new ping info, before removing the old one */
3912         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3913                                     LNET_PING_INFO_HDR_SIZE +
3914                                     lnet_get_ni_bytes() - net_ni_bytes,
3915                                     false);
3916         if (rc != 0)
3917                 goto out;
3918
3919         lnet_shutdown_lndnet(net);
3920
3921         lnet_acceptor_stop();
3922
3923         lnet_ping_target_update(pbuf, ping_mdh);
3924
3925 out:
3926         mutex_unlock(&the_lnet.ln_api_mutex);
3927
3928         return rc;
3929 }
3930
3931 void lnet_mark_ping_buffer_for_update(void)
3932 {
3933         if (the_lnet.ln_routing)
3934                 return;
3935
3936         atomic_set(&the_lnet.ln_update_ping_buf, 1);
3937         complete(&the_lnet.ln_mt_wait_complete);
3938 }
3939 EXPORT_SYMBOL(lnet_mark_ping_buffer_for_update);
3940
3941 void lnet_update_ping_buffer(struct work_struct *work)
3942 {
3943         struct lnet_ping_buffer *pbuf;
3944         struct lnet_handle_md ping_mdh;
3945
3946         mutex_lock(&the_lnet.ln_api_mutex);
3947
3948         atomic_set(&the_lnet.ln_pb_update_ready, 1);
3949
3950         if ((the_lnet.ln_state == LNET_STATE_RUNNING) &&
3951             !lnet_ping_target_setup(&pbuf, &ping_mdh,
3952                                     LNET_PING_INFO_HDR_SIZE +
3953                                     lnet_get_ni_bytes(),
3954                                     false))
3955                 lnet_ping_target_update(pbuf, ping_mdh);
3956
3957
3958         mutex_unlock(&the_lnet.ln_api_mutex);
3959 }
3960
3961
3962 void lnet_queue_ping_buffer_update(void)
3963 {
3964         /* don't queue pb update if it is not needed */
3965         if (atomic_dec_if_positive(&the_lnet.ln_update_ping_buf) < 0)
3966                 return;
3967
3968         /* don't queue pb update if already queued and not processed */
3969         if (atomic_dec_if_positive(&the_lnet.ln_pb_update_ready) < 0)
3970                 return;
3971
3972         INIT_WORK(&the_lnet.ln_pb_update_work, lnet_update_ping_buffer);
3973         queue_work(the_lnet.ln_pb_update_wq, &the_lnet.ln_pb_update_work);
3974 }
3975
3976 void lnet_incr_dlc_seq(void)
3977 {
3978         atomic_inc(&lnet_dlc_seq_no);
3979 }
3980
3981 __u32 lnet_get_dlc_seq_locked(void)
3982 {
3983         return atomic_read(&lnet_dlc_seq_no);
3984 }
3985
3986 static void
3987 lnet_ni_set_healthv(lnet_nid_t nid, int value, bool all)
3988 {
3989         struct lnet_net *net;
3990         struct lnet_ni *ni;
3991
3992         lnet_net_lock(LNET_LOCK_EX);
3993         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3994                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3995                         if (all || (nid_is_nid4(&ni->ni_nid) &&
3996                                     lnet_nid_to_nid4(&ni->ni_nid) == nid)) {
3997                                 atomic_set(&ni->ni_healthv, value);
3998                                 if (list_empty(&ni->ni_recovery) &&
3999                                     value < LNET_MAX_HEALTH_VALUE) {
4000                                         CERROR("manually adding local NI %s to recovery\n",
4001                                                libcfs_nidstr(&ni->ni_nid));
4002                                         list_add_tail(&ni->ni_recovery,
4003                                                       &the_lnet.ln_mt_localNIRecovq);
4004                                         lnet_ni_addref_locked(ni, 0);
4005                                 }
4006                                 if (!all) {
4007                                         lnet_net_unlock(LNET_LOCK_EX);
4008                                         return;
4009                                 }
4010                         }
4011                 }
4012         }
4013         lnet_net_unlock(LNET_LOCK_EX);
4014 }
4015
4016 static void
4017 lnet_ni_set_conns_per_peer(lnet_nid_t nid, int value, bool all)
4018 {
4019         struct lnet_net *net;
4020         struct lnet_ni *ni;
4021
4022         lnet_net_lock(LNET_LOCK_EX);
4023         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
4024                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
4025                         if (lnet_nid_to_nid4(&ni->ni_nid) != nid && !all)
4026                                 continue;
4027                         if (LNET_NETTYP(net->net_id) == SOCKLND)
4028                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_sock.lnd_conns_per_peer = value;
4029                         else if (LNET_NETTYP(net->net_id) == O2IBLND)
4030                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer = value;
4031                         if (!all) {
4032                                 lnet_net_unlock(LNET_LOCK_EX);
4033                                 return;
4034                         }
4035                 }
4036         }
4037         lnet_net_unlock(LNET_LOCK_EX);
4038 }
4039
4040 static int
4041 lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats)
4042 {
4043         int cpt, rc = 0;
4044         struct lnet_ni *ni;
4045         struct lnet_nid nid;
4046
4047         lnet_nid4_to_nid(stats->hlni_nid, &nid);
4048         cpt = lnet_net_lock_current();
4049         ni = lnet_nid_to_ni_locked(&nid, cpt);
4050         if (!ni) {
4051                 rc = -ENOENT;
4052                 goto unlock;
4053         }
4054
4055         stats->hlni_local_interrupt = atomic_read(&ni->ni_hstats.hlt_local_interrupt);
4056         stats->hlni_local_dropped = atomic_read(&ni->ni_hstats.hlt_local_dropped);
4057         stats->hlni_local_aborted = atomic_read(&ni->ni_hstats.hlt_local_aborted);
4058         stats->hlni_local_no_route = atomic_read(&ni->ni_hstats.hlt_local_no_route);
4059         stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout);
4060         stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error);
4061         stats->hlni_fatal_error = atomic_read(&ni->ni_fatal_error_on);
4062         stats->hlni_health_value = atomic_read(&ni->ni_healthv);
4063         stats->hlni_ping_count = ni->ni_ping_count;
4064         stats->hlni_next_ping = ni->ni_next_ping;
4065
4066 unlock:
4067         lnet_net_unlock(cpt);
4068
4069         return rc;
4070 }
4071
4072 static int
4073 lnet_get_local_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4074 {
4075         struct lnet_ni *ni;
4076         int i = 0;
4077
4078         lnet_net_lock(LNET_LOCK_EX);
4079         list_for_each_entry(ni, &the_lnet.ln_mt_localNIRecovq, ni_recovery) {
4080                 if (!nid_is_nid4(&ni->ni_nid))
4081                         continue;
4082                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&ni->ni_nid);
4083                 i++;
4084                 if (i >= LNET_MAX_SHOW_NUM_NID)
4085                         break;
4086         }
4087         lnet_net_unlock(LNET_LOCK_EX);
4088         list->rlst_num_nids = i;
4089
4090         return 0;
4091 }
4092
4093 static int
4094 lnet_get_peer_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4095 {
4096         struct lnet_peer_ni *lpni;
4097         int i = 0;
4098
4099         lnet_net_lock(LNET_LOCK_EX);
4100         list_for_each_entry(lpni, &the_lnet.ln_mt_peerNIRecovq, lpni_recovery) {
4101                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&lpni->lpni_nid);
4102                 i++;
4103                 if (i >= LNET_MAX_SHOW_NUM_NID)
4104                         break;
4105         }
4106         lnet_net_unlock(LNET_LOCK_EX);
4107         list->rlst_num_nids = i;
4108
4109         return 0;
4110 }
4111
4112 /**
4113  * LNet ioctl handler.
4114  *
4115  */
4116 int
4117 LNetCtl(unsigned int cmd, void *arg)
4118 {
4119         struct libcfs_ioctl_data *data = arg;
4120         struct lnet_ioctl_config_data *config;
4121         struct lnet_ni           *ni;
4122         struct lnet_nid           nid;
4123         int                       rc;
4124
4125         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
4126                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
4127
4128         switch (cmd) {
4129         case IOC_LIBCFS_GET_NI: {
4130                 struct lnet_processid id = {};
4131
4132                 rc = LNetGetId(data->ioc_count, &id, false);
4133                 data->ioc_nid = lnet_nid_to_nid4(&id.nid);
4134                 return rc;
4135         }
4136         case IOC_LIBCFS_FAIL_NID:
4137                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
4138
4139         case IOC_LIBCFS_ADD_ROUTE: {
4140                 /* default router sensitivity to 1 */
4141                 unsigned int sensitivity = 1;
4142                 config = arg;
4143
4144                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4145                         return -EINVAL;
4146
4147                 if (config->cfg_config_u.cfg_route.rtr_sensitivity) {
4148                         sensitivity =
4149                           config->cfg_config_u.cfg_route.rtr_sensitivity;
4150                 }
4151
4152                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4153                 mutex_lock(&the_lnet.ln_api_mutex);
4154                 rc = lnet_add_route(config->cfg_net,
4155                                     config->cfg_config_u.cfg_route.rtr_hop,
4156                                     &nid,
4157                                     config->cfg_config_u.cfg_route.
4158                                         rtr_priority, sensitivity);
4159                 mutex_unlock(&the_lnet.ln_api_mutex);
4160                 return rc;
4161         }
4162
4163         case IOC_LIBCFS_DEL_ROUTE:
4164                 config = arg;
4165
4166                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4167                         return -EINVAL;
4168
4169                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4170                 mutex_lock(&the_lnet.ln_api_mutex);
4171                 rc = lnet_del_route(config->cfg_net, &nid);
4172                 mutex_unlock(&the_lnet.ln_api_mutex);
4173                 return rc;
4174
4175         case IOC_LIBCFS_GET_ROUTE:
4176                 config = arg;
4177
4178                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4179                         return -EINVAL;
4180
4181                 mutex_lock(&the_lnet.ln_api_mutex);
4182                 rc = lnet_get_route(config->cfg_count,
4183                                     &config->cfg_net,
4184                                     &config->cfg_config_u.cfg_route.rtr_hop,
4185                                     &config->cfg_nid,
4186                                     &config->cfg_config_u.cfg_route.rtr_flags,
4187                                     &config->cfg_config_u.cfg_route.
4188                                         rtr_priority,
4189                                     &config->cfg_config_u.cfg_route.
4190                                         rtr_sensitivity);
4191                 mutex_unlock(&the_lnet.ln_api_mutex);
4192                 return rc;
4193
4194         case IOC_LIBCFS_GET_LOCAL_NI: {
4195                 struct lnet_ioctl_config_ni *cfg_ni;
4196                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
4197                 struct lnet_ioctl_element_stats *stats;
4198                 __u32 tun_size;
4199
4200                 cfg_ni = arg;
4201
4202                 /* get the tunables if they are available */
4203                 if (cfg_ni->lic_cfg_hdr.ioc_len <
4204                     sizeof(*cfg_ni) + sizeof(*stats) + sizeof(*tun))
4205                         return -EINVAL;
4206
4207                 stats = (struct lnet_ioctl_element_stats *)
4208                         cfg_ni->lic_bulk;
4209                 tun = (struct lnet_ioctl_config_lnd_tunables *)
4210                                 (cfg_ni->lic_bulk + sizeof(*stats));
4211
4212                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
4213                         sizeof(*stats);
4214
4215                 mutex_lock(&the_lnet.ln_api_mutex);
4216                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
4217                 mutex_unlock(&the_lnet.ln_api_mutex);
4218                 return rc;
4219         }
4220
4221         case IOC_LIBCFS_GET_LOCAL_NI_MSG_STATS: {
4222                 struct lnet_ioctl_element_msg_stats *msg_stats = arg;
4223                 int cpt;
4224
4225                 if (msg_stats->im_hdr.ioc_len != sizeof(*msg_stats))
4226                         return -EINVAL;
4227
4228                 mutex_lock(&the_lnet.ln_api_mutex);
4229
4230                 cpt = lnet_net_lock_current();
4231                 rc = lnet_get_ni_stats(msg_stats);
4232                 lnet_net_unlock(cpt);
4233
4234                 mutex_unlock(&the_lnet.ln_api_mutex);
4235
4236                 return rc;
4237         }
4238
4239         case IOC_LIBCFS_GET_NET: {
4240                 size_t total = sizeof(*config) +
4241                                sizeof(struct lnet_ioctl_net_config);
4242                 config = arg;
4243
4244                 if (config->cfg_hdr.ioc_len < total)
4245                         return -EINVAL;
4246
4247                 mutex_lock(&the_lnet.ln_api_mutex);
4248                 rc = lnet_get_net_config(config);
4249                 mutex_unlock(&the_lnet.ln_api_mutex);
4250                 return rc;
4251         }
4252
4253         case IOC_LIBCFS_GET_LNET_STATS:
4254         {
4255                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
4256
4257                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
4258                         return -EINVAL;
4259
4260                 mutex_lock(&the_lnet.ln_api_mutex);
4261                 rc = lnet_counters_get(&lnet_stats->st_cntrs);
4262                 mutex_unlock(&the_lnet.ln_api_mutex);
4263                 return rc;
4264         }
4265
4266         case IOC_LIBCFS_RESET_LNET_STATS:
4267         {
4268                 mutex_lock(&the_lnet.ln_api_mutex);
4269                 lnet_counters_reset();
4270                 mutex_unlock(&the_lnet.ln_api_mutex);
4271                 return 0;
4272         }
4273
4274         case IOC_LIBCFS_CONFIG_RTR:
4275                 config = arg;
4276
4277                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4278                         return -EINVAL;
4279
4280                 mutex_lock(&the_lnet.ln_api_mutex);
4281                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
4282                         rc = lnet_rtrpools_enable();
4283                         mutex_unlock(&the_lnet.ln_api_mutex);
4284                         return rc;
4285                 }
4286                 lnet_rtrpools_disable();
4287                 mutex_unlock(&the_lnet.ln_api_mutex);
4288                 return 0;
4289
4290         case IOC_LIBCFS_ADD_BUF:
4291                 config = arg;
4292
4293                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4294                         return -EINVAL;
4295
4296                 mutex_lock(&the_lnet.ln_api_mutex);
4297                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
4298                                                 buf_tiny,
4299                                           config->cfg_config_u.cfg_buffers.
4300                                                 buf_small,
4301                                           config->cfg_config_u.cfg_buffers.
4302                                                 buf_large);
4303                 mutex_unlock(&the_lnet.ln_api_mutex);
4304                 return rc;
4305
4306         case IOC_LIBCFS_SET_NUMA_RANGE: {
4307                 struct lnet_ioctl_set_value *numa;
4308                 numa = arg;
4309                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4310                         return -EINVAL;
4311                 lnet_net_lock(LNET_LOCK_EX);
4312                 lnet_numa_range = numa->sv_value;
4313                 lnet_net_unlock(LNET_LOCK_EX);
4314                 return 0;
4315         }
4316
4317         case IOC_LIBCFS_GET_NUMA_RANGE: {
4318                 struct lnet_ioctl_set_value *numa;
4319                 numa = arg;
4320                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4321                         return -EINVAL;
4322                 numa->sv_value = lnet_numa_range;
4323                 return 0;
4324         }
4325
4326         case IOC_LIBCFS_GET_BUF: {
4327                 struct lnet_ioctl_pool_cfg *pool_cfg;
4328                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
4329
4330                 config = arg;
4331
4332                 if (config->cfg_hdr.ioc_len < total)
4333                         return -EINVAL;
4334
4335                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
4336
4337                 mutex_lock(&the_lnet.ln_api_mutex);
4338                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
4339                 mutex_unlock(&the_lnet.ln_api_mutex);
4340                 return rc;
4341         }
4342
4343         case IOC_LIBCFS_GET_LOCAL_HSTATS: {
4344                 struct lnet_ioctl_local_ni_hstats *stats = arg;
4345
4346                 if (stats->hlni_hdr.ioc_len < sizeof(*stats))
4347                         return -EINVAL;
4348
4349                 mutex_lock(&the_lnet.ln_api_mutex);
4350                 rc = lnet_get_local_ni_hstats(stats);
4351                 mutex_unlock(&the_lnet.ln_api_mutex);
4352
4353                 return rc;
4354         }
4355
4356         case IOC_LIBCFS_GET_RECOVERY_QUEUE: {
4357                 struct lnet_ioctl_recovery_list *list = arg;
4358                 if (list->rlst_hdr.ioc_len < sizeof(*list))
4359                         return -EINVAL;
4360
4361                 mutex_lock(&the_lnet.ln_api_mutex);
4362                 if (list->rlst_type == LNET_HEALTH_TYPE_LOCAL_NI)
4363                         rc = lnet_get_local_ni_recovery_list(list);
4364                 else
4365                         rc = lnet_get_peer_ni_recovery_list(list);
4366                 mutex_unlock(&the_lnet.ln_api_mutex);
4367                 return rc;
4368         }
4369
4370         case IOC_LIBCFS_ADD_PEER_NI: {
4371                 struct lnet_ioctl_peer_cfg *cfg = arg;
4372                 struct lnet_nid prim_nid;
4373
4374                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4375                         return -EINVAL;
4376
4377                 mutex_lock(&the_lnet.ln_api_mutex);
4378                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4379                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4380                 rc = lnet_user_add_peer_ni(&prim_nid, &nid, cfg->prcfg_mr,
4381                                            cfg->prcfg_count == 1);
4382                 mutex_unlock(&the_lnet.ln_api_mutex);
4383                 return rc;
4384         }
4385
4386         case IOC_LIBCFS_DEL_PEER_NI: {
4387                 struct lnet_ioctl_peer_cfg *cfg = arg;
4388                 struct lnet_nid prim_nid;
4389
4390                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4391                         return -EINVAL;
4392
4393                 mutex_lock(&the_lnet.ln_api_mutex);
4394                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4395                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4396                 rc = lnet_del_peer_ni(&prim_nid,
4397                                       &nid,
4398                                       cfg->prcfg_count);
4399                 mutex_unlock(&the_lnet.ln_api_mutex);
4400                 return rc;
4401         }
4402
4403         case IOC_LIBCFS_GET_PEER_INFO: {
4404                 struct lnet_ioctl_peer *peer_info = arg;
4405
4406                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
4407                         return -EINVAL;
4408
4409                 mutex_lock(&the_lnet.ln_api_mutex);
4410                 rc = lnet_get_peer_ni_info(
4411                    peer_info->pr_count,
4412                    &peer_info->pr_nid,
4413                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
4414                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
4415                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
4416                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
4417                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
4418                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
4419                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
4420                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
4421                 mutex_unlock(&the_lnet.ln_api_mutex);
4422                 return rc;
4423         }
4424
4425         case IOC_LIBCFS_GET_PEER_NI: {
4426                 struct lnet_ioctl_peer_cfg *cfg = arg;
4427
4428                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4429                         return -EINVAL;
4430
4431                 mutex_lock(&the_lnet.ln_api_mutex);
4432                 rc = lnet_get_peer_info(cfg,
4433                                         (void __user *)cfg->prcfg_bulk);
4434                 mutex_unlock(&the_lnet.ln_api_mutex);
4435                 return rc;
4436         }
4437
4438         case IOC_LIBCFS_GET_PEER_LIST: {
4439                 struct lnet_ioctl_peer_cfg *cfg = arg;
4440
4441                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4442                         return -EINVAL;
4443
4444                 mutex_lock(&the_lnet.ln_api_mutex);
4445                 rc = lnet_get_peer_list(&cfg->prcfg_count, &cfg->prcfg_size,
4446                                 (struct lnet_process_id __user *)cfg->prcfg_bulk);
4447                 mutex_unlock(&the_lnet.ln_api_mutex);
4448                 return rc;
4449         }
4450
4451         case IOC_LIBCFS_SET_HEALHV: {
4452                 struct lnet_ioctl_reset_health_cfg *cfg = arg;
4453                 int value;
4454
4455                 if (cfg->rh_hdr.ioc_len < sizeof(*cfg))
4456                         return -EINVAL;
4457                 if (cfg->rh_value < 0 ||
4458                     cfg->rh_value > LNET_MAX_HEALTH_VALUE)
4459                         value = LNET_MAX_HEALTH_VALUE;
4460                 else
4461                         value = cfg->rh_value;
4462                 CDEBUG(D_NET, "Manually setting healthv to %d for %s:%s. all = %d\n",
4463                        value, (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) ?
4464                        "local" : "peer", libcfs_nid2str(cfg->rh_nid), cfg->rh_all);
4465                 lnet_nid4_to_nid(cfg->rh_nid, &nid);
4466                 mutex_lock(&the_lnet.ln_api_mutex);
4467                 if (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI)
4468                         lnet_ni_set_healthv(cfg->rh_nid, value,
4469                                              cfg->rh_all);
4470                 else
4471                         lnet_peer_ni_set_healthv(&nid, value, cfg->rh_all);
4472                 mutex_unlock(&the_lnet.ln_api_mutex);
4473                 return 0;
4474         }
4475
4476         case IOC_LIBCFS_SET_PEER: {
4477                 struct lnet_ioctl_peer_cfg *cfg = arg;
4478                 struct lnet_peer *lp;
4479
4480                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4481                         return -EINVAL;
4482
4483                 mutex_lock(&the_lnet.ln_api_mutex);
4484                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &nid);
4485                 lp = lnet_find_peer(&nid);
4486                 if (!lp) {
4487                         mutex_unlock(&the_lnet.ln_api_mutex);
4488                         return -ENOENT;
4489                 }
4490                 spin_lock(&lp->lp_lock);
4491                 lp->lp_state = cfg->prcfg_state;
4492                 spin_unlock(&lp->lp_lock);
4493                 lnet_peer_decref_locked(lp);
4494                 mutex_unlock(&the_lnet.ln_api_mutex);
4495                 CDEBUG(D_NET, "Set peer %s state to %u\n",
4496                        libcfs_nid2str(cfg->prcfg_prim_nid), cfg->prcfg_state);
4497                 return 0;
4498         }
4499
4500         case IOC_LIBCFS_SET_CONNS_PER_PEER: {
4501                 struct lnet_ioctl_reset_conns_per_peer_cfg *cfg = arg;
4502                 int value;
4503
4504                 if (cfg->rcpp_hdr.ioc_len < sizeof(*cfg))
4505                         return -EINVAL;
4506                 if (cfg->rcpp_value < 0)
4507                         value = 1;
4508                 else
4509                         value = cfg->rcpp_value;
4510                 CDEBUG(D_NET,
4511                        "Setting conns_per_peer to %d for %s. all = %d\n",
4512                        value, libcfs_nid2str(cfg->rcpp_nid), cfg->rcpp_all);
4513                 mutex_lock(&the_lnet.ln_api_mutex);
4514                 lnet_ni_set_conns_per_peer(cfg->rcpp_nid, value, cfg->rcpp_all);
4515                 mutex_unlock(&the_lnet.ln_api_mutex);
4516                 return 0;
4517         }
4518
4519         case IOC_LIBCFS_NOTIFY_ROUTER: {
4520                 /* Convert the user-supplied real time to monotonic.
4521                  * NB: "when" is always in the past
4522                  */
4523                 time64_t when = ktime_get_seconds() -
4524                                 (ktime_get_real_seconds() - data->ioc_u64[0]);
4525
4526                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4527                 return lnet_notify(NULL, &nid, data->ioc_flags, false, when);
4528         }
4529
4530         case IOC_LIBCFS_LNET_DIST:
4531                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4532                 rc = LNetDist(&nid, &nid, &data->ioc_u32[1]);
4533                 if (rc < 0 && rc != -EHOSTUNREACH)
4534                         return rc;
4535
4536                 data->ioc_nid = lnet_nid_to_nid4(&nid);
4537                 data->ioc_u32[0] = rc;
4538                 return 0;
4539
4540         case IOC_LIBCFS_TESTPROTOCOMPAT:
4541                 the_lnet.ln_testprotocompat = data->ioc_flags;
4542                 return 0;
4543
4544         case IOC_LIBCFS_LNET_FAULT:
4545                 return lnet_fault_ctl(data->ioc_flags, data);
4546
4547         case IOC_LIBCFS_PING_PEER: {
4548                 struct lnet_ioctl_ping_data *ping = arg;
4549                 struct lnet_process_id __user *ids = ping->ping_buf;
4550                 struct lnet_nid src_nid = LNET_ANY_NID;
4551                 struct lnet_genl_ping_list plist;
4552                 struct lnet_processid id;
4553                 struct lnet_peer *lp;
4554                 signed long timeout;
4555                 int count, i;
4556
4557                 /* Check if the supplied ping data supports source nid
4558                  * NB: This check is sufficient if lnet_ioctl_ping_data has
4559                  * additional fields added, but if they are re-ordered or
4560                  * fields removed then this will break. It is expected that
4561                  * these ioctls will be replaced with netlink implementation, so
4562                  * it is probably not worth coming up with a more robust version
4563                  * compatibility scheme.
4564                  */
4565                 if (ping->ping_hdr.ioc_len >= sizeof(struct lnet_ioctl_ping_data))
4566                         lnet_nid4_to_nid(ping->ping_src, &src_nid);
4567
4568                 /* If timeout is negative then set default of 3 minutes */
4569                 if (((s32)ping->op_param) <= 0 ||
4570                     ping->op_param > (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
4571                         timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
4572                 else
4573                         timeout = nsecs_to_jiffies(ping->op_param * NSEC_PER_MSEC);
4574
4575                 id.pid = ping->ping_id.pid;
4576                 lnet_nid4_to_nid(ping->ping_id.nid, &id.nid);
4577                 rc = lnet_ping(&id, &src_nid, timeout, &plist,
4578                                ping->ping_count);
4579                 if (rc < 0)
4580                         goto report_ping_err;
4581                 count = rc;
4582                 rc = 0;
4583
4584                 for (i = 0; i < count; i++) {
4585                         struct lnet_processid *result;
4586                         struct lnet_process_id tmpid;
4587
4588                         result = genradix_ptr(&plist.lgpl_list, i);
4589                         memset(&tmpid, 0, sizeof(tmpid));
4590                         tmpid.pid = result->pid;
4591                         tmpid.nid = lnet_nid_to_nid4(&result->nid);
4592                         if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
4593                                 rc = -EFAULT;
4594                                 goto report_ping_err;
4595                         }
4596                 }
4597
4598                 mutex_lock(&the_lnet.ln_api_mutex);
4599                 lp = lnet_find_peer(&id.nid);
4600                 if (lp) {
4601                         ping->ping_id.nid =
4602                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4603                         ping->mr_info = lnet_peer_is_multi_rail(lp);
4604                         lnet_peer_decref_locked(lp);
4605                 }
4606                 mutex_unlock(&the_lnet.ln_api_mutex);
4607
4608                 ping->ping_count = count;
4609 report_ping_err:
4610                 genradix_free(&plist.lgpl_list);
4611                 return rc;
4612         }
4613
4614         case IOC_LIBCFS_DISCOVER: {
4615                 struct lnet_ioctl_ping_data *discover = arg;
4616                 struct lnet_process_id __user *ids;
4617                 struct lnet_genl_ping_list dlists;
4618                 struct lnet_processid id;
4619                 struct lnet_peer *lp;
4620                 int count, i;
4621
4622                 if (discover->ping_count <= 0)
4623                         return -EINVAL;
4624
4625                 genradix_init(&dlists.lgpl_list);
4626                 /* If the user buffer has more space than the lnet_interfaces_max,
4627                  * then only fill it up to lnet_interfaces_max.
4628                  */
4629                 if (discover->ping_count > lnet_interfaces_max)
4630                         discover->ping_count = lnet_interfaces_max;
4631
4632                 id.pid = discover->ping_id.pid;
4633                 lnet_nid4_to_nid(discover->ping_id.nid, &id.nid);
4634                 rc = lnet_discover(&id, discover->op_param, &dlists);
4635                 if (rc < 0)
4636                         goto report_discover_err;
4637                 count = rc;
4638
4639                 ids = discover->ping_buf;
4640                 for (i = 0; i < count; i++) {
4641                         struct lnet_processid *result;
4642                         struct lnet_process_id tmpid;
4643
4644                         result = genradix_ptr(&dlists.lgpl_list, i);
4645                         memset(&tmpid, 0, sizeof(tmpid));
4646                         tmpid.pid = result->pid;
4647                         tmpid.nid = lnet_nid_to_nid4(&result->nid);
4648                         if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
4649                                 rc = -EFAULT;
4650                                 goto report_discover_err;
4651                         }
4652
4653                         if (i >= discover->ping_count)
4654                                 break;
4655                 }
4656                 rc = 0;
4657
4658                 mutex_lock(&the_lnet.ln_api_mutex);
4659                 lp = lnet_find_peer(&id.nid);
4660                 if (lp) {
4661                         discover->ping_id.nid =
4662                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4663                         discover->mr_info = lnet_peer_is_multi_rail(lp);
4664                         lnet_peer_decref_locked(lp);
4665                 }
4666                 mutex_unlock(&the_lnet.ln_api_mutex);
4667
4668                 discover->ping_count = count;
4669 report_discover_err:
4670                 genradix_free(&dlists.lgpl_list);
4671                 return rc;
4672         }
4673
4674         case IOC_LIBCFS_ADD_UDSP: {
4675                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4676                 __u32 bulk_size = ioc_udsp->iou_hdr.ioc_len;
4677
4678                 mutex_lock(&the_lnet.ln_api_mutex);
4679                 rc = lnet_udsp_demarshal_add(arg, bulk_size);
4680                 if (!rc) {
4681                         rc = lnet_udsp_apply_policies(NULL, false);
4682                         CDEBUG(D_NET, "policy application returned %d\n", rc);
4683                         rc = 0;
4684                 }
4685                 mutex_unlock(&the_lnet.ln_api_mutex);
4686
4687                 return rc;
4688         }
4689
4690         case IOC_LIBCFS_DEL_UDSP: {
4691                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4692                 int idx = ioc_udsp->iou_idx;
4693
4694                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4695                         return -EINVAL;
4696
4697                 mutex_lock(&the_lnet.ln_api_mutex);
4698                 rc = lnet_udsp_del_policy(idx);
4699                 mutex_unlock(&the_lnet.ln_api_mutex);
4700
4701                 return rc;
4702         }
4703
4704         case IOC_LIBCFS_GET_UDSP_SIZE: {
4705                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4706                 struct lnet_udsp *udsp;
4707
4708                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4709                         return -EINVAL;
4710
4711                 rc = 0;
4712
4713                 mutex_lock(&the_lnet.ln_api_mutex);
4714                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4715                 if (!udsp) {
4716                         rc = -ENOENT;
4717                 } else {
4718                         /* coming in iou_idx will hold the idx of the udsp
4719                          * to get the size of. going out the iou_idx will
4720                          * hold the size of the UDSP found at the passed
4721                          * in index.
4722                          */
4723                         ioc_udsp->iou_idx = lnet_get_udsp_size(udsp);
4724                         if (ioc_udsp->iou_idx < 0)
4725                                 rc = -EINVAL;
4726                 }
4727                 mutex_unlock(&the_lnet.ln_api_mutex);
4728
4729                 return rc;
4730         }
4731
4732         case IOC_LIBCFS_GET_UDSP: {
4733                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4734                 struct lnet_udsp *udsp;
4735
4736                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4737                         return -EINVAL;
4738
4739                 rc = 0;
4740
4741                 mutex_lock(&the_lnet.ln_api_mutex);
4742                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4743                 if (!udsp)
4744                         rc = -ENOENT;
4745                 else
4746                         rc = lnet_udsp_marshal(udsp, ioc_udsp);
4747                 mutex_unlock(&the_lnet.ln_api_mutex);
4748
4749                 return rc;
4750         }
4751
4752         case IOC_LIBCFS_GET_CONST_UDSP_INFO: {
4753                 struct lnet_ioctl_construct_udsp_info *info = arg;
4754
4755                 if (info->cud_hdr.ioc_len < sizeof(*info))
4756                         return -EINVAL;
4757
4758                 CDEBUG(D_NET, "GET_UDSP_INFO for %s\n",
4759                        libcfs_nid2str(info->cud_nid));
4760
4761                 lnet_nid4_to_nid(info->cud_nid, &nid);
4762                 mutex_lock(&the_lnet.ln_api_mutex);
4763                 lnet_net_lock(0);
4764                 lnet_udsp_get_construct_info(info, &nid);
4765                 lnet_net_unlock(0);
4766                 mutex_unlock(&the_lnet.ln_api_mutex);
4767
4768                 return 0;
4769         }
4770
4771         default:
4772                 ni = lnet_net2ni_addref(data->ioc_net);
4773                 if (ni == NULL)
4774                         return -EINVAL;
4775
4776                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
4777                         rc = -EINVAL;
4778                 else
4779                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
4780
4781                 lnet_ni_decref(ni);
4782                 return rc <= 0 ? rc : 0;
4783         }
4784         /* not reached */
4785 }
4786 EXPORT_SYMBOL(LNetCtl);
4787
4788 struct lnet_nid_cpt {
4789         struct lnet_nid lnc_nid;
4790         unsigned int lnc_cpt;
4791 };
4792
4793 struct lnet_genl_nid_cpt_list {
4794         unsigned int lgncl_index;
4795         unsigned int lgncl_list_count;
4796         GENRADIX(struct lnet_nid_cpt) lgncl_lnc_list;
4797 };
4798
4799 static inline struct lnet_genl_nid_cpt_list *
4800 lnet_cpt_of_nid_dump_ctx(struct netlink_callback *cb)
4801 {
4802         return (struct lnet_genl_nid_cpt_list *)cb->args[0];
4803 }
4804
4805 static int lnet_cpt_of_nid_show_done(struct netlink_callback *cb)
4806 {
4807         struct lnet_genl_nid_cpt_list *lgncl;
4808
4809         lgncl = lnet_cpt_of_nid_dump_ctx(cb);
4810
4811         if (lgncl) {
4812                 genradix_free(&lgncl->lgncl_lnc_list);
4813                 LIBCFS_FREE(lgncl, sizeof(*lgncl));
4814                 cb->args[0] = 0;
4815         }
4816
4817         return 0;
4818 }
4819
4820 static int lnet_cpt_of_nid_show_start(struct netlink_callback *cb)
4821 {
4822         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
4823 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4824         struct netlink_ext_ack *extack = NULL;
4825 #endif
4826         struct lnet_genl_nid_cpt_list *lgncl;
4827         int msg_len = genlmsg_len(gnlh);
4828         struct nlattr *params, *top;
4829         int rem, rc = 0;
4830
4831 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4832         extack = cb->extack;
4833 #endif
4834
4835         mutex_lock(&the_lnet.ln_api_mutex);
4836         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
4837                 NL_SET_ERR_MSG(extack, "Network is down");
4838                 mutex_unlock(&the_lnet.ln_api_mutex);
4839                 return -ENETDOWN;
4840         }
4841
4842         msg_len = genlmsg_len(gnlh);
4843         if (!msg_len) {
4844                 NL_SET_ERR_MSG(extack, "Missing NID argument(s)");
4845                 mutex_unlock(&the_lnet.ln_api_mutex);
4846                 return -ENOENT;
4847         }
4848
4849         LIBCFS_ALLOC(lgncl, sizeof(*lgncl));
4850         if (!lgncl) {
4851                 mutex_unlock(&the_lnet.ln_api_mutex);
4852                 return -ENOMEM;
4853         }
4854
4855         genradix_init(&lgncl->lgncl_lnc_list);
4856         lgncl->lgncl_list_count = 0;
4857         cb->args[0] = (long)lgncl;
4858
4859         params = genlmsg_data(gnlh);
4860         nla_for_each_attr(top, params, msg_len, rem) {
4861                 struct nlattr *nids;
4862                 int rem2;
4863
4864                 switch (nla_type(top)) {
4865                 case LN_SCALAR_ATTR_LIST:
4866                         nla_for_each_nested(nids, top, rem2) {
4867                                 char nidstr[LNET_NIDSTR_SIZE + 1];
4868                                 struct lnet_nid_cpt *lnc;
4869
4870                                 if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
4871                                         continue;
4872
4873                                 memset(nidstr, 0, sizeof(nidstr));
4874                                 rc = nla_strscpy(nidstr, nids, sizeof(nidstr));
4875                                 if (rc < 0) {
4876                                         NL_SET_ERR_MSG(extack,
4877                                                        "failed to get NID");
4878                                         GOTO(report_err, rc);
4879                                 }
4880
4881                                 lnc = genradix_ptr_alloc(&lgncl->lgncl_lnc_list,
4882                                                          lgncl->lgncl_list_count++,
4883                                                          GFP_ATOMIC);
4884                                 if (!lnc) {
4885                                         NL_SET_ERR_MSG(extack,
4886                                                        "failed to allocate NID");
4887                                         GOTO(report_err, rc = -ENOMEM);
4888                                 }
4889
4890                                 rc = libcfs_strnid(&lnc->lnc_nid, strim(nidstr));
4891                                 if (rc < 0) {
4892                                         NL_SET_ERR_MSG(extack, "invalid NID");
4893                                         GOTO(report_err, rc);
4894                                 }
4895                                 rc = 0;
4896                                 CDEBUG(D_NET, "nid: %s\n", libcfs_nidstr(&lnc->lnc_nid));
4897                         }
4898                         fallthrough;
4899                 default:
4900                         break;
4901                 }
4902         }
4903 report_err:
4904         mutex_unlock(&the_lnet.ln_api_mutex);
4905
4906         if (rc < 0)
4907                 lnet_cpt_of_nid_show_done(cb);
4908
4909         return rc;
4910 }
4911
4912 static const struct ln_key_list cpt_of_nid_props_list = {
4913         .lkl_maxattr                    = LNET_CPT_OF_NID_ATTR_MAX,
4914         .lkl_list                       = {
4915                 [LNET_CPT_OF_NID_ATTR_HDR]      = {
4916                         .lkp_value              = "cpt-of-nid",
4917                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
4918                         .lkp_data_type          = NLA_NUL_STRING,
4919                 },
4920                 [LNET_CPT_OF_NID_ATTR_NID]      = {
4921                         .lkp_value              = "nid",
4922                         .lkp_data_type          = NLA_STRING,
4923                 },
4924                 [LNET_CPT_OF_NID_ATTR_CPT]      = {
4925                         .lkp_value              = "cpt",
4926                         .lkp_data_type          = NLA_U32,
4927                 },
4928         },
4929 };
4930
4931 static int lnet_cpt_of_nid_show_dump(struct sk_buff *msg,
4932                                      struct netlink_callback *cb)
4933 {
4934         struct lnet_genl_nid_cpt_list *lgncl;
4935 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4936         struct netlink_ext_ack *extack = NULL;
4937 #endif
4938         int portid = NETLINK_CB(cb->skb).portid;
4939         int seq = cb->nlh->nlmsg_seq;
4940         int idx;
4941         int rc = 0;
4942         bool need_hdr = true;
4943
4944 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4945         extack = cb->extack;
4946 #endif
4947
4948         mutex_lock(&the_lnet.ln_api_mutex);
4949         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
4950                 NL_SET_ERR_MSG(extack, "Network is down");
4951                 GOTO(send_error, rc = -ENETDOWN);
4952         }
4953
4954         lgncl = lnet_cpt_of_nid_dump_ctx(cb);
4955         idx = lgncl->lgncl_index;
4956
4957         if (!lgncl->lgncl_index) {
4958                 const struct ln_key_list *all[] = {
4959                         &cpt_of_nid_props_list, NULL, NULL
4960                 };
4961
4962                 rc = lnet_genl_send_scalar_list(msg, portid, seq, &lnet_family,
4963                                                 NLM_F_CREATE | NLM_F_MULTI,
4964                                                 LNET_CMD_CPT_OF_NID, all);
4965                 if (rc < 0) {
4966                         NL_SET_ERR_MSG(extack, "failed to send key table");
4967                         GOTO(send_error, rc);
4968                 }
4969         }
4970
4971         while (idx < lgncl->lgncl_list_count) {
4972                 struct lnet_nid_cpt *lnc;
4973                 void *hdr;
4974                 int cpt;
4975
4976                 lnc = genradix_ptr(&lgncl->lgncl_lnc_list, idx++);
4977
4978                 cpt = lnet_nid_cpt_hash(&lnc->lnc_nid, LNET_CPT_NUMBER);
4979
4980                 CDEBUG(D_NET, "nid: %s cpt: %d\n", libcfs_nidstr(&lnc->lnc_nid), cpt);
4981                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
4982                                   NLM_F_MULTI, LNET_CMD_CPT_OF_NID);
4983                 if (!hdr) {
4984                         NL_SET_ERR_MSG(extack, "failed to send values");
4985                         genlmsg_cancel(msg, hdr);
4986                         GOTO(send_error, rc = -EMSGSIZE);
4987                 }
4988
4989                 if (need_hdr) {
4990                         nla_put_string(msg, LNET_CPT_OF_NID_ATTR_HDR, "");
4991                         need_hdr = false;
4992                 }
4993
4994                 nla_put_string(msg, LNET_CPT_OF_NID_ATTR_NID,
4995                                libcfs_nidstr(&lnc->lnc_nid));
4996                 nla_put_u32(msg, LNET_CPT_OF_NID_ATTR_CPT, cpt);
4997
4998                 genlmsg_end(msg, hdr);
4999         }
5000
5001         genradix_free(&lgncl->lgncl_lnc_list);
5002         rc = 0;
5003         lgncl->lgncl_index = idx;
5004
5005 send_error:
5006         mutex_unlock(&the_lnet.ln_api_mutex);
5007
5008         return lnet_nl_send_error(cb->skb, portid, seq, rc);
5009 }
5010
5011 #ifndef HAVE_NETLINK_CALLBACK_START
5012 static int lnet_old_cpt_of_nid_show_dump(struct sk_buff *msg,
5013                                          struct netlink_callback *cb)
5014 {
5015         if (!cb->args[0]) {
5016                 int rc = lnet_cpt_of_nid_show_start(cb);
5017
5018                 if (rc < 0)
5019                         return rc;
5020         }
5021
5022         return lnet_cpt_of_nid_show_dump(msg, cb);
5023 }
5024 #endif
5025
5026 /* This is the keys for the UDSP info which is used by many
5027  * Netlink commands.
5028  */
5029 static const struct ln_key_list udsp_info_list = {
5030         .lkl_maxattr                    = LNET_UDSP_INFO_ATTR_MAX,
5031         .lkl_list                       = {
5032                 [LNET_UDSP_INFO_ATTR_NET_PRIORITY]              = {
5033                         .lkp_value      = "net priority",
5034                         .lkp_data_type  = NLA_S32
5035                 },
5036                 [LNET_UDSP_INFO_ATTR_NID_PRIORITY]              = {
5037                         .lkp_value      = "nid priority",
5038                         .lkp_data_type  = NLA_S32
5039                 },
5040                 [LNET_UDSP_INFO_ATTR_PREF_RTR_NIDS_LIST]        = {
5041                         .lkp_value      = "Preferred gateway NIDs",
5042                         .lkp_key_format = LNKF_MAPPING,
5043                         .lkp_data_type  = NLA_NESTED,
5044                 },
5045                 [LNET_UDSP_INFO_ATTR_PREF_NIDS_LIST]            = {
5046                         .lkp_value      = "Preferred source NIDs",
5047                         .lkp_key_format = LNKF_MAPPING,
5048                         .lkp_data_type  = NLA_NESTED,
5049                 },
5050         },
5051 };
5052
5053 static const struct ln_key_list udsp_info_pref_nids_list = {
5054         .lkl_maxattr                    = LNET_UDSP_INFO_PREF_NIDS_ATTR_MAX,
5055         .lkl_list                       = {
5056                 [LNET_UDSP_INFO_PREF_NIDS_ATTR_INDEX]           = {
5057                         .lkp_value      = "NID-0",
5058                         .lkp_data_type  = NLA_NUL_STRING,
5059                 },
5060                 [LNET_UDSP_INFO_PREF_NIDS_ATTR_NID]             = {
5061                         .lkp_value      = "0@lo",
5062                         .lkp_data_type  = NLA_STRING,
5063                 },
5064         },
5065 };
5066
5067 static int lnet_udsp_info_send(struct sk_buff *msg, int attr,
5068                                struct lnet_nid *nid, bool remote)
5069 {
5070         struct lnet_ioctl_construct_udsp_info *udsp;
5071         struct nlattr *udsp_attr, *udsp_info;
5072         struct nlattr *udsp_list_attr;
5073         struct nlattr *udsp_list_info;
5074         int i;
5075
5076         CFS_ALLOC_PTR(udsp);
5077         if (!udsp)
5078                 return -ENOMEM;
5079
5080         udsp->cud_peer = remote;
5081         lnet_udsp_get_construct_info(udsp, nid);
5082
5083         udsp_info = nla_nest_start(msg, attr);
5084         udsp_attr = nla_nest_start(msg, 0);
5085         nla_put_s32(msg, LNET_UDSP_INFO_ATTR_NET_PRIORITY,
5086                     udsp->cud_net_priority);
5087         nla_put_s32(msg, LNET_UDSP_INFO_ATTR_NID_PRIORITY,
5088                     udsp->cud_nid_priority);
5089
5090         if (udsp->cud_pref_rtr_nid[0] == 0)
5091                 goto skip_list;
5092
5093         udsp_list_info = nla_nest_start(msg,
5094                                         LNET_UDSP_INFO_ATTR_PREF_RTR_NIDS_LIST);
5095         for (i = 0; i < LNET_MAX_SHOW_NUM_NID; i++) {
5096                 char tmp[8]; /* NID-"3 number"\0 */
5097
5098                 if (udsp->cud_pref_rtr_nid[i] == 0)
5099                         break;
5100
5101                 udsp_list_attr = nla_nest_start(msg, i);
5102                 snprintf(tmp, sizeof(tmp), "NID-%d", i);
5103                 nla_put_string(msg, LNET_UDSP_INFO_PREF_NIDS_ATTR_INDEX,
5104                                tmp);
5105                 nla_put_string(msg, LNET_UDSP_INFO_PREF_NIDS_ATTR_NID,
5106                                libcfs_nid2str(udsp->cud_pref_rtr_nid[i]));
5107                 nla_nest_end(msg, udsp_list_attr);
5108         }
5109         nla_nest_end(msg, udsp_list_info);
5110 skip_list:
5111         nla_nest_end(msg, udsp_attr);
5112         nla_nest_end(msg, udsp_info);
5113         LIBCFS_FREE(udsp, sizeof(*udsp));
5114
5115         return 0;
5116 }
5117
5118 /* LNet NI handling */
5119 static const struct ln_key_list net_props_list = {
5120         .lkl_maxattr                    = LNET_NET_ATTR_MAX,
5121         .lkl_list                       = {
5122                 [LNET_NET_ATTR_HDR]             = {
5123                         .lkp_value              = "net",
5124                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5125                         .lkp_data_type          = NLA_NUL_STRING,
5126                 },
5127                 [LNET_NET_ATTR_TYPE]            = {
5128                         .lkp_value              = "net type",
5129                         .lkp_data_type          = NLA_STRING
5130                 },
5131                 [LNET_NET_ATTR_LOCAL]           = {
5132                         .lkp_value              = "local NI(s)",
5133                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5134                         .lkp_data_type          = NLA_NESTED
5135                 },
5136         },
5137 };
5138
5139 static struct ln_key_list local_ni_list = {
5140         .lkl_maxattr                    = LNET_NET_LOCAL_NI_ATTR_MAX,
5141         .lkl_list                       = {
5142                 [LNET_NET_LOCAL_NI_ATTR_NID]            = {
5143                         .lkp_value              = "nid",
5144                         .lkp_data_type          = NLA_STRING
5145                 },
5146                 [LNET_NET_LOCAL_NI_ATTR_STATUS]         = {
5147                         .lkp_value              = "status",
5148                         .lkp_data_type          = NLA_STRING
5149                 },
5150                 [LNET_NET_LOCAL_NI_ATTR_INTERFACE]      = {
5151                         .lkp_value              = "interfaces",
5152                         .lkp_key_format         = LNKF_MAPPING,
5153                         .lkp_data_type          = NLA_NESTED
5154                 },
5155                 [LNET_NET_LOCAL_NI_ATTR_STATS]          = {
5156                         .lkp_value              = "statistics",
5157                         .lkp_key_format         = LNKF_MAPPING,
5158                         .lkp_data_type          = NLA_NESTED
5159                 },
5160                 [LNET_NET_LOCAL_NI_ATTR_UDSP_INFO]      = {
5161                         .lkp_value              = "udsp info",
5162                         .lkp_key_format         = LNKF_MAPPING,
5163                         .lkp_data_type          = NLA_NESTED
5164                 },
5165                 [LNET_NET_LOCAL_NI_ATTR_SEND_STATS]     = {
5166                         .lkp_value              = "sent_stats",
5167                         .lkp_key_format         = LNKF_MAPPING,
5168                         .lkp_data_type          = NLA_NESTED
5169                 },
5170                 [LNET_NET_LOCAL_NI_ATTR_RECV_STATS]     = {
5171                         .lkp_value              = "received_stats",
5172                         .lkp_key_format         = LNKF_MAPPING,
5173                         .lkp_data_type          = NLA_NESTED
5174                 },
5175                 [LNET_NET_LOCAL_NI_ATTR_DROPPED_STATS]  = {
5176                         .lkp_value              = "dropped_stats",
5177                         .lkp_key_format         = LNKF_MAPPING,
5178                         .lkp_data_type          = NLA_NESTED
5179
5180                 },
5181                 [LNET_NET_LOCAL_NI_ATTR_HEALTH_STATS]   = {
5182                         .lkp_value              = "health stats",
5183                         .lkp_key_format         = LNKF_MAPPING,
5184                         .lkp_data_type          = NLA_NESTED
5185                 },
5186                 [LNET_NET_LOCAL_NI_ATTR_TUNABLES]       = {
5187                         .lkp_value              = "tunables",
5188                         .lkp_key_format         = LNKF_MAPPING,
5189                         .lkp_data_type          = NLA_NESTED
5190                 },
5191                 [LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES]   = {
5192                         .lkp_value              = "lnd tunables",
5193                         .lkp_key_format         = LNKF_MAPPING,
5194                         .lkp_data_type          = NLA_NESTED
5195                 },
5196                 [LNET_NET_LOCAL_NI_DEV_CPT]             = {
5197                         .lkp_value              = "dev cpt",
5198                         .lkp_data_type          = NLA_S32,
5199                 },
5200                 [LNET_NET_LOCAL_NI_CPTS]                = {
5201                         .lkp_value              = "CPT",
5202                         .lkp_data_type          = NLA_STRING,
5203                 },
5204         },
5205 };
5206
5207 static const struct ln_key_list local_ni_interfaces_list = {
5208         .lkl_maxattr                    = LNET_NET_LOCAL_NI_INTF_ATTR_MAX,
5209         .lkl_list                       = {
5210                 [LNET_NET_LOCAL_NI_INTF_ATTR_TYPE] = {
5211                         .lkp_value      = "0",
5212                         .lkp_data_type  = NLA_STRING
5213                 },
5214         },
5215 };
5216
5217 static const struct ln_key_list local_ni_stats_list = {
5218         .lkl_maxattr                    = LNET_NET_LOCAL_NI_STATS_ATTR_MAX,
5219         .lkl_list                       = {
5220                 [LNET_NET_LOCAL_NI_STATS_ATTR_SEND_COUNT]       = {
5221                         .lkp_value      = "send_count",
5222                         .lkp_data_type  = NLA_U32
5223                 },
5224                 [LNET_NET_LOCAL_NI_STATS_ATTR_RECV_COUNT]       = {
5225                         .lkp_value      = "recv_count",
5226                         .lkp_data_type  = NLA_U32
5227                 },
5228                 [LNET_NET_LOCAL_NI_STATS_ATTR_DROP_COUNT]       = {
5229                         .lkp_value      = "drop_count",
5230                         .lkp_data_type  = NLA_U32
5231                 },
5232         },
5233 };
5234
5235 static const struct ln_key_list local_ni_msg_stats_list = {
5236         .lkl_maxattr                    = LNET_NET_LOCAL_NI_MSG_STATS_ATTR_MAX,
5237         .lkl_list                       = {
5238                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT]    = {
5239                         .lkp_value      = "put",
5240                         .lkp_data_type  = NLA_U32
5241                 },
5242                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT]    = {
5243                         .lkp_value      = "get",
5244                         .lkp_data_type  = NLA_U32
5245                 },
5246                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT]  = {
5247                         .lkp_value      = "reply",
5248                         .lkp_data_type  = NLA_U32
5249                 },
5250                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT]    = {
5251                         .lkp_value      = "ack",
5252                         .lkp_data_type  = NLA_U32
5253                 },
5254                 [LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT]  = {
5255                         .lkp_value      = "hello",
5256                         .lkp_data_type  = NLA_U32
5257                 },
5258         },
5259 };
5260
5261 static const struct ln_key_list local_ni_health_stats_list = {
5262         .lkl_maxattr                    = LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_MAX,
5263         .lkl_list                       = {
5264                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_FATAL_ERRORS] = {
5265                         .lkp_value      = "fatal_error",
5266                         .lkp_data_type  = NLA_S32
5267                 },
5268                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_LEVEL] = {
5269                         .lkp_value      = "health value",
5270                         .lkp_data_type  = NLA_S32
5271                 },
5272                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_INTERRUPTS] = {
5273                         .lkp_value      = "interrupts",
5274                         .lkp_data_type  = NLA_U32
5275                 },
5276                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_DROPPED] = {
5277                         .lkp_value      = "dropped",
5278                         .lkp_data_type  = NLA_U32
5279                 },
5280                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ABORTED] = {
5281                         .lkp_value      = "aborted",
5282                         .lkp_data_type  = NLA_U32
5283                 },
5284                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NO_ROUTE] = {
5285                         .lkp_value      = "no route",
5286                         .lkp_data_type  = NLA_U32
5287                 },
5288                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_TIMEOUTS] = {
5289                         .lkp_value      = "timeouts",
5290                         .lkp_data_type  = NLA_U32
5291                 },
5292                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ERROR] = {
5293                         .lkp_value      = "error",
5294                         .lkp_data_type  = NLA_U32
5295                 },
5296                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PING_COUNT] = {
5297                         .lkp_value      = "ping_count",
5298                         .lkp_data_type  = NLA_U32,
5299                 },
5300                 [LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NEXT_PING] = {
5301                         .lkp_value      = "next_ping",
5302                         .lkp_data_type  = NLA_U64
5303                 },
5304         },
5305 };
5306
5307 static const struct ln_key_list local_ni_tunables_list = {
5308         .lkl_maxattr                    = LNET_NET_LOCAL_NI_TUNABLES_ATTR_MAX,
5309         .lkl_list                       = {
5310                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT]  = {
5311                         .lkp_value      = "peer_timeout",
5312                         .lkp_data_type  = NLA_S32
5313                 },
5314                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS]  = {
5315                         .lkp_value      = "peer_credits",
5316                         .lkp_data_type  = NLA_S32
5317                 },
5318                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS] = {
5319                         .lkp_value      = "peer_buffer_credits",
5320                         .lkp_data_type  = NLA_S32
5321                 },
5322                 [LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS] = {
5323                         .lkp_value      = "credits",
5324                         .lkp_data_type  = NLA_S32
5325                 },
5326         },
5327 };
5328
5329 /* Use an index since the traversal is across LNet nets and ni collections */
5330 struct lnet_genl_net_list {
5331         unsigned int    lngl_net_id;
5332         unsigned int    lngl_idx;
5333 };
5334
5335 static inline struct lnet_genl_net_list *
5336 lnet_net_dump_ctx(struct netlink_callback *cb)
5337 {
5338         return (struct lnet_genl_net_list *)cb->args[0];
5339 }
5340
5341 static int lnet_net_show_done(struct netlink_callback *cb)
5342 {
5343         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
5344
5345         if (nlist) {
5346                 LIBCFS_FREE(nlist, sizeof(*nlist));
5347                 cb->args[0] = 0;
5348         }
5349
5350         return 0;
5351 }
5352
5353 /* LNet net ->start() handler for GET requests */
5354 static int lnet_net_show_start(struct netlink_callback *cb)
5355 {
5356         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5357 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5358         struct netlink_ext_ack *extack = NULL;
5359 #endif
5360         struct lnet_genl_net_list *nlist;
5361         int msg_len = genlmsg_len(gnlh);
5362         struct nlattr *params, *top;
5363         int rem, rc = 0;
5364
5365 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5366         extack = cb->extack;
5367 #endif
5368         if (the_lnet.ln_refcount == 0) {
5369                 NL_SET_ERR_MSG(extack, "LNet stack down");
5370                 return -ENETDOWN;
5371         }
5372
5373         LIBCFS_ALLOC(nlist, sizeof(*nlist));
5374         if (!nlist)
5375                 return -ENOMEM;
5376
5377         nlist->lngl_net_id = LNET_NET_ANY;
5378         nlist->lngl_idx = 0;
5379         cb->args[0] = (long)nlist;
5380
5381         if (!msg_len)
5382                 return 0;
5383
5384         params = genlmsg_data(gnlh);
5385         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
5386                 NL_SET_ERR_MSG(extack, "invalid configuration");
5387                 return -EINVAL;
5388         }
5389
5390         nla_for_each_nested(top, params, rem) {
5391                 struct nlattr *net;
5392                 int rem2;
5393
5394                 nla_for_each_nested(net, top, rem2) {
5395                         char filter[LNET_NIDSTR_SIZE];
5396
5397                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE ||
5398                             nla_strcmp(net, "net type") != 0)
5399                                 continue;
5400
5401                         net = nla_next(net, &rem2);
5402                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE) {
5403                                 NL_SET_ERR_MSG(extack, "invalid config param");
5404                                 GOTO(report_err, rc = -EINVAL);
5405                         }
5406
5407                         rc = nla_strscpy(filter, net, sizeof(filter));
5408                         if (rc < 0) {
5409                                 NL_SET_ERR_MSG(extack, "failed to get param");
5410                                 GOTO(report_err, rc);
5411                         }
5412                         rc = 0;
5413
5414                         nlist->lngl_net_id = libcfs_str2net(filter);
5415                         if (nlist->lngl_net_id == LNET_NET_ANY) {
5416                                 NL_SET_ERR_MSG(extack, "cannot parse net");
5417                                 GOTO(report_err, rc = -ENOENT);
5418                         }
5419                 }
5420         }
5421 report_err:
5422         if (rc < 0)
5423                 lnet_net_show_done(cb);
5424
5425         return rc;
5426 }
5427
5428 static int lnet_net_show_dump(struct sk_buff *msg,
5429                               struct netlink_callback *cb)
5430 {
5431         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
5432 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5433         struct netlink_ext_ack *extack = NULL;
5434 #endif
5435         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5436         int portid = NETLINK_CB(cb->skb).portid;
5437         bool found = false, started = true;
5438         const struct lnet_lnd *lnd = NULL;
5439         int idx = nlist->lngl_idx, rc = 0;
5440         int seq = cb->nlh->nlmsg_seq;
5441         struct lnet_net *net;
5442         void *hdr = NULL;
5443
5444 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5445         extack = cb->extack;
5446 #endif
5447         lnet_net_lock(LNET_LOCK_EX);
5448
5449         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
5450                 struct nlattr *local_ni, *ni_attr;
5451                 struct lnet_ni *ni;
5452                 int dev = 0;
5453
5454                 if (nlist->lngl_net_id != LNET_NET_ANY &&
5455                     nlist->lngl_net_id != net->net_id)
5456                         continue;
5457
5458                 if (gnlh->version && LNET_NETTYP(net->net_id) != LOLND) {
5459                         if (!net->net_lnd) {
5460                                 NL_SET_ERR_MSG(extack,
5461                                                "LND not setup for NI");
5462                                 GOTO(net_unlock, rc = -ENODEV);
5463                         }
5464                         if (net->net_lnd != lnd)
5465                                 lnd = net->net_lnd;
5466                         else
5467                                 lnd = NULL;
5468                 }
5469
5470                 /* We need to resend the key table every time the base LND
5471                  * changed.
5472                  */
5473                 if (!idx || lnd) {
5474                         const struct ln_key_list *all[] = {
5475                                 &net_props_list, &local_ni_list,
5476                                 &local_ni_interfaces_list,
5477                                 &local_ni_stats_list,
5478                                 &udsp_info_list,
5479                                 &udsp_info_pref_nids_list,
5480                                 &udsp_info_pref_nids_list,
5481                                 &local_ni_msg_stats_list,
5482                                 &local_ni_msg_stats_list,
5483                                 &local_ni_msg_stats_list,
5484                                 &local_ni_health_stats_list,
5485                                 &local_ni_tunables_list,
5486                                 NULL, /* lnd tunables */
5487                                 NULL
5488                         };
5489                         int flags = NLM_F_CREATE | NLM_F_MULTI;
5490
5491                         if (lnd) {
5492                                 all[ARRAY_SIZE(all) - 2] = lnd->lnd_keys;
5493                                 if (idx)
5494                                         flags |= NLM_F_REPLACE;
5495                                 started = true;
5496                         }
5497
5498                         rc = lnet_genl_send_scalar_list(msg, portid, seq,
5499                                                         &lnet_family, flags,
5500                                                         LNET_CMD_NETS, all);
5501                         if (rc < 0) {
5502                                 NL_SET_ERR_MSG(extack, "failed to send key table");
5503                                 GOTO(net_unlock, rc);
5504                         }
5505                 }
5506
5507                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
5508                                   NLM_F_MULTI, LNET_CMD_NETS);
5509                 if (!hdr) {
5510                         NL_SET_ERR_MSG(extack, "failed to send values");
5511                         GOTO(net_unlock, rc = -EMSGSIZE);
5512                 }
5513
5514                 if (started) {
5515                         nla_put_string(msg, LNET_NET_ATTR_HDR, "");
5516                         started = false;
5517                 }
5518
5519                 nla_put_string(msg, LNET_NET_ATTR_TYPE,
5520                                libcfs_net2str(net->net_id));
5521
5522                 local_ni = nla_nest_start(msg, LNET_NET_ATTR_LOCAL);
5523                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
5524                         char *status = "up";
5525
5526                         if (idx++ < nlist->lngl_idx)
5527                                 continue;
5528
5529                         ni_attr = nla_nest_start(msg, dev++);
5530                         found = true;
5531                         lnet_ni_lock(ni);
5532                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_NID,
5533                                        libcfs_nidstr(&ni->ni_nid));
5534                         if (!nid_is_lo0(&ni->ni_nid) &&
5535                             lnet_ni_get_status_locked(ni) != LNET_NI_STATUS_UP)
5536                                 status = "down";
5537                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_STATUS,
5538                                        status);
5539
5540                         if (!nid_is_lo0(&ni->ni_nid) && ni->ni_interface) {
5541                                 struct nlattr *intf_nest, *intf_attr;
5542
5543                                 intf_nest = nla_nest_start(msg,
5544                                                            LNET_NET_LOCAL_NI_ATTR_INTERFACE);
5545                                 intf_attr = nla_nest_start(msg, 0);
5546                                 nla_put_string(msg,
5547                                                LNET_NET_LOCAL_NI_INTF_ATTR_TYPE,
5548                                                ni->ni_interface);
5549                                 nla_nest_end(msg, intf_attr);
5550                                 nla_nest_end(msg, intf_nest);
5551                         }
5552
5553                         if (gnlh->version) {
5554                                 char cpts[LNET_MAX_SHOW_NUM_CPT * 4 + 4], *cpt;
5555                                 struct lnet_ioctl_element_msg_stats msg_stats;
5556                                 struct lnet_ioctl_element_stats stats;
5557                                 size_t buf_len = sizeof(cpts), len;
5558                                 struct nlattr *health_attr, *health_stats;
5559                                 struct nlattr *send_attr, *send_stats;
5560                                 struct nlattr *recv_attr, *recv_stats;
5561                                 struct nlattr *drop_attr, *drop_stats;
5562                                 struct nlattr *stats_attr, *ni_stats;
5563                                 struct nlattr *tun_attr, *ni_tun;
5564                                 int j;
5565
5566                                 stats.iel_send_count = lnet_sum_stats(&ni->ni_stats,
5567                                                                       LNET_STATS_TYPE_SEND);
5568                                 stats.iel_recv_count = lnet_sum_stats(&ni->ni_stats,
5569                                                                       LNET_STATS_TYPE_RECV);
5570                                 stats.iel_drop_count = lnet_sum_stats(&ni->ni_stats,
5571                                                                       LNET_STATS_TYPE_DROP);
5572                                 lnet_ni_unlock(ni);
5573
5574                                 stats_attr = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_STATS);
5575                                 ni_stats = nla_nest_start(msg, 0);
5576                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_SEND_COUNT,
5577                                             stats.iel_send_count);
5578                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_RECV_COUNT,
5579                                             stats.iel_recv_count);
5580                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_STATS_ATTR_DROP_COUNT,
5581                                             stats.iel_drop_count);
5582                                 nla_nest_end(msg, ni_stats);
5583                                 nla_nest_end(msg, stats_attr);
5584
5585                                 if (gnlh->version < 4)
5586                                         goto skip_udsp;
5587
5588                                 /* UDSP info */
5589                                 rc = lnet_udsp_info_send(msg, LNET_NET_LOCAL_NI_ATTR_UDSP_INFO,
5590                                                          &ni->ni_nid, false);
5591                                 if (rc < 0) {
5592                                         NL_SET_ERR_MSG(extack,
5593                                                        "Failed to get udsp info");
5594                                         genlmsg_cancel(msg, hdr);
5595                                         GOTO(net_unlock, rc = -ENOMEM);
5596                                 }
5597 skip_udsp:
5598                                 if (gnlh->version < 2)
5599                                         goto skip_msg_stats;
5600
5601                                 msg_stats.im_idx = idx - 1;
5602                                 rc = lnet_get_ni_stats(&msg_stats);
5603                                 if (rc < 0) {
5604                                         NL_SET_ERR_MSG(extack,
5605                                                        "failed to get msg stats");
5606                                         genlmsg_cancel(msg, hdr);
5607                                         GOTO(net_unlock, rc = -ENOMEM);
5608                                 }
5609
5610                                 send_stats = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_SEND_STATS);
5611                                 send_attr = nla_nest_start(msg, 0);
5612                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5613                                             msg_stats.im_send_stats.ico_get_count);
5614                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5615                                             msg_stats.im_send_stats.ico_put_count);
5616                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5617                                             msg_stats.im_send_stats.ico_reply_count);
5618                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5619                                             msg_stats.im_send_stats.ico_ack_count);
5620                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5621                                             msg_stats.im_send_stats.ico_hello_count);
5622                                 nla_nest_end(msg, send_attr);
5623                                 nla_nest_end(msg, send_stats);
5624
5625                                 recv_stats = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_RECV_STATS);
5626                                 recv_attr = nla_nest_start(msg, 0);
5627                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5628                                             msg_stats.im_recv_stats.ico_get_count);
5629                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5630                                             msg_stats.im_recv_stats.ico_put_count);
5631                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5632                                             msg_stats.im_recv_stats.ico_reply_count);
5633                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5634                                             msg_stats.im_recv_stats.ico_ack_count);
5635                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5636                                             msg_stats.im_recv_stats.ico_hello_count);
5637                                 nla_nest_end(msg, recv_attr);
5638                                 nla_nest_end(msg, recv_stats);
5639
5640                                 drop_stats = nla_nest_start(msg,
5641                                                             LNET_NET_LOCAL_NI_ATTR_DROPPED_STATS);
5642                                 drop_attr = nla_nest_start(msg, 0);
5643                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_PUT_COUNT,
5644                                             msg_stats.im_drop_stats.ico_get_count);
5645                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_GET_COUNT,
5646                                             msg_stats.im_drop_stats.ico_put_count);
5647                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_REPLY_COUNT,
5648                                             msg_stats.im_drop_stats.ico_reply_count);
5649                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_ACK_COUNT,
5650                                             msg_stats.im_drop_stats.ico_ack_count);
5651                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_MSG_STATS_ATTR_HELLO_COUNT,
5652                                             msg_stats.im_drop_stats.ico_hello_count);
5653                                 nla_nest_end(msg, drop_attr);
5654                                 nla_nest_end(msg, drop_stats);
5655
5656                                 /* health stats */
5657                                 health_stats = nla_nest_start(msg,
5658                                                               LNET_NET_LOCAL_NI_ATTR_HEALTH_STATS);
5659                                 health_attr = nla_nest_start(msg, 0);
5660                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_FATAL_ERRORS,
5661                                             atomic_read(&ni->ni_fatal_error_on));
5662                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_LEVEL,
5663                                             atomic_read(&ni->ni_healthv));
5664                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_INTERRUPTS,
5665                                             atomic_read(&ni->ni_hstats.hlt_local_interrupt));
5666                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_DROPPED,
5667                                             atomic_read(&ni->ni_hstats.hlt_local_dropped));
5668                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ABORTED,
5669                                             atomic_read(&ni->ni_hstats.hlt_local_aborted));
5670                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NO_ROUTE,
5671                                             atomic_read(&ni->ni_hstats.hlt_local_no_route));
5672                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_TIMEOUTS,
5673                                             atomic_read(&ni->ni_hstats.hlt_local_timeout));
5674                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_ERROR,
5675                                             atomic_read(&ni->ni_hstats.hlt_local_error));
5676                                 nla_put_u32(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PING_COUNT,
5677                                             ni->ni_ping_count);
5678                                 nla_put_u64_64bit(msg, LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_NEXT_PING,
5679                                                   ni->ni_next_ping,
5680                                                   LNET_NET_LOCAL_NI_HEALTH_STATS_ATTR_PAD);
5681                                 nla_nest_end(msg, health_attr);
5682                                 nla_nest_end(msg, health_stats);
5683 skip_msg_stats:
5684                                 /* Report net tunables */
5685                                 tun_attr = nla_nest_start(msg, LNET_NET_LOCAL_NI_ATTR_TUNABLES);
5686                                 ni_tun = nla_nest_start(msg, 0);
5687                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT,
5688                                             ni->ni_net->net_tunables.lct_peer_timeout);
5689                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS,
5690                                             ni->ni_net->net_tunables.lct_peer_tx_credits);
5691                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS,
5692                                             ni->ni_net->net_tunables.lct_peer_rtr_credits);
5693                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS,
5694                                             ni->ni_net->net_tunables.lct_max_tx_credits);
5695                                 nla_nest_end(msg, ni_tun);
5696
5697                                 nla_nest_end(msg, tun_attr);
5698
5699                                 if (lnd && lnd->lnd_nl_get && lnd->lnd_keys) {
5700                                         struct nlattr *lnd_tun_attr, *lnd_ni_tun;
5701
5702                                         lnd_tun_attr = nla_nest_start(msg,
5703                                                                       LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES);
5704                                         lnd_ni_tun = nla_nest_start(msg, 0);
5705                                         rc = lnd->lnd_nl_get(LNET_CMD_NETS, msg,
5706                                                              LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES,
5707                                                              ni);
5708                                         if (rc < 0) {
5709                                                 NL_SET_ERR_MSG(extack,
5710                                                                "failed to get lnd tunables");
5711                                                 genlmsg_cancel(msg, hdr);
5712                                                 GOTO(net_unlock, rc);
5713                                         }
5714                                         nla_nest_end(msg, lnd_ni_tun);
5715                                         nla_nest_end(msg, lnd_tun_attr);
5716                                 }
5717
5718                                 nla_put_s32(msg, LNET_NET_LOCAL_NI_DEV_CPT, ni->ni_dev_cpt);
5719
5720                                 /* Report cpts. We could send this as a nested list
5721                                  * of integers but older versions of the tools
5722                                  * except a string. The new versions can handle
5723                                  * both formats so in the future we can change
5724                                  * this to a nested list.
5725                                  */
5726                                 len = snprintf(cpts, buf_len, "\"[");
5727                                 cpt = cpts + len;
5728                                 buf_len -= len;
5729
5730                                 if (ni->ni_ncpts == LNET_CPT_NUMBER && !ni->ni_cpts)  {
5731                                         for (j = 0; j < ni->ni_ncpts; j++) {
5732                                                 len = snprintf(cpt, buf_len, "%d,", j);
5733                                                 buf_len -= len;
5734                                                 cpt += len;
5735                                         }
5736                                 } else {
5737                                         for (j = 0;
5738                                              ni->ni_cpts && j < ni->ni_ncpts &&
5739                                              j < LNET_MAX_SHOW_NUM_CPT; j++) {
5740                                                 len = snprintf(cpt, buf_len, "%d,",
5741                                                                ni->ni_cpts[j]);
5742                                                 buf_len -= len;
5743                                                 cpt += len;
5744                                         }
5745                                 }
5746                                 snprintf(cpt - 1, sizeof(cpts), "]\"");
5747
5748                                 nla_put_string(msg, LNET_NET_LOCAL_NI_CPTS, cpts);
5749                         } else {
5750                                 lnet_ni_unlock(ni);
5751                         }
5752                         nla_nest_end(msg, ni_attr);
5753                 }
5754                 nla_nest_end(msg, local_ni);
5755
5756                 genlmsg_end(msg, hdr);
5757         }
5758
5759         if (!found) {
5760                 struct nlmsghdr *nlh = nlmsg_hdr(msg);
5761
5762                 nlmsg_cancel(msg, nlh);
5763                 NL_SET_ERR_MSG(extack, "Network is down");
5764                 rc = -ESRCH;
5765         }
5766         nlist->lngl_idx = idx;
5767 net_unlock:
5768         lnet_net_unlock(LNET_LOCK_EX);
5769
5770         return lnet_nl_send_error(cb->skb, portid, seq, rc);
5771 }
5772
5773 #ifndef HAVE_NETLINK_CALLBACK_START
5774 static int lnet_old_net_show_dump(struct sk_buff *msg,
5775                                    struct netlink_callback *cb)
5776 {
5777         if (!cb->args[0]) {
5778                 int rc = lnet_net_show_start(cb);
5779
5780                 if (rc < 0)
5781                         return rc;
5782         }
5783
5784         return lnet_net_show_dump(msg, cb);
5785 }
5786 #endif
5787
5788 static int lnet_genl_parse_tunables(struct nlattr *settings,
5789                                     struct lnet_ioctl_config_lnd_tunables *tun)
5790 {
5791         struct nlattr *param;
5792         int rem, rc = 0;
5793
5794         nla_for_each_nested(param, settings, rem) {
5795                 int type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_UNSPEC;
5796                 s64 num;
5797
5798                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
5799                         continue;
5800
5801                 if (nla_strcmp(param, "peer_timeout") == 0)
5802                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT;
5803                 else if (nla_strcmp(param, "peer_credits") == 0)
5804                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS;
5805                 else if (nla_strcmp(param, "peer_buffer_credits") == 0)
5806                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS;
5807                 else if (nla_strcmp(param, "credits") == 0)
5808                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS;
5809
5810                 param = nla_next(param, &rem);
5811                 if (nla_type(param) != LN_SCALAR_ATTR_INT_VALUE)
5812                         return -EINVAL;
5813
5814                 num = nla_get_s64(param);
5815                 switch (type) {
5816                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT:
5817                         if (num >= 0)
5818                                 tun->lt_cmn.lct_peer_timeout = num;
5819                         break;
5820                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS:
5821                         if (num > 0)
5822                                 tun->lt_cmn.lct_peer_tx_credits = num;
5823                         break;
5824                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS:
5825                         if (num > 0)
5826                                 tun->lt_cmn.lct_peer_rtr_credits = num;
5827                         break;
5828                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS:
5829                         if (num > 0)
5830                                 tun->lt_cmn.lct_max_tx_credits = num;
5831                         break;
5832                 default:
5833                         rc = -EINVAL;
5834                         break;
5835                 }
5836         }
5837         return rc;
5838 }
5839
5840 static int lnet_genl_parse_lnd_tunables(struct nlattr *settings,
5841                                         struct lnet_lnd_tunables *tun,
5842                                         const struct lnet_lnd *lnd)
5843 {
5844         const struct ln_key_list *list = lnd->lnd_keys;
5845         struct nlattr *param;
5846         int rem, rc = 0;
5847         int i = 1;
5848
5849         /* silently ignore these setting if the LND driver doesn't
5850          * support any LND tunables
5851          */
5852         if (!list || !lnd->lnd_nl_set || !list->lkl_maxattr)
5853                 return 0;
5854
5855         nla_for_each_nested(param, settings, rem) {
5856                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
5857                         continue;
5858
5859                 for (i = 1; i <= list->lkl_maxattr; i++) {
5860                         if (!list->lkl_list[i].lkp_value ||
5861                             nla_strcmp(param, list->lkl_list[i].lkp_value) != 0)
5862                                 continue;
5863
5864                         param = nla_next(param, &rem);
5865                         rc = lnd->lnd_nl_set(LNET_CMD_NETS, param, i, tun);
5866                         if (rc < 0)
5867                                 return rc;
5868                 }
5869         }
5870
5871         return rc;
5872 }
5873
5874 static int
5875 lnet_genl_parse_local_ni(struct nlattr *entry, struct genl_info *info,
5876                          int net_id, struct lnet_ioctl_config_ni *conf,
5877                          bool *ni_list)
5878 {
5879         bool create = info->nlhdr->nlmsg_flags & NLM_F_CREATE;
5880         struct lnet_ioctl_config_lnd_tunables *tun;
5881         struct nlattr *settings;
5882         int rem3, rc = 0;
5883
5884         LIBCFS_ALLOC(tun, sizeof(struct lnet_ioctl_config_lnd_tunables));
5885         if (!tun) {
5886                 GENL_SET_ERR_MSG(info, "cannot allocate memory for tunables");
5887                 GOTO(out, rc = -ENOMEM);
5888         }
5889
5890         /* Use LND defaults */
5891         tun->lt_cmn.lct_peer_timeout = -1;
5892         tun->lt_cmn.lct_peer_tx_credits = -1;
5893         tun->lt_cmn.lct_peer_rtr_credits = -1;
5894         tun->lt_cmn.lct_max_tx_credits = -1;
5895         conf->lic_ncpts = 0;
5896
5897         nla_for_each_nested(settings, entry, rem3) {
5898                 if (nla_type(settings) != LN_SCALAR_ATTR_VALUE)
5899                         continue;
5900
5901                 if (nla_strcmp(settings, "interfaces") == 0) {
5902                         struct nlattr *intf;
5903                         int rem4;
5904
5905                         settings = nla_next(settings, &rem3);
5906                         if (nla_type(settings) !=
5907                             LN_SCALAR_ATTR_LIST) {
5908                                 GENL_SET_ERR_MSG(info,
5909                                                  "invalid interfaces");
5910                                 GOTO(out, rc = -EINVAL);
5911                         }
5912
5913                         nla_for_each_nested(intf, settings, rem4) {
5914                                 intf = nla_next(intf, &rem4);
5915                                 if (nla_type(intf) !=
5916                                     LN_SCALAR_ATTR_VALUE) {
5917                                         GENL_SET_ERR_MSG(info,
5918                                                          "cannot parse interface");
5919                                         GOTO(out, rc = -EINVAL);
5920                                 }
5921
5922                                 rc = nla_strscpy(conf->lic_ni_intf, intf,
5923                                                  sizeof(conf->lic_ni_intf));
5924                                 if (rc < 0) {
5925                                         GENL_SET_ERR_MSG(info,
5926                                                          "failed to parse interfaces");
5927                                         GOTO(out, rc);
5928                                 }
5929                         }
5930                         *ni_list = true;
5931                 } else if (nla_strcmp(settings, "tunables") == 0) {
5932                         settings = nla_next(settings, &rem3);
5933                         if (nla_type(settings) !=
5934                             LN_SCALAR_ATTR_LIST) {
5935                                 GENL_SET_ERR_MSG(info,
5936                                                  "invalid tunables");
5937                                 GOTO(out, rc = -EINVAL);
5938                         }
5939
5940                         rc = lnet_genl_parse_tunables(settings, tun);
5941                         if (rc < 0) {
5942                                 GENL_SET_ERR_MSG(info,
5943                                                  "failed to parse tunables");
5944                                 GOTO(out, rc);
5945                         }
5946                 } else if ((nla_strcmp(settings, "lnd tunables") == 0)) {
5947                         const struct lnet_lnd *lnd;
5948
5949                         lnd = lnet_load_lnd(LNET_NETTYP(net_id));
5950                         if (IS_ERR(lnd)) {
5951                                 GENL_SET_ERR_MSG(info,
5952                                                  "LND type not supported");
5953                                 GOTO(out, rc = PTR_ERR(lnd));
5954                         }
5955
5956                         settings = nla_next(settings, &rem3);
5957                         if (nla_type(settings) !=
5958                             LN_SCALAR_ATTR_LIST) {
5959                                 GENL_SET_ERR_MSG(info,
5960                                                  "lnd tunables should be list\n");
5961                                 GOTO(out, rc = -EINVAL);
5962                         }
5963
5964                         rc = lnet_genl_parse_lnd_tunables(settings,
5965                                                           &tun->lt_tun, lnd);
5966                         if (rc < 0) {
5967                                 GENL_SET_ERR_MSG(info,
5968                                                  "failed to parse lnd tunables");
5969                                 GOTO(out, rc);
5970                         }
5971                 } else if (nla_strcmp(settings, "CPT") == 0) {
5972                         struct nlattr *cpt;
5973                         int rem4;
5974
5975                         settings = nla_next(settings, &rem3);
5976                         if (nla_type(settings) != LN_SCALAR_ATTR_LIST) {
5977                                 GENL_SET_ERR_MSG(info,
5978                                                  "CPT should be list");
5979                                 GOTO(out, rc = -EINVAL);
5980                         }
5981
5982                         nla_for_each_nested(cpt, settings, rem4) {
5983                                 s64 core;
5984
5985                                 if (nla_type(cpt) !=
5986                                     LN_SCALAR_ATTR_INT_VALUE) {
5987                                         GENL_SET_ERR_MSG(info,
5988                                                          "invalid CPT config");
5989                                         GOTO(out, rc = -EINVAL);
5990                                 }
5991
5992                                 core = nla_get_s64(cpt);
5993                                 if (core >= LNET_CPT_NUMBER) {
5994                                         GENL_SET_ERR_MSG(info,
5995                                                          "invalid CPT value");
5996                                         GOTO(out, rc = -ERANGE);
5997                                 }
5998
5999                                 conf->lic_cpts[conf->lic_ncpts] = core;
6000                                 conf->lic_ncpts++;
6001                         }
6002                 }
6003         }
6004
6005         if (!create) {
6006                 struct lnet_net *net;
6007                 struct lnet_ni *ni;
6008
6009                 rc = -ENODEV;
6010                 if (!strlen(conf->lic_ni_intf)) {
6011                         GENL_SET_ERR_MSG(info,
6012                                          "interface is missing");
6013                         GOTO(out, rc);
6014                 }
6015
6016                 lnet_net_lock(LNET_LOCK_EX);
6017                 net = lnet_get_net_locked(net_id);
6018                 if (!net) {
6019                         GENL_SET_ERR_MSG(info,
6020                                          "LNet net doesn't exist");
6021                         lnet_net_unlock(LNET_LOCK_EX);
6022                         GOTO(out, rc);
6023                 }
6024
6025                 list_for_each_entry(ni, &net->net_ni_list,
6026                                     ni_netlist) {
6027                         if (!ni->ni_interface ||
6028                             strcmp(ni->ni_interface,
6029                                   conf->lic_ni_intf) != 0)
6030                                 continue;
6031
6032                         lnet_net_unlock(LNET_LOCK_EX);
6033                         rc = lnet_dyn_del_ni(&ni->ni_nid);
6034                         if (rc < 0) {
6035                                 GENL_SET_ERR_MSG(info,
6036                                                  "cannot del LNet NI");
6037                                 GOTO(out, rc);
6038                         }
6039                         break;
6040                 }
6041
6042                 if (rc < 0) { /* will be -ENODEV */
6043                         GENL_SET_ERR_MSG(info,
6044                                          "interface invalid for deleting LNet NI");
6045                         lnet_net_unlock(LNET_LOCK_EX);
6046                 }
6047         } else {
6048                 if (!strlen(conf->lic_ni_intf)) {
6049                         GENL_SET_ERR_MSG(info,
6050                                          "interface is missing");
6051                         GOTO(out, rc);
6052                 }
6053
6054                 rc = lnet_dyn_add_ni(conf, net_id, tun);
6055                 switch (rc) {
6056                 case -ENOENT:
6057                         GENL_SET_ERR_MSG(info,
6058                                          "cannot parse net");
6059                         break;
6060                 case -ERANGE:
6061                         GENL_SET_ERR_MSG(info,
6062                                          "invalid CPT set");
6063                         break;
6064                 default:
6065                         GENL_SET_ERR_MSG(info,
6066                                          "cannot add LNet NI");
6067                 case 0:
6068                         break;
6069                 }
6070         }
6071 out:
6072         if (tun)
6073                 LIBCFS_FREE(tun, sizeof(struct lnet_ioctl_config_lnd_tunables));
6074
6075         return rc;
6076 }
6077
6078 static int lnet_net_cmd(struct sk_buff *skb, struct genl_info *info)
6079 {
6080         struct nlmsghdr *nlh = nlmsg_hdr(skb);
6081         struct genlmsghdr *gnlh = nlmsg_data(nlh);
6082         struct nlattr *params = genlmsg_data(gnlh);
6083         int msg_len, rem, rc = 0;
6084         struct nlattr *attr;
6085
6086         msg_len = genlmsg_len(gnlh);
6087         if (!msg_len) {
6088                 GENL_SET_ERR_MSG(info, "no configuration");
6089                 return -ENOMSG;
6090         }
6091
6092         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
6093                 GENL_SET_ERR_MSG(info, "invalid configuration");
6094                 return -EINVAL;
6095         }
6096
6097         nla_for_each_nested(attr, params, rem) {
6098                 bool ni_list = false, ipnets = false;
6099                 struct lnet_ioctl_config_ni conf;
6100                 u32 net_id = LNET_NET_ANY;
6101                 struct nlattr *entry;
6102                 int rem2;
6103
6104                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6105                         continue;
6106
6107                 nla_for_each_nested(entry, attr, rem2) {
6108                         switch (nla_type(entry)) {
6109                         case LN_SCALAR_ATTR_VALUE: {
6110                                 ssize_t len;
6111
6112                                 memset(&conf, 0, sizeof(conf));
6113                                 if (nla_strcmp(entry, "ip2net") == 0) {
6114                                         entry = nla_next(entry, &rem2);
6115                                         if (nla_type(entry) !=
6116                                             LN_SCALAR_ATTR_VALUE) {
6117                                                 GENL_SET_ERR_MSG(info,
6118                                                                  "ip2net has invalid key");
6119                                                 GOTO(out, rc = -EINVAL);
6120                                         }
6121
6122                                         len = nla_strscpy(conf.lic_legacy_ip2nets,
6123                                                           entry,
6124                                                           sizeof(conf.lic_legacy_ip2nets));
6125                                         if (len < 0) {
6126                                                 GENL_SET_ERR_MSG(info,
6127                                                                  "ip2net key string is invalid");
6128                                                 GOTO(out, rc = len);
6129                                         }
6130                                         ni_list = true;
6131                                         ipnets = true;
6132                                 } else if (nla_strcmp(entry, "net type") == 0) {
6133                                         char tmp[LNET_NIDSTR_SIZE];
6134
6135                                         entry = nla_next(entry, &rem2);
6136                                         if (nla_type(entry) !=
6137                                             LN_SCALAR_ATTR_VALUE) {
6138                                                 GENL_SET_ERR_MSG(info,
6139                                                                  "net type has invalid key");
6140                                                 GOTO(out, rc = -EINVAL);
6141                                         }
6142
6143                                         len = nla_strscpy(tmp, entry,
6144                                                           sizeof(tmp));
6145                                         if (len < 0) {
6146                                                 GENL_SET_ERR_MSG(info,
6147                                                                  "net type key string is invalid");
6148                                                 GOTO(out, rc = len);
6149                                         }
6150
6151                                         net_id = libcfs_str2net(tmp);
6152                                         if (!net_id) {
6153                                                 GENL_SET_ERR_MSG(info,
6154                                                                  "cannot parse net");
6155                                                 GOTO(out, rc = -ENODEV);
6156                                         }
6157                                         if (LNET_NETTYP(net_id) == LOLND) {
6158                                                 GENL_SET_ERR_MSG(info,
6159                                                                  "setting @lo not allowed");
6160                                                 GOTO(out, rc = -ENODEV);
6161                                         }
6162                                         conf.lic_legacy_ip2nets[0] = '\0';
6163                                         conf.lic_ni_intf[0] = '\0';
6164                                         ni_list = false;
6165                                 }
6166                                 if (rc < 0)
6167                                         GOTO(out, rc);
6168                                 break;
6169                         }
6170                         case LN_SCALAR_ATTR_LIST: {
6171                                 struct nlattr *interface;
6172                                 int rem3;
6173
6174                                 ipnets = false;
6175                                 nla_for_each_nested(interface, entry, rem3) {
6176                                         rc = lnet_genl_parse_local_ni(interface, info,
6177                                                                       net_id, &conf,
6178                                                                       &ni_list);
6179                                         if (rc < 0)
6180                                                 GOTO(out, rc);
6181                                 }
6182                                 break;
6183                         }
6184                         /* it is possible a newer version of the user land send
6185                          * values older kernels doesn't handle. So silently
6186                          * ignore these values
6187                          */
6188                         default:
6189                                 break;
6190                         }
6191                 }
6192
6193                 /* Handle case of just sent NET with no list of NIDs */
6194                 if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && !ni_list) {
6195                         rc = lnet_dyn_del_net(net_id);
6196                         if (rc < 0) {
6197                                 GENL_SET_ERR_MSG(info,
6198                                                  "cannot del network");
6199                         }
6200                 } else if ((info->nlhdr->nlmsg_flags & NLM_F_CREATE) &&
6201                            ipnets && ni_list) {
6202                         rc = lnet_handle_legacy_ip2nets(conf.lic_legacy_ip2nets,
6203                                                         NULL);
6204                         if (rc < 0)
6205                                 GENL_SET_ERR_MSG(info,
6206                                                  "cannot setup ip2nets");
6207                 }
6208         }
6209 out:
6210         return rc;
6211 }
6212
6213 /* Called with ln_api_mutex */
6214 static int lnet_parse_peer_nis(struct nlattr *rlist, struct genl_info *info,
6215                                struct lnet_nid *pnid, bool mr,
6216                                bool *create_some)
6217 {
6218         struct lnet_nid snid = LNET_ANY_NID;
6219         struct nlattr *props;
6220         int rem, rc = 0;
6221         s64 num = -1;
6222
6223         nla_for_each_nested(props, rlist, rem) {
6224                 if (nla_type(props) != LN_SCALAR_ATTR_VALUE)
6225                         continue;
6226
6227                 if (nla_strcmp(props, "nid") == 0) {
6228                         char nidstr[LNET_NIDSTR_SIZE];
6229
6230                         props = nla_next(props, &rem);
6231                         if (nla_type(props) != LN_SCALAR_ATTR_VALUE) {
6232                                 GENL_SET_ERR_MSG(info,
6233                                                  "invalid secondary NID");
6234                                 GOTO(report_err, rc = -EINVAL);
6235                         }
6236
6237                         rc = nla_strscpy(nidstr, props, sizeof(nidstr));
6238                         if (rc < 0) {
6239                                 GENL_SET_ERR_MSG(info,
6240                                                  "failed to get secondary NID");
6241                                 GOTO(report_err, rc);
6242                         }
6243
6244                         rc = libcfs_strnid(&snid, strim(nidstr));
6245                         if (rc < 0) {
6246                                 GENL_SET_ERR_MSG(info, "unsupported secondary NID");
6247                                 GOTO(report_err, rc);
6248                         }
6249
6250                         if (LNET_NID_IS_ANY(&snid) || nid_same(&snid, pnid))
6251                                 *create_some = false;
6252                 } else if (nla_strcmp(props, "health stats") == 0) {
6253                         struct nlattr *health;
6254                         int rem2;
6255
6256                         props = nla_next(props, &rem);
6257                         if (nla_type(props) !=
6258                               LN_SCALAR_ATTR_LIST) {
6259                                 GENL_SET_ERR_MSG(info,
6260                                                  "invalid health configuration");
6261                                 GOTO(report_err, rc = -EINVAL);
6262                         }
6263
6264                         nla_for_each_nested(health, props, rem2) {
6265                                 if (nla_type(health) != LN_SCALAR_ATTR_VALUE ||
6266                                     nla_strcmp(health, "health value") != 0) {
6267                                         GENL_SET_ERR_MSG(info,
6268                                                          "wrong health config format");
6269                                         GOTO(report_err, rc = -EINVAL);
6270                                 }
6271
6272                                 health = nla_next(health, &rem2);
6273                                 if (nla_type(health) !=
6274                                     LN_SCALAR_ATTR_INT_VALUE) {
6275                                         GENL_SET_ERR_MSG(info,
6276                                                          "invalid health config format");
6277                                         GOTO(report_err, rc = -EINVAL);
6278                                 }
6279
6280                                 num = nla_get_s64(health);
6281                                 clamp_t(s64, num, 0, LNET_MAX_HEALTH_VALUE);
6282                         }
6283                 }
6284         }
6285
6286         if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE && num != -1) {
6287                 lnet_peer_ni_set_healthv(pnid, num, !*create_some);
6288         } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
6289                 bool lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6290
6291                 rc = lnet_user_add_peer_ni(pnid, &snid, mr, lock_prim);
6292                 if (rc < 0)
6293                         GENL_SET_ERR_MSG(info,
6294                                          "failed to add peer");
6295         } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && *create_some) {
6296                 bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6297
6298                 rc = lnet_del_peer_ni(pnid, &snid, force);
6299                 if (rc < 0)
6300                         GENL_SET_ERR_MSG(info,
6301                                          "failed to del peer");
6302         }
6303 report_err:
6304         return rc;
6305 }
6306
6307 static int lnet_peer_ni_cmd(struct sk_buff *skb, struct genl_info *info)
6308 {
6309         struct nlmsghdr *nlh = nlmsg_hdr(skb);
6310         struct genlmsghdr *gnlh = nlmsg_data(nlh);
6311         struct nlattr *params = genlmsg_data(gnlh);
6312         int msg_len, rem, rc = 0;
6313         struct lnet_nid pnid;
6314         struct nlattr *attr;
6315
6316         mutex_lock(&the_lnet.ln_api_mutex);
6317         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
6318                 GENL_SET_ERR_MSG(info, "Network is down");
6319                 mutex_unlock(&the_lnet.ln_api_mutex);
6320                 return -ENETDOWN;
6321         }
6322
6323         msg_len = genlmsg_len(gnlh);
6324         if (!msg_len) {
6325                 GENL_SET_ERR_MSG(info, "no configuration");
6326                 mutex_unlock(&the_lnet.ln_api_mutex);
6327                 return -ENOMSG;
6328         }
6329
6330         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
6331                 GENL_SET_ERR_MSG(info, "invalid configuration");
6332                 mutex_unlock(&the_lnet.ln_api_mutex);
6333                 return -EINVAL;
6334         }
6335
6336         nla_for_each_nested(attr, params, rem) {
6337                 bool parse_peer_nis = false;
6338                 struct nlattr *pnid_prop;
6339                 int rem2;
6340
6341                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6342                         continue;
6343
6344                 pnid = LNET_ANY_NID;
6345                 nla_for_each_nested(pnid_prop, attr, rem2) {
6346                         bool mr = true;
6347
6348                         if (nla_type(pnid_prop) != LN_SCALAR_ATTR_VALUE)
6349                                 continue;
6350
6351                         if (nla_strcmp(pnid_prop, "primary nid") == 0) {
6352                                 char nidstr[LNET_NIDSTR_SIZE];
6353
6354                                 pnid_prop = nla_next(pnid_prop, &rem2);
6355                                 if (nla_type(pnid_prop) !=
6356                                     LN_SCALAR_ATTR_VALUE) {
6357                                         GENL_SET_ERR_MSG(info,
6358                                                           "invalid primary NID type");
6359                                         GOTO(report_err, rc = -EINVAL);
6360                                 }
6361
6362                                 rc = nla_strscpy(nidstr, pnid_prop,
6363                                                  sizeof(nidstr));
6364                                 if (rc < 0) {
6365                                         GENL_SET_ERR_MSG(info,
6366                                                          "failed to get primary NID");
6367                                         GOTO(report_err, rc);
6368                                 }
6369
6370                                 rc = libcfs_strnid(&pnid, strim(nidstr));
6371                                 if (rc < 0) {
6372                                         GENL_SET_ERR_MSG(info,
6373                                                          "unsupported primary NID");
6374                                         GOTO(report_err, rc);
6375                                 }
6376
6377                                 /* we must create primary NID for peer ni
6378                                  * creation
6379                                  */
6380                                 if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
6381                                         bool lock_prim;
6382
6383                                         lock_prim = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6384                                         rc = lnet_user_add_peer_ni(&pnid,
6385                                                                    &LNET_ANY_NID,
6386                                                                    true, lock_prim);
6387                                         if (rc < 0) {
6388                                                 GENL_SET_ERR_MSG(info,
6389                                                                  "failed to add primary peer");
6390                                                 GOTO(report_err, rc);
6391                                         }
6392                                 }
6393                         } else if (nla_strcmp(pnid_prop, "Multi-Rail") == 0) {
6394                                 pnid_prop = nla_next(pnid_prop, &rem2);
6395                                 if (nla_type(pnid_prop) !=
6396                                     LN_SCALAR_ATTR_INT_VALUE) {
6397                                         GENL_SET_ERR_MSG(info,
6398                                                           "invalid MR flag param");
6399                                         GOTO(report_err, rc = -EINVAL);
6400                                 }
6401
6402                                 if (nla_get_s64(pnid_prop) == 0)
6403                                         mr = false;
6404                         } else if (nla_strcmp(pnid_prop, "peer state") == 0) {
6405                                 struct lnet_peer_ni *lpni;
6406                                 struct lnet_peer *lp;
6407
6408                                 pnid_prop = nla_next(pnid_prop, &rem2);
6409                                 if (nla_type(pnid_prop) !=
6410                                     LN_SCALAR_ATTR_INT_VALUE) {
6411                                         GENL_SET_ERR_MSG(info,
6412                                                           "invalid peer state param");
6413                                         GOTO(report_err, rc = -EINVAL);
6414                                 }
6415
6416                                 lpni = lnet_peer_ni_find_locked(&pnid);
6417                                 if (!lpni) {
6418                                         GENL_SET_ERR_MSG(info,
6419                                                           "invalid peer state param");
6420                                         GOTO(report_err, rc = -ENOENT);
6421                                 }
6422                                 lnet_peer_ni_decref_locked(lpni);
6423                                 lp = lpni->lpni_peer_net->lpn_peer;
6424                                 lp->lp_state = nla_get_s64(pnid_prop);
6425                         } else if (nla_strcmp(pnid_prop, "peer ni") == 0) {
6426                                 struct nlattr *rlist;
6427                                 int rem3;
6428
6429                                 if (LNET_NID_IS_ANY(&pnid)) {
6430                                         GENL_SET_ERR_MSG(info,
6431                                                          "missing required primary NID");
6432                                         GOTO(report_err, rc);
6433                                 }
6434
6435                                 pnid_prop = nla_next(pnid_prop, &rem2);
6436                                 if (nla_type(pnid_prop) !=
6437                                     LN_SCALAR_ATTR_LIST) {
6438                                         GENL_SET_ERR_MSG(info,
6439                                                           "invalid NIDs list");
6440                                         GOTO(report_err, rc = -EINVAL);
6441                                 }
6442
6443                                 parse_peer_nis = true;
6444                                 nla_for_each_nested(rlist, pnid_prop, rem3) {
6445                                         rc = lnet_parse_peer_nis(rlist, info,
6446                                                                  &pnid, mr,
6447                                                                  &parse_peer_nis);
6448                                         if (rc < 0)
6449                                                 GOTO(report_err, rc);
6450                                 }
6451                         }
6452                 }
6453
6454                 /* If we have remote peer ni's we already add /del peers */
6455                 if (parse_peer_nis)
6456                         continue;
6457
6458                 if (LNET_NID_IS_ANY(&pnid)) {
6459                         GENL_SET_ERR_MSG(info, "missing primary NID");
6460                         GOTO(report_err, rc);
6461                 }
6462
6463                 if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
6464                         bool force = info->nlhdr->nlmsg_flags & NLM_F_EXCL;
6465
6466                         rc = lnet_del_peer_ni(&pnid, &LNET_ANY_NID,
6467                                               force);
6468                         if (rc < 0) {
6469                                 GENL_SET_ERR_MSG(info,
6470                                                  "failed to del primary peer");
6471                                 GOTO(report_err, rc);
6472                         }
6473                 }
6474         }
6475 report_err:
6476         /* If we failed on creation and encounter a latter error then
6477          * delete the primary nid.
6478          */
6479         if (rc < 0 && info->nlhdr->nlmsg_flags & NLM_F_CREATE &&
6480             !LNET_NID_IS_ANY(&pnid))
6481                 lnet_del_peer_ni(&pnid, &LNET_ANY_NID,
6482                                  info->nlhdr->nlmsg_flags & NLM_F_EXCL);
6483         mutex_unlock(&the_lnet.ln_api_mutex);
6484
6485         return rc;
6486 }
6487
6488 /** LNet route handling */
6489
6490 /* We can't use struct lnet_ioctl_config_data since it lacks
6491  * support for large NIDS
6492  */
6493 struct lnet_route_properties {
6494         struct lnet_nid         lrp_gateway;
6495         u32                     lrp_net;
6496         s32                     lrp_hop;
6497         u32                     lrp_flags;
6498         u32                     lrp_priority;
6499         u32                     lrp_sensitivity;
6500 };
6501
6502 struct lnet_genl_route_list {
6503         unsigned int                            lgrl_index;
6504         unsigned int                            lgrl_count;
6505         GENRADIX(struct lnet_route_properties)  lgrl_list;
6506 };
6507
6508 static inline struct lnet_genl_route_list *
6509 lnet_route_dump_ctx(struct netlink_callback *cb)
6510 {
6511         return (struct lnet_genl_route_list *)cb->args[0];
6512 }
6513
6514 static int lnet_route_show_done(struct netlink_callback *cb)
6515 {
6516         struct lnet_genl_route_list *rlist = lnet_route_dump_ctx(cb);
6517
6518         if (rlist) {
6519                 genradix_free(&rlist->lgrl_list);
6520                 CFS_FREE_PTR(rlist);
6521         }
6522         cb->args[0] = 0;
6523
6524         return 0;
6525 }
6526
6527 static int lnet_scan_route(struct lnet_genl_route_list *rlist,
6528                     struct lnet_route_properties *settings)
6529 {
6530         struct lnet_remotenet *rnet;
6531         struct list_head *rn_list;
6532         struct lnet_route *route;
6533         int cpt, i, rc = 0;
6534
6535         cpt = lnet_net_lock_current();
6536
6537         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
6538                 rn_list = &the_lnet.ln_remote_nets_hash[i];
6539                 list_for_each_entry(rnet, rn_list, lrn_list) {
6540                         if (settings->lrp_net != LNET_NET_ANY &&
6541                             settings->lrp_net != rnet->lrn_net)
6542                                 continue;
6543
6544                         list_for_each_entry(route, &rnet->lrn_routes,
6545                                             lr_list) {
6546                                 struct lnet_route_properties *prop;
6547
6548                                 if (!LNET_NID_IS_ANY(&settings->lrp_gateway) &&
6549                                     !nid_same(&settings->lrp_gateway,
6550                                               &route->lr_nid)) {
6551                                         continue;
6552                                 }
6553
6554                                 if (settings->lrp_hop != -1 &&
6555                                     settings->lrp_hop != route->lr_hops)
6556                                         continue;
6557
6558                                 if (settings->lrp_priority != -1 &&
6559                                     settings->lrp_priority != route->lr_priority)
6560                                         continue;
6561
6562                                 if (settings->lrp_sensitivity != -1 &&
6563                                     settings->lrp_sensitivity !=
6564                                     route->lr_gateway->lp_health_sensitivity)
6565                                         continue;
6566
6567                                 prop = genradix_ptr_alloc(&rlist->lgrl_list,
6568                                                           rlist->lgrl_count++,
6569                                                           GFP_KERNEL);
6570                                 if (!prop)
6571                                         GOTO(failed_alloc, rc = -ENOMEM);
6572
6573                                 prop->lrp_net = rnet->lrn_net;
6574                                 prop->lrp_gateway = route->lr_nid;
6575                                 prop->lrp_hop = route->lr_hops;
6576                                 prop->lrp_priority = route->lr_priority;
6577                                 prop->lrp_sensitivity =
6578                                         route->lr_gateway->lp_health_sensitivity;
6579                                 if (lnet_is_route_alive(route))
6580                                         prop->lrp_flags |= LNET_RT_ALIVE;
6581                                 else
6582                                         prop->lrp_flags &= ~LNET_RT_ALIVE;
6583                                 if (route->lr_single_hop)
6584                                         prop->lrp_flags &= ~LNET_RT_MULTI_HOP;
6585                                 else
6586                                         prop->lrp_flags |= LNET_RT_MULTI_HOP;
6587                         }
6588                 }
6589         }
6590
6591 failed_alloc:
6592         lnet_net_unlock(cpt);
6593         return rc;
6594 }
6595
6596 /* LNet route ->start() handler for GET requests */
6597 static int lnet_route_show_start(struct netlink_callback *cb)
6598 {
6599         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6600 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6601         struct netlink_ext_ack *extack = NULL;
6602 #endif
6603         struct lnet_genl_route_list *rlist;
6604         int msg_len = genlmsg_len(gnlh);
6605         int rc = 0;
6606
6607 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6608         extack = cb->extack;
6609 #endif
6610         if (the_lnet.ln_refcount == 0 ||
6611             the_lnet.ln_state != LNET_STATE_RUNNING) {
6612                 NL_SET_ERR_MSG(extack, "Network is down");
6613                 return -ENETDOWN;
6614         }
6615
6616         CFS_ALLOC_PTR(rlist);
6617         if (!rlist) {
6618                 NL_SET_ERR_MSG(extack, "No memory for route list");
6619                 return -ENOMEM;
6620         }
6621
6622         genradix_init(&rlist->lgrl_list);
6623         rlist->lgrl_count = 0;
6624         rlist->lgrl_index = 0;
6625         cb->args[0] = (long)rlist;
6626
6627         mutex_lock(&the_lnet.ln_api_mutex);
6628         if (!msg_len) {
6629                 struct lnet_route_properties tmp = {
6630                         .lrp_gateway            = LNET_ANY_NID,
6631                         .lrp_net                = LNET_NET_ANY,
6632                         .lrp_hop                = -1,
6633                         .lrp_priority           = -1,
6634                         .lrp_sensitivity        = -1,
6635                 };
6636
6637                 rc = lnet_scan_route(rlist, &tmp);
6638                 if (rc < 0) {
6639                         NL_SET_ERR_MSG(extack,
6640                                        "failed to allocate router data");
6641                         GOTO(report_err, rc);
6642                 }
6643         } else {
6644                 struct nlattr *params = genlmsg_data(gnlh);
6645                 struct nlattr *attr;
6646                 int rem;
6647
6648                 nla_for_each_nested(attr, params, rem) {
6649                         struct lnet_route_properties tmp = {
6650                                 .lrp_gateway            = LNET_ANY_NID,
6651                                 .lrp_net                = LNET_NET_ANY,
6652                                 .lrp_hop                = -1,
6653                                 .lrp_priority           = -1,
6654                                 .lrp_sensitivity        = -1,
6655                         };
6656                         struct nlattr *route;
6657                         int rem2;
6658
6659                         if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6660                                 continue;
6661
6662                         nla_for_each_nested(route, attr, rem2) {
6663                                 if (nla_type(route) != LN_SCALAR_ATTR_VALUE)
6664                                         continue;
6665
6666                                 if (nla_strcmp(route, "net") == 0) {
6667                                         char nw[LNET_NIDSTR_SIZE];
6668
6669                                         route = nla_next(route, &rem2);
6670                                         if (nla_type(route) !=
6671                                             LN_SCALAR_ATTR_VALUE) {
6672                                                 NL_SET_ERR_MSG(extack,
6673                                                                "invalid net param");
6674                                                 GOTO(report_err, rc = -EINVAL);
6675                                         }
6676
6677                                         rc = nla_strscpy(nw, route, sizeof(nw));
6678                                         if (rc < 0) {
6679                                                 NL_SET_ERR_MSG(extack,
6680                                                                "failed to get route param");
6681                                                 GOTO(report_err, rc);
6682                                         }
6683                                         rc = 0;
6684                                         tmp.lrp_net = libcfs_str2net(strim(nw));
6685                                 } else if (nla_strcmp(route, "gateway") == 0) {
6686                                         char gw[LNET_NIDSTR_SIZE];
6687
6688                                         route = nla_next(route, &rem2);
6689                                         if (nla_type(route) !=
6690                                             LN_SCALAR_ATTR_VALUE) {
6691                                                 NL_SET_ERR_MSG(extack,
6692                                                                "invalid gateway param");
6693                                                 GOTO(report_err, rc = -EINVAL);
6694                                         }
6695
6696                                         rc = nla_strscpy(gw, route, sizeof(gw));
6697                                         if (rc < 0) {
6698                                                 NL_SET_ERR_MSG(extack,
6699                                                                "failed to get route param");
6700                                                 GOTO(report_err, rc);
6701                                         }
6702
6703                                         rc = libcfs_strnid(&tmp.lrp_gateway, strim(gw));
6704                                         if (rc < 0) {
6705                                                 NL_SET_ERR_MSG(extack,
6706                                                                "cannot parse gateway");
6707                                                 GOTO(report_err, rc = -ENODEV);
6708                                         }
6709                                         rc = 0;
6710                                 } else if (nla_strcmp(route, "hop") == 0) {
6711                                         route = nla_next(route, &rem2);
6712                                         if (nla_type(route) !=
6713                                             LN_SCALAR_ATTR_INT_VALUE) {
6714                                                 NL_SET_ERR_MSG(extack,
6715                                                                "invalid hop param");
6716                                                 GOTO(report_err, rc = -EINVAL);
6717                                         }
6718
6719                                         tmp.lrp_hop = nla_get_s64(route);
6720                                         if (tmp.lrp_hop != -1)
6721                                                 clamp_t(s32, tmp.lrp_hop, 1, 127);
6722                                 } else if (nla_strcmp(route, "priority") == 0) {
6723                                         route = nla_next(route, &rem2);
6724                                         if (nla_type(route) !=
6725                                             LN_SCALAR_ATTR_INT_VALUE) {
6726                                                 NL_SET_ERR_MSG(extack,
6727                                                                "invalid priority param");
6728                                                 GOTO(report_err, rc = -EINVAL);
6729                                         }
6730
6731                                         tmp.lrp_priority = nla_get_s64(route);
6732                                 }
6733                         }
6734
6735                         rc = lnet_scan_route(rlist, &tmp);
6736                         if (rc < 0) {
6737                                 NL_SET_ERR_MSG(extack,
6738                                                "failed to allocate router data");
6739                                 GOTO(report_err, rc);
6740                         }
6741                 }
6742         }
6743 report_err:
6744         mutex_unlock(&the_lnet.ln_api_mutex);
6745
6746         if (rc < 0)
6747                 lnet_route_show_done(cb);
6748
6749         return rc;
6750 }
6751
6752 static const struct ln_key_list route_props_list = {
6753         .lkl_maxattr                    = LNET_ROUTE_ATTR_MAX,
6754         .lkl_list                       = {
6755                 [LNET_ROUTE_ATTR_HDR]                   = {
6756                         .lkp_value                      = "route",
6757                         .lkp_key_format                 = LNKF_SEQUENCE | LNKF_MAPPING,
6758                         .lkp_data_type                  = NLA_NUL_STRING,
6759                 },
6760                 [LNET_ROUTE_ATTR_NET]                   = {
6761                         .lkp_value                      = "net",
6762                         .lkp_data_type                  = NLA_STRING
6763                 },
6764                 [LNET_ROUTE_ATTR_GATEWAY]               = {
6765                         .lkp_value                      = "gateway",
6766                         .lkp_data_type                  = NLA_STRING
6767                 },
6768                 [LNET_ROUTE_ATTR_HOP]                   = {
6769                         .lkp_value                      = "hop",
6770                         .lkp_data_type                  = NLA_S32
6771                 },
6772                 [LNET_ROUTE_ATTR_PRIORITY]              = {
6773                         .lkp_value                      = "priority",
6774                         .lkp_data_type                  = NLA_U32
6775                 },
6776                 [LNET_ROUTE_ATTR_HEALTH_SENSITIVITY]    = {
6777                         .lkp_value                      = "health_sensitivity",
6778                         .lkp_data_type                  = NLA_U32
6779                 },
6780                 [LNET_ROUTE_ATTR_STATE] = {
6781                         .lkp_value                      = "state",
6782                         .lkp_data_type                  = NLA_STRING,
6783                 },
6784                 [LNET_ROUTE_ATTR_TYPE]  = {
6785                         .lkp_value                      = "type",
6786                         .lkp_data_type                  = NLA_STRING,
6787                 },
6788         },
6789 };
6790
6791
6792 static int lnet_route_show_dump(struct sk_buff *msg,
6793                                 struct netlink_callback *cb)
6794 {
6795         struct lnet_genl_route_list *rlist = lnet_route_dump_ctx(cb);
6796         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6797 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6798         struct netlink_ext_ack *extack = NULL;
6799 #endif
6800         int portid = NETLINK_CB(cb->skb).portid;
6801         int seq = cb->nlh->nlmsg_seq;
6802         int idx = rlist->lgrl_index;
6803         int rc = 0;
6804
6805 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6806         extack = cb->extack;
6807 #endif
6808         if (!rlist->lgrl_count) {
6809                 NL_SET_ERR_MSG(extack, "No routes found");
6810                 GOTO(send_error, rc = -ENOENT);
6811         }
6812
6813         if (!idx) {
6814                 const struct ln_key_list *all[] = {
6815                         &route_props_list, NULL
6816                 };
6817
6818                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
6819                                                 &lnet_family,
6820                                                 NLM_F_CREATE | NLM_F_MULTI,
6821                                                 LNET_CMD_ROUTES, all);
6822                 if (rc < 0) {
6823                         NL_SET_ERR_MSG(extack, "failed to send key table");
6824                         GOTO(send_error, rc);
6825                 }
6826         }
6827
6828         /* If not routes found send an empty message and not an error */
6829         if (!rlist->lgrl_count) {
6830                 void *hdr;
6831
6832                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
6833                                   NLM_F_MULTI, LNET_CMD_ROUTES);
6834                 if (!hdr) {
6835                         NL_SET_ERR_MSG(extack, "failed to send values");
6836                         genlmsg_cancel(msg, hdr);
6837                         GOTO(send_error, rc = -EMSGSIZE);
6838                 }
6839                 genlmsg_end(msg, hdr);
6840
6841                 goto send_error;
6842         }
6843
6844         while (idx < rlist->lgrl_count) {
6845                 struct lnet_route_properties *prop;
6846                 void *hdr;
6847
6848                 prop = genradix_ptr(&rlist->lgrl_list, idx++);
6849
6850                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
6851                                   NLM_F_MULTI, LNET_CMD_ROUTES);
6852                 if (!hdr) {
6853                         NL_SET_ERR_MSG(extack, "failed to send values");
6854                         genlmsg_cancel(msg, hdr);
6855                         GOTO(send_error, rc = -EMSGSIZE);
6856                 }
6857
6858                 if (idx == 1)
6859                         nla_put_string(msg, LNET_ROUTE_ATTR_HDR, "");
6860
6861                 nla_put_string(msg, LNET_ROUTE_ATTR_NET,
6862                                libcfs_net2str(prop->lrp_net));
6863                 nla_put_string(msg, LNET_ROUTE_ATTR_GATEWAY,
6864                                libcfs_nidstr(&prop->lrp_gateway));
6865                 if (gnlh->version) {
6866                         nla_put_s32(msg, LNET_ROUTE_ATTR_HOP, prop->lrp_hop);
6867                         nla_put_u32(msg, LNET_ROUTE_ATTR_PRIORITY, prop->lrp_priority);
6868                         nla_put_u32(msg, LNET_ROUTE_ATTR_HEALTH_SENSITIVITY,
6869                                     prop->lrp_sensitivity);
6870
6871                         nla_put_string(msg, LNET_ROUTE_ATTR_STATE,
6872                                        prop->lrp_flags & LNET_RT_ALIVE ?
6873                                        "up" : "down");
6874                         nla_put_string(msg, LNET_ROUTE_ATTR_TYPE,
6875                                        prop->lrp_flags & LNET_RT_MULTI_HOP ?
6876                                        "multi-hop" : "single-hop");
6877                 }
6878                 genlmsg_end(msg, hdr);
6879         }
6880         rlist->lgrl_index = idx;
6881 send_error:
6882         return lnet_nl_send_error(cb->skb, portid, seq, rc);
6883 };
6884
6885 #ifndef HAVE_NETLINK_CALLBACK_START
6886 static int lnet_old_route_show_dump(struct sk_buff *msg,
6887                                     struct netlink_callback *cb)
6888 {
6889         if (!cb->args[0]) {
6890                 int rc = lnet_route_show_start(cb);
6891
6892                 if (rc < 0)
6893                         return rc;
6894         }
6895
6896         return lnet_route_show_dump(msg, cb);
6897 }
6898 #endif /* !HAVE_NETLINK_CALLBACK_START */
6899
6900 /** LNet peer handling */
6901 struct lnet_genl_processid_list {
6902         unsigned int                    lgpl_index;
6903         unsigned int                    lgpl_count;
6904         GENRADIX(struct lnet_processid) lgpl_list;
6905 };
6906
6907 static inline struct lnet_genl_processid_list *
6908 lnet_peer_dump_ctx(struct netlink_callback *cb)
6909 {
6910         return (struct lnet_genl_processid_list *)cb->args[0];
6911 }
6912
6913 static int lnet_peer_ni_show_done(struct netlink_callback *cb)
6914 {
6915         struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb);
6916
6917         if (plist) {
6918                 genradix_free(&plist->lgpl_list);
6919                 CFS_FREE_PTR(plist);
6920         }
6921         cb->args[0] = 0;
6922
6923         return 0;
6924 }
6925
6926 /* LNet peer ->start() handler for GET requests */
6927 static int lnet_peer_ni_show_start(struct netlink_callback *cb)
6928 {
6929         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
6930 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
6931         struct netlink_ext_ack *extack = NULL;
6932 #endif
6933         struct lnet_genl_processid_list *plist;
6934         int msg_len = genlmsg_len(gnlh);
6935         int rc = 0;
6936
6937 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
6938         extack = cb->extack;
6939 #endif
6940         mutex_lock(&the_lnet.ln_api_mutex);
6941         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
6942                 NL_SET_ERR_MSG(extack, "Network is down");
6943                 mutex_unlock(&the_lnet.ln_api_mutex);
6944                 return -ENETDOWN;
6945         }
6946
6947         CFS_ALLOC_PTR(plist);
6948         if (!plist) {
6949                 NL_SET_ERR_MSG(extack, "No memory for peer list");
6950                 mutex_unlock(&the_lnet.ln_api_mutex);
6951                 return -ENOMEM;
6952         }
6953
6954         genradix_init(&plist->lgpl_list);
6955         plist->lgpl_count = 0;
6956         plist->lgpl_index = 0;
6957         cb->args[0] = (long)plist;
6958
6959         if (!msg_len) {
6960                 struct lnet_peer_table *ptable;
6961                 int cpt;
6962
6963                 cfs_percpt_for_each(ptable, cpt, the_lnet.ln_peer_tables) {
6964                         struct lnet_peer *lp;
6965
6966                         list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
6967                                 struct lnet_processid *lpi;
6968
6969                                 lpi = genradix_ptr_alloc(&plist->lgpl_list,
6970                                                          plist->lgpl_count++,
6971                                                          GFP_KERNEL);
6972                                 if (!lpi) {
6973                                         NL_SET_ERR_MSG(extack, "failed to allocate NID");
6974                                         GOTO(report_err, rc = -ENOMEM);
6975                                 }
6976
6977                                 lpi->pid = LNET_PID_LUSTRE;
6978                                 lpi->nid = lp->lp_primary_nid;
6979                         }
6980                 }
6981         } else {
6982                 struct nlattr *params = genlmsg_data(gnlh);
6983                 struct nlattr *attr;
6984                 int rem;
6985
6986                 nla_for_each_nested(attr, params, rem) {
6987                         struct nlattr *nid;
6988                         int rem2;
6989
6990                         if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
6991                                 continue;
6992
6993                         nla_for_each_nested(nid, attr, rem2) {
6994                                 char addr[LNET_NIDSTR_SIZE];
6995                                 struct lnet_processid *id;
6996
6997                                 if (nla_type(nid) != LN_SCALAR_ATTR_VALUE ||
6998                                     nla_strcmp(nid, "primary nid") != 0)
6999                                         continue;
7000
7001                                 nid = nla_next(nid, &rem2);
7002                                 if (nla_type(nid) != LN_SCALAR_ATTR_VALUE) {
7003                                         NL_SET_ERR_MSG(extack,
7004                                                        "invalid primary nid param");
7005                                         GOTO(report_err, rc = -EINVAL);
7006                                 }
7007
7008                                 rc = nla_strscpy(addr, nid, sizeof(addr));
7009                                 if (rc < 0) {
7010                                         NL_SET_ERR_MSG(extack,
7011                                                        "failed to get primary nid param");
7012                                         GOTO(report_err, rc);
7013                                 }
7014
7015                                 id = genradix_ptr_alloc(&plist->lgpl_list,
7016                                                         plist->lgpl_count++,
7017                                                         GFP_KERNEL);
7018                                 if (!id) {
7019                                         NL_SET_ERR_MSG(extack, "failed to allocate NID");
7020                                         GOTO(report_err, rc = -ENOMEM);
7021                                 }
7022
7023                                 rc = libcfs_strid(id, strim(addr));
7024                                 if (rc < 0) {
7025                                         NL_SET_ERR_MSG(extack, "invalid NID");
7026                                         GOTO(report_err, rc);
7027                                 }
7028                                 rc = 0;
7029                         }
7030                 }
7031         }
7032 report_err:
7033         mutex_unlock(&the_lnet.ln_api_mutex);
7034
7035         if (rc < 0)
7036                 lnet_peer_ni_show_done(cb);
7037
7038         return rc;
7039 }
7040
7041 static const struct ln_key_list lnet_peer_ni_keys = {
7042         .lkl_maxattr                    = LNET_PEER_NI_ATTR_MAX,
7043         .lkl_list                       = {
7044                 [LNET_PEER_NI_ATTR_HDR]  = {
7045                         .lkp_value              = "peer",
7046                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7047                         .lkp_data_type          = NLA_NUL_STRING,
7048                 },
7049                 [LNET_PEER_NI_ATTR_PRIMARY_NID] = {
7050                         .lkp_value              = "primary nid",
7051                         .lkp_data_type          = NLA_STRING,
7052                 },
7053                 [LNET_PEER_NI_ATTR_MULTIRAIL]   = {
7054                         .lkp_value              = "Multi-Rail",
7055                         .lkp_data_type          = NLA_FLAG
7056                 },
7057                 [LNET_PEER_NI_ATTR_STATE]       = {
7058                         .lkp_value              = "peer state",
7059                         .lkp_data_type          = NLA_U32
7060                 },
7061                 [LNET_PEER_NI_ATTR_PEER_NI_LIST] = {
7062                         .lkp_value              = "peer ni",
7063                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7064                         .lkp_data_type          = NLA_NESTED,
7065                 },
7066         },
7067 };
7068
7069 static const struct ln_key_list lnet_peer_ni_list = {
7070         .lkl_maxattr                    = LNET_PEER_NI_LIST_ATTR_MAX,
7071         .lkl_list                       = {
7072                 [LNET_PEER_NI_LIST_ATTR_NID]            = {
7073                         .lkp_value                      = "nid",
7074                         .lkp_data_type                  = NLA_STRING,
7075                 },
7076                 [LNET_PEER_NI_LIST_ATTR_UDSP_INFO]      = {
7077                         .lkp_value                      = "udsp info",
7078                         .lkp_key_format                 = LNKF_MAPPING,
7079                         .lkp_data_type                  = NLA_NESTED,
7080                 },
7081                 [LNET_PEER_NI_LIST_ATTR_STATE]          = {
7082                         .lkp_value                      = "state",
7083                         .lkp_data_type                  = NLA_STRING,
7084                 },
7085                 [LNET_PEER_NI_LIST_ATTR_MAX_TX_CREDITS] = {
7086                         .lkp_value                      = "max_ni_tx_credits",
7087                         .lkp_data_type                  = NLA_U32,
7088                 },
7089                 [LNET_PEER_NI_LIST_ATTR_CUR_TX_CREDITS] = {
7090                         .lkp_value                      = "available_tx_credits",
7091                         .lkp_data_type                  = NLA_U32,
7092                 },
7093                 [LNET_PEER_NI_LIST_ATTR_MIN_TX_CREDITS] = {
7094                         .lkp_value                      = "min_tx_credits",
7095                         .lkp_data_type                  = NLA_U32,
7096                 },
7097                 [LNET_PEER_NI_LIST_ATTR_QUEUE_BUF_COUNT] = {
7098                         .lkp_value                      = "tx_q_num_of_buf",
7099                         .lkp_data_type                  = NLA_U32,
7100                 },
7101                 [LNET_PEER_NI_LIST_ATTR_CUR_RTR_CREDITS] = {
7102                         .lkp_value                      = "available_rtr_credits",
7103                         .lkp_data_type                  = NLA_U32,
7104                 },
7105                 [LNET_PEER_NI_LIST_ATTR_MIN_RTR_CREDITS] = {
7106                         .lkp_value                      = "min_rtr_credits",
7107                         .lkp_data_type                  = NLA_U32,
7108                 },
7109                 [LNET_PEER_NI_LIST_ATTR_REFCOUNT]       = {
7110                         .lkp_value                      = "refcount",
7111                         .lkp_data_type                  = NLA_U32,
7112                 },
7113                 [LNET_PEER_NI_LIST_ATTR_STATS_COUNT]    = {
7114                         .lkp_value                      = "statistics",
7115                         .lkp_key_format                 = LNKF_MAPPING,
7116                         .lkp_data_type                  = NLA_NESTED
7117                 },
7118                 [LNET_PEER_NI_LIST_ATTR_SENT_STATS]     = {
7119                         .lkp_value                      = "sent_stats",
7120                         .lkp_key_format                 = LNKF_MAPPING,
7121                         .lkp_data_type                  = NLA_NESTED
7122                 },
7123                 [LNET_PEER_NI_LIST_ATTR_RECV_STATS]     = {
7124                         .lkp_value                      = "received_stats",
7125                         .lkp_key_format                 = LNKF_MAPPING,
7126                         .lkp_data_type                  = NLA_NESTED
7127                 },
7128                 [LNET_PEER_NI_LIST_ATTR_DROP_STATS]     = {
7129                         .lkp_value                      = "dropped_stats",
7130                         .lkp_key_format                 = LNKF_MAPPING,
7131                         .lkp_data_type                  = NLA_NESTED
7132                 },
7133                 [LNET_PEER_NI_LIST_ATTR_HEALTH_STATS]   = {
7134                         .lkp_value                      = "health stats",
7135                         .lkp_key_format                 = LNKF_MAPPING,
7136                         .lkp_data_type                  = NLA_NESTED
7137                 },
7138         },
7139 };
7140
7141 static const struct ln_key_list lnet_peer_ni_list_stats_count = {
7142         .lkl_maxattr                    = LNET_PEER_NI_LIST_STATS_COUNT_ATTR_MAX,
7143         .lkl_list                       = {
7144                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_SEND_COUNT] = {
7145                         .lkp_value                              = "send_count",
7146                         .lkp_data_type                          = NLA_U32,
7147                 },
7148                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_RECV_COUNT] = {
7149                         .lkp_value                              = "recv_count",
7150                         .lkp_data_type                          = NLA_U32,
7151                 },
7152                 [LNET_PEER_NI_LIST_STATS_COUNT_ATTR_DROP_COUNT] = {
7153                         .lkp_value                              = "drop_count",
7154                         .lkp_data_type                          = NLA_U32,
7155                 },
7156         },
7157 };
7158
7159 static const struct ln_key_list lnet_peer_ni_list_stats = {
7160         .lkl_maxattr                    = LNET_PEER_NI_LIST_STATS_ATTR_MAX,
7161         .lkl_list                       = {
7162                 [LNET_PEER_NI_LIST_STATS_ATTR_PUT]      = {
7163                         .lkp_value                      = "put",
7164                         .lkp_data_type                  = NLA_U32,
7165                 },
7166                 [LNET_PEER_NI_LIST_STATS_ATTR_GET]      = {
7167                         .lkp_value                      = "get",
7168                         .lkp_data_type                  = NLA_U32,
7169                 },
7170                 [LNET_PEER_NI_LIST_STATS_ATTR_REPLY]    = {
7171                         .lkp_value                      = "reply",
7172                         .lkp_data_type                  = NLA_U32,
7173                 },
7174                 [LNET_PEER_NI_LIST_STATS_ATTR_ACK]      = {
7175                         .lkp_value                      = "ack",
7176                         .lkp_data_type                  = NLA_U32,
7177                 },
7178                 [LNET_PEER_NI_LIST_STATS_ATTR_HELLO]    = {
7179                         .lkp_value                      = "hello",
7180                         .lkp_data_type                  = NLA_U32,
7181                 },
7182         },
7183 };
7184
7185 static const struct ln_key_list lnet_peer_ni_list_health = {
7186         .lkl_maxattr                    = LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_MAX,
7187         .lkl_list                       = {
7188                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_VALUE]     = {
7189                         .lkp_value                      = "health value",
7190                         .lkp_data_type                  = NLA_S32,
7191                 },
7192                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_DROPPED]   = {
7193                         .lkp_value                      = "dropped",
7194                         .lkp_data_type                  = NLA_U32,
7195                 },
7196                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_TIMEOUT]   = {
7197                         .lkp_value                      = "timeout",
7198                         .lkp_data_type                  = NLA_U32,
7199                 },
7200                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_ERROR]     = {
7201                         .lkp_value                      = "error",
7202                         .lkp_data_type                  = NLA_U32,
7203                 },
7204                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NETWORK_TIMEOUT] = {
7205                         .lkp_value                      = "network timeout",
7206                         .lkp_data_type                  = NLA_U32,
7207                 },
7208                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PING_COUNT] = {
7209                         .lkp_value                      = "ping_count",
7210                         .lkp_data_type                  = NLA_U32,
7211                 },
7212                 [LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NEXT_PING] = {
7213                         .lkp_value                      = "next_ping",
7214                         .lkp_data_type                  = NLA_S64,
7215                 },
7216         },
7217 };
7218
7219 static int lnet_peer_ni_show_dump(struct sk_buff *msg,
7220                                   struct netlink_callback *cb)
7221 {
7222         struct lnet_genl_processid_list *plist = lnet_peer_dump_ctx(cb);
7223         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
7224 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7225         struct netlink_ext_ack *extack = NULL;
7226 #endif
7227         int portid = NETLINK_CB(cb->skb).portid;
7228         int seq = cb->nlh->nlmsg_seq;
7229         int idx = plist->lgpl_index;
7230         int rc = 0;
7231
7232 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
7233         extack = cb->extack;
7234 #endif
7235         if (!plist->lgpl_count) {
7236                 NL_SET_ERR_MSG(extack, "No peers found");
7237                 GOTO(send_error, rc = -ENOENT);
7238         }
7239
7240         if (!idx) {
7241                 const struct ln_key_list *all[] = {
7242                         &lnet_peer_ni_keys, &lnet_peer_ni_list,
7243                         &udsp_info_list, &udsp_info_pref_nids_list,
7244                         &udsp_info_pref_nids_list,
7245                         &lnet_peer_ni_list_stats_count,
7246                         &lnet_peer_ni_list_stats, /* send_stats */
7247                         &lnet_peer_ni_list_stats, /* recv_stats */
7248                         &lnet_peer_ni_list_stats, /* drop stats */
7249                         &lnet_peer_ni_list_health,
7250                         NULL
7251                 };
7252
7253                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
7254                                                 &lnet_family,
7255                                                 NLM_F_CREATE | NLM_F_MULTI,
7256                                                 LNET_CMD_PEERS, all);
7257                 if (rc < 0) {
7258                         NL_SET_ERR_MSG(extack, "failed to send key table");
7259                         GOTO(send_error, rc);
7260                 }
7261         }
7262
7263         mutex_lock(&the_lnet.ln_api_mutex);
7264         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
7265                 NL_SET_ERR_MSG(extack, "Network is down");
7266                 GOTO(unlock_api_mutex, rc = -ENETDOWN);
7267         }
7268
7269         while (idx < plist->lgpl_count) {
7270                 struct lnet_processid *id;
7271                 struct lnet_peer_ni *lpni = NULL;
7272                 struct nlattr *nid_list;
7273                 struct lnet_peer *lp;
7274                 int count = 1;
7275                 void *hdr;
7276
7277                 id = genradix_ptr(&plist->lgpl_list, idx++);
7278                 if (nid_is_lo0(&id->nid))
7279                         continue;
7280
7281                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
7282                                   NLM_F_MULTI, LNET_CMD_PEERS);
7283                 if (!hdr) {
7284                         NL_SET_ERR_MSG(extack, "failed to send values");
7285                         genlmsg_cancel(msg, hdr);
7286                         GOTO(unlock_api_mutex, rc = -EMSGSIZE);
7287                 }
7288
7289                 lp = lnet_find_peer(&id->nid);
7290                 if (!lp) {
7291                         NL_SET_ERR_MSG(extack, "cannot find peer");
7292                         GOTO(unlock_api_mutex, rc = -ENOENT);
7293                 }
7294
7295                 if (idx == 1)
7296                         nla_put_string(msg, LNET_PEER_NI_ATTR_HDR, "");
7297
7298                 nla_put_string(msg, LNET_PEER_NI_ATTR_PRIMARY_NID,
7299                                libcfs_nidstr(&lp->lp_primary_nid));
7300                 if (lnet_peer_is_multi_rail(lp))
7301                         nla_put_flag(msg, LNET_PEER_NI_ATTR_MULTIRAIL);
7302
7303                 if (gnlh->version >= 3)
7304                         nla_put_u32(msg, LNET_PEER_NI_ATTR_STATE, lp->lp_state);
7305
7306                 nid_list = nla_nest_start(msg, LNET_PEER_NI_ATTR_PEER_NI_LIST);
7307                 while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
7308                         struct nlattr *peer_nid = nla_nest_start(msg, count++);
7309
7310                         nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_NID,
7311                                        libcfs_nidstr(&lpni->lpni_nid));
7312
7313                         if (gnlh->version >= 4) {
7314                                 rc = lnet_udsp_info_send(msg,
7315                                                          LNET_PEER_NI_LIST_ATTR_UDSP_INFO,
7316                                                          &lpni->lpni_nid, true);
7317                                 if (rc < 0) {
7318                                         lnet_peer_decref_locked(lp);
7319                                         NL_SET_ERR_MSG(extack,
7320                                                        "failed to get UDSP info");
7321                                         GOTO(unlock_api_mutex, rc);
7322                                 }
7323                         }
7324
7325                         if (lnet_isrouter(lpni) ||
7326                             lnet_peer_aliveness_enabled(lpni)) {
7327                                 nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_STATE,
7328                                                lnet_is_peer_ni_alive(lpni) ?
7329                                                "up" : "down");
7330                         } else {
7331                                 nla_put_string(msg, LNET_PEER_NI_LIST_ATTR_STATE,
7332                                                "NA");
7333                         }
7334
7335                         if (gnlh->version) {
7336                                 struct lnet_ioctl_element_msg_stats lpni_msg_stats;
7337                                 struct nlattr *send_stats_list, *send_stats;
7338                                 struct nlattr *recv_stats_list, *recv_stats;
7339                                 struct nlattr *drop_stats_list, *drop_stats;
7340                                 struct nlattr *health_list, *health_stats;
7341                                 struct lnet_ioctl_element_stats stats;
7342                                 struct nlattr *stats_attr, *ni_stats;
7343
7344                                 nla_put_u32(msg,
7345                                             LNET_PEER_NI_LIST_ATTR_MAX_TX_CREDITS,
7346                                             lpni->lpni_net ?
7347                                                 lpni->lpni_net->net_tunables.lct_peer_tx_credits : 0);
7348                                 nla_put_u32(msg,
7349                                             LNET_PEER_NI_LIST_ATTR_CUR_TX_CREDITS,
7350                                             lpni->lpni_txcredits);
7351                                 nla_put_u32(msg,
7352                                             LNET_PEER_NI_LIST_ATTR_MIN_TX_CREDITS,
7353                                             lpni->lpni_mintxcredits);
7354                                 nla_put_u32(msg,
7355                                             LNET_PEER_NI_LIST_ATTR_QUEUE_BUF_COUNT,
7356                                             lpni->lpni_txqnob);
7357                                 nla_put_u32(msg,
7358                                             LNET_PEER_NI_LIST_ATTR_CUR_RTR_CREDITS,
7359                                             lpni->lpni_rtrcredits);
7360                                 nla_put_u32(msg,
7361                                             LNET_PEER_NI_LIST_ATTR_MIN_RTR_CREDITS,
7362                                             lpni->lpni_minrtrcredits);
7363                                 nla_put_u32(msg,
7364                                             LNET_PEER_NI_LIST_ATTR_REFCOUNT,
7365                                             kref_read(&lpni->lpni_kref));
7366
7367                                 memset(&stats, 0, sizeof(stats));
7368                                 stats.iel_send_count = lnet_sum_stats(&lpni->lpni_stats,
7369                                                                       LNET_STATS_TYPE_SEND);
7370                                 stats.iel_recv_count = lnet_sum_stats(&lpni->lpni_stats,
7371                                                                       LNET_STATS_TYPE_RECV);
7372                                 stats.iel_drop_count = lnet_sum_stats(&lpni->lpni_stats,
7373                                                                       LNET_STATS_TYPE_DROP);
7374
7375                                 stats_attr = nla_nest_start(msg,
7376                                                             LNET_PEER_NI_LIST_ATTR_STATS_COUNT);
7377                                 ni_stats = nla_nest_start(msg, 0);
7378                                 nla_put_u32(msg,
7379                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_SEND_COUNT,
7380                                             stats.iel_send_count);
7381                                 nla_put_u32(msg,
7382                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_RECV_COUNT,
7383                                             stats.iel_recv_count);
7384                                 nla_put_u32(msg,
7385                                             LNET_PEER_NI_LIST_STATS_COUNT_ATTR_DROP_COUNT,
7386                                             stats.iel_drop_count);
7387                                 nla_nest_end(msg, ni_stats);
7388                                 nla_nest_end(msg, stats_attr);
7389
7390                                 if (gnlh->version < 2)
7391                                         goto skip_msg_stats;
7392
7393                                 lnet_usr_translate_stats(&lpni_msg_stats, &lpni->lpni_stats);
7394
7395                                 send_stats_list = nla_nest_start(msg,
7396                                                                  LNET_PEER_NI_LIST_ATTR_SENT_STATS);
7397                                 send_stats = nla_nest_start(msg, 0);
7398                                 nla_put_u32(msg,
7399                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7400                                             lpni_msg_stats.im_send_stats.ico_put_count);
7401                                 nla_put_u32(msg,
7402                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7403                                             lpni_msg_stats.im_send_stats.ico_get_count);
7404                                 nla_put_u32(msg,
7405                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7406                                             lpni_msg_stats.im_send_stats.ico_reply_count);
7407                                 nla_put_u32(msg,
7408                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7409                                             lpni_msg_stats.im_send_stats.ico_ack_count);
7410                                 nla_put_u32(msg,
7411                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7412                                             lpni_msg_stats.im_send_stats.ico_hello_count);
7413                                 nla_nest_end(msg, send_stats);
7414                                 nla_nest_end(msg, send_stats_list);
7415
7416                                 recv_stats_list = nla_nest_start(msg,
7417                                                                  LNET_PEER_NI_LIST_ATTR_RECV_STATS);
7418                                 recv_stats = nla_nest_start(msg, 0);
7419                                 nla_put_u32(msg,
7420                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7421                                             lpni_msg_stats.im_recv_stats.ico_put_count);
7422                                 nla_put_u32(msg,
7423                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7424                                             lpni_msg_stats.im_recv_stats.ico_get_count);
7425                                 nla_put_u32(msg,
7426                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7427                                             lpni_msg_stats.im_recv_stats.ico_reply_count);
7428                                 nla_put_u32(msg,
7429                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7430                                             lpni_msg_stats.im_recv_stats.ico_ack_count);
7431                                 nla_put_u32(msg,
7432                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7433                                             lpni_msg_stats.im_recv_stats.ico_hello_count);
7434                                 nla_nest_end(msg, recv_stats);
7435                                 nla_nest_end(msg, recv_stats_list);
7436
7437                                 drop_stats_list = nla_nest_start(msg,
7438                                                                  LNET_PEER_NI_LIST_ATTR_DROP_STATS);
7439                                 drop_stats = nla_nest_start(msg, 0);
7440                                 nla_put_u32(msg,
7441                                             LNET_PEER_NI_LIST_STATS_ATTR_PUT,
7442                                             lpni_msg_stats.im_drop_stats.ico_put_count);
7443                                 nla_put_u32(msg,
7444                                             LNET_PEER_NI_LIST_STATS_ATTR_GET,
7445                                             lpni_msg_stats.im_drop_stats.ico_get_count);
7446                                 nla_put_u32(msg,
7447                                             LNET_PEER_NI_LIST_STATS_ATTR_REPLY,
7448                                             lpni_msg_stats.im_drop_stats.ico_reply_count);
7449                                 nla_put_u32(msg,
7450                                             LNET_PEER_NI_LIST_STATS_ATTR_ACK,
7451                                             lpni_msg_stats.im_drop_stats.ico_ack_count);
7452                                 nla_put_u32(msg,
7453                                             LNET_PEER_NI_LIST_STATS_ATTR_HELLO,
7454                                             lpni_msg_stats.im_drop_stats.ico_hello_count);
7455                                 nla_nest_end(msg, drop_stats);
7456                                 nla_nest_end(msg, drop_stats_list);
7457
7458                                 health_list = nla_nest_start(msg,
7459                                                              LNET_PEER_NI_LIST_ATTR_HEALTH_STATS);
7460                                 health_stats = nla_nest_start(msg, 0);
7461                                 nla_put_s32(msg,
7462                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_VALUE,
7463                                             atomic_read(&lpni->lpni_healthv));
7464                                 nla_put_u32(msg,
7465                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_DROPPED,
7466                                             atomic_read(&lpni->lpni_hstats.hlt_remote_dropped));
7467                                 nla_put_u32(msg,
7468                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_TIMEOUT,
7469                                             atomic_read(&lpni->lpni_hstats.hlt_remote_timeout));
7470                                 nla_put_u32(msg,
7471                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_ERROR,
7472                                             atomic_read(&lpni->lpni_hstats.hlt_remote_error));
7473                                 nla_put_u32(msg,
7474                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NETWORK_TIMEOUT,
7475                                             atomic_read(&lpni->lpni_hstats.hlt_network_timeout));
7476                                 nla_put_u32(msg,
7477                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PING_COUNT,
7478                                             lpni->lpni_ping_count);
7479                                 nla_put_s64(msg,
7480                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_NEXT_PING,
7481                                             lpni->lpni_next_ping,
7482                                             LNET_PEER_NI_LIST_HEALTH_STATS_ATTR_PAD);
7483                                 nla_nest_end(msg, health_stats);
7484                                 nla_nest_end(msg, health_list);
7485                         }
7486 skip_msg_stats:
7487                         nla_nest_end(msg, peer_nid);
7488                 }
7489                 nla_nest_end(msg, nid_list);
7490
7491                 genlmsg_end(msg, hdr);
7492                 lnet_peer_decref_locked(lp);
7493         }
7494         plist->lgpl_index = idx;
7495 unlock_api_mutex:
7496         mutex_unlock(&the_lnet.ln_api_mutex);
7497 send_error:
7498         return lnet_nl_send_error(cb->skb, portid, seq, rc);
7499 };
7500
7501 #ifndef HAVE_NETLINK_CALLBACK_START
7502 static int lnet_old_peer_ni_show_dump(struct sk_buff *msg,
7503                                       struct netlink_callback *cb)
7504 {
7505         if (!cb->args[0]) {
7506                 int rc = lnet_peer_ni_show_start(cb);
7507
7508                 if (rc < 0)
7509                         return rc;
7510         }
7511
7512         return lnet_peer_ni_show_dump(msg, cb);
7513 }
7514 #endif
7515
7516 static int lnet_route_cmd(struct sk_buff *skb, struct genl_info *info)
7517 {
7518         struct nlmsghdr *nlh = nlmsg_hdr(skb);
7519         struct genlmsghdr *gnlh = nlmsg_data(nlh);
7520         struct nlattr *params = genlmsg_data(gnlh);
7521         int msg_len, rem, rc = 0;
7522         struct nlattr *attr;
7523
7524         mutex_lock(&the_lnet.ln_api_mutex);
7525         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
7526                 GENL_SET_ERR_MSG(info, "Network is down");
7527                 mutex_unlock(&the_lnet.ln_api_mutex);
7528                 return -ENETDOWN;
7529         }
7530
7531         msg_len = genlmsg_len(gnlh);
7532         if (!msg_len) {
7533                 GENL_SET_ERR_MSG(info, "no configuration");
7534                 mutex_unlock(&the_lnet.ln_api_mutex);
7535                 return -ENOMSG;
7536         }
7537
7538         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
7539                 GENL_SET_ERR_MSG(info, "invalid configuration");
7540                 mutex_unlock(&the_lnet.ln_api_mutex);
7541                 return -EINVAL;
7542         }
7543
7544         nla_for_each_nested(attr, params, rem) {
7545                 u32 net_id = LNET_NET_ANY, hops = LNET_UNDEFINED_HOPS;
7546                 u32 priority = 0, sensitivity = 1;
7547                 struct lnet_nid gw_nid = LNET_ANY_NID;
7548                 struct nlattr *route_prop;
7549                 bool alive = true;
7550                 s64 when = 0;
7551                 int rem2;
7552
7553                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
7554                         continue;
7555
7556                 nla_for_each_nested(route_prop, attr, rem2) {
7557                         char tmp[LNET_NIDSTR_SIZE];
7558                         ssize_t len;
7559                         s64 num;
7560
7561                         if (nla_type(route_prop) != LN_SCALAR_ATTR_VALUE)
7562                                 continue;
7563
7564                         if (nla_strcmp(route_prop, "net") == 0) {
7565                                 route_prop = nla_next(route_prop, &rem2);
7566                                 if (nla_type(route_prop) !=
7567                                     LN_SCALAR_ATTR_VALUE) {
7568                                         GENL_SET_ERR_MSG(info,
7569                                                          "net is invalid key");
7570                                         GOTO(report_err, rc = -EINVAL);
7571                                 }
7572
7573                                 len = nla_strscpy(tmp, route_prop, sizeof(tmp));
7574                                 if (len < 0) {
7575                                         GENL_SET_ERR_MSG(info,
7576                                                          "net key string is invalid");
7577                                         GOTO(report_err, rc = len);
7578                                 }
7579
7580                                 net_id = libcfs_str2net(tmp);
7581                                 if (!net_id) {
7582                                         GENL_SET_ERR_MSG(info,
7583                                                          "cannot parse remote net");
7584                                         GOTO(report_err, rc = -ENODEV);
7585                                 }
7586
7587                                 if (LNET_NETTYP(net_id) == LOLND) {
7588                                         GENL_SET_ERR_MSG(info,
7589                                                          "setting @lo not allowed");
7590                                         GOTO(report_err, rc = -EACCES);
7591                                 }
7592
7593                                 if (net_id == LNET_NET_ANY) {
7594                                         GENL_SET_ERR_MSG(info,
7595                                                          "setting LNET_NET_ANY not allowed");
7596                                         GOTO(report_err, rc = -ENXIO);
7597                                 }
7598                         } else if (nla_strcmp(route_prop, "gateway") == 0) {
7599                                 route_prop = nla_next(route_prop, &rem2);
7600                                 if (nla_type(route_prop) !=
7601                                     LN_SCALAR_ATTR_VALUE) {
7602                                         GENL_SET_ERR_MSG(info,
7603                                                          "gateway is invalid key");
7604                                         GOTO(report_err, rc = -EINVAL);
7605                                 }
7606
7607                                 len = nla_strscpy(tmp, route_prop, sizeof(tmp));
7608                                 if (len < 0) {
7609                                         GENL_SET_ERR_MSG(info,
7610                                                          "gateway string is invalid");
7611                                         GOTO(report_err, rc = len);
7612                                 }
7613
7614                                 rc = libcfs_strnid(&gw_nid, strim(tmp));
7615                                 if (rc < 0) {
7616                                         GENL_SET_ERR_MSG(info,
7617                                                          "cannot parse gateway");
7618                                         GOTO(report_err, rc = -ENODEV);
7619                                 }
7620                         } else if (nla_strcmp(route_prop, "state") == 0) {
7621                                 route_prop = nla_next(route_prop, &rem2);
7622                                 if (nla_type(route_prop) !=
7623                                     LN_SCALAR_ATTR_VALUE) {
7624                                         GENL_SET_ERR_MSG(info,
7625                                                          "state is invalid key");
7626                                         GOTO(report_err, rc = -EINVAL);
7627                                 }
7628
7629                                 if (nla_strcmp(route_prop, "down") == 0) {
7630                                         alive = false;
7631                                 } else if (nla_strcmp(route_prop, "up") == 0) {
7632                                         alive = true;
7633                                 } else {
7634                                         GENL_SET_ERR_MSG(info,
7635                                                          "status string bad value");
7636                                         GOTO(report_err, rc = -EINVAL);
7637                                 }
7638                         } else if (nla_strcmp(route_prop, "notify_time") == 0) {
7639                                 route_prop = nla_next(route_prop, &rem2);
7640                                 if (nla_type(route_prop) !=
7641                                     LN_SCALAR_ATTR_INT_VALUE) {
7642                                         GENL_SET_ERR_MSG(info,
7643                                                          "notify_time is invalid key");
7644                                         GOTO(report_err, rc = -EINVAL);
7645                                 }
7646
7647                                 when = nla_get_s64(route_prop);
7648                                 if (ktime_get_real_seconds() < when) {
7649                                         GENL_SET_ERR_MSG(info,
7650                                                          "notify_time is in the future");
7651                                         GOTO(report_err, rc = -EINVAL);
7652                                 }
7653                         } else if (nla_strcmp(route_prop, "hop") == 0) {
7654                                 route_prop = nla_next(route_prop, &rem2);
7655                                 if (nla_type(route_prop) !=
7656                                     LN_SCALAR_ATTR_INT_VALUE) {
7657                                         GENL_SET_ERR_MSG(info,
7658                                                          "hop has invalid key");
7659                                         GOTO(report_err, rc = -EINVAL);
7660                                 }
7661
7662                                 hops = nla_get_s64(route_prop);
7663                                 if ((hops < 1 || hops > 255) && hops != -1) {
7664                                         GENL_SET_ERR_MSG(info,
7665                                                          "invalid hop count must be between 1 and 255");
7666                                         GOTO(report_err, rc = -EINVAL);
7667                                 }
7668                         } else if (nla_strcmp(route_prop, "priority") == 0) {
7669                                 route_prop = nla_next(route_prop, &rem2);
7670                                 if (nla_type(route_prop) !=
7671                                     LN_SCALAR_ATTR_INT_VALUE) {
7672                                         GENL_SET_ERR_MSG(info,
7673                                                          "priority has invalid key");
7674                                         GOTO(report_err, rc = -EINVAL);
7675                                 }
7676
7677                                 num = nla_get_s64(route_prop);
7678                                 if (num < 0) {
7679                                         GENL_SET_ERR_MSG(info,
7680                                                          "invalid priority, must not be negative");
7681                                         GOTO(report_err, rc = -EINVAL);
7682                                 }
7683                                 priority = num;
7684                         } else if (nla_strcmp(route_prop,
7685                                               "health_sensitivity") == 0) {
7686                                 route_prop = nla_next(route_prop, &rem2);
7687                                 if (nla_type(route_prop) !=
7688                                     LN_SCALAR_ATTR_INT_VALUE) {
7689                                         GENL_SET_ERR_MSG(info,
7690                                                          "sensitivity has invalid key");
7691                                         GOTO(report_err, rc = -EINVAL);
7692                                 }
7693
7694                                 num = nla_get_s64(route_prop);
7695                                 if (num < 1) {
7696                                         GENL_SET_ERR_MSG(info,
7697                                                          "invalid health sensitivity, must be 1 or greater");
7698                                         GOTO(report_err, rc = -EINVAL);
7699                                 }
7700                                 sensitivity = num;
7701                         }
7702                 }
7703
7704                 if (net_id == LNET_NET_ANY) {
7705                         GENL_SET_ERR_MSG(info,
7706                                          "missing mandatory parameter: network");
7707                         GOTO(report_err, rc = -ENODEV);
7708                 }
7709
7710                 if (LNET_NID_IS_ANY(&gw_nid)) {
7711                         GENL_SET_ERR_MSG(info,
7712                                          "missing mandatory parameter: gateway");
7713                         GOTO(report_err, rc = -ENODEV);
7714                 }
7715
7716                 if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) {
7717                         /* Convert the user-supplied real time to monotonic.
7718                          * NB: "when" is always in the past
7719                          */
7720                         when = ktime_get_seconds() -
7721                                 (ktime_get_real_seconds() - when);
7722
7723                         mutex_unlock(&the_lnet.ln_api_mutex);
7724                         rc = lnet_notify(NULL, &gw_nid, alive, false, when);
7725                         mutex_lock(&the_lnet.ln_api_mutex);
7726                         if (rc < 0)
7727                                 GOTO(report_err, rc);
7728                         else if (the_lnet.ln_state != LNET_STATE_RUNNING)
7729                                 GOTO(report_err, rc = -ENETDOWN);
7730                 } else if (info->nlhdr->nlmsg_flags & NLM_F_CREATE) {
7731                         rc = lnet_add_route(net_id, hops, &gw_nid, priority,
7732                                             sensitivity);
7733                         if (rc < 0) {
7734                                 switch (rc) {
7735                                 case -EINVAL:
7736                                         GENL_SET_ERR_MSG(info,
7737                                                          "invalid settings for route creation");
7738                                         break;
7739                                 case -EHOSTUNREACH:
7740                                         GENL_SET_ERR_MSG(info,
7741                                                          "No interface configured on the same net as gateway");
7742                                         break;
7743                                 case -ESHUTDOWN:
7744                                         GENL_SET_ERR_MSG(info,
7745                                                          "Network is down");
7746                                         break;
7747                                 case -EEXIST:
7748                                         GENL_SET_ERR_MSG(info,
7749                                                          "Route already exists or the specified network is local");
7750                                         break;
7751                                 default:
7752                                         GENL_SET_ERR_MSG(info,
7753                                                          "failed to create route");
7754                                         break;
7755                                 }
7756                                 GOTO(report_err, rc);
7757                         }
7758                 } else if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
7759                         rc = lnet_del_route(net_id, &gw_nid);
7760                         if (rc < 0) {
7761                                 GENL_SET_ERR_MSG(info,
7762                                                  "failed to delete route");
7763                                 GOTO(report_err, rc);
7764                         }
7765                 }
7766         }
7767 report_err:
7768         mutex_unlock(&the_lnet.ln_api_mutex);
7769
7770         return rc;
7771 }
7772
7773 static inline struct lnet_genl_ping_list *
7774 lnet_ping_dump_ctx(struct netlink_callback *cb)
7775 {
7776         return (struct lnet_genl_ping_list *)cb->args[0];
7777 }
7778
7779 static int lnet_ping_show_done(struct netlink_callback *cb)
7780 {
7781         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
7782
7783         if (plist) {
7784                 genradix_free(&plist->lgpl_failed);
7785                 genradix_free(&plist->lgpl_list);
7786                 LIBCFS_FREE(plist, sizeof(*plist));
7787                 cb->args[0] = 0;
7788         }
7789
7790         return 0;
7791 }
7792
7793 /* LNet ping ->start() handler for GET requests */
7794 static int lnet_ping_show_start(struct netlink_callback *cb)
7795 {
7796         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
7797 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7798         struct netlink_ext_ack *extack = NULL;
7799 #endif
7800         struct lnet_genl_ping_list *plist;
7801         int msg_len = genlmsg_len(gnlh);
7802         struct nlattr *params, *top;
7803         int rem, rc = 0;
7804
7805 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
7806         extack = cb->extack;
7807 #endif
7808         if (the_lnet.ln_refcount == 0) {
7809                 NL_SET_ERR_MSG(extack, "Network is down");
7810                 return -ENETDOWN;
7811         }
7812
7813         if (!msg_len) {
7814                 NL_SET_ERR_MSG(extack, "Ping needs NID targets");
7815                 return -ENOENT;
7816         }
7817
7818         LIBCFS_ALLOC(plist, sizeof(*plist));
7819         if (!plist) {
7820                 NL_SET_ERR_MSG(extack, "failed to setup ping list");
7821                 return -ENOMEM;
7822         }
7823         genradix_init(&plist->lgpl_list);
7824         plist->lgpl_timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
7825         plist->lgpl_src_nid = LNET_ANY_NID;
7826         plist->lgpl_index = 0;
7827         plist->lgpl_list_count = 0;
7828         cb->args[0] = (long)plist;
7829
7830         params = genlmsg_data(gnlh);
7831         nla_for_each_attr(top, params, msg_len, rem) {
7832                 struct nlattr *nids;
7833                 int rem2;
7834
7835                 switch (nla_type(top)) {
7836                 case LN_SCALAR_ATTR_VALUE:
7837                         if (nla_strcmp(top, "timeout") == 0) {
7838                                 s64 timeout;
7839
7840                                 top = nla_next(top, &rem);
7841                                 if (nla_type(top) != LN_SCALAR_ATTR_INT_VALUE) {
7842                                         NL_SET_ERR_MSG(extack,
7843                                                        "invalid timeout param");
7844                                         GOTO(report_err, rc = -EINVAL);
7845                                 }
7846
7847                                 /* If timeout is negative then set default of
7848                                  * 3 minutes
7849                                  */
7850                                 timeout = nla_get_s64(top);
7851                                 if (timeout > 0 &&
7852                                     timeout < (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
7853                                         plist->lgpl_timeout =
7854                                                 nsecs_to_jiffies(timeout * NSEC_PER_MSEC);
7855                         } else if (nla_strcmp(top, "source") == 0) {
7856                                 char nidstr[LNET_NIDSTR_SIZE + 1];
7857
7858                                 top = nla_next(top, &rem);
7859                                 if (nla_type(top) != LN_SCALAR_ATTR_VALUE) {
7860                                         NL_SET_ERR_MSG(extack,
7861                                                        "invalid source param");
7862                                         GOTO(report_err, rc = -EINVAL);
7863                                 }
7864
7865                                 rc = nla_strscpy(nidstr, top, sizeof(nidstr));
7866                                 if (rc < 0) {
7867                                         NL_SET_ERR_MSG(extack,
7868                                                        "failed to parse source nid");
7869                                         GOTO(report_err, rc);
7870                                 }
7871
7872                                 rc = libcfs_strnid(&plist->lgpl_src_nid,
7873                                                    strim(nidstr));
7874                                 if (rc < 0) {
7875                                         NL_SET_ERR_MSG(extack,
7876                                                        "invalid source nid");
7877                                         GOTO(report_err, rc);
7878                                 }
7879                                 rc = 0;
7880                         }
7881                         break;
7882                 case LN_SCALAR_ATTR_LIST:
7883                         nla_for_each_nested(nids, top, rem2) {
7884                                 char nid[LNET_NIDSTR_SIZE + 1];
7885                                 struct lnet_processid *id;
7886
7887                                 if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
7888                                         continue;
7889
7890                                 memset(nid, 0, sizeof(nid));
7891                                 rc = nla_strscpy(nid, nids, sizeof(nid));
7892                                 if (rc < 0) {
7893                                         NL_SET_ERR_MSG(extack,
7894                                                        "failed to get NID");
7895                                         GOTO(report_err, rc);
7896                                 }
7897
7898                                 id = genradix_ptr_alloc(&plist->lgpl_list,
7899                                                         plist->lgpl_list_count++,
7900                                                         GFP_KERNEL);
7901                                 if (!id) {
7902                                         NL_SET_ERR_MSG(extack,
7903                                                        "failed to allocate NID");
7904                                         GOTO(report_err, rc = -ENOMEM);
7905                                 }
7906
7907                                 rc = libcfs_strid(id, strim(nid));
7908                                 if (rc < 0) {
7909                                         NL_SET_ERR_MSG(extack, "cannot parse NID");
7910                                         GOTO(report_err, rc);
7911                                 }
7912                                 rc = 0;
7913                         }
7914                         fallthrough;
7915                 default:
7916                         break;
7917                 }
7918         }
7919 report_err:
7920         if (rc < 0)
7921                 lnet_ping_show_done(cb);
7922
7923         return rc;
7924 }
7925
7926 static const struct ln_key_list ping_err_props_list = {
7927         .lkl_maxattr                    = LNET_ERR_ATTR_MAX,
7928         .lkl_list                       = {
7929                 [LNET_ERR_ATTR_HDR]             = {
7930                         .lkp_value              = "manage",
7931                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7932                         .lkp_data_type          = NLA_NUL_STRING,
7933                 },
7934                 [LNET_ERR_ATTR_TYPE]            = {
7935                         .lkp_value              = "ping",
7936                         .lkp_data_type          = NLA_STRING,
7937                 },
7938                 [LNET_ERR_ATTR_ERRNO]           = {
7939                         .lkp_value              = "errno",
7940                         .lkp_data_type          = NLA_S16,
7941                 },
7942                 [LNET_ERR_ATTR_DESCR]           = {
7943                         .lkp_value              = "descr",
7944                         .lkp_data_type          = NLA_STRING,
7945                 },
7946         },
7947 };
7948
7949 static const struct ln_key_list ping_props_list = {
7950         .lkl_maxattr                    = LNET_PING_ATTR_MAX,
7951         .lkl_list                       = {
7952                 [LNET_PING_ATTR_HDR]            = {
7953                         .lkp_value              = "ping",
7954                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7955                         .lkp_data_type          = NLA_NUL_STRING,
7956                 },
7957                 [LNET_PING_ATTR_PRIMARY_NID]    = {
7958                         .lkp_value              = "primary nid",
7959                         .lkp_data_type          = NLA_STRING
7960                 },
7961                 [LNET_PING_ATTR_ERRNO]          = {
7962                         .lkp_value              = "errno",
7963                         .lkp_data_type          = NLA_S16
7964                 },
7965                 [LNET_PING_ATTR_MULTIRAIL]      = {
7966                         .lkp_value              = "Multi-Rail",
7967                         .lkp_data_type          = NLA_FLAG
7968                 },
7969                 [LNET_PING_ATTR_PEER_NI_LIST]   = {
7970                         .lkp_value              = "peer_ni",
7971                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
7972                         .lkp_data_type          = NLA_NESTED
7973                 },
7974         },
7975 };
7976
7977 static const struct ln_key_list ping_peer_ni_list = {
7978         .lkl_maxattr                    = LNET_PING_PEER_NI_ATTR_MAX,
7979         .lkl_list                       = {
7980                 [LNET_PING_PEER_NI_ATTR_NID]    = {
7981                         .lkp_value              = "nid",
7982                         .lkp_data_type          = NLA_STRING
7983                 },
7984         },
7985 };
7986
7987 static int lnet_ping_show_dump(struct sk_buff *msg,
7988                                struct netlink_callback *cb)
7989 {
7990         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
7991 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
7992         struct netlink_ext_ack *extack = NULL;
7993 #endif
7994         int portid = NETLINK_CB(cb->skb).portid;
7995         int seq = cb->nlh->nlmsg_seq;
7996         int idx = plist->lgpl_index;
7997         int rc = 0, i = 0;
7998
7999 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
8000         extack = cb->extack;
8001 #endif
8002         if (!plist->lgpl_index) {
8003                 const struct ln_key_list *all[] = {
8004                         &ping_props_list, &ping_peer_ni_list, NULL
8005                 };
8006
8007                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
8008                                                 &lnet_family,
8009                                                 NLM_F_CREATE | NLM_F_MULTI,
8010                                                 LNET_CMD_PING, all);
8011                 if (rc < 0) {
8012                         NL_SET_ERR_MSG(extack, "failed to send key table");
8013                         GOTO(send_error, rc);
8014                 }
8015
8016                 genradix_init(&plist->lgpl_failed);
8017         }
8018
8019         while (idx < plist->lgpl_list_count) {
8020                 struct lnet_nid primary_nid = LNET_ANY_NID;
8021                 struct lnet_genl_ping_list peers;
8022                 struct lnet_processid *id;
8023                 struct nlattr *nid_list;
8024                 struct lnet_peer *lp;
8025                 bool mr_flag = false;
8026                 unsigned int count;
8027                 void *hdr = NULL;
8028
8029                 id = genradix_ptr(&plist->lgpl_list, idx++);
8030
8031                 rc = lnet_ping(id, &plist->lgpl_src_nid, plist->lgpl_timeout,
8032                                &peers, lnet_interfaces_max);
8033                 if (rc < 0) {
8034                         struct lnet_fail_ping *fail;
8035
8036                         fail = genradix_ptr_alloc(&plist->lgpl_failed,
8037                                                   plist->lgpl_failed_count++,
8038                                                   GFP_KERNEL);
8039                         if (!fail) {
8040                                 NL_SET_ERR_MSG(extack,
8041                                                "failed to allocate failed NID");
8042                                 GOTO(send_error, rc);
8043                         }
8044                         memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
8045                         snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
8046                                  "failed to ping %s",
8047                                  libcfs_nidstr(&id->nid));
8048                         fail->lfp_id = *id;
8049                         fail->lfp_errno = rc;
8050                         goto cant_reach;
8051                 }
8052
8053                 mutex_lock(&the_lnet.ln_api_mutex);
8054                 lp = lnet_find_peer(&id->nid);
8055                 if (lp) {
8056                         primary_nid = lp->lp_primary_nid;
8057                         mr_flag = lnet_peer_is_multi_rail(lp);
8058                         lnet_peer_decref_locked(lp);
8059                 }
8060                 mutex_unlock(&the_lnet.ln_api_mutex);
8061
8062                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
8063                                   NLM_F_MULTI, LNET_CMD_PING);
8064                 if (!hdr) {
8065                         NL_SET_ERR_MSG(extack, "failed to send values");
8066                         genlmsg_cancel(msg, hdr);
8067                         GOTO(send_error, rc = -EMSGSIZE);
8068                 }
8069
8070                 if (i++ == 0)
8071                         nla_put_string(msg, LNET_PING_ATTR_HDR, "");
8072
8073                 nla_put_string(msg, LNET_PING_ATTR_PRIMARY_NID,
8074                                libcfs_nidstr(&primary_nid));
8075                 if (mr_flag)
8076                         nla_put_flag(msg, LNET_PING_ATTR_MULTIRAIL);
8077
8078                 nid_list = nla_nest_start(msg, LNET_PING_ATTR_PEER_NI_LIST);
8079                 for (count = 0; count < rc; count++) {
8080                         struct lnet_processid *result;
8081                         struct nlattr *nid_attr;
8082                         char *idstr;
8083
8084                         result = genradix_ptr(&peers.lgpl_list, count);
8085                         if (nid_is_lo0(&result->nid))
8086                                 continue;
8087
8088                         nid_attr = nla_nest_start(msg, count + 1);
8089                         if (id->pid == LNET_PID_LUSTRE)
8090                                 idstr = libcfs_nidstr(&result->nid);
8091                         else
8092                                 idstr = libcfs_idstr(result);
8093                         nla_put_string(msg, LNET_PING_PEER_NI_ATTR_NID, idstr);
8094                         nla_nest_end(msg, nid_attr);
8095                 }
8096                 nla_nest_end(msg, nid_list);
8097                 genlmsg_end(msg, hdr);
8098 cant_reach:
8099                 genradix_free(&peers.lgpl_list);
8100         }
8101
8102         if (plist->lgpl_failed_count) {
8103                 int flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
8104                 const struct ln_key_list *fail[] = {
8105                         &ping_err_props_list, NULL
8106                 };
8107
8108                 rc = lnet_genl_send_scalar_list(msg, portid, seq, &lnet_family,
8109                                                 flags, LNET_CMD_PING, fail);
8110                 if (rc < 0) {
8111                         NL_SET_ERR_MSG(extack,
8112                                        "failed to send new key table");
8113                         GOTO(send_error, rc);
8114                 }
8115
8116                 for (i = 0; i < plist->lgpl_failed_count; i++) {
8117                         struct lnet_fail_ping *fail;
8118                         void *hdr;
8119
8120                         fail = genradix_ptr(&plist->lgpl_failed, i);
8121
8122                         hdr = genlmsg_put(msg, portid, seq, &lnet_family,
8123                                           NLM_F_MULTI, LNET_CMD_PING);
8124                         if (!hdr) {
8125                                 NL_SET_ERR_MSG(extack,
8126                                                "failed to send failed values");
8127                                 genlmsg_cancel(msg, hdr);
8128                                 GOTO(send_error, rc = -EMSGSIZE);
8129                         }
8130
8131                         if (i == 0)
8132                                 nla_put_string(msg, LNET_ERR_ATTR_HDR, "");
8133
8134                         nla_put_string(msg, LNET_ERR_ATTR_TYPE, "\n");
8135                         nla_put_s16(msg, LNET_ERR_ATTR_ERRNO,
8136                                     fail->lfp_errno);
8137                         nla_put_string(msg, LNET_ERR_ATTR_DESCR,
8138                                        fail->lfp_msg);
8139                         genlmsg_end(msg, hdr);
8140                 }
8141         }
8142         genradix_free(&plist->lgpl_list);
8143         rc = 0; /* don't treat it as an error */
8144
8145         plist->lgpl_index = idx;
8146 send_error:
8147         return lnet_nl_send_error(cb->skb, portid, seq, rc);
8148 }
8149
8150 #ifndef HAVE_NETLINK_CALLBACK_START
8151 static int lnet_old_ping_show_dump(struct sk_buff *msg,
8152                                    struct netlink_callback *cb)
8153 {
8154         if (!cb->args[0]) {
8155                 int rc = lnet_ping_show_start(cb);
8156
8157                 if (rc < 0)
8158                         return rc;
8159         }
8160
8161         return lnet_ping_show_dump(msg, cb);
8162 }
8163 #endif
8164
8165 static const struct ln_key_list discover_err_props_list = {
8166         .lkl_maxattr                    = LNET_ERR_ATTR_MAX,
8167         .lkl_list                       = {
8168                 [LNET_ERR_ATTR_HDR]             = {
8169                         .lkp_value              = "manage",
8170                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8171                         .lkp_data_type          = NLA_NUL_STRING,
8172                 },
8173                 [LNET_ERR_ATTR_TYPE]            = {
8174                         .lkp_value              = "discover",
8175                         .lkp_data_type          = NLA_STRING,
8176                 },
8177                 [LNET_ERR_ATTR_ERRNO]           = {
8178                         .lkp_value              = "errno",
8179                         .lkp_data_type          = NLA_S16,
8180                 },
8181                 [LNET_ERR_ATTR_DESCR]           = {
8182                         .lkp_value              = "descr",
8183                         .lkp_data_type          = NLA_STRING,
8184                 },
8185         },
8186 };
8187
8188 static const struct ln_key_list discover_props_list = {
8189         .lkl_maxattr                    = LNET_PING_ATTR_MAX,
8190         .lkl_list                       = {
8191                 [LNET_PING_ATTR_HDR]            = {
8192                         .lkp_value              = "discover",
8193                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8194                         .lkp_data_type          = NLA_NUL_STRING,
8195                 },
8196                 [LNET_PING_ATTR_PRIMARY_NID]    = {
8197                         .lkp_value              = "primary nid",
8198                         .lkp_data_type          = NLA_STRING
8199                 },
8200                 [LNET_PING_ATTR_ERRNO]          = {
8201                         .lkp_value              = "errno",
8202                         .lkp_data_type          = NLA_S16
8203                 },
8204                 [LNET_PING_ATTR_MULTIRAIL]      = {
8205                         .lkp_value              = "Multi-Rail",
8206                         .lkp_data_type          = NLA_FLAG
8207                 },
8208                 [LNET_PING_ATTR_PEER_NI_LIST]   = {
8209                         .lkp_value              = "peer_ni",
8210                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
8211                         .lkp_data_type          = NLA_NESTED
8212                 },
8213         },
8214 };
8215
8216 static int lnet_ping_cmd(struct sk_buff *skb, struct genl_info *info)
8217 {
8218         const struct ln_key_list *all[] = {
8219                 &discover_props_list, &ping_peer_ni_list, NULL
8220         };
8221         struct nlmsghdr *nlh = nlmsg_hdr(skb);
8222         struct genlmsghdr *gnlh = nlmsg_data(nlh);
8223         struct nlattr *params = genlmsg_data(gnlh);
8224         struct lnet_genl_ping_list dlists;
8225         int msg_len, rem, rc = 0, i;
8226         bool clear_hdr = false;
8227         struct sk_buff *reply;
8228         struct nlattr *attr;
8229         void *hdr = NULL;
8230
8231         msg_len = genlmsg_len(gnlh);
8232         if (!msg_len) {
8233                 GENL_SET_ERR_MSG(info, "no configuration");
8234                 return -ENOMSG;
8235         }
8236
8237         if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE)) {
8238                 GENL_SET_ERR_MSG(info, "only NLM_F_CREATE setting is allowed");
8239                 return -EINVAL;
8240         }
8241
8242         reply = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
8243         if (!reply) {
8244                 GENL_SET_ERR_MSG(info,
8245                                  "fail to allocate reply");
8246                 return -ENOMEM;
8247         }
8248
8249         genradix_init(&dlists.lgpl_failed);
8250         dlists.lgpl_failed_count = 0;
8251         genradix_init(&dlists.lgpl_list);
8252         dlists.lgpl_list_count = 0;
8253
8254         rc = lnet_genl_send_scalar_list(reply, info->snd_portid,
8255                                         info->snd_seq, &lnet_family,
8256                                         NLM_F_CREATE | NLM_F_MULTI,
8257                                         LNET_CMD_PING, all);
8258         if (rc < 0) {
8259                 GENL_SET_ERR_MSG(info,
8260                                  "failed to send key table");
8261                 GOTO(report_err, rc);
8262         }
8263
8264         nla_for_each_attr(attr, params, msg_len, rem) {
8265                 struct nlattr *nids;
8266                 int rem2;
8267
8268                 /* We only care about the NID list to discover with */
8269                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
8270                         continue;
8271
8272                 nla_for_each_nested(nids, attr, rem2) {
8273                         char nid[LNET_NIDSTR_SIZE + 1];
8274                         struct lnet_processid id;
8275                         struct nlattr *nid_list;
8276                         struct lnet_peer *lp;
8277                         ssize_t len;
8278
8279                         if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
8280                                 continue;
8281
8282                         memset(nid, 0, sizeof(nid));
8283                         rc = nla_strscpy(nid, nids, sizeof(nid));
8284                         if (rc < 0) {
8285                                 GENL_SET_ERR_MSG(info,
8286                                                  "failed to get NID");
8287                                 GOTO(report_err, rc);
8288                         }
8289
8290                         len = libcfs_strid(&id, strim(nid));
8291                         if (len < 0) {
8292                                 struct lnet_fail_ping *fail;
8293
8294                                 fail = genradix_ptr_alloc(&dlists.lgpl_failed,
8295                                                           dlists.lgpl_failed_count++,
8296                                                           GFP_KERNEL);
8297                                 if (!fail) {
8298                                         GENL_SET_ERR_MSG(info,
8299                                                          "failed to allocate improper NID");
8300                                         GOTO(report_err, rc = -ENOMEM);
8301                                 }
8302                                 memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
8303                                 snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
8304                                          "cannot parse NID '%s'", strim(nid));
8305                                 fail->lfp_id = id;
8306                                 fail->lfp_errno = len;
8307                                 continue;
8308                         }
8309
8310                         if (LNET_NID_IS_ANY(&id.nid))
8311                                 continue;
8312
8313                         rc = lnet_discover(&id,
8314                                            info->nlhdr->nlmsg_flags & NLM_F_EXCL,
8315                                            &dlists);
8316                         if (rc < 0) {
8317                                 struct lnet_fail_ping *fail;
8318
8319                                 fail = genradix_ptr_alloc(&dlists.lgpl_failed,
8320                                                           dlists.lgpl_failed_count++,
8321                                                           GFP_KERNEL);
8322                                 if (!fail) {
8323                                         GENL_SET_ERR_MSG(info,
8324                                                          "failed to allocate failed NID");
8325                                         GOTO(report_err, rc = -ENOMEM);
8326                                 }
8327                                 memset(fail->lfp_msg, '\0', sizeof(fail->lfp_msg));
8328                                 snprintf(fail->lfp_msg, sizeof(fail->lfp_msg),
8329                                          "failed to discover %s",
8330                                          libcfs_nidstr(&id.nid));
8331                                 fail->lfp_id = id;
8332                                 fail->lfp_errno = rc;
8333                                 continue;
8334                         }
8335
8336                         /* create the genetlink message header */
8337                         hdr = genlmsg_put(reply, info->snd_portid, info->snd_seq,
8338                                           &lnet_family, NLM_F_MULTI, LNET_CMD_PING);
8339                         if (!hdr) {
8340                                 GENL_SET_ERR_MSG(info,
8341                                                  "failed to allocate hdr");
8342                                 GOTO(report_err, rc = -ENOMEM);
8343                         }
8344
8345                         if (!clear_hdr) {
8346                                 nla_put_string(reply, LNET_PING_ATTR_HDR, "");
8347                                 clear_hdr = true;
8348                         }
8349
8350                         lp = lnet_find_peer(&id.nid);
8351                         if (lp) {
8352                                 nla_put_string(reply, LNET_PING_ATTR_PRIMARY_NID,
8353                                                libcfs_nidstr(&lp->lp_primary_nid));
8354                                 if (lnet_peer_is_multi_rail(lp))
8355                                         nla_put_flag(reply, LNET_PING_ATTR_MULTIRAIL);
8356                                 lnet_peer_decref_locked(lp);
8357                         }
8358
8359                         nid_list = nla_nest_start(reply, LNET_PING_ATTR_PEER_NI_LIST);
8360                         for (i = 0; i < dlists.lgpl_list_count; i++) {
8361                                 struct lnet_processid *found;
8362                                 struct nlattr *nid_attr;
8363                                 char *idstr;
8364
8365                                 found = genradix_ptr(&dlists.lgpl_list, i);
8366                                 if (nid_is_lo0(&found->nid))
8367                                         continue;
8368
8369                                 nid_attr = nla_nest_start(reply, i + 1);
8370                                 if (id.pid == LNET_PID_LUSTRE)
8371                                         idstr = libcfs_nidstr(&found->nid);
8372                                 else
8373                                         idstr = libcfs_idstr(found);
8374                                 nla_put_string(reply, LNET_PING_PEER_NI_ATTR_NID, idstr);
8375                                 nla_nest_end(reply, nid_attr);
8376                         }
8377                         nla_nest_end(reply, nid_list);
8378
8379                         genlmsg_end(reply, hdr);
8380                 }
8381         }
8382
8383         if (dlists.lgpl_failed_count) {
8384                 int flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
8385                 const struct ln_key_list *fail[] = {
8386                         &discover_err_props_list, NULL
8387                 };
8388
8389                 rc = lnet_genl_send_scalar_list(reply, info->snd_portid,
8390                                                 info->snd_seq, &lnet_family,
8391                                                 flags, LNET_CMD_PING, fail);
8392                 if (rc < 0) {
8393                         GENL_SET_ERR_MSG(info,
8394                                          "failed to send new key table");
8395                         GOTO(report_err, rc);
8396                 }
8397
8398                 for (i = 0; i < dlists.lgpl_failed_count; i++) {
8399                         struct lnet_fail_ping *fail;
8400
8401                         hdr = genlmsg_put(reply, info->snd_portid, info->snd_seq,
8402                                           &lnet_family, NLM_F_MULTI, LNET_CMD_PING);
8403                         if (!hdr) {
8404                                 GENL_SET_ERR_MSG(info,
8405                                                  "failed to send failed values");
8406                                 GOTO(report_err, rc = -ENOMSG);
8407                         }
8408
8409                         fail = genradix_ptr(&dlists.lgpl_failed, i);
8410                         if (i == 0)
8411                                 nla_put_string(reply, LNET_ERR_ATTR_HDR, "");
8412
8413                         nla_put_string(reply, LNET_ERR_ATTR_TYPE, "\n");
8414                         nla_put_s16(reply, LNET_ERR_ATTR_ERRNO,
8415                                     fail->lfp_errno);
8416                         nla_put_string(reply, LNET_ERR_ATTR_DESCR,
8417                                        fail->lfp_msg);
8418                         genlmsg_end(reply, hdr);
8419                 }
8420         }
8421
8422         nlh = nlmsg_put(reply, info->snd_portid, info->snd_seq, NLMSG_DONE, 0,
8423                         NLM_F_MULTI);
8424         if (!nlh) {
8425                 genlmsg_cancel(reply, hdr);
8426                 GENL_SET_ERR_MSG(info,
8427                                  "failed to finish message");
8428                 GOTO(report_err, rc = -EMSGSIZE);
8429         }
8430
8431 report_err:
8432         genradix_free(&dlists.lgpl_failed);
8433         genradix_free(&dlists.lgpl_list);
8434
8435         if (rc < 0) {
8436                 genlmsg_cancel(reply, hdr);
8437                 nlmsg_free(reply);
8438         } else {
8439                 rc = genlmsg_reply(reply, info);
8440         }
8441
8442         return rc;
8443 }
8444
8445 static const struct genl_multicast_group lnet_mcast_grps[] = {
8446         { .name =       "ip2net",       },
8447         { .name =       "net",          },
8448         { .name =       "peer",         },
8449         { .name =       "route",        },
8450         { .name =       "ping",         },
8451         { .name =       "discover",     },
8452         { .name =       "cpt-of-nid",   },
8453 };
8454
8455 static const struct genl_ops lnet_genl_ops[] = {
8456         {
8457                 .cmd            = LNET_CMD_NETS,
8458                 .flags          = GENL_ADMIN_PERM,
8459 #ifdef HAVE_NETLINK_CALLBACK_START
8460                 .start          = lnet_net_show_start,
8461                 .dumpit         = lnet_net_show_dump,
8462 #else
8463                 .dumpit         = lnet_old_net_show_dump,
8464 #endif
8465                 .done           = lnet_net_show_done,
8466                 .doit           = lnet_net_cmd,
8467         },
8468         {
8469                 .cmd            = LNET_CMD_PEERS,
8470                 .flags          = GENL_ADMIN_PERM,
8471 #ifdef HAVE_NETLINK_CALLBACK_START
8472                 .start          = lnet_peer_ni_show_start,
8473                 .dumpit         = lnet_peer_ni_show_dump,
8474 #else
8475                 .dumpit         = lnet_old_peer_ni_show_dump,
8476 #endif
8477                 .done           = lnet_peer_ni_show_done,
8478                 .doit           = lnet_peer_ni_cmd,
8479         },
8480         {
8481                 .cmd            = LNET_CMD_ROUTES,
8482                 .flags          = GENL_ADMIN_PERM,
8483 #ifdef HAVE_NETLINK_CALLBACK_START
8484                 .start          = lnet_route_show_start,
8485                 .dumpit         = lnet_route_show_dump,
8486 #else
8487                 .dumpit         = lnet_old_route_show_dump,
8488 #endif
8489                 .done           = lnet_route_show_done,
8490                 .doit           = lnet_route_cmd,
8491         },
8492         {
8493                 .cmd            = LNET_CMD_PING,
8494                 .flags          = GENL_ADMIN_PERM,
8495 #ifdef HAVE_NETLINK_CALLBACK_START
8496                 .start          = lnet_ping_show_start,
8497                 .dumpit         = lnet_ping_show_dump,
8498 #else
8499                 .dumpit         = lnet_old_ping_show_dump,
8500 #endif
8501                 .done           = lnet_ping_show_done,
8502                 .doit           = lnet_ping_cmd,
8503         },
8504         {
8505                 .cmd            = LNET_CMD_CPT_OF_NID,
8506 #ifdef HAVE_NETLINK_CALLBACK_START
8507                 .start          = lnet_cpt_of_nid_show_start,
8508                 .dumpit         = lnet_cpt_of_nid_show_dump,
8509 #else
8510                 .dumpit         = lnet_old_cpt_of_nid_show_dump,
8511 #endif
8512                 .done           = lnet_cpt_of_nid_show_done,
8513         },
8514 };
8515
8516 static struct genl_family lnet_family = {
8517         .name           = LNET_GENL_NAME,
8518         .version        = LNET_GENL_VERSION,
8519         .module         = THIS_MODULE,
8520         .parallel_ops   = true,
8521         .netnsok        = true,
8522         .ops            = lnet_genl_ops,
8523         .n_ops          = ARRAY_SIZE(lnet_genl_ops),
8524         .mcgrps         = lnet_mcast_grps,
8525         .n_mcgrps       = ARRAY_SIZE(lnet_mcast_grps),
8526 #ifdef GENL_FAMILY_HAS_RESV_START_OP
8527         .resv_start_op  = __LNET_CMD_MAX_PLUS_ONE,
8528 #endif
8529 };
8530
8531 void LNetDebugPeer(struct lnet_processid *id)
8532 {
8533         lnet_debug_peer(&id->nid);
8534 }
8535 EXPORT_SYMBOL(LNetDebugPeer);
8536
8537 /**
8538  * Determine if the specified peer \a nid is on the local node.
8539  *
8540  * \param nid   peer nid to check
8541  *
8542  * \retval true         If peer NID is on the local node.
8543  * \retval false        If peer NID is not on the local node.
8544  */
8545 bool LNetIsPeerLocal(struct lnet_nid *nid)
8546 {
8547         struct lnet_net *net;
8548         struct lnet_ni *ni;
8549         int cpt;
8550
8551         cpt = lnet_net_lock_current();
8552         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
8553                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
8554                         if (nid_same(&ni->ni_nid, nid)) {
8555                                 lnet_net_unlock(cpt);
8556                                 return true;
8557                         }
8558                 }
8559         }
8560         lnet_net_unlock(cpt);
8561
8562         return false;
8563 }
8564 EXPORT_SYMBOL(LNetIsPeerLocal);
8565
8566 /**
8567  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
8568  * Note that all interfaces share a same PID, as requested by LNetNIInit().
8569  *
8570  * @index       Index of the interface to look up.
8571  * @id          On successful return, this location will hold the
8572  *              struct lnet_process_id ID of the interface.
8573  * @large_nids  Report large NIDs if this is true.
8574  *
8575  * RETURN       0 If an interface exists at \a index.
8576  *              -ENOENT If no interface has been found.
8577  */
8578 int
8579 LNetGetId(unsigned int index, struct lnet_processid *id, bool large_nids)
8580 {
8581         struct lnet_ni   *ni;
8582         struct lnet_net  *net;
8583         int               cpt;
8584         int               rc = -ENOENT;
8585
8586         LASSERT(the_lnet.ln_refcount > 0);
8587
8588         cpt = lnet_net_lock_current();
8589
8590         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
8591                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
8592                         if (!large_nids && !nid_is_nid4(&ni->ni_nid))
8593                                 continue;
8594
8595                         if (index-- != 0)
8596                                 continue;
8597
8598                         id->nid = ni->ni_nid;
8599                         id->pid = the_lnet.ln_pid;
8600                         rc = 0;
8601                         break;
8602                 }
8603         }
8604
8605         lnet_net_unlock(cpt);
8606         return rc;
8607 }
8608 EXPORT_SYMBOL(LNetGetId);
8609
8610 struct ping_data {
8611         int rc;
8612         int replied;
8613         int pd_unlinked;
8614         struct lnet_handle_md mdh;
8615         struct completion completion;
8616 };
8617
8618 static void
8619 lnet_ping_event_handler(struct lnet_event *event)
8620 {
8621         struct ping_data *pd = event->md_user_ptr;
8622
8623         CDEBUG(D_NET, "ping event (%d %d)%s\n",
8624                event->type, event->status,
8625                event->unlinked ? " unlinked" : "");
8626
8627         if (event->status) {
8628                 if (!pd->rc)
8629                         pd->rc = event->status;
8630         } else if (event->type == LNET_EVENT_REPLY) {
8631                 pd->replied = 1;
8632                 pd->rc = event->mlength;
8633         }
8634
8635         if (event->unlinked)
8636                 pd->pd_unlinked = 1;
8637
8638         if (event->unlinked ||
8639             (event->type == LNET_EVENT_SEND && event->status))
8640                 complete(&pd->completion);
8641 }
8642
8643 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
8644                      signed long timeout, struct lnet_genl_ping_list *plist,
8645                      int n_ids)
8646 {
8647         int id_bytes = sizeof(struct lnet_ni_status); /* For 0@lo */
8648         struct lnet_md md = { NULL };
8649         struct ping_data pd = { 0 };
8650         struct lnet_ping_buffer *pbuf;
8651         struct lnet_processid pid;
8652         struct lnet_ping_iter pi;
8653         int i = 0;
8654         u32 *st;
8655         int nob;
8656         int rc;
8657         int rc2;
8658
8659         genradix_init(&plist->lgpl_list);
8660
8661         /* n_ids limit is arbitrary */
8662         if (n_ids <= 0 || LNET_NID_IS_ANY(&id->nid))
8663                 return -EINVAL;
8664
8665         /* if the user buffer has more space than the lnet_interfaces_max
8666          * then only fill it up to lnet_interfaces_max
8667          */
8668         if (n_ids > lnet_interfaces_max)
8669                 n_ids = lnet_interfaces_max;
8670
8671         if (id->pid == LNET_PID_ANY)
8672                 id->pid = LNET_PID_LUSTRE;
8673
8674         id_bytes += n_ids * sizeof(struct lnet_nid);
8675         pbuf = lnet_ping_buffer_alloc(id_bytes, GFP_NOFS);
8676         if (!pbuf)
8677                 return -ENOMEM;
8678
8679         /* initialize md content */
8680         md.start     = &pbuf->pb_info;
8681         md.length    = id_bytes;
8682         md.threshold = 2; /* GET/REPLY */
8683         md.max_size  = 0;
8684         md.options   = LNET_MD_TRUNCATE;
8685         md.user_ptr  = &pd;
8686         md.handler   = lnet_ping_event_handler;
8687
8688         init_completion(&pd.completion);
8689
8690         rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh);
8691         if (rc != 0) {
8692                 CERROR("Can't bind MD: %d\n", rc);
8693                 goto fail_ping_buffer_decref;
8694         }
8695
8696         rc = LNetGet(src_nid, pd.mdh, id, LNET_RESERVED_PORTAL,
8697                      LNET_PROTO_PING_MATCHBITS, 0, false);
8698         if (rc != 0) {
8699                 /* Don't CERROR; this could be deliberate! */
8700                 rc2 = LNetMDUnlink(pd.mdh);
8701                 LASSERT(rc2 == 0);
8702
8703                 /* NB must wait for the UNLINK event below... */
8704         }
8705
8706         /* Ensure completion in finite time... */
8707         wait_for_completion_timeout(&pd.completion, timeout);
8708         if (!pd.pd_unlinked) {
8709                 LNetMDUnlink(pd.mdh);
8710                 wait_for_completion(&pd.completion);
8711         }
8712
8713         if (!pd.replied) {
8714                 rc = pd.rc ?: -EIO;
8715                 goto fail_ping_buffer_decref;
8716         }
8717
8718         nob = pd.rc;
8719         LASSERT(nob >= 0 && nob <= id_bytes);
8720
8721         rc = -EPROTO;           /* if I can't parse... */
8722
8723         if (nob < LNET_PING_INFO_HDR_SIZE) {
8724                 CERROR("%s: ping info too short %d\n",
8725                        libcfs_idstr(id), nob);
8726                 goto fail_ping_buffer_decref;
8727         }
8728
8729         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
8730                 lnet_swap_pinginfo(pbuf);
8731         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
8732                 CERROR("%s: Unexpected magic %08x\n",
8733                        libcfs_idstr(id), pbuf->pb_info.pi_magic);
8734                 goto fail_ping_buffer_decref;
8735         }
8736
8737         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
8738                 CERROR("%s: ping w/o NI status: 0x%x\n",
8739                        libcfs_idstr(id), pbuf->pb_info.pi_features);
8740                 goto fail_ping_buffer_decref;
8741         }
8742
8743         /* Test if smaller than lnet_pinginfo with just one pi_ni status info.
8744          * That one might contain size when large nids are used.
8745          */
8746         if (nob < offsetof(struct lnet_ping_info, pi_ni[1])) {
8747                 CERROR("%s: Short reply %d(%lu min)\n",
8748                        libcfs_idstr(id), nob,
8749                        offsetof(struct lnet_ping_info, pi_ni[1]));
8750                 goto fail_ping_buffer_decref;
8751         }
8752
8753         if (ping_info_count_entries(pbuf) < n_ids) {
8754                 n_ids = ping_info_count_entries(pbuf);
8755                 id_bytes = lnet_ping_info_size(&pbuf->pb_info);
8756         }
8757
8758         if (nob < id_bytes) {
8759                 CERROR("%s: Short reply %d(%d expected)\n",
8760                        libcfs_idstr(id), nob, id_bytes);
8761                 goto fail_ping_buffer_decref;
8762         }
8763
8764         for (st = ping_iter_first(&pi, pbuf, &pid.nid);
8765              st;
8766              st = ping_iter_next(&pi, &pid.nid)) {
8767                 id = genradix_ptr_alloc(&plist->lgpl_list, i++, GFP_KERNEL);
8768                 if (!id) {
8769                         rc = -ENOMEM;
8770                         goto fail_ping_buffer_decref;
8771                 }
8772
8773                 id->pid = pbuf->pb_info.pi_pid;
8774                 id->nid = pid.nid;
8775         }
8776         rc = i;
8777 fail_ping_buffer_decref:
8778         lnet_ping_buffer_decref(pbuf);
8779         return rc;
8780 }
8781
8782 static int
8783 lnet_discover(struct lnet_processid *pid, u32 force,
8784               struct lnet_genl_ping_list *dlist)
8785 {
8786         struct lnet_peer_ni *lpni;
8787         struct lnet_peer_ni *p;
8788         struct lnet_peer *lp;
8789         int cpt;
8790         int rc;
8791
8792         if (LNET_NID_IS_ANY(&pid->nid))
8793                 return -EINVAL;
8794
8795         if (pid->pid == LNET_PID_ANY)
8796                 pid->pid = LNET_PID_LUSTRE;
8797
8798         cpt = lnet_net_lock_current();
8799         lpni = lnet_peerni_by_nid_locked(&pid->nid, NULL, cpt);
8800         if (IS_ERR(lpni)) {
8801                 rc = PTR_ERR(lpni);
8802                 goto out;
8803         }
8804
8805         /*
8806          * Clearing the NIDS_UPTODATE flag ensures the peer will
8807          * be discovered, provided discovery has not been disabled.
8808          */
8809         lp = lpni->lpni_peer_net->lpn_peer;
8810         spin_lock(&lp->lp_lock);
8811         lp->lp_state &= ~LNET_PEER_NIDS_UPTODATE;
8812         /* If the force flag is set, force a PING and PUSH as well. */
8813         if (force)
8814                 lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH;
8815         spin_unlock(&lp->lp_lock);
8816         rc = lnet_discover_peer_locked(lpni, cpt, true);
8817         if (rc)
8818                 goto out_decref;
8819
8820         /* The lpni (or lp) for this NID may have changed and our ref is
8821          * the only thing keeping the old one around. Release the ref
8822          * and lookup the lpni again
8823          */
8824         lnet_peer_ni_decref_locked(lpni);
8825         lpni = lnet_peer_ni_find_locked(&pid->nid);
8826         if (!lpni) {
8827                 rc = -ENOENT;
8828                 goto out;
8829         }
8830         lp = lpni->lpni_peer_net->lpn_peer;
8831
8832         dlist->lgpl_list_count = 0;
8833         p = NULL;
8834         while ((p = lnet_get_next_peer_ni_locked(lp, NULL, p)) != NULL) {
8835                 struct lnet_processid *id;
8836
8837                 id = genradix_ptr_alloc(&dlist->lgpl_list,
8838                                         dlist->lgpl_list_count++, GFP_KERNEL);
8839                 if (!id) {
8840                         rc = -ENOMEM;
8841                         goto out_decref;
8842                 }
8843                 id->pid = pid->pid;
8844                 id->nid = p->lpni_nid;
8845         }
8846         rc = dlist->lgpl_list_count;
8847
8848 out_decref:
8849         lnet_peer_ni_decref_locked(lpni);
8850 out:
8851         lnet_net_unlock(cpt);
8852
8853         return rc;
8854 }
8855
8856 /**
8857  * Retrieve peer discovery status.
8858  *
8859  * \retval 1 if lnet_peer_discovery_disabled is 0
8860  * \retval 0 if lnet_peer_discovery_disabled is 1
8861  */
8862 int
8863 LNetGetPeerDiscoveryStatus(void)
8864 {
8865         return !lnet_peer_discovery_disabled;
8866 }
8867 EXPORT_SYMBOL(LNetGetPeerDiscoveryStatus);