Whamcloud - gitweb
LU-14668 lnet: add 'lock_prim_nid" lnet module parameter
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  */
31
32 #define DEBUG_SUBSYSTEM S_LNET
33
34 #include <linux/ctype.h>
35 #include <linux/generic-radix-tree.h>
36 #include <linux/log2.h>
37 #include <linux/ktime.h>
38 #include <linux/moduleparam.h>
39 #include <linux/uaccess.h>
40 #ifdef HAVE_SCHED_HEADERS
41 #include <linux/sched/signal.h>
42 #endif
43 #include <net/genetlink.h>
44
45 #include <libcfs/linux/linux-net.h>
46 #include <lnet/udsp.h>
47 #include <lnet/lib-lnet.h>
48
49 #define D_LNI D_CONSOLE
50
51 /*
52  * initialize ln_api_mutex statically, since it needs to be used in
53  * discovery_set callback. That module parameter callback can be called
54  * before module init completes. The mutex needs to be ready for use then.
55  */
56 struct lnet the_lnet = {
57         .ln_api_mutex = __MUTEX_INITIALIZER(the_lnet.ln_api_mutex),
58 };              /* THE state of the network */
59 EXPORT_SYMBOL(the_lnet);
60
61 static char *ip2nets = "";
62 module_param(ip2nets, charp, 0444);
63 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
64
65 static char *networks = "";
66 module_param(networks, charp, 0444);
67 MODULE_PARM_DESC(networks, "local networks");
68
69 static char *routes = "";
70 module_param(routes, charp, 0444);
71 MODULE_PARM_DESC(routes, "routes to non-local networks");
72
73 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
74 module_param(rnet_htable_size, int, 0444);
75 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
76
77 static int use_tcp_bonding;
78 module_param(use_tcp_bonding, int, 0444);
79 MODULE_PARM_DESC(use_tcp_bonding,
80                  "use_tcp_bonding parameter has been removed");
81
82 unsigned int lnet_numa_range = 0;
83 module_param(lnet_numa_range, uint, 0444);
84 MODULE_PARM_DESC(lnet_numa_range,
85                 "NUMA range to consider during Multi-Rail selection");
86
87 /*
88  * lnet_health_sensitivity determines by how much we decrement the health
89  * value on sending error. The value defaults to 100, which means health
90  * interface health is decremented by 100 points every failure.
91  */
92 unsigned int lnet_health_sensitivity = 100;
93 static int sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp);
94 #ifdef HAVE_KERNEL_PARAM_OPS
95 static struct kernel_param_ops param_ops_health_sensitivity = {
96         .set = sensitivity_set,
97         .get = param_get_int,
98 };
99 #define param_check_health_sensitivity(name, p) \
100                 __param_check(name, p, int)
101 module_param(lnet_health_sensitivity, health_sensitivity, S_IRUGO|S_IWUSR);
102 #else
103 module_param_call(lnet_health_sensitivity, sensitivity_set, param_get_int,
104                   &lnet_health_sensitivity, S_IRUGO|S_IWUSR);
105 #endif
106 MODULE_PARM_DESC(lnet_health_sensitivity,
107                 "Value to decrement the health value by on error");
108
109 /*
110  * lnet_recovery_interval determines how often we should perform recovery
111  * on unhealthy interfaces.
112  */
113 unsigned int lnet_recovery_interval = 1;
114 static int recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp);
115 #ifdef HAVE_KERNEL_PARAM_OPS
116 static struct kernel_param_ops param_ops_recovery_interval = {
117         .set = recovery_interval_set,
118         .get = param_get_int,
119 };
120 #define param_check_recovery_interval(name, p) \
121                 __param_check(name, p, int)
122 module_param(lnet_recovery_interval, recovery_interval, S_IRUGO|S_IWUSR);
123 #else
124 module_param_call(lnet_recovery_interval, recovery_interval_set, param_get_int,
125                   &lnet_recovery_interval, S_IRUGO|S_IWUSR);
126 #endif
127 MODULE_PARM_DESC(lnet_recovery_interval,
128                 "DEPRECATED - Interval to recover unhealthy interfaces in seconds");
129
130 unsigned int lnet_recovery_limit;
131 module_param(lnet_recovery_limit, uint, 0644);
132 MODULE_PARM_DESC(lnet_recovery_limit,
133                  "How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery");
134
135 unsigned int lnet_max_recovery_ping_interval = 900;
136 unsigned int lnet_max_recovery_ping_count = 9;
137 static int max_recovery_ping_interval_set(const char *val,
138                                           cfs_kernel_param_arg_t *kp);
139
140 #define param_check_max_recovery_ping_interval(name, p) \
141                 __param_check(name, p, int)
142
143 #ifdef HAVE_KERNEL_PARAM_OPS
144 static struct kernel_param_ops param_ops_max_recovery_ping_interval = {
145         .set = max_recovery_ping_interval_set,
146         .get = param_get_int,
147 };
148 module_param(lnet_max_recovery_ping_interval, max_recovery_ping_interval, 0644);
149 #else
150 module_param_call(lnet_max_recovery_ping_interval, max_recovery_ping_interval,
151                   param_get_int, &lnet_max_recovery_ping_interval, 0644);
152 #endif
153 MODULE_PARM_DESC(lnet_max_recovery_ping_interval,
154                  "The max interval between LNet recovery pings, in seconds");
155
156 static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
157 static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp);
158
159 static struct kernel_param_ops param_ops_interfaces_max = {
160         .set = intf_max_set,
161         .get = param_get_int,
162 };
163
164 #define param_check_interfaces_max(name, p) \
165                 __param_check(name, p, int)
166
167 #ifdef HAVE_KERNEL_PARAM_OPS
168 module_param(lnet_interfaces_max, interfaces_max, 0644);
169 #else
170 module_param_call(lnet_interfaces_max, intf_max_set, param_get_int,
171                   &param_ops_interfaces_max, 0644);
172 #endif
173 MODULE_PARM_DESC(lnet_interfaces_max,
174                 "Maximum number of interfaces in a node.");
175
176 unsigned lnet_peer_discovery_disabled = 0;
177 static int discovery_set(const char *val, cfs_kernel_param_arg_t *kp);
178
179 static struct kernel_param_ops param_ops_discovery_disabled = {
180         .set = discovery_set,
181         .get = param_get_int,
182 };
183
184 #define param_check_discovery_disabled(name, p) \
185                 __param_check(name, p, int)
186 #ifdef HAVE_KERNEL_PARAM_OPS
187 module_param(lnet_peer_discovery_disabled, discovery_disabled, 0644);
188 #else
189 module_param_call(lnet_peer_discovery_disabled, discovery_set, param_get_int,
190                   &param_ops_discovery_disabled, 0644);
191 #endif
192 MODULE_PARM_DESC(lnet_peer_discovery_disabled,
193                 "Set to 1 to disable peer discovery on this node.");
194
195 unsigned int lnet_drop_asym_route;
196 static int drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp);
197
198 static struct kernel_param_ops param_ops_drop_asym_route = {
199         .set = drop_asym_route_set,
200         .get = param_get_int,
201 };
202
203 #define param_check_drop_asym_route(name, p)    \
204         __param_check(name, p, int)
205 #ifdef HAVE_KERNEL_PARAM_OPS
206 module_param(lnet_drop_asym_route, drop_asym_route, 0644);
207 #else
208 module_param_call(lnet_drop_asym_route, drop_asym_route_set, param_get_int,
209                   &param_ops_drop_asym_route, 0644);
210 #endif
211 MODULE_PARM_DESC(lnet_drop_asym_route,
212                  "Set to 1 to drop asymmetrical route messages.");
213
214 #define LNET_TRANSACTION_TIMEOUT_DEFAULT 150
215 unsigned int lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_DEFAULT;
216 static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
217 #ifdef HAVE_KERNEL_PARAM_OPS
218 static struct kernel_param_ops param_ops_transaction_timeout = {
219         .set = transaction_to_set,
220         .get = param_get_int,
221 };
222
223 #define param_check_transaction_timeout(name, p) \
224                 __param_check(name, p, int)
225 module_param(lnet_transaction_timeout, transaction_timeout, S_IRUGO|S_IWUSR);
226 #else
227 module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
228                   &lnet_transaction_timeout, S_IRUGO|S_IWUSR);
229 #endif
230 MODULE_PARM_DESC(lnet_transaction_timeout,
231                 "Maximum number of seconds to wait for a peer response.");
232
233 #define LNET_RETRY_COUNT_DEFAULT 2
234 unsigned int lnet_retry_count = LNET_RETRY_COUNT_DEFAULT;
235 static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
236 #ifdef HAVE_KERNEL_PARAM_OPS
237 static struct kernel_param_ops param_ops_retry_count = {
238         .set = retry_count_set,
239         .get = param_get_int,
240 };
241
242 #define param_check_retry_count(name, p) \
243                 __param_check(name, p, int)
244 module_param(lnet_retry_count, retry_count, S_IRUGO|S_IWUSR);
245 #else
246 module_param_call(lnet_retry_count, retry_count_set, param_get_int,
247                   &lnet_retry_count, S_IRUGO|S_IWUSR);
248 #endif
249 MODULE_PARM_DESC(lnet_retry_count,
250                  "Maximum number of times to retry transmitting a message");
251
252 unsigned int lnet_response_tracking = 3;
253 static int response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp);
254
255 #ifdef HAVE_KERNEL_PARAM_OPS
256 static struct kernel_param_ops param_ops_response_tracking = {
257         .set = response_tracking_set,
258         .get = param_get_int,
259 };
260
261 #define param_check_response_tracking(name, p)  \
262         __param_check(name, p, int)
263 module_param(lnet_response_tracking, response_tracking, 0644);
264 #else
265 module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
266                   &lnet_response_tracking, 0644);
267 #endif
268 MODULE_PARM_DESC(lnet_response_tracking,
269                  "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
270
271 int lock_prim_nid = 1;
272 module_param(lock_prim_nid, int, 0444);
273 MODULE_PARM_DESC(lock_prim_nid,
274                  "Whether nid passed down by Lustre is locked as primary");
275
276 #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
277                                   (LNET_RETRY_COUNT_DEFAULT + 1))
278 unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
279 static void lnet_set_lnd_timeout(void)
280 {
281         lnet_lnd_timeout = (lnet_transaction_timeout - 1) /
282                            (lnet_retry_count + 1);
283 }
284
285 /*
286  * This sequence number keeps track of how many times DLC was used to
287  * update the local NIs. It is incremented when a NI is added or
288  * removed and checked when sending a message to determine if there is
289  * a need to re-run the selection algorithm. See lnet_select_pathway()
290  * for more details on its usage.
291  */
292 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
293
294 struct lnet_fail_ping {
295         struct lnet_processid           lfp_id;
296         int                             lfp_errno;
297 };
298
299 struct lnet_genl_ping_list {
300         unsigned int                    lgpl_index;
301         unsigned int                    lgpl_list_count;
302         unsigned int                    lgpl_failed_count;
303         signed long                     lgpl_timeout;
304         struct lnet_nid                 lgpl_src_nid;
305         GENRADIX(struct lnet_fail_ping) lgpl_failed;
306         GENRADIX(struct lnet_processid) lgpl_list;
307 };
308
309 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
310                      signed long timeout, struct lnet_genl_ping_list *plist,
311                      int n_ids);
312
313 static int lnet_discover(struct lnet_process_id id, __u32 force,
314                          struct lnet_process_id __user *ids, int n_ids);
315
316 static int
317 sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
318 {
319         int rc;
320         unsigned *sensitivity = (unsigned *)kp->arg;
321         unsigned long value;
322
323         rc = kstrtoul(val, 0, &value);
324         if (rc) {
325                 CERROR("Invalid module parameter value for 'lnet_health_sensitivity'\n");
326                 return rc;
327         }
328
329         /*
330          * The purpose of locking the api_mutex here is to ensure that
331          * the correct value ends up stored properly.
332          */
333         mutex_lock(&the_lnet.ln_api_mutex);
334
335         if (value > LNET_MAX_HEALTH_VALUE) {
336                 mutex_unlock(&the_lnet.ln_api_mutex);
337                 CERROR("Invalid health value. Maximum: %d value = %lu\n",
338                        LNET_MAX_HEALTH_VALUE, value);
339                 return -EINVAL;
340         }
341
342         if (*sensitivity != 0 && value == 0 && lnet_retry_count != 0) {
343                 lnet_retry_count = 0;
344                 lnet_set_lnd_timeout();
345         }
346
347         *sensitivity = value;
348
349         mutex_unlock(&the_lnet.ln_api_mutex);
350
351         return 0;
352 }
353
354 static int
355 recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
356 {
357         CWARN("'lnet_recovery_interval' has been deprecated\n");
358
359         return 0;
360 }
361
362 static int
363 max_recovery_ping_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
364 {
365         int rc;
366         unsigned long value;
367
368         rc = kstrtoul(val, 0, &value);
369         if (rc) {
370                 CERROR("Invalid module parameter value for 'lnet_max_recovery_ping_interval'\n");
371                 return rc;
372         }
373
374         if (!value) {
375                 CERROR("Invalid max ping timeout. Must be strictly positive\n");
376                 return -EINVAL;
377         }
378
379         /* The purpose of locking the api_mutex here is to ensure that
380          * the correct value ends up stored properly.
381          */
382         mutex_lock(&the_lnet.ln_api_mutex);
383         lnet_max_recovery_ping_interval = value;
384         lnet_max_recovery_ping_count = 0;
385         value >>= 1;
386         while (value) {
387                 lnet_max_recovery_ping_count++;
388                 value >>= 1;
389         }
390         mutex_unlock(&the_lnet.ln_api_mutex);
391
392         return 0;
393 }
394
395 static int
396 discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
397 {
398         int rc;
399         unsigned *discovery_off = (unsigned *)kp->arg;
400         unsigned long value;
401         struct lnet_ping_buffer *pbuf;
402
403         rc = kstrtoul(val, 0, &value);
404         if (rc) {
405                 CERROR("Invalid module parameter value for 'lnet_peer_discovery_disabled'\n");
406                 return rc;
407         }
408
409         value = (value) ? 1 : 0;
410
411         /*
412          * The purpose of locking the api_mutex here is to ensure that
413          * the correct value ends up stored properly.
414          */
415         mutex_lock(&the_lnet.ln_api_mutex);
416
417         if (value == *discovery_off) {
418                 mutex_unlock(&the_lnet.ln_api_mutex);
419                 return 0;
420         }
421
422         /*
423          * We still want to set the discovery value even when LNet is not
424          * running. This is the case when LNet is being loaded and we want
425          * the module parameters to take effect. Otherwise if we're
426          * changing the value dynamically, we want to set it after
427          * updating the peers
428          */
429         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
430                 *discovery_off = value;
431                 mutex_unlock(&the_lnet.ln_api_mutex);
432                 return 0;
433         }
434
435         /* tell peers that discovery setting has changed */
436         lnet_net_lock(LNET_LOCK_EX);
437         pbuf = the_lnet.ln_ping_target;
438         if (value)
439                 pbuf->pb_info.pi_features &= ~LNET_PING_FEAT_DISCOVERY;
440         else
441                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
442         lnet_net_unlock(LNET_LOCK_EX);
443
444         /* only send a push when we're turning off discovery */
445         if (*discovery_off <= 0 && value > 0)
446                 lnet_push_update_to_peers(1);
447         *discovery_off = value;
448
449         mutex_unlock(&the_lnet.ln_api_mutex);
450
451         return 0;
452 }
453
454 static int
455 drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp)
456 {
457         int rc;
458         unsigned int *drop_asym_route = (unsigned int *)kp->arg;
459         unsigned long value;
460
461         rc = kstrtoul(val, 0, &value);
462         if (rc) {
463                 CERROR("Invalid module parameter value for "
464                        "'lnet_drop_asym_route'\n");
465                 return rc;
466         }
467
468         /*
469          * The purpose of locking the api_mutex here is to ensure that
470          * the correct value ends up stored properly.
471          */
472         mutex_lock(&the_lnet.ln_api_mutex);
473
474         if (value == *drop_asym_route) {
475                 mutex_unlock(&the_lnet.ln_api_mutex);
476                 return 0;
477         }
478
479         *drop_asym_route = value;
480
481         mutex_unlock(&the_lnet.ln_api_mutex);
482
483         return 0;
484 }
485
486 static int
487 transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp)
488 {
489         int rc;
490         unsigned *transaction_to = (unsigned *)kp->arg;
491         unsigned long value;
492
493         rc = kstrtoul(val, 0, &value);
494         if (rc) {
495                 CERROR("Invalid module parameter value for 'lnet_transaction_timeout'\n");
496                 return rc;
497         }
498
499         /*
500          * The purpose of locking the api_mutex here is to ensure that
501          * the correct value ends up stored properly.
502          */
503         mutex_lock(&the_lnet.ln_api_mutex);
504
505         if (value <= lnet_retry_count || value == 0) {
506                 mutex_unlock(&the_lnet.ln_api_mutex);
507                 CERROR("Invalid value for lnet_transaction_timeout (%lu). "
508                        "Has to be greater than lnet_retry_count (%u)\n",
509                        value, lnet_retry_count);
510                 return -EINVAL;
511         }
512
513         if (value == *transaction_to) {
514                 mutex_unlock(&the_lnet.ln_api_mutex);
515                 return 0;
516         }
517
518         *transaction_to = value;
519         /* Update the lnet_lnd_timeout now that we've modified the
520          * transaction timeout
521          */
522         lnet_set_lnd_timeout();
523
524         mutex_unlock(&the_lnet.ln_api_mutex);
525
526         return 0;
527 }
528
529 static int
530 retry_count_set(const char *val, cfs_kernel_param_arg_t *kp)
531 {
532         int rc;
533         unsigned *retry_count = (unsigned *)kp->arg;
534         unsigned long value;
535
536         rc = kstrtoul(val, 0, &value);
537         if (rc) {
538                 CERROR("Invalid module parameter value for 'lnet_retry_count'\n");
539                 return rc;
540         }
541
542         /*
543          * The purpose of locking the api_mutex here is to ensure that
544          * the correct value ends up stored properly.
545          */
546         mutex_lock(&the_lnet.ln_api_mutex);
547
548         if (lnet_health_sensitivity == 0 && value > 0) {
549                 mutex_unlock(&the_lnet.ln_api_mutex);
550                 CERROR("Can not set lnet_retry_count when health feature is turned off\n");
551                 return -EINVAL;
552         }
553
554         if (value > lnet_transaction_timeout) {
555                 mutex_unlock(&the_lnet.ln_api_mutex);
556                 CERROR("Invalid value for lnet_retry_count (%lu). "
557                        "Has to be smaller than lnet_transaction_timeout (%u)\n",
558                        value, lnet_transaction_timeout);
559                 return -EINVAL;
560         }
561
562         *retry_count = value;
563
564         /* Update the lnet_lnd_timeout now that we've modified the
565          * retry count
566          */
567         lnet_set_lnd_timeout();
568
569         mutex_unlock(&the_lnet.ln_api_mutex);
570
571         return 0;
572 }
573
574 static int
575 intf_max_set(const char *val, cfs_kernel_param_arg_t *kp)
576 {
577         int value, rc;
578
579         rc = kstrtoint(val, 0, &value);
580         if (rc) {
581                 CERROR("Invalid module parameter value for 'lnet_interfaces_max'\n");
582                 return rc;
583         }
584
585         if (value < LNET_INTERFACES_MIN) {
586                 CWARN("max interfaces provided are too small, setting to %d\n",
587                       LNET_INTERFACES_MAX_DEFAULT);
588                 value = LNET_INTERFACES_MAX_DEFAULT;
589         }
590
591         *(int *)kp->arg = value;
592
593         return 0;
594 }
595
596 static int
597 response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp)
598 {
599         int rc;
600         unsigned long new_value;
601
602         rc = kstrtoul(val, 0, &new_value);
603         if (rc) {
604                 CERROR("Invalid value for 'lnet_response_tracking'\n");
605                 return -EINVAL;
606         }
607
608         if (new_value < 0 || new_value > 3) {
609                 CWARN("Invalid value (%lu) for 'lnet_response_tracking'\n",
610                       new_value);
611                 return -EINVAL;
612         }
613
614         lnet_response_tracking = new_value;
615
616         return 0;
617 }
618
619 static const char *
620 lnet_get_routes(void)
621 {
622         return routes;
623 }
624
625 static const char *
626 lnet_get_networks(void)
627 {
628         const char *nets;
629         int rc;
630
631         if (*networks != 0 && *ip2nets != 0) {
632                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
633                                    "'ip2nets' but not both at once\n");
634                 return NULL;
635         }
636
637         if (*ip2nets != 0) {
638                 rc = lnet_parse_ip2nets(&nets, ip2nets);
639                 return (rc == 0) ? nets : NULL;
640         }
641
642         if (*networks != 0)
643                 return networks;
644
645         return "tcp";
646 }
647
648 static void
649 lnet_init_locks(void)
650 {
651         spin_lock_init(&the_lnet.ln_eq_wait_lock);
652         spin_lock_init(&the_lnet.ln_msg_resend_lock);
653         init_completion(&the_lnet.ln_mt_wait_complete);
654         mutex_init(&the_lnet.ln_lnd_mutex);
655 }
656
657 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
658 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
659                                             *  MDs kmem_cache */
660 struct kmem_cache *lnet_udsp_cachep;       /* udsp cache */
661 struct kmem_cache *lnet_rspt_cachep;       /* response tracker cache */
662 struct kmem_cache *lnet_msg_cachep;
663
664 static int
665 lnet_slab_setup(void)
666 {
667         /* create specific kmem_cache for MEs and small MDs (i.e., originally
668          * allocated in <size-xxx> kmem_cache).
669          */
670         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(struct lnet_me),
671                                             0, 0, NULL);
672         if (!lnet_mes_cachep)
673                 return -ENOMEM;
674
675         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
676                                                   LNET_SMALL_MD_SIZE, 0, 0,
677                                                   NULL);
678         if (!lnet_small_mds_cachep)
679                 return -ENOMEM;
680
681         lnet_udsp_cachep = kmem_cache_create("lnet_udsp",
682                                              sizeof(struct lnet_udsp),
683                                              0, 0, NULL);
684         if (!lnet_udsp_cachep)
685                 return -ENOMEM;
686
687         lnet_rspt_cachep = kmem_cache_create("lnet_rspt", sizeof(struct lnet_rsp_tracker),
688                                             0, 0, NULL);
689         if (!lnet_rspt_cachep)
690                 return -ENOMEM;
691
692         lnet_msg_cachep = kmem_cache_create("lnet_msg", sizeof(struct lnet_msg),
693                                             0, 0, NULL);
694         if (!lnet_msg_cachep)
695                 return -ENOMEM;
696
697         return 0;
698 }
699
700 static void
701 lnet_slab_cleanup(void)
702 {
703         if (lnet_msg_cachep) {
704                 kmem_cache_destroy(lnet_msg_cachep);
705                 lnet_msg_cachep = NULL;
706         }
707
708         if (lnet_rspt_cachep) {
709                 kmem_cache_destroy(lnet_rspt_cachep);
710                 lnet_rspt_cachep = NULL;
711         }
712
713         if (lnet_udsp_cachep) {
714                 kmem_cache_destroy(lnet_udsp_cachep);
715                 lnet_udsp_cachep = NULL;
716         }
717
718         if (lnet_small_mds_cachep) {
719                 kmem_cache_destroy(lnet_small_mds_cachep);
720                 lnet_small_mds_cachep = NULL;
721         }
722
723         if (lnet_mes_cachep) {
724                 kmem_cache_destroy(lnet_mes_cachep);
725                 lnet_mes_cachep = NULL;
726         }
727 }
728
729 static int
730 lnet_create_remote_nets_table(void)
731 {
732         int               i;
733         struct list_head *hash;
734
735         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
736         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
737         CFS_ALLOC_PTR_ARRAY(hash, LNET_REMOTE_NETS_HASH_SIZE);
738         if (hash == NULL) {
739                 CERROR("Failed to create remote nets hash table\n");
740                 return -ENOMEM;
741         }
742
743         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
744                 INIT_LIST_HEAD(&hash[i]);
745         the_lnet.ln_remote_nets_hash = hash;
746         return 0;
747 }
748
749 static void
750 lnet_destroy_remote_nets_table(void)
751 {
752         int i;
753
754         if (the_lnet.ln_remote_nets_hash == NULL)
755                 return;
756
757         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
758                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
759
760         CFS_FREE_PTR_ARRAY(the_lnet.ln_remote_nets_hash,
761                            LNET_REMOTE_NETS_HASH_SIZE);
762         the_lnet.ln_remote_nets_hash = NULL;
763 }
764
765 static void
766 lnet_destroy_locks(void)
767 {
768         if (the_lnet.ln_res_lock != NULL) {
769                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
770                 the_lnet.ln_res_lock = NULL;
771         }
772
773         if (the_lnet.ln_net_lock != NULL) {
774                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
775                 the_lnet.ln_net_lock = NULL;
776         }
777 }
778
779 static int
780 lnet_create_locks(void)
781 {
782         lnet_init_locks();
783
784         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
785         if (the_lnet.ln_res_lock == NULL)
786                 goto failed;
787
788         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
789         if (the_lnet.ln_net_lock == NULL)
790                 goto failed;
791
792         return 0;
793
794  failed:
795         lnet_destroy_locks();
796         return -ENOMEM;
797 }
798
799 static void lnet_assert_wire_constants(void)
800 {
801         /* Wire protocol assertions generated by 'wirecheck'
802          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
803          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
804          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
805          */
806
807         /* Constants... */
808         BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
809         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
810         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
811         BUILD_BUG_ON(LNET_MSG_ACK != 0);
812         BUILD_BUG_ON(LNET_MSG_PUT != 1);
813         BUILD_BUG_ON(LNET_MSG_GET != 2);
814         BUILD_BUG_ON(LNET_MSG_REPLY != 3);
815         BUILD_BUG_ON(LNET_MSG_HELLO != 4);
816
817         BUILD_BUG_ON((int)sizeof(lnet_nid_t) != 8);
818         BUILD_BUG_ON((int)sizeof(lnet_pid_t) != 4);
819
820         /* Checks for struct lnet_nid */
821         BUILD_BUG_ON((int)sizeof(struct lnet_nid) != 20);
822         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_size) != 0);
823         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_size) != 1);
824         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_type) != 1);
825         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_type) != 1);
826         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_num) != 2);
827         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_num) != 2);
828         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_addr) != 4);
829         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_addr) != 16);
830
831         /* Checks for struct lnet_process_id_packed */
832         BUILD_BUG_ON((int)sizeof(struct lnet_process_id_packed) != 12);
833         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, nid) != 0);
834         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->nid) != 8);
835         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, pid) != 8);
836         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->pid) != 4);
837
838         /* Checks for struct lnet_handle_wire */
839         BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
840         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
841                                    wh_interface_cookie) != 0);
842         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
843         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
844                                    wh_object_cookie) != 8);
845         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
846
847         /* Checks for struct struct lnet_magicversion */
848         BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
849         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
850         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
851         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
852         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
853         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion,
854                                    version_minor) != 6);
855         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
856
857         /* Checks for struct _lnet_hdr_nid4 */
858         BUILD_BUG_ON((int)sizeof(struct _lnet_hdr_nid4) != 72);
859         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_nid) != 0);
860         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_nid) != 8);
861         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_nid) != 8);
862         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_nid) != 8);
863         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_pid) != 16);
864         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_pid) != 4);
865         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_pid) != 20);
866         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_pid) != 4);
867         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, type) != 24);
868         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->type) != 4);
869         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, payload_length) != 28);
870         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->payload_length) != 4);
871         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg) != 32);
872         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg) != 40);
873
874         /* Ack */
875         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.dst_wmd) != 32);
876         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.dst_wmd) != 16);
877         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.match_bits) != 48);
878         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.match_bits) != 8);
879         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.mlength) != 56);
880         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.mlength) != 4);
881
882         /* Put */
883         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ack_wmd) != 32);
884         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ack_wmd) != 16);
885         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.match_bits) != 48);
886         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.match_bits) != 8);
887         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.hdr_data) != 56);
888         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.hdr_data) != 8);
889         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ptl_index) != 64);
890         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ptl_index) != 4);
891         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.offset) != 68);
892         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.offset) != 4);
893
894         /* Get */
895         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.return_wmd) != 32);
896         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.return_wmd) != 16);
897         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.match_bits) != 48);
898         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.match_bits) != 8);
899         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.ptl_index) != 56);
900         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.ptl_index) != 4);
901         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.src_offset) != 60);
902         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.src_offset) != 4);
903         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.sink_length) != 64);
904         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.sink_length) != 4);
905
906         /* Reply */
907         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.reply.dst_wmd) != 32);
908         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.reply.dst_wmd) != 16);
909
910         /* Hello */
911         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.incarnation) != 32);
912         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.incarnation) != 8);
913         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.type) != 40);
914         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.type) != 4);
915
916         /* Checks for struct lnet_ni_status and related constants */
917         BUILD_BUG_ON(LNET_NI_STATUS_INVALID != 0x00000000);
918         BUILD_BUG_ON(LNET_NI_STATUS_UP != 0x15aac0de);
919         BUILD_BUG_ON(LNET_NI_STATUS_DOWN != 0xdeadface);
920
921         /* Checks for struct lnet_ni_status */
922         BUILD_BUG_ON((int)sizeof(struct lnet_ni_status) != 16);
923         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_nid) != 0);
924         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) != 8);
925         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_status) != 8);
926         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_status) != 4);
927         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_msg_size) != 12);
928         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_msg_size) != 4);
929
930         /* Checks for struct lnet_ni_large_status */
931         BUILD_BUG_ON((int)sizeof(struct lnet_ni_large_status) != 24);
932         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_status) != 0);
933         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_status) != 4);
934         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_nid) != 4);
935         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_nid) != 20);
936
937         /* Checks for struct lnet_ping_info and related constants */
938         BUILD_BUG_ON(LNET_PROTO_PING_MAGIC != 0x70696E67);
939         BUILD_BUG_ON(LNET_PING_FEAT_INVAL != 0);
940         BUILD_BUG_ON(LNET_PING_FEAT_BASE != 1);
941         BUILD_BUG_ON(LNET_PING_FEAT_NI_STATUS != 2);
942         BUILD_BUG_ON(LNET_PING_FEAT_RTE_DISABLED != 4);
943         BUILD_BUG_ON(LNET_PING_FEAT_MULTI_RAIL != 8);
944         BUILD_BUG_ON(LNET_PING_FEAT_DISCOVERY != 16);
945         BUILD_BUG_ON(LNET_PING_FEAT_LARGE_ADDR != 32);
946         BUILD_BUG_ON(LNET_PING_FEAT_PRIMARY_LARGE != 64);
947         BUILD_BUG_ON(LNET_PING_FEAT_BITS != 127);
948
949         /* Checks for struct lnet_ping_info */
950         BUILD_BUG_ON((int)sizeof(struct lnet_ping_info) != 16);
951         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_magic) != 0);
952         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) != 4);
953         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_features) != 4);
954         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_features) != 4);
955         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_pid) != 8);
956         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) != 4);
957         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_nnis) != 12);
958         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) != 4);
959         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_ni) != 16);
960         BUILD_BUG_ON(offsetof(struct lnet_ping_info, pi_ni) != sizeof(struct lnet_ping_info));
961
962         /* Acceptor connection request */
963         BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
964
965         /* Checks for struct lnet_acceptor_connreq */
966         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq) != 16);
967         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_magic) != 0);
968         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_magic) != 4);
969         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_version) != 4);
970         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_version) != 4);
971         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_nid) != 8);
972         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_nid) != 8);
973
974         /* Checks for struct lnet_acceptor_connreq_v2 */
975         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq_v2) != 28);
976         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_magic) != 0);
977         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_magic) != 4);
978         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_version) != 4);
979         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_version) != 4);
980         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_nid) != 8);
981         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_nid) != 20);
982
983         /* Checks for struct lnet_counters_common */
984         BUILD_BUG_ON((int)sizeof(struct lnet_counters_common) != 60);
985         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_alloc) != 0);
986         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_alloc) != 4);
987         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_max) != 4);
988         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_max) != 4);
989         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_errors) != 8);
990         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_errors) != 4);
991         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_count) != 12);
992         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_count) != 4);
993         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_count) != 16);
994         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_count) != 4);
995         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_count) != 20);
996         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_count) != 4);
997         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_count) != 24);
998         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_count) != 4);
999         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_length) != 28);
1000         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_length) != 8);
1001         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_length) != 36);
1002         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_length) != 8);
1003         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_length) != 44);
1004         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_length) != 8);
1005         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_length) != 52);
1006         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_length) != 8);
1007 }
1008
1009 static const struct lnet_lnd *lnet_find_lnd_by_type(__u32 type)
1010 {
1011         const struct lnet_lnd *lnd;
1012
1013         /* holding lnd mutex */
1014         if (type >= NUM_LNDS)
1015                 return NULL;
1016         lnd = the_lnet.ln_lnds[type];
1017         LASSERT(!lnd || lnd->lnd_type == type);
1018
1019         return lnd;
1020 }
1021
1022 unsigned int
1023 lnet_get_lnd_timeout(void)
1024 {
1025         return lnet_lnd_timeout;
1026 }
1027 EXPORT_SYMBOL(lnet_get_lnd_timeout);
1028
1029 void
1030 lnet_register_lnd(const struct lnet_lnd *lnd)
1031 {
1032         mutex_lock(&the_lnet.ln_lnd_mutex);
1033
1034         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
1035         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
1036
1037         the_lnet.ln_lnds[lnd->lnd_type] = lnd;
1038
1039         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
1040
1041         mutex_unlock(&the_lnet.ln_lnd_mutex);
1042 }
1043 EXPORT_SYMBOL(lnet_register_lnd);
1044
1045 void
1046 lnet_unregister_lnd(const struct lnet_lnd *lnd)
1047 {
1048         mutex_lock(&the_lnet.ln_lnd_mutex);
1049
1050         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
1051
1052         the_lnet.ln_lnds[lnd->lnd_type] = NULL;
1053         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
1054
1055         mutex_unlock(&the_lnet.ln_lnd_mutex);
1056 }
1057 EXPORT_SYMBOL(lnet_unregister_lnd);
1058
1059 static void
1060 lnet_counters_get_common_locked(struct lnet_counters_common *common)
1061 {
1062         struct lnet_counters *ctr;
1063         int i;
1064
1065         /* FIXME !!! Their is no assert_lnet_net_locked() to ensure this
1066          * actually called under the protection of the lnet_net_lock.
1067          */
1068         memset(common, 0, sizeof(*common));
1069
1070         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1071                 common->lcc_msgs_max     += ctr->lct_common.lcc_msgs_max;
1072                 common->lcc_msgs_alloc   += ctr->lct_common.lcc_msgs_alloc;
1073                 common->lcc_errors       += ctr->lct_common.lcc_errors;
1074                 common->lcc_send_count   += ctr->lct_common.lcc_send_count;
1075                 common->lcc_recv_count   += ctr->lct_common.lcc_recv_count;
1076                 common->lcc_route_count  += ctr->lct_common.lcc_route_count;
1077                 common->lcc_drop_count   += ctr->lct_common.lcc_drop_count;
1078                 common->lcc_send_length  += ctr->lct_common.lcc_send_length;
1079                 common->lcc_recv_length  += ctr->lct_common.lcc_recv_length;
1080                 common->lcc_route_length += ctr->lct_common.lcc_route_length;
1081                 common->lcc_drop_length  += ctr->lct_common.lcc_drop_length;
1082         }
1083 }
1084
1085 void
1086 lnet_counters_get_common(struct lnet_counters_common *common)
1087 {
1088         lnet_net_lock(LNET_LOCK_EX);
1089         lnet_counters_get_common_locked(common);
1090         lnet_net_unlock(LNET_LOCK_EX);
1091 }
1092 EXPORT_SYMBOL(lnet_counters_get_common);
1093
1094 int
1095 lnet_counters_get(struct lnet_counters *counters)
1096 {
1097         struct lnet_counters *ctr;
1098         struct lnet_counters_health *health = &counters->lct_health;
1099         int i, rc = 0;
1100
1101         memset(counters, 0, sizeof(*counters));
1102
1103         lnet_net_lock(LNET_LOCK_EX);
1104
1105         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1106                 GOTO(out_unlock, rc = -ENODEV);
1107
1108         lnet_counters_get_common_locked(&counters->lct_common);
1109
1110         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1111                 health->lch_rst_alloc    += ctr->lct_health.lch_rst_alloc;
1112                 health->lch_resend_count += ctr->lct_health.lch_resend_count;
1113                 health->lch_response_timeout_count +=
1114                                 ctr->lct_health.lch_response_timeout_count;
1115                 health->lch_local_interrupt_count +=
1116                                 ctr->lct_health.lch_local_interrupt_count;
1117                 health->lch_local_dropped_count +=
1118                                 ctr->lct_health.lch_local_dropped_count;
1119                 health->lch_local_aborted_count +=
1120                                 ctr->lct_health.lch_local_aborted_count;
1121                 health->lch_local_no_route_count +=
1122                                 ctr->lct_health.lch_local_no_route_count;
1123                 health->lch_local_timeout_count +=
1124                                 ctr->lct_health.lch_local_timeout_count;
1125                 health->lch_local_error_count +=
1126                                 ctr->lct_health.lch_local_error_count;
1127                 health->lch_remote_dropped_count +=
1128                                 ctr->lct_health.lch_remote_dropped_count;
1129                 health->lch_remote_error_count +=
1130                                 ctr->lct_health.lch_remote_error_count;
1131                 health->lch_remote_timeout_count +=
1132                                 ctr->lct_health.lch_remote_timeout_count;
1133                 health->lch_network_timeout_count +=
1134                                 ctr->lct_health.lch_network_timeout_count;
1135         }
1136 out_unlock:
1137         lnet_net_unlock(LNET_LOCK_EX);
1138         return rc;
1139 }
1140 EXPORT_SYMBOL(lnet_counters_get);
1141
1142 void
1143 lnet_counters_reset(void)
1144 {
1145         struct lnet_counters *counters;
1146         int             i;
1147
1148         lnet_net_lock(LNET_LOCK_EX);
1149
1150         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1151                 goto avoid_reset;
1152
1153         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
1154                 memset(counters, 0, sizeof(struct lnet_counters));
1155 avoid_reset:
1156         lnet_net_unlock(LNET_LOCK_EX);
1157 }
1158
1159 static char *
1160 lnet_res_type2str(int type)
1161 {
1162         switch (type) {
1163         default:
1164                 LBUG();
1165         case LNET_COOKIE_TYPE_MD:
1166                 return "MD";
1167         case LNET_COOKIE_TYPE_ME:
1168                 return "ME";
1169         case LNET_COOKIE_TYPE_EQ:
1170                 return "EQ";
1171         }
1172 }
1173
1174 static void
1175 lnet_res_container_cleanup(struct lnet_res_container *rec)
1176 {
1177         int     count = 0;
1178
1179         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
1180                 return;
1181
1182         while (!list_empty(&rec->rec_active)) {
1183                 struct list_head *e = rec->rec_active.next;
1184
1185                 list_del_init(e);
1186                 if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
1187                         lnet_md_free(list_entry(e, struct lnet_libmd, md_list));
1188
1189                 } else { /* NB: Active MEs should be attached on portals */
1190                         LBUG();
1191                 }
1192                 count++;
1193         }
1194
1195         if (count > 0) {
1196                 /* Found alive MD/ME/EQ, user really should unlink/free
1197                  * all of them before finalize LNet, but if someone didn't,
1198                  * we have to recycle garbage for him */
1199                 CERROR("%d active elements on exit of %s container\n",
1200                        count, lnet_res_type2str(rec->rec_type));
1201         }
1202
1203         if (rec->rec_lh_hash != NULL) {
1204                 CFS_FREE_PTR_ARRAY(rec->rec_lh_hash, LNET_LH_HASH_SIZE);
1205                 rec->rec_lh_hash = NULL;
1206         }
1207
1208         rec->rec_type = 0; /* mark it as finalized */
1209 }
1210
1211 static int
1212 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
1213 {
1214         int     rc = 0;
1215         int     i;
1216
1217         LASSERT(rec->rec_type == 0);
1218
1219         rec->rec_type = type;
1220         INIT_LIST_HEAD(&rec->rec_active);
1221
1222         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
1223
1224         /* Arbitrary choice of hash table size */
1225         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
1226                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
1227         if (rec->rec_lh_hash == NULL) {
1228                 rc = -ENOMEM;
1229                 goto out;
1230         }
1231
1232         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
1233                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
1234
1235         return 0;
1236
1237 out:
1238         CERROR("Failed to setup %s resource container\n",
1239                lnet_res_type2str(type));
1240         lnet_res_container_cleanup(rec);
1241         return rc;
1242 }
1243
1244 static void
1245 lnet_res_containers_destroy(struct lnet_res_container **recs)
1246 {
1247         struct lnet_res_container       *rec;
1248         int                             i;
1249
1250         cfs_percpt_for_each(rec, i, recs)
1251                 lnet_res_container_cleanup(rec);
1252
1253         cfs_percpt_free(recs);
1254 }
1255
1256 static struct lnet_res_container **
1257 lnet_res_containers_create(int type)
1258 {
1259         struct lnet_res_container       **recs;
1260         struct lnet_res_container       *rec;
1261         int                             rc;
1262         int                             i;
1263
1264         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
1265         if (recs == NULL) {
1266                 CERROR("Failed to allocate %s resource containers\n",
1267                        lnet_res_type2str(type));
1268                 return NULL;
1269         }
1270
1271         cfs_percpt_for_each(rec, i, recs) {
1272                 rc = lnet_res_container_setup(rec, i, type);
1273                 if (rc != 0) {
1274                         lnet_res_containers_destroy(recs);
1275                         return NULL;
1276                 }
1277         }
1278
1279         return recs;
1280 }
1281
1282 struct lnet_libhandle *
1283 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
1284 {
1285         /* ALWAYS called with lnet_res_lock held */
1286         struct list_head        *head;
1287         struct lnet_libhandle   *lh;
1288         unsigned int            hash;
1289
1290         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
1291                 return NULL;
1292
1293         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
1294         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
1295
1296         list_for_each_entry(lh, head, lh_hash_chain) {
1297                 if (lh->lh_cookie == cookie)
1298                         return lh;
1299         }
1300
1301         return NULL;
1302 }
1303
1304 void
1305 lnet_res_lh_initialize(struct lnet_res_container *rec,
1306                        struct lnet_libhandle *lh)
1307 {
1308         /* ALWAYS called with lnet_res_lock held */
1309         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
1310         unsigned int    hash;
1311
1312         lh->lh_cookie = rec->rec_lh_cookie;
1313         rec->rec_lh_cookie += 1 << ibits;
1314
1315         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
1316
1317         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
1318 }
1319
1320 struct list_head **
1321 lnet_create_array_of_queues(void)
1322 {
1323         struct list_head **qs;
1324         struct list_head *q;
1325         int i;
1326
1327         qs = cfs_percpt_alloc(lnet_cpt_table(),
1328                               sizeof(struct list_head));
1329         if (!qs) {
1330                 CERROR("Failed to allocate queues\n");
1331                 return NULL;
1332         }
1333
1334         cfs_percpt_for_each(q, i, qs)
1335                 INIT_LIST_HEAD(q);
1336
1337         return qs;
1338 }
1339
1340 static int lnet_unprepare(void);
1341
1342 static int
1343 lnet_prepare(lnet_pid_t requested_pid)
1344 {
1345         /* Prepare to bring up the network */
1346         struct lnet_res_container **recs;
1347         int                       rc = 0;
1348
1349         if (requested_pid == LNET_PID_ANY) {
1350                 /* Don't instantiate LNET just for me */
1351                 return -ENETDOWN;
1352         }
1353
1354         LASSERT(the_lnet.ln_refcount == 0);
1355
1356         the_lnet.ln_routing = 0;
1357
1358         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
1359         the_lnet.ln_pid = requested_pid;
1360
1361         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
1362         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
1363         INIT_LIST_HEAD(&the_lnet.ln_nets);
1364         INIT_LIST_HEAD(&the_lnet.ln_routers);
1365         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
1366         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
1367         INIT_LIST_HEAD(&the_lnet.ln_dc_request);
1368         INIT_LIST_HEAD(&the_lnet.ln_dc_working);
1369         INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
1370         INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
1371         INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
1372         INIT_LIST_HEAD(&the_lnet.ln_udsp_list);
1373         init_waitqueue_head(&the_lnet.ln_dc_waitq);
1374         the_lnet.ln_mt_handler = NULL;
1375         init_completion(&the_lnet.ln_started);
1376
1377         rc = lnet_slab_setup();
1378         if (rc != 0)
1379                 goto failed;
1380
1381         rc = lnet_create_remote_nets_table();
1382         if (rc != 0)
1383                 goto failed;
1384
1385         /*
1386          * NB the interface cookie in wire handles guards against delayed
1387          * replies and ACKs appearing valid after reboot.
1388          */
1389         the_lnet.ln_interface_cookie = ktime_get_real_ns();
1390
1391         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
1392                                                 sizeof(struct lnet_counters));
1393         if (the_lnet.ln_counters == NULL) {
1394                 CERROR("Failed to allocate counters for LNet\n");
1395                 rc = -ENOMEM;
1396                 goto failed;
1397         }
1398
1399         rc = lnet_peer_tables_create();
1400         if (rc != 0)
1401                 goto failed;
1402
1403         rc = lnet_msg_containers_create();
1404         if (rc != 0)
1405                 goto failed;
1406
1407         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
1408                                       LNET_COOKIE_TYPE_EQ);
1409         if (rc != 0)
1410                 goto failed;
1411
1412         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
1413         if (recs == NULL) {
1414                 rc = -ENOMEM;
1415                 goto failed;
1416         }
1417
1418         the_lnet.ln_md_containers = recs;
1419
1420         rc = lnet_portals_create();
1421         if (rc != 0) {
1422                 CERROR("Failed to create portals for LNet: %d\n", rc);
1423                 goto failed;
1424         }
1425
1426         the_lnet.ln_mt_zombie_rstqs = lnet_create_array_of_queues();
1427         if (!the_lnet.ln_mt_zombie_rstqs) {
1428                 rc = -ENOMEM;
1429                 goto failed;
1430         }
1431
1432         return 0;
1433
1434  failed:
1435         lnet_unprepare();
1436         return rc;
1437 }
1438
1439 static int
1440 lnet_unprepare(void)
1441 {
1442         /* NB no LNET_LOCK since this is the last reference.  All LND instances
1443          * have shut down already, so it is safe to unlink and free all
1444          * descriptors, even those that appear committed to a network op (eg MD
1445          * with non-zero pending count) */
1446
1447         lnet_fail_nid(LNET_NID_ANY, 0);
1448
1449         LASSERT(the_lnet.ln_refcount == 0);
1450         LASSERT(list_empty(&the_lnet.ln_test_peers));
1451         LASSERT(list_empty(&the_lnet.ln_nets));
1452
1453         if (the_lnet.ln_mt_zombie_rstqs) {
1454                 lnet_clean_zombie_rstqs();
1455                 the_lnet.ln_mt_zombie_rstqs = NULL;
1456         }
1457
1458         lnet_assert_handler_unused(the_lnet.ln_mt_handler);
1459         the_lnet.ln_mt_handler = NULL;
1460
1461         lnet_portals_destroy();
1462
1463         if (the_lnet.ln_md_containers != NULL) {
1464                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
1465                 the_lnet.ln_md_containers = NULL;
1466         }
1467
1468         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
1469
1470         lnet_msg_containers_destroy();
1471         lnet_peer_uninit();
1472         lnet_rtrpools_free(0);
1473
1474         if (the_lnet.ln_counters != NULL) {
1475                 cfs_percpt_free(the_lnet.ln_counters);
1476                 the_lnet.ln_counters = NULL;
1477         }
1478         lnet_destroy_remote_nets_table();
1479         lnet_udsp_destroy(true);
1480         lnet_slab_cleanup();
1481
1482         return 0;
1483 }
1484
1485 struct lnet_ni  *
1486 lnet_net2ni_locked(__u32 net_id, int cpt)
1487 {
1488         struct lnet_ni   *ni;
1489         struct lnet_net  *net;
1490
1491         LASSERT(cpt != LNET_LOCK_EX);
1492
1493         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1494                 if (net->net_id == net_id) {
1495                         ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
1496                                               ni_netlist);
1497                         return ni;
1498                 }
1499         }
1500
1501         return NULL;
1502 }
1503
1504 struct lnet_ni *
1505 lnet_net2ni_addref(__u32 net)
1506 {
1507         struct lnet_ni *ni;
1508
1509         lnet_net_lock(0);
1510         ni = lnet_net2ni_locked(net, 0);
1511         if (ni)
1512                 lnet_ni_addref_locked(ni, 0);
1513         lnet_net_unlock(0);
1514
1515         return ni;
1516 }
1517 EXPORT_SYMBOL(lnet_net2ni_addref);
1518
1519 struct lnet_net *
1520 lnet_get_net_locked(__u32 net_id)
1521 {
1522         struct lnet_net  *net;
1523
1524         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1525                 if (net->net_id == net_id)
1526                         return net;
1527         }
1528
1529         return NULL;
1530 }
1531
1532 void
1533 lnet_net_clr_pref_rtrs(struct lnet_net *net)
1534 {
1535         struct list_head zombies;
1536         struct lnet_nid_list *ne;
1537         struct lnet_nid_list *tmp;
1538
1539         INIT_LIST_HEAD(&zombies);
1540
1541         lnet_net_lock(LNET_LOCK_EX);
1542         list_splice_init(&net->net_rtr_pref_nids, &zombies);
1543         lnet_net_unlock(LNET_LOCK_EX);
1544
1545         list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
1546                 list_del_init(&ne->nl_list);
1547                 LIBCFS_FREE(ne, sizeof(*ne));
1548         }
1549 }
1550
1551 int
1552 lnet_net_add_pref_rtr(struct lnet_net *net,
1553                       struct lnet_nid *gw_nid)
1554 __must_hold(&the_lnet.ln_api_mutex)
1555 {
1556         struct lnet_nid_list *ne;
1557
1558         /* This function is called with api_mutex held. When the api_mutex
1559          * is held the list can not be modified, as it is only modified as
1560          * a result of applying a UDSP and that happens under api_mutex
1561          * lock.
1562          */
1563         list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
1564                 if (nid_same(&ne->nl_nid, gw_nid))
1565                         return -EEXIST;
1566         }
1567
1568         LIBCFS_ALLOC(ne, sizeof(*ne));
1569         if (!ne)
1570                 return -ENOMEM;
1571
1572         ne->nl_nid = *gw_nid;
1573
1574         /* Lock the cpt to protect against addition and checks in the
1575          * selection algorithm
1576          */
1577         lnet_net_lock(LNET_LOCK_EX);
1578         list_add(&ne->nl_list, &net->net_rtr_pref_nids);
1579         lnet_net_unlock(LNET_LOCK_EX);
1580
1581         return 0;
1582 }
1583
1584 bool
1585 lnet_net_is_pref_rtr_locked(struct lnet_net *net, struct lnet_nid *rtr_nid)
1586 {
1587         struct lnet_nid_list *ne;
1588
1589         CDEBUG(D_NET, "%s: rtr pref empty: %d\n",
1590                libcfs_net2str(net->net_id),
1591                list_empty(&net->net_rtr_pref_nids));
1592
1593         if (list_empty(&net->net_rtr_pref_nids))
1594                 return false;
1595
1596         list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
1597                 CDEBUG(D_NET, "Comparing pref %s with gw %s\n",
1598                        libcfs_nidstr(&ne->nl_nid),
1599                        libcfs_nidstr(rtr_nid));
1600                 if (nid_same(rtr_nid, &ne->nl_nid))
1601                         return true;
1602         }
1603
1604         return false;
1605 }
1606
1607 static unsigned int
1608 lnet_nid4_cpt_hash(lnet_nid_t nid, unsigned int number)
1609 {
1610         __u64 key = nid;
1611         __u64 pair_bits = 0x0001000100010001LLU;
1612         __u64 mask = pair_bits * 0xFF;
1613         __u64 pair_sum;
1614
1615         /* Use (sum-by-multiplication of nid bytes) mod (number of CPTs)
1616          * to match nid to a CPT.
1617          */
1618         pair_sum = (key & mask) + ((key >> 8) & mask);
1619         pair_sum = (pair_sum * pair_bits) >> 48;
1620
1621         CDEBUG(D_NET, "Match nid %s to cpt %u\n",
1622                libcfs_nid2str(nid), (unsigned int)(pair_sum) % number);
1623
1624         return (unsigned int)(pair_sum) % number;
1625 }
1626
1627 unsigned int
1628 lnet_nid_cpt_hash(struct lnet_nid *nid, unsigned int number)
1629 {
1630         unsigned int val;
1631         u32 h = 0;
1632         int i;
1633
1634         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
1635
1636         if (number == 1)
1637                 return 0;
1638
1639         if (nid_is_nid4(nid))
1640                 return lnet_nid4_cpt_hash(lnet_nid_to_nid4(nid), number);
1641
1642         for (i = 0; i < 4; i++)
1643                 h = cfs_hash_32(nid->nid_addr[i]^h, 32);
1644         val = cfs_hash_32(LNET_NID_NET(nid) ^ h, LNET_CPT_BITS);
1645         if (val < number)
1646                 return val;
1647         return (unsigned int)(h + val + (val >> 1)) % number;
1648 }
1649
1650 int
1651 lnet_cpt_of_nid_locked(struct lnet_nid *nid, struct lnet_ni *ni)
1652 {
1653         struct lnet_net *net;
1654
1655         /* must called with hold of lnet_net_lock */
1656         if (LNET_CPT_NUMBER == 1)
1657                 return 0; /* the only one */
1658
1659         /*
1660          * If NI is provided then use the CPT identified in the NI cpt
1661          * list if one exists. If one doesn't exist, then that NI is
1662          * associated with all CPTs and it follows that the net it belongs
1663          * to is implicitly associated with all CPTs, so just hash the nid
1664          * and return that.
1665          */
1666         if (ni != NULL) {
1667                 if (ni->ni_cpts != NULL)
1668                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
1669                                                              ni->ni_ncpts)];
1670                 else
1671                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1672         }
1673
1674         /* no NI provided so look at the net */
1675         net = lnet_get_net_locked(LNET_NID_NET(nid));
1676
1677         if (net != NULL && net->net_cpts != NULL) {
1678                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
1679         }
1680
1681         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1682 }
1683
1684 int
1685 lnet_nid2cpt(struct lnet_nid *nid, struct lnet_ni *ni)
1686 {
1687         int     cpt;
1688         int     cpt2;
1689
1690         if (LNET_CPT_NUMBER == 1)
1691                 return 0; /* the only one */
1692
1693         cpt = lnet_net_lock_current();
1694
1695         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
1696
1697         lnet_net_unlock(cpt);
1698
1699         return cpt2;
1700 }
1701 EXPORT_SYMBOL(lnet_nid2cpt);
1702
1703 int
1704 lnet_cpt_of_nid(lnet_nid_t nid4, struct lnet_ni *ni)
1705 {
1706         struct lnet_nid nid;
1707
1708         if (LNET_CPT_NUMBER == 1)
1709                 return 0; /* the only one */
1710
1711         lnet_nid4_to_nid(nid4, &nid);
1712         return lnet_nid2cpt(&nid, ni);
1713 }
1714 EXPORT_SYMBOL(lnet_cpt_of_nid);
1715
1716 int
1717 lnet_islocalnet_locked(__u32 net_id)
1718 {
1719         struct lnet_net *net;
1720         bool local;
1721
1722         net = lnet_get_net_locked(net_id);
1723
1724         local = net != NULL;
1725
1726         return local;
1727 }
1728
1729 int
1730 lnet_islocalnet(__u32 net_id)
1731 {
1732         int cpt;
1733         bool local;
1734
1735         cpt = lnet_net_lock_current();
1736
1737         local = lnet_islocalnet_locked(net_id);
1738
1739         lnet_net_unlock(cpt);
1740
1741         return local;
1742 }
1743
1744 struct lnet_ni  *
1745 lnet_nid_to_ni_locked(struct lnet_nid *nid, int cpt)
1746 {
1747         struct lnet_net  *net;
1748         struct lnet_ni *ni;
1749
1750         LASSERT(cpt != LNET_LOCK_EX);
1751
1752         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1753                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1754                         if (nid_same(&ni->ni_nid, nid))
1755                                 return ni;
1756                 }
1757         }
1758
1759         return NULL;
1760 }
1761
1762 struct lnet_ni *
1763 lnet_nid_to_ni_addref(struct lnet_nid *nid)
1764 {
1765         struct lnet_ni *ni;
1766
1767         lnet_net_lock(0);
1768         ni = lnet_nid_to_ni_locked(nid, 0);
1769         if (ni)
1770                 lnet_ni_addref_locked(ni, 0);
1771         lnet_net_unlock(0);
1772
1773         return ni;
1774 }
1775 EXPORT_SYMBOL(lnet_nid_to_ni_addref);
1776
1777 int
1778 lnet_islocalnid(struct lnet_nid *nid)
1779 {
1780         struct lnet_ni  *ni;
1781         int             cpt;
1782
1783         cpt = lnet_net_lock_current();
1784         ni = lnet_nid_to_ni_locked(nid, cpt);
1785         lnet_net_unlock(cpt);
1786
1787         return ni != NULL;
1788 }
1789
1790 int
1791 lnet_count_acceptor_nets(void)
1792 {
1793         /* Return the # of NIs that need the acceptor. */
1794         int              count = 0;
1795         struct lnet_net  *net;
1796         int              cpt;
1797
1798         cpt = lnet_net_lock_current();
1799         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1800                 /* all socklnd type networks should have the acceptor
1801                  * thread started */
1802                 if (net->net_lnd->lnd_accept != NULL)
1803                         count++;
1804         }
1805
1806         lnet_net_unlock(cpt);
1807
1808         return count;
1809 }
1810
1811 struct lnet_ping_buffer *
1812 lnet_ping_buffer_alloc(int nbytes, gfp_t gfp)
1813 {
1814         struct lnet_ping_buffer *pbuf;
1815
1816         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nbytes), gfp);
1817         if (pbuf) {
1818                 pbuf->pb_nbytes = nbytes;       /* sizeof of pb_info */
1819                 pbuf->pb_needs_post = false;
1820                 atomic_set(&pbuf->pb_refcnt, 1);
1821         }
1822
1823         return pbuf;
1824 }
1825
1826 void
1827 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
1828 {
1829         LASSERT(atomic_read(&pbuf->pb_refcnt) == 0);
1830         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nbytes));
1831 }
1832
1833 static struct lnet_ping_buffer *
1834 lnet_ping_target_create(int nbytes)
1835 {
1836         struct lnet_ping_buffer *pbuf;
1837
1838         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
1839         if (pbuf == NULL) {
1840                 CERROR("Can't allocate ping source [%d]\n", nbytes);
1841                 return NULL;
1842         }
1843
1844         pbuf->pb_info.pi_nnis = 0;
1845         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1846         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1847         pbuf->pb_info.pi_features =
1848                 LNET_PING_FEAT_NI_STATUS | LNET_PING_FEAT_MULTI_RAIL;
1849
1850         return pbuf;
1851 }
1852
1853 static inline int
1854 lnet_get_net_ni_bytes_locked(struct lnet_net *net)
1855 {
1856         struct lnet_ni *ni;
1857         int bytes = 0;
1858
1859         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1860                 bytes += lnet_ping_sts_size(&ni->ni_nid);
1861
1862         return bytes;
1863 }
1864
1865 static inline int
1866 lnet_get_ni_bytes(void)
1867 {
1868         struct lnet_ni *ni;
1869         struct lnet_net *net;
1870         int bytes = 0;
1871
1872         lnet_net_lock(0);
1873
1874         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1875                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1876                         bytes += lnet_ping_sts_size(&ni->ni_nid);
1877         }
1878
1879         lnet_net_unlock(0);
1880
1881         return bytes;
1882 }
1883
1884 void
1885 lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf)
1886 {
1887         struct lnet_ni_large_status *lstat, *lend;
1888         struct lnet_ni_status *stat, *end;
1889         int nnis;
1890         int i;
1891
1892         __swab32s(&pbuf->pb_info.pi_magic);
1893         __swab32s(&pbuf->pb_info.pi_features);
1894         __swab32s(&pbuf->pb_info.pi_pid);
1895         __swab32s(&pbuf->pb_info.pi_nnis);
1896         nnis = pbuf->pb_info.pi_nnis;
1897         stat = &pbuf->pb_info.pi_ni[0];
1898         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
1899         for (i = 0; i < nnis && stat + 1 <= end; i++, stat++) {
1900                 __swab64s(&stat->ns_nid);
1901                 __swab32s(&stat->ns_status);
1902                 if (i == 0)
1903                         /* Might be total size */
1904                         __swab32s(&stat->ns_msg_size);
1905         }
1906         if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_LARGE_ADDR))
1907                 return;
1908
1909         lstat = (struct lnet_ni_large_status *)stat;
1910         lend = (void *)end;
1911         while (lstat + 1 <= lend) {
1912                 __swab32s(&lstat->ns_status);
1913                 /* struct lnet_nid never needs to be swabed */
1914                 lstat = lnet_ping_sts_next(lstat);
1915         }
1916 }
1917
1918 int
1919 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1920 {
1921         if (!pinfo)
1922                 return -EINVAL;
1923         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1924                 return -EPROTO;
1925         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1926                 return -EPROTO;
1927         /* Loopback is guaranteed to be present */
1928         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
1929                 return -ERANGE;
1930         if (LNET_PING_INFO_LONI(pinfo) != LNET_NID_LO_0)
1931                 return -EPROTO;
1932         return 0;
1933 }
1934
1935 static void
1936 lnet_ping_target_destroy(void)
1937 {
1938         struct lnet_net *net;
1939         struct lnet_ni  *ni;
1940
1941         lnet_net_lock(LNET_LOCK_EX);
1942
1943         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1944                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1945                         lnet_ni_lock(ni);
1946                         ni->ni_status = NULL;
1947                         lnet_ni_unlock(ni);
1948                 }
1949         }
1950
1951         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1952         the_lnet.ln_ping_target = NULL;
1953
1954         lnet_net_unlock(LNET_LOCK_EX);
1955 }
1956
1957 static void
1958 lnet_ping_target_event_handler(struct lnet_event *event)
1959 {
1960         struct lnet_ping_buffer *pbuf = event->md_user_ptr;
1961
1962         if (event->unlinked)
1963                 lnet_ping_buffer_decref(pbuf);
1964 }
1965
1966 static int
1967 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1968                        struct lnet_handle_md *ping_mdh,
1969                        int ni_bytes, bool set_eq)
1970 {
1971         struct lnet_processid id = {
1972                 .nid = LNET_ANY_NID,
1973                 .pid = LNET_PID_ANY
1974         };
1975         struct lnet_me *me;
1976         struct lnet_md md = { NULL };
1977         int rc;
1978
1979         if (set_eq)
1980                 the_lnet.ln_ping_target_handler =
1981                         lnet_ping_target_event_handler;
1982
1983         *ppbuf = lnet_ping_target_create(ni_bytes);
1984         if (*ppbuf == NULL) {
1985                 rc = -ENOMEM;
1986                 goto fail_free_eq;
1987         }
1988
1989         /* Ping target ME/MD */
1990         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
1991                           LNET_PROTO_PING_MATCHBITS, 0,
1992                           LNET_UNLINK, LNET_INS_AFTER);
1993         if (IS_ERR(me)) {
1994                 rc = PTR_ERR(me);
1995                 CERROR("Can't create ping target ME: %d\n", rc);
1996                 goto fail_decref_ping_buffer;
1997         }
1998
1999         /* initialize md content */
2000         md.start     = &(*ppbuf)->pb_info;
2001         md.length    = (*ppbuf)->pb_nbytes;
2002         md.threshold = LNET_MD_THRESH_INF;
2003         md.max_size  = 0;
2004         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
2005                        LNET_MD_MANAGE_REMOTE;
2006         md.handler   = the_lnet.ln_ping_target_handler;
2007         md.user_ptr  = *ppbuf;
2008
2009         rc = LNetMDAttach(me, &md, LNET_RETAIN, ping_mdh);
2010         if (rc != 0) {
2011                 CERROR("Can't attach ping target MD: %d\n", rc);
2012                 goto fail_decref_ping_buffer;
2013         }
2014         lnet_ping_buffer_addref(*ppbuf);
2015
2016         return 0;
2017
2018 fail_decref_ping_buffer:
2019         LASSERT(atomic_read(&(*ppbuf)->pb_refcnt) == 1);
2020         lnet_ping_buffer_decref(*ppbuf);
2021         *ppbuf = NULL;
2022 fail_free_eq:
2023         return rc;
2024 }
2025
2026 static void
2027 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
2028                     struct lnet_handle_md *ping_mdh)
2029 {
2030         LNetMDUnlink(*ping_mdh);
2031         LNetInvalidateMDHandle(ping_mdh);
2032
2033         /* NB the MD could be busy; this just starts the unlink */
2034         wait_var_event_warning(&pbuf->pb_refcnt,
2035                                atomic_read(&pbuf->pb_refcnt) <= 1,
2036                                "Still waiting for ping data MD to unlink\n");
2037 }
2038
2039 static void
2040 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
2041 {
2042         struct lnet_ni *ni;
2043         struct lnet_net *net;
2044         struct lnet_ni_status *ns, *end;
2045         struct lnet_ni_large_status *lns, *lend;
2046         int rc;
2047
2048         pbuf->pb_info.pi_nnis = 0;
2049         ns = &pbuf->pb_info.pi_ni[0];
2050         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
2051         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2052                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2053                         if (!nid_is_nid4(&ni->ni_nid)) {
2054                                 if (ns == &pbuf->pb_info.pi_ni[1]) {
2055                                         /* This is primary, and it is long */
2056                                         pbuf->pb_info.pi_features |=
2057                                                 LNET_PING_FEAT_PRIMARY_LARGE;
2058                                 }
2059                                 continue;
2060                         }
2061                         LASSERT(ns + 1 <= end);
2062                         ns->ns_nid = lnet_nid_to_nid4(&ni->ni_nid);
2063
2064                         lnet_ni_lock(ni);
2065                         ns->ns_status = lnet_ni_get_status_locked(ni);
2066                         ni->ni_status = &ns->ns_status;
2067                         lnet_ni_unlock(ni);
2068
2069                         pbuf->pb_info.pi_nnis++;
2070                         ns++;
2071                 }
2072         }
2073
2074         lns = (void *)ns;
2075         lend = (void *)end;
2076         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2077                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2078                         if (nid_is_nid4(&ni->ni_nid))
2079                                 continue;
2080                         LASSERT(lns + 1 <= lend);
2081
2082                         lns->ns_nid = ni->ni_nid;
2083
2084                         lnet_ni_lock(ni);
2085                         ns->ns_status = lnet_ni_get_status_locked(ni);
2086                         ni->ni_status = &lns->ns_status;
2087                         lnet_ni_unlock(ni);
2088
2089                         lns = lnet_ping_sts_next(lns);
2090                 }
2091         }
2092         if ((void *)lns > (void *)ns) {
2093                 /* Record total info size */
2094                 pbuf->pb_info.pi_ni[0].ns_msg_size =
2095                         (void *)lns - (void *)&pbuf->pb_info;
2096                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_LARGE_ADDR;
2097         }
2098
2099         /* We (ab)use the ns_status of the loopback interface to
2100          * transmit the sequence number. The first interface listed
2101          * must be the loopback interface.
2102          */
2103         rc = lnet_ping_info_validate(&pbuf->pb_info);
2104         if (rc) {
2105                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
2106                 LBUG();
2107         }
2108         LNET_PING_BUFFER_SEQNO(pbuf) =
2109                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
2110 }
2111
2112 static void
2113 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
2114                         struct lnet_handle_md ping_mdh)
2115 {
2116         struct lnet_ping_buffer *old_pbuf = NULL;
2117         struct lnet_handle_md old_ping_md;
2118
2119         /* switch the NIs to point to the new ping info created */
2120         lnet_net_lock(LNET_LOCK_EX);
2121
2122         if (!the_lnet.ln_routing)
2123                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
2124         if (!lnet_peer_discovery_disabled)
2125                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
2126
2127         /* Ensure only known feature bits have been set. */
2128         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
2129         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
2130
2131         lnet_ping_target_install_locked(pbuf);
2132
2133         if (the_lnet.ln_ping_target) {
2134                 old_pbuf = the_lnet.ln_ping_target;
2135                 old_ping_md = the_lnet.ln_ping_target_md;
2136         }
2137         the_lnet.ln_ping_target_md = ping_mdh;
2138         the_lnet.ln_ping_target = pbuf;
2139
2140         lnet_net_unlock(LNET_LOCK_EX);
2141
2142         if (old_pbuf) {
2143                 /* unlink and free the old ping info */
2144                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
2145                 lnet_ping_buffer_decref(old_pbuf);
2146         }
2147
2148         lnet_push_update_to_peers(0);
2149 }
2150
2151 static void
2152 lnet_ping_target_fini(void)
2153 {
2154         lnet_ping_md_unlink(the_lnet.ln_ping_target,
2155                             &the_lnet.ln_ping_target_md);
2156
2157         lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
2158         lnet_ping_target_destroy();
2159 }
2160
2161 /* Resize the push target. */
2162 int lnet_push_target_resize(void)
2163 {
2164         struct lnet_handle_md mdh;
2165         struct lnet_handle_md old_mdh;
2166         struct lnet_ping_buffer *pbuf;
2167         struct lnet_ping_buffer *old_pbuf;
2168         int nbytes;
2169         int rc;
2170
2171 again:
2172         nbytes = the_lnet.ln_push_target_nbytes;
2173         if (nbytes <= 0) {
2174                 CDEBUG(D_NET, "Invalid nbytes %d\n", nbytes);
2175                 return -EINVAL;
2176         }
2177
2178         /* NB: lnet_ping_buffer_alloc() sets pbuf refcount to 1. That ref is
2179          * dropped when we need to resize again (see "old_pbuf" below) or when
2180          * LNet is shutdown (see lnet_push_target_fini())
2181          */
2182         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
2183         if (!pbuf) {
2184                 CDEBUG(D_NET, "Can't allocate pbuf for nbytes %d\n", nbytes);
2185                 return -ENOMEM;
2186         }
2187
2188         rc = lnet_push_target_post(pbuf, &mdh);
2189         if (rc) {
2190                 CDEBUG(D_NET, "Failed to post push target: %d\n", rc);
2191                 lnet_ping_buffer_decref(pbuf);
2192                 return rc;
2193         }
2194
2195         lnet_net_lock(LNET_LOCK_EX);
2196         old_pbuf = the_lnet.ln_push_target;
2197         old_mdh = the_lnet.ln_push_target_md;
2198         the_lnet.ln_push_target = pbuf;
2199         the_lnet.ln_push_target_md = mdh;
2200         lnet_net_unlock(LNET_LOCK_EX);
2201
2202         if (old_pbuf) {
2203                 LNetMDUnlink(old_mdh);
2204                 /* Drop ref set by lnet_ping_buffer_alloc() */
2205                 lnet_ping_buffer_decref(old_pbuf);
2206         }
2207
2208         /* Received another push or reply that requires a larger buffer */
2209         if (nbytes < the_lnet.ln_push_target_nbytes)
2210                 goto again;
2211
2212         CDEBUG(D_NET, "nbytes %d success\n", nbytes);
2213         return 0;
2214 }
2215
2216 int lnet_push_target_post(struct lnet_ping_buffer *pbuf,
2217                           struct lnet_handle_md *mdhp)
2218 {
2219         struct lnet_processid id = { LNET_ANY_NID, LNET_PID_ANY };
2220         struct lnet_md md = { NULL };
2221         struct lnet_me *me;
2222         int rc;
2223
2224         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
2225                           LNET_PROTO_PING_MATCHBITS, 0,
2226                           LNET_UNLINK, LNET_INS_AFTER);
2227         if (IS_ERR(me)) {
2228                 rc = PTR_ERR(me);
2229                 CERROR("Can't create push target ME: %d\n", rc);
2230                 return rc;
2231         }
2232
2233         pbuf->pb_needs_post = false;
2234
2235         /* This reference is dropped by lnet_push_target_event_handler() */
2236         lnet_ping_buffer_addref(pbuf);
2237
2238         /* initialize md content */
2239         md.start     = &pbuf->pb_info;
2240         md.length    = pbuf->pb_nbytes;
2241         md.threshold = 1;
2242         md.max_size  = 0;
2243         md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE;
2244         md.user_ptr  = pbuf;
2245         md.handler   = the_lnet.ln_push_target_handler;
2246
2247         rc = LNetMDAttach(me, &md, LNET_UNLINK, mdhp);
2248         if (rc) {
2249                 CERROR("Can't attach push MD: %d\n", rc);
2250                 lnet_ping_buffer_decref(pbuf);
2251                 pbuf->pb_needs_post = true;
2252                 return rc;
2253         }
2254
2255         CDEBUG(D_NET, "posted push target %p\n", pbuf);
2256
2257         return 0;
2258 }
2259
2260 static void lnet_push_target_event_handler(struct lnet_event *ev)
2261 {
2262         struct lnet_ping_buffer *pbuf = ev->md_user_ptr;
2263
2264         CDEBUG(D_NET, "type %d status %d unlinked %d\n", ev->type, ev->status,
2265                ev->unlinked);
2266
2267         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
2268                 lnet_swap_pinginfo(pbuf);
2269
2270         if (ev->type == LNET_EVENT_UNLINK) {
2271                 /* Drop ref added by lnet_push_target_post() */
2272                 lnet_ping_buffer_decref(pbuf);
2273                 return;
2274         }
2275
2276         lnet_peer_push_event(ev);
2277         if (ev->unlinked)
2278                 /* Drop ref added by lnet_push_target_post */
2279                 lnet_ping_buffer_decref(pbuf);
2280 }
2281
2282 /* Initialize the push target. */
2283 static int lnet_push_target_init(void)
2284 {
2285         int rc;
2286
2287         if (the_lnet.ln_push_target)
2288                 return -EALREADY;
2289
2290         the_lnet.ln_push_target_handler =
2291                 lnet_push_target_event_handler;
2292
2293         rc = LNetSetLazyPortal(LNET_RESERVED_PORTAL);
2294         LASSERT(rc == 0);
2295
2296         /* Start at the required minimum, we'll enlarge if required. */
2297         the_lnet.ln_push_target_nbytes = LNET_PING_INFO_MIN_SIZE;
2298
2299         rc = lnet_push_target_resize();
2300         if (rc) {
2301                 LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2302                 the_lnet.ln_push_target_handler = NULL;
2303         }
2304
2305         return rc;
2306 }
2307
2308 /* Clean up the push target. */
2309 static void lnet_push_target_fini(void)
2310 {
2311         if (!the_lnet.ln_push_target)
2312                 return;
2313
2314         /* Unlink and invalidate to prevent new references. */
2315         LNetMDUnlink(the_lnet.ln_push_target_md);
2316         LNetInvalidateMDHandle(&the_lnet.ln_push_target_md);
2317
2318         /* Wait for the unlink to complete. */
2319         wait_var_event_warning(&the_lnet.ln_push_target->pb_refcnt,
2320                                atomic_read(&the_lnet.ln_push_target->pb_refcnt) <= 1,
2321                                "Still waiting for ping data MD to unlink\n");
2322
2323         /* Drop ref set by lnet_ping_buffer_alloc() */
2324         lnet_ping_buffer_decref(the_lnet.ln_push_target);
2325         the_lnet.ln_push_target = NULL;
2326         the_lnet.ln_push_target_nbytes = 0;
2327
2328         LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2329         lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
2330         the_lnet.ln_push_target_handler = NULL;
2331 }
2332
2333 static int
2334 lnet_ni_tq_credits(struct lnet_ni *ni)
2335 {
2336         int     credits;
2337
2338         LASSERT(ni->ni_ncpts >= 1);
2339
2340         if (ni->ni_ncpts == 1)
2341                 return ni->ni_net->net_tunables.lct_max_tx_credits;
2342
2343         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
2344         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
2345         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
2346
2347         return credits;
2348 }
2349
2350 static void
2351 lnet_ni_unlink_locked(struct lnet_ni *ni)
2352 {
2353         /* move it to zombie list and nobody can find it anymore */
2354         LASSERT(!list_empty(&ni->ni_netlist));
2355         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
2356         lnet_ni_decref_locked(ni, 0);
2357 }
2358
2359 static void
2360 lnet_clear_zombies_nis_locked(struct lnet_net *net)
2361 {
2362         int             i;
2363         int             islo;
2364         struct lnet_ni  *ni;
2365         struct list_head *zombie_list = &net->net_ni_zombie;
2366
2367         /*
2368          * Now wait for the NIs I just nuked to show up on the zombie
2369          * list and shut them down in guaranteed thread context
2370          */
2371         i = 2;
2372         while ((ni = list_first_entry_or_null(zombie_list,
2373                                               struct lnet_ni,
2374                                               ni_netlist)) != NULL) {
2375                 int *ref;
2376                 int j;
2377
2378                 list_del_init(&ni->ni_netlist);
2379                 /* the ni should be in deleting state. If it's not it's
2380                  * a bug */
2381                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
2382                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
2383                         if (*ref == 0)
2384                                 continue;
2385                         /* still busy, add it back to zombie list */
2386                         list_add(&ni->ni_netlist, zombie_list);
2387                         break;
2388                 }
2389
2390                 if (!list_empty(&ni->ni_netlist)) {
2391                         /* Unlock mutex while waiting to allow other
2392                          * threads to read the LNet state and fall through
2393                          * to avoid deadlock
2394                          */
2395                         lnet_net_unlock(LNET_LOCK_EX);
2396                         mutex_unlock(&the_lnet.ln_api_mutex);
2397
2398                         ++i;
2399                         if ((i & (-i)) == i) {
2400                                 CDEBUG(D_WARNING,
2401                                        "Waiting for zombie LNI %s\n",
2402                                        libcfs_nidstr(&ni->ni_nid));
2403                         }
2404                         schedule_timeout_uninterruptible(cfs_time_seconds(1));
2405
2406                         mutex_lock(&the_lnet.ln_api_mutex);
2407                         lnet_net_lock(LNET_LOCK_EX);
2408                         continue;
2409                 }
2410
2411                 lnet_net_unlock(LNET_LOCK_EX);
2412
2413                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
2414
2415                 LASSERT(!in_interrupt());
2416                 /* Holding the LND mutex makes it safe for lnd_shutdown
2417                  * to call module_put(). Module unload cannot finish
2418                  * until lnet_unregister_lnd() completes, and that
2419                  * requires the LND mutex.
2420                  */
2421                 mutex_unlock(&the_lnet.ln_api_mutex);
2422                 mutex_lock(&the_lnet.ln_lnd_mutex);
2423                 (net->net_lnd->lnd_shutdown)(ni);
2424                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2425                 mutex_lock(&the_lnet.ln_api_mutex);
2426
2427                 if (!islo)
2428                         CDEBUG(D_LNI, "Removed LNI %s\n",
2429                               libcfs_nidstr(&ni->ni_nid));
2430
2431                 lnet_ni_free(ni);
2432                 i = 2;
2433                 lnet_net_lock(LNET_LOCK_EX);
2434         }
2435 }
2436
2437 /* shutdown down the NI and release refcount */
2438 static void
2439 lnet_shutdown_lndni(struct lnet_ni *ni)
2440 {
2441         int i;
2442         struct lnet_net *net = ni->ni_net;
2443
2444         lnet_net_lock(LNET_LOCK_EX);
2445         lnet_ni_lock(ni);
2446         ni->ni_state = LNET_NI_STATE_DELETING;
2447         lnet_ni_unlock(ni);
2448         lnet_ni_unlink_locked(ni);
2449         lnet_incr_dlc_seq();
2450         lnet_net_unlock(LNET_LOCK_EX);
2451
2452         /* clear messages for this NI on the lazy portal */
2453         for (i = 0; i < the_lnet.ln_nportals; i++)
2454                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
2455
2456         lnet_net_lock(LNET_LOCK_EX);
2457         lnet_clear_zombies_nis_locked(net);
2458         lnet_net_unlock(LNET_LOCK_EX);
2459 }
2460
2461 static void
2462 lnet_shutdown_lndnet(struct lnet_net *net)
2463 {
2464         struct lnet_ni *ni;
2465
2466         lnet_net_lock(LNET_LOCK_EX);
2467
2468         list_del_init(&net->net_list);
2469
2470         while ((ni = list_first_entry_or_null(&net->net_ni_list,
2471                                               struct lnet_ni,
2472                                               ni_netlist)) != NULL) {
2473                 lnet_net_unlock(LNET_LOCK_EX);
2474                 lnet_shutdown_lndni(ni);
2475                 lnet_net_lock(LNET_LOCK_EX);
2476         }
2477
2478         lnet_net_unlock(LNET_LOCK_EX);
2479
2480         /* Do peer table cleanup for this net */
2481         lnet_peer_tables_cleanup(net);
2482
2483         lnet_net_free(net);
2484 }
2485
2486 static void
2487 lnet_shutdown_lndnets(void)
2488 {
2489         struct lnet_net *net;
2490         LIST_HEAD(resend);
2491         struct lnet_msg *msg, *tmp;
2492
2493         /* NB called holding the global mutex */
2494
2495         /* All quiet on the API front */
2496         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING ||
2497                 the_lnet.ln_state == LNET_STATE_STOPPING);
2498         LASSERT(the_lnet.ln_refcount == 0);
2499
2500         lnet_net_lock(LNET_LOCK_EX);
2501         the_lnet.ln_state = LNET_STATE_STOPPING;
2502
2503         /*
2504          * move the nets to the zombie list to avoid them being
2505          * picked up for new work. LONET is also included in the
2506          * Nets that will be moved to the zombie list
2507          */
2508         list_splice_init(&the_lnet.ln_nets, &the_lnet.ln_net_zombie);
2509
2510         /* Drop the cached loopback Net. */
2511         if (the_lnet.ln_loni != NULL) {
2512                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
2513                 the_lnet.ln_loni = NULL;
2514         }
2515         lnet_net_unlock(LNET_LOCK_EX);
2516
2517         /* iterate through the net zombie list and delete each net */
2518         while ((net = list_first_entry_or_null(&the_lnet.ln_net_zombie,
2519                                                struct lnet_net,
2520                                                net_list)) != NULL)
2521                 lnet_shutdown_lndnet(net);
2522
2523         spin_lock(&the_lnet.ln_msg_resend_lock);
2524         list_splice(&the_lnet.ln_msg_resend, &resend);
2525         spin_unlock(&the_lnet.ln_msg_resend_lock);
2526
2527         list_for_each_entry_safe(msg, tmp, &resend, msg_list) {
2528                 list_del_init(&msg->msg_list);
2529                 msg->msg_no_resend = true;
2530                 lnet_finalize(msg, -ECANCELED);
2531         }
2532
2533         lnet_net_lock(LNET_LOCK_EX);
2534         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
2535         lnet_net_unlock(LNET_LOCK_EX);
2536 }
2537
2538 static int
2539 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
2540 {
2541         int                     rc = -EINVAL;
2542         struct lnet_tx_queue    *tq;
2543         int                     i;
2544         struct lnet_net         *net = ni->ni_net;
2545
2546         mutex_lock(&the_lnet.ln_lnd_mutex);
2547
2548         if (tun) {
2549                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
2550                 ni->ni_lnd_tunables_set = true;
2551         }
2552
2553         rc = (net->net_lnd->lnd_startup)(ni);
2554
2555         mutex_unlock(&the_lnet.ln_lnd_mutex);
2556
2557         if (rc != 0) {
2558                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
2559                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
2560                 goto failed0;
2561         }
2562
2563         lnet_ni_lock(ni);
2564         ni->ni_state = LNET_NI_STATE_ACTIVE;
2565         lnet_ni_unlock(ni);
2566
2567         /* We keep a reference on the loopback net through the loopback NI */
2568         if (net->net_lnd->lnd_type == LOLND) {
2569                 lnet_ni_addref(ni);
2570                 LASSERT(the_lnet.ln_loni == NULL);
2571                 the_lnet.ln_loni = ni;
2572                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
2573                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
2574                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
2575                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
2576                 return 0;
2577         }
2578
2579         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
2580             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
2581                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
2582                                    libcfs_lnd2str(net->net_lnd->lnd_type),
2583                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
2584                                         "" : "per-peer ");
2585                 /* shutdown the NI since if we get here then it must've already
2586                  * been started
2587                  */
2588                 lnet_shutdown_lndni(ni);
2589                 return -EINVAL;
2590         }
2591
2592         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
2593                 tq->tq_credits_min =
2594                 tq->tq_credits_max =
2595                 tq->tq_credits = lnet_ni_tq_credits(ni);
2596         }
2597
2598         atomic_set(&ni->ni_tx_credits,
2599                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
2600         atomic_set(&ni->ni_healthv, LNET_MAX_HEALTH_VALUE);
2601
2602         /* Nodes with small feet have little entropy. The NID for this
2603          * node gives the most entropy in the low bits.
2604          */
2605         add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid));
2606
2607         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
2608                 libcfs_nidstr(&ni->ni_nid),
2609                 ni->ni_net->net_tunables.lct_peer_tx_credits,
2610                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
2611                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
2612                 ni->ni_net->net_tunables.lct_peer_timeout);
2613
2614         return 0;
2615 failed0:
2616         lnet_ni_free(ni);
2617         return rc;
2618 }
2619
2620 static const struct lnet_lnd *lnet_load_lnd(u32 lnd_type)
2621 {
2622         const struct lnet_lnd *lnd;
2623         int rc = 0;
2624
2625         mutex_lock(&the_lnet.ln_lnd_mutex);
2626         lnd = lnet_find_lnd_by_type(lnd_type);
2627         if (!lnd) {
2628                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2629                 rc = request_module("%s", libcfs_lnd2modname(lnd_type));
2630                 mutex_lock(&the_lnet.ln_lnd_mutex);
2631
2632                 lnd = lnet_find_lnd_by_type(lnd_type);
2633                 if (!lnd) {
2634                         mutex_unlock(&the_lnet.ln_lnd_mutex);
2635                         CERROR("Can't load LND %s, module %s, rc=%d\n",
2636                         libcfs_lnd2str(lnd_type),
2637                         libcfs_lnd2modname(lnd_type), rc);
2638 #ifndef HAVE_MODULE_LOADING_SUPPORT
2639                         LCONSOLE_ERROR_MSG(0x104,
2640                                            "Your kernel must be compiled with kernel module loading support.");
2641 #endif
2642                         return ERR_PTR(-EINVAL);
2643                 }
2644         }
2645         mutex_unlock(&the_lnet.ln_lnd_mutex);
2646
2647         return lnd;
2648 }
2649
2650 static int
2651 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
2652 {
2653         struct lnet_ni *ni;
2654         struct lnet_net *net_l = NULL;
2655         LIST_HEAD(local_ni_list);
2656         int rc;
2657         int ni_count = 0;
2658         __u32 lnd_type;
2659         const struct lnet_lnd  *lnd;
2660         int peer_timeout =
2661                 net->net_tunables.lct_peer_timeout;
2662         int maxtxcredits =
2663                 net->net_tunables.lct_max_tx_credits;
2664         int peerrtrcredits =
2665                 net->net_tunables.lct_peer_rtr_credits;
2666
2667         /*
2668          * make sure that this net is unique. If it isn't then
2669          * we are adding interfaces to an already existing network, and
2670          * 'net' is just a convenient way to pass in the list.
2671          * if it is unique we need to find the LND and load it if
2672          * necessary.
2673          */
2674         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
2675                 lnd_type = LNET_NETTYP(net->net_id);
2676
2677                 lnd = lnet_load_lnd(lnd_type);
2678                 if (IS_ERR(lnd)) {
2679                         rc = PTR_ERR(lnd);
2680                         goto failed0;
2681                 }
2682
2683                 mutex_lock(&the_lnet.ln_lnd_mutex);
2684                 net->net_lnd = lnd;
2685                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2686
2687                 net_l = net;
2688         }
2689
2690         /*
2691          * net_l: if the network being added is unique then net_l
2692          *        will point to that network
2693          *        if the network being added is not unique then
2694          *        net_l points to the existing network.
2695          *
2696          * When we enter the loop below, we'll pick NIs off he
2697          * network beign added and start them up, then add them to
2698          * a local ni list. Once we've successfully started all
2699          * the NIs then we join the local NI list (of started up
2700          * networks) with the net_l->net_ni_list, which should
2701          * point to the correct network to add the new ni list to
2702          *
2703          * If any of the new NIs fail to start up, then we want to
2704          * iterate through the local ni list, which should include
2705          * any NIs which were successfully started up, and shut
2706          * them down.
2707          *
2708          * After than we want to delete the network being added,
2709          * to avoid a memory leak.
2710          */
2711         while ((ni = list_first_entry_or_null(&net->net_ni_added,
2712                                               struct lnet_ni,
2713                                               ni_netlist)) != NULL) {
2714                 list_del_init(&ni->ni_netlist);
2715
2716                 /* make sure that the the NI we're about to start
2717                  * up is actually unique. if it's not fail. */
2718                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
2719                                         ni->ni_interface)) {
2720                         rc = -EEXIST;
2721                         goto failed1;
2722                 }
2723
2724                 /* adjust the pointer the parent network, just in case it
2725                  * the net is a duplicate */
2726                 ni->ni_net = net_l;
2727
2728                 rc = lnet_startup_lndni(ni, tun);
2729
2730                 if (rc < 0)
2731                         goto failed1;
2732
2733                 lnet_ni_addref(ni);
2734                 list_add_tail(&ni->ni_netlist, &local_ni_list);
2735
2736                 ni_count++;
2737         }
2738
2739         lnet_net_lock(LNET_LOCK_EX);
2740         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
2741         lnet_incr_dlc_seq();
2742         lnet_net_unlock(LNET_LOCK_EX);
2743
2744         /* if the network is not unique then we don't want to keep
2745          * it around after we're done. Free it. Otherwise add that
2746          * net to the global the_lnet.ln_nets */
2747         if (net_l != net && net_l != NULL) {
2748                 /*
2749                  * TODO - note. currently the tunables can not be updated
2750                  * once added
2751                  */
2752                 lnet_net_free(net);
2753         } else {
2754                 /*
2755                  * restore tunables after it has been overwitten by the
2756                  * lnd
2757                  */
2758                 if (peer_timeout != -1)
2759                         net->net_tunables.lct_peer_timeout = peer_timeout;
2760                 if (maxtxcredits != -1)
2761                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
2762                 if (peerrtrcredits != -1)
2763                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
2764
2765                 lnet_net_lock(LNET_LOCK_EX);
2766                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
2767                 lnet_net_unlock(LNET_LOCK_EX);
2768         }
2769
2770         return ni_count;
2771
2772 failed1:
2773         /*
2774          * shutdown the new NIs that are being started up
2775          * free the NET being started
2776          */
2777         while ((ni = list_first_entry_or_null(&local_ni_list,
2778                                               struct lnet_ni,
2779                                               ni_netlist)) != NULL)
2780                 lnet_shutdown_lndni(ni);
2781
2782 failed0:
2783         lnet_net_free(net);
2784
2785         return rc;
2786 }
2787
2788 static int
2789 lnet_startup_lndnets(struct list_head *netlist)
2790 {
2791         struct lnet_net         *net;
2792         int                     rc;
2793         int                     ni_count = 0;
2794
2795         /*
2796          * Change to running state before bringing up the LNDs. This
2797          * allows lnet_shutdown_lndnets() to assert that we've passed
2798          * through here.
2799          */
2800         lnet_net_lock(LNET_LOCK_EX);
2801         the_lnet.ln_state = LNET_STATE_RUNNING;
2802         lnet_net_unlock(LNET_LOCK_EX);
2803
2804         while ((net = list_first_entry_or_null(netlist,
2805                                                struct lnet_net,
2806                                                net_list)) != NULL) {
2807                 list_del_init(&net->net_list);
2808
2809                 rc = lnet_startup_lndnet(net, NULL);
2810
2811                 if (rc < 0)
2812                         goto failed;
2813
2814                 ni_count += rc;
2815         }
2816
2817         return ni_count;
2818 failed:
2819         lnet_shutdown_lndnets();
2820
2821         return rc;
2822 }
2823
2824 static int lnet_genl_parse_list(struct sk_buff *msg,
2825                                 const struct ln_key_list *data[], u16 idx)
2826 {
2827         const struct ln_key_list *list = data[idx];
2828         const struct ln_key_props *props;
2829         struct nlattr *node;
2830         u16 count;
2831
2832         if (!list)
2833                 return 0;
2834
2835         if (!list->lkl_maxattr)
2836                 return -ERANGE;
2837
2838         props = list->lkl_list;
2839         if (!props)
2840                 return -EINVAL;
2841
2842         node = nla_nest_start(msg, LN_SCALAR_ATTR_LIST);
2843         if (!node)
2844                 return -ENOBUFS;
2845
2846         for (count = 1; count <= list->lkl_maxattr; count++) {
2847                 struct nlattr *key = nla_nest_start(msg, count);
2848
2849                 if (count == 1)
2850                         nla_put_u16(msg, LN_SCALAR_ATTR_LIST_SIZE,
2851                                     list->lkl_maxattr);
2852
2853                 nla_put_u16(msg, LN_SCALAR_ATTR_INDEX, count);
2854                 if (props[count].lkp_value)
2855                         nla_put_string(msg, LN_SCALAR_ATTR_VALUE,
2856                                        props[count].lkp_value);
2857                 if (props[count].lkp_key_format)
2858                         nla_put_u16(msg, LN_SCALAR_ATTR_KEY_FORMAT,
2859                                     props[count].lkp_key_format);
2860                 nla_put_u16(msg, LN_SCALAR_ATTR_NLA_TYPE,
2861                             props[count].lkp_data_type);
2862                 if (props[count].lkp_data_type == NLA_NESTED) {
2863                         int rc;
2864
2865                         rc = lnet_genl_parse_list(msg, data, ++idx);
2866                         if (rc < 0)
2867                                 return rc;
2868                         idx = rc;
2869                 }
2870
2871                 nla_nest_end(msg, key);
2872         }
2873
2874         nla_nest_end(msg, node);
2875         return idx;
2876 }
2877
2878 int lnet_genl_send_scalar_list(struct sk_buff *msg, u32 portid, u32 seq,
2879                                const struct genl_family *family, int flags,
2880                                u8 cmd, const struct ln_key_list *data[])
2881 {
2882         int rc = 0;
2883         void *hdr;
2884
2885         if (!data[0])
2886                 return -EINVAL;
2887
2888         hdr = genlmsg_put(msg, portid, seq, family, flags, cmd);
2889         if (!hdr)
2890                 GOTO(canceled, rc = -EMSGSIZE);
2891
2892         rc = lnet_genl_parse_list(msg, data, 0);
2893         if (rc < 0)
2894                 GOTO(canceled, rc);
2895
2896         genlmsg_end(msg, hdr);
2897 canceled:
2898         if (rc < 0)
2899                 genlmsg_cancel(msg, hdr);
2900         return rc > 0 ? 0 : rc;
2901 }
2902 EXPORT_SYMBOL(lnet_genl_send_scalar_list);
2903
2904 static struct genl_family lnet_family;
2905
2906 /**
2907  * Initialize LNet library.
2908  *
2909  * Automatically called at module loading time. Caller has to call
2910  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
2911  * latter returned 0. It must be called exactly once.
2912  *
2913  * \retval 0 on success
2914  * \retval -ve on failures.
2915  */
2916 int lnet_lib_init(void)
2917 {
2918         int rc;
2919
2920         lnet_assert_wire_constants();
2921
2922         /* refer to global cfs_cpt_table for now */
2923         the_lnet.ln_cpt_table = cfs_cpt_tab;
2924         the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab);
2925
2926         LASSERT(the_lnet.ln_cpt_number > 0);
2927         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
2928                 /* we are under risk of consuming all lh_cookie */
2929                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
2930                        "please change setting of CPT-table and retry\n",
2931                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
2932                 return -E2BIG;
2933         }
2934
2935         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
2936                 the_lnet.ln_cpt_bits++;
2937
2938         rc = lnet_create_locks();
2939         if (rc != 0) {
2940                 CERROR("Can't create LNet global locks: %d\n", rc);
2941                 return rc;
2942         }
2943
2944         rc = genl_register_family(&lnet_family);
2945         if (rc != 0) {
2946                 lnet_destroy_locks();
2947                 CERROR("Can't register LNet netlink family: %d\n", rc);
2948                 return rc;
2949         }
2950
2951         the_lnet.ln_refcount = 0;
2952         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
2953         INIT_LIST_HEAD(&the_lnet.ln_msg_resend);
2954
2955         /* The hash table size is the number of bits it takes to express the set
2956          * ln_num_routes, minus 1 (better to under estimate than over so we
2957          * don't waste memory). */
2958         if (rnet_htable_size <= 0)
2959                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
2960         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
2961                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
2962         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
2963                                            order_base_2(rnet_htable_size) - 1);
2964
2965         /* All LNDs apart from the LOLND are in separate modules.  They
2966          * register themselves when their module loads, and unregister
2967          * themselves when their module is unloaded. */
2968         lnet_register_lnd(&the_lolnd);
2969         return 0;
2970 }
2971
2972 /**
2973  * Finalize LNet library.
2974  *
2975  * \pre lnet_lib_init() called with success.
2976  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
2977  *
2978  * As this happens at module-unload, all lnds must already be unloaded,
2979  * so they must already be unregistered.
2980  */
2981 void lnet_lib_exit(void)
2982 {
2983         int i;
2984
2985         LASSERT(the_lnet.ln_refcount == 0);
2986         lnet_unregister_lnd(&the_lolnd);
2987         for (i = 0; i < NUM_LNDS; i++)
2988                 LASSERT(!the_lnet.ln_lnds[i]);
2989         lnet_destroy_locks();
2990         genl_unregister_family(&lnet_family);
2991 }
2992
2993 /**
2994  * Set LNet PID and start LNet interfaces, routing, and forwarding.
2995  *
2996  * Users must call this function at least once before any other functions.
2997  * For each successful call there must be a corresponding call to
2998  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
2999  * ignored.
3000  *
3001  * The PID used by LNet may be different from the one requested.
3002  * See LNetGetId().
3003  *
3004  * \param requested_pid PID requested by the caller.
3005  *
3006  * \return >= 0 on success, and < 0 error code on failures.
3007  */
3008 int
3009 LNetNIInit(lnet_pid_t requested_pid)
3010 {
3011         int im_a_router = 0;
3012         int rc;
3013         int ni_bytes;
3014         struct lnet_ping_buffer *pbuf;
3015         struct lnet_handle_md ping_mdh;
3016         LIST_HEAD(net_head);
3017         struct lnet_net *net;
3018
3019         mutex_lock(&the_lnet.ln_api_mutex);
3020
3021         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
3022
3023         if (the_lnet.ln_state == LNET_STATE_STOPPING) {
3024                 mutex_unlock(&the_lnet.ln_api_mutex);
3025                 return -ESHUTDOWN;
3026         }
3027
3028         if (the_lnet.ln_refcount > 0) {
3029                 rc = the_lnet.ln_refcount++;
3030                 mutex_unlock(&the_lnet.ln_api_mutex);
3031                 return rc;
3032         }
3033
3034         rc = lnet_prepare(requested_pid);
3035         if (rc != 0) {
3036                 mutex_unlock(&the_lnet.ln_api_mutex);
3037                 return rc;
3038         }
3039
3040         /* create a network for Loopback network */
3041         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
3042         if (net == NULL) {
3043                 rc = -ENOMEM;
3044                 goto err_empty_list;
3045         }
3046
3047         /* Add in the loopback NI */
3048         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
3049                 rc = -ENOMEM;
3050                 goto err_empty_list;
3051         }
3052
3053         if (use_tcp_bonding)
3054                 CWARN("use_tcp_bonding has been removed. Use Multi-Rail and Dynamic Discovery instead, see LU-13641\n");
3055
3056         /* If LNet is being initialized via DLC it is possible
3057          * that the user requests not to load module parameters (ones which
3058          * are supported by DLC) on initialization.  Therefore, make sure not
3059          * to load networks, routes and forwarding from module parameters
3060          * in this case.  On cleanup in case of failure only clean up
3061          * routes if it has been loaded */
3062         if (!the_lnet.ln_nis_from_mod_params) {
3063                 rc = lnet_parse_networks(&net_head, lnet_get_networks());
3064                 if (rc < 0)
3065                         goto err_empty_list;
3066         }
3067
3068         rc = lnet_startup_lndnets(&net_head);
3069         if (rc < 0)
3070                 goto err_empty_list;
3071
3072         if (!the_lnet.ln_nis_from_mod_params) {
3073                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
3074                 if (rc != 0)
3075                         goto err_shutdown_lndnis;
3076
3077                 rc = lnet_rtrpools_alloc(im_a_router);
3078                 if (rc != 0)
3079                         goto err_destroy_routes;
3080         }
3081
3082         rc = lnet_acceptor_start();
3083         if (rc != 0)
3084                 goto err_destroy_routes;
3085
3086         the_lnet.ln_refcount = 1;
3087         /* Now I may use my own API functions... */
3088
3089         ni_bytes = LNET_PING_INFO_HDR_SIZE;
3090         list_for_each_entry(net, &the_lnet.ln_nets, net_list)
3091                 ni_bytes += lnet_get_net_ni_bytes_locked(net);
3092
3093         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_bytes, true);
3094         if (rc != 0)
3095                 goto err_acceptor_stop;
3096
3097         lnet_ping_target_update(pbuf, ping_mdh);
3098
3099         the_lnet.ln_mt_handler = lnet_mt_event_handler;
3100
3101         rc = lnet_push_target_init();
3102         if (rc != 0)
3103                 goto err_stop_ping;
3104
3105         rc = lnet_peer_discovery_start();
3106         if (rc != 0)
3107                 goto err_destroy_push_target;
3108
3109         rc = lnet_monitor_thr_start();
3110         if (rc != 0)
3111                 goto err_stop_discovery_thr;
3112
3113         lnet_fault_init();
3114         lnet_router_debugfs_init();
3115
3116         mutex_unlock(&the_lnet.ln_api_mutex);
3117
3118         complete_all(&the_lnet.ln_started);
3119
3120         /* wait for all routers to start */
3121         lnet_wait_router_start();
3122
3123         return 0;
3124
3125 err_stop_discovery_thr:
3126         lnet_peer_discovery_stop();
3127 err_destroy_push_target:
3128         lnet_push_target_fini();
3129 err_stop_ping:
3130         lnet_ping_target_fini();
3131 err_acceptor_stop:
3132         the_lnet.ln_refcount = 0;
3133         lnet_acceptor_stop();
3134 err_destroy_routes:
3135         if (!the_lnet.ln_nis_from_mod_params)
3136                 lnet_destroy_routes();
3137 err_shutdown_lndnis:
3138         lnet_shutdown_lndnets();
3139 err_empty_list:
3140         lnet_unprepare();
3141         LASSERT(rc < 0);
3142         mutex_unlock(&the_lnet.ln_api_mutex);
3143         while ((net = list_first_entry_or_null(&net_head,
3144                                                struct lnet_net,
3145                                                net_list)) != NULL) {
3146                 list_del_init(&net->net_list);
3147                 lnet_net_free(net);
3148         }
3149         return rc;
3150 }
3151 EXPORT_SYMBOL(LNetNIInit);
3152
3153 /**
3154  * Stop LNet interfaces, routing, and forwarding.
3155  *
3156  * Users must call this function once for each successful call to LNetNIInit().
3157  * Once the LNetNIFini() operation has been started, the results of pending
3158  * API operations are undefined.
3159  *
3160  * \return always 0 for current implementation.
3161  */
3162 int
3163 LNetNIFini(void)
3164 {
3165         mutex_lock(&the_lnet.ln_api_mutex);
3166
3167         LASSERT(the_lnet.ln_refcount > 0);
3168
3169         if (the_lnet.ln_refcount != 1) {
3170                 the_lnet.ln_refcount--;
3171         } else {
3172                 LASSERT(!the_lnet.ln_niinit_self);
3173
3174                 lnet_net_lock(LNET_LOCK_EX);
3175                 the_lnet.ln_state = LNET_STATE_STOPPING;
3176                 lnet_net_unlock(LNET_LOCK_EX);
3177
3178                 lnet_fault_fini();
3179
3180                 lnet_router_debugfs_fini();
3181                 lnet_monitor_thr_stop();
3182                 lnet_peer_discovery_stop();
3183                 lnet_push_target_fini();
3184                 lnet_ping_target_fini();
3185
3186                 /* Teardown fns that use my own API functions BEFORE here */
3187                 the_lnet.ln_refcount = 0;
3188
3189                 lnet_acceptor_stop();
3190                 lnet_destroy_routes();
3191                 lnet_shutdown_lndnets();
3192                 lnet_unprepare();
3193         }
3194
3195         mutex_unlock(&the_lnet.ln_api_mutex);
3196         return 0;
3197 }
3198 EXPORT_SYMBOL(LNetNIFini);
3199
3200 /**
3201  * Grabs the ni data from the ni structure and fills the out
3202  * parameters
3203  *
3204  * \param[in] ni network        interface structure
3205  * \param[out] cfg_ni           NI config information
3206  * \param[out] tun              network and LND tunables
3207  */
3208 static void
3209 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
3210                    struct lnet_ioctl_config_lnd_tunables *tun,
3211                    struct lnet_ioctl_element_stats *stats,
3212                    __u32 tun_size)
3213 {
3214         size_t min_size = 0;
3215         int i;
3216
3217         if (!ni || !cfg_ni || !tun || !nid_is_nid4(&ni->ni_nid))
3218                 return;
3219
3220         if (ni->ni_interface != NULL) {
3221                 strncpy(cfg_ni->lic_ni_intf,
3222                         ni->ni_interface,
3223                         sizeof(cfg_ni->lic_ni_intf));
3224         }
3225
3226         cfg_ni->lic_nid = lnet_nid_to_nid4(&ni->ni_nid);
3227         cfg_ni->lic_status = lnet_ni_get_status_locked(ni);
3228         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
3229
3230         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
3231
3232         if (stats) {
3233                 stats->iel_send_count = lnet_sum_stats(&ni->ni_stats,
3234                                                        LNET_STATS_TYPE_SEND);
3235                 stats->iel_recv_count = lnet_sum_stats(&ni->ni_stats,
3236                                                        LNET_STATS_TYPE_RECV);
3237                 stats->iel_drop_count = lnet_sum_stats(&ni->ni_stats,
3238                                                        LNET_STATS_TYPE_DROP);
3239         }
3240
3241         /*
3242          * tun->lt_tun will always be present, but in order to be
3243          * backwards compatible, we need to deal with the cases when
3244          * tun->lt_tun is smaller than what the kernel has, because it
3245          * comes from an older version of a userspace program, then we'll
3246          * need to copy as much information as we have available space.
3247          */
3248         min_size = tun_size - sizeof(tun->lt_cmn);
3249         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
3250
3251         /* copy over the cpts */
3252         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
3253             ni->ni_cpts == NULL)  {
3254                 for (i = 0; i < ni->ni_ncpts; i++)
3255                         cfg_ni->lic_cpts[i] = i;
3256         } else {
3257                 for (i = 0;
3258                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
3259                      i < LNET_MAX_SHOW_NUM_CPT;
3260                      i++)
3261                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
3262         }
3263         cfg_ni->lic_ncpts = ni->ni_ncpts;
3264 }
3265
3266 /**
3267  * NOTE: This is a legacy function left in the code to be backwards
3268  * compatible with older userspace programs. It should eventually be
3269  * removed.
3270  *
3271  * Grabs the ni data from the ni structure and fills the out
3272  * parameters
3273  *
3274  * \param[in] ni network        interface structure
3275  * \param[out] config           config information
3276  */
3277 static void
3278 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
3279                          struct lnet_ioctl_config_data *config)
3280 {
3281         struct lnet_ioctl_net_config *net_config;
3282         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
3283         size_t min_size, tunable_size = 0;
3284         int i;
3285
3286         if (!ni || !config || !nid_is_nid4(&ni->ni_nid))
3287                 return;
3288
3289         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
3290         if (!net_config)
3291                 return;
3292
3293         if (!ni->ni_interface)
3294                 return;
3295
3296         strncpy(net_config->ni_interface,
3297                 ni->ni_interface,
3298                 sizeof(net_config->ni_interface));
3299
3300         config->cfg_nid = lnet_nid_to_nid4(&ni->ni_nid);
3301         config->cfg_config_u.cfg_net.net_peer_timeout =
3302                 ni->ni_net->net_tunables.lct_peer_timeout;
3303         config->cfg_config_u.cfg_net.net_max_tx_credits =
3304                 ni->ni_net->net_tunables.lct_max_tx_credits;
3305         config->cfg_config_u.cfg_net.net_peer_tx_credits =
3306                 ni->ni_net->net_tunables.lct_peer_tx_credits;
3307         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
3308                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
3309
3310         net_config->ni_status = lnet_ni_get_status_locked(ni);
3311
3312         if (ni->ni_cpts) {
3313                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
3314
3315                 for (i = 0; i < num_cpts; i++)
3316                         net_config->ni_cpts[i] = ni->ni_cpts[i];
3317
3318                 config->cfg_ncpts = num_cpts;
3319         }
3320
3321         /*
3322          * See if user land tools sent in a newer and larger version
3323          * of struct lnet_tunables than what the kernel uses.
3324          */
3325         min_size = sizeof(*config) + sizeof(*net_config);
3326
3327         if (config->cfg_hdr.ioc_len > min_size)
3328                 tunable_size = config->cfg_hdr.ioc_len - min_size;
3329
3330         /* Don't copy too much data to user space */
3331         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
3332         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
3333
3334         if (lnd_cfg && min_size) {
3335                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
3336                 config->cfg_config_u.cfg_net.net_interface_count = 1;
3337
3338                 /* Tell user land that kernel side has less data */
3339                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
3340                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
3341                         config->cfg_hdr.ioc_len -= min_size;
3342                 }
3343         }
3344 }
3345
3346 struct lnet_ni *
3347 lnet_get_ni_idx_locked(int idx)
3348 {
3349         struct lnet_ni          *ni;
3350         struct lnet_net         *net;
3351
3352         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3353                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3354                         if (idx-- == 0)
3355                                 return ni;
3356                 }
3357         }
3358
3359         return NULL;
3360 }
3361
3362 int lnet_get_net_healthv_locked(struct lnet_net *net)
3363 {
3364         struct lnet_ni *ni;
3365         int best_healthv = 0;
3366         int healthv, ni_fatal;
3367
3368         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3369                 healthv = atomic_read(&ni->ni_healthv);
3370                 ni_fatal = atomic_read(&ni->ni_fatal_error_on);
3371                 if (!ni_fatal && healthv > best_healthv)
3372                         best_healthv = healthv;
3373         }
3374
3375         return best_healthv;
3376 }
3377
3378 struct lnet_ni *
3379 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
3380 {
3381         struct lnet_ni          *ni;
3382         struct lnet_net         *net = mynet;
3383
3384         /*
3385          * It is possible that the net has been cleaned out while there is
3386          * a message being sent. This function accessed the net without
3387          * checking if the list is empty
3388          */
3389         if (!prev) {
3390                 if (!net)
3391                         net = list_first_entry(&the_lnet.ln_nets,
3392                                                struct lnet_net,
3393                                                net_list);
3394                 if (list_empty(&net->net_ni_list))
3395                         return NULL;
3396                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3397                                       ni_netlist);
3398
3399                 return ni;
3400         }
3401
3402         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
3403                 /* if you reached the end of the ni list and the net is
3404                  * specified, then there are no more nis in that net */
3405                 if (net != NULL)
3406                         return NULL;
3407
3408                 /* we reached the end of this net ni list. move to the
3409                  * next net */
3410                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
3411                         /* no more nets and no more NIs. */
3412                         return NULL;
3413
3414                 /* get the next net */
3415                 net = list_first_entry(&prev->ni_net->net_list, struct lnet_net,
3416                                        net_list);
3417                 if (list_empty(&net->net_ni_list))
3418                         return NULL;
3419                 /* get the ni on it */
3420                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3421                                       ni_netlist);
3422
3423                 return ni;
3424         }
3425
3426         if (list_empty(&prev->ni_netlist))
3427                 return NULL;
3428
3429         /* there are more nis left */
3430         ni = list_first_entry(&prev->ni_netlist, struct lnet_ni, ni_netlist);
3431
3432         return ni;
3433 }
3434
3435 int
3436 lnet_get_net_config(struct lnet_ioctl_config_data *config)
3437 {
3438         struct lnet_ni *ni;
3439         int cpt;
3440         int rc = -ENOENT;
3441         int idx = config->cfg_count;
3442
3443         cpt = lnet_net_lock_current();
3444
3445         ni = lnet_get_ni_idx_locked(idx);
3446
3447         if (ni != NULL) {
3448                 rc = 0;
3449                 lnet_ni_lock(ni);
3450                 lnet_fill_ni_info_legacy(ni, config);
3451                 lnet_ni_unlock(ni);
3452         }
3453
3454         lnet_net_unlock(cpt);
3455         return rc;
3456 }
3457
3458 int
3459 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
3460                    struct lnet_ioctl_config_lnd_tunables *tun,
3461                    struct lnet_ioctl_element_stats *stats,
3462                    __u32 tun_size)
3463 {
3464         struct lnet_ni          *ni;
3465         int                     cpt;
3466         int                     rc = -ENOENT;
3467
3468         if (!cfg_ni || !tun || !stats)
3469                 return -EINVAL;
3470
3471         cpt = lnet_net_lock_current();
3472
3473         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
3474
3475         if (ni) {
3476                 rc = 0;
3477                 lnet_ni_lock(ni);
3478                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
3479                 lnet_ni_unlock(ni);
3480         }
3481
3482         lnet_net_unlock(cpt);
3483         return rc;
3484 }
3485
3486 int lnet_get_ni_stats(struct lnet_ioctl_element_msg_stats *msg_stats)
3487 {
3488         struct lnet_ni *ni;
3489         int cpt;
3490         int rc = -ENOENT;
3491
3492         if (!msg_stats)
3493                 return -EINVAL;
3494
3495         cpt = lnet_net_lock_current();
3496
3497         ni = lnet_get_ni_idx_locked(msg_stats->im_idx);
3498
3499         if (ni) {
3500                 lnet_usr_translate_stats(msg_stats, &ni->ni_stats);
3501                 rc = 0;
3502         }
3503
3504         lnet_net_unlock(cpt);
3505
3506         return rc;
3507 }
3508
3509 static int lnet_add_net_common(struct lnet_net *net,
3510                                struct lnet_ioctl_config_lnd_tunables *tun)
3511 {
3512         struct lnet_handle_md ping_mdh;
3513         struct lnet_ping_buffer *pbuf;
3514         struct lnet_remotenet *rnet;
3515         struct lnet_ni *ni;
3516         u32 net_id;
3517         int rc;
3518
3519         lnet_net_lock(LNET_LOCK_EX);
3520         rnet = lnet_find_rnet_locked(net->net_id);
3521         lnet_net_unlock(LNET_LOCK_EX);
3522         /*
3523          * make sure that the net added doesn't invalidate the current
3524          * configuration LNet is keeping
3525          */
3526         if (rnet) {
3527                 CERROR("Adding net %s will invalidate routing configuration\n",
3528                        libcfs_net2str(net->net_id));
3529                 lnet_net_free(net);
3530                 return -EUSERS;
3531         }
3532
3533         if (tun)
3534                 memcpy(&net->net_tunables,
3535                        &tun->lt_cmn, sizeof(net->net_tunables));
3536         else
3537                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
3538
3539         net_id = net->net_id;
3540
3541         rc = lnet_startup_lndnet(net,
3542                                  (tun) ? &tun->lt_tun : NULL);
3543         if (rc < 0)
3544                 return rc;
3545
3546         /* make sure you calculate the correct number of slots in the ping
3547          * buffer. Since the ping info is a flattened list of all the NIs,
3548          * we should allocate enough slots to accomodate the number of NIs
3549          * which will be added.
3550          */
3551         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3552                                     LNET_PING_INFO_HDR_SIZE +
3553                                     lnet_get_ni_bytes(),
3554                                     false);
3555         if (rc < 0) {
3556                 lnet_shutdown_lndnet(net);
3557                 return rc;
3558         }
3559
3560         lnet_net_lock(LNET_LOCK_EX);
3561         net = lnet_get_net_locked(net_id);
3562         LASSERT(net);
3563
3564         /* apply the UDSPs */
3565         rc = lnet_udsp_apply_policies_on_net(net);
3566         if (rc)
3567                 CERROR("Failed to apply UDSPs on local net %s\n",
3568                        libcfs_net2str(net->net_id));
3569
3570         /* At this point we lost track of which NI was just added, so we
3571          * just re-apply the policies on all of the NIs on this net
3572          */
3573         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3574                 rc = lnet_udsp_apply_policies_on_ni(ni);
3575                 if (rc)
3576                         CERROR("Failed to apply UDSPs on ni %s\n",
3577                                libcfs_nidstr(&ni->ni_nid));
3578         }
3579         lnet_net_unlock(LNET_LOCK_EX);
3580
3581         /*
3582          * Start the acceptor thread if this is the first network
3583          * being added that requires the thread.
3584          */
3585         if (net->net_lnd->lnd_accept) {
3586                 rc = lnet_acceptor_start();
3587                 if (rc < 0) {
3588                         /* shutdown the net that we just started */
3589                         CERROR("Failed to start up acceptor thread\n");
3590                         lnet_shutdown_lndnet(net);
3591                         goto failed;
3592                 }
3593         }
3594
3595         lnet_net_lock(LNET_LOCK_EX);
3596         lnet_peer_net_added(net);
3597         lnet_net_unlock(LNET_LOCK_EX);
3598
3599         lnet_ping_target_update(pbuf, ping_mdh);
3600
3601         return 0;
3602
3603 failed:
3604         lnet_ping_md_unlink(pbuf, &ping_mdh);
3605         lnet_ping_buffer_decref(pbuf);
3606         return rc;
3607 }
3608
3609 static void
3610 lnet_set_tune_defaults(struct lnet_ioctl_config_lnd_tunables *tun)
3611 {
3612         if (tun) {
3613                 if (tun->lt_cmn.lct_peer_timeout < 0)
3614                         tun->lt_cmn.lct_peer_timeout = DEFAULT_PEER_TIMEOUT;
3615                 if (!tun->lt_cmn.lct_peer_tx_credits)
3616                         tun->lt_cmn.lct_peer_tx_credits = DEFAULT_PEER_CREDITS;
3617                 if (!tun->lt_cmn.lct_max_tx_credits)
3618                         tun->lt_cmn.lct_max_tx_credits = DEFAULT_CREDITS;
3619         }
3620 }
3621
3622 static int lnet_handle_legacy_ip2nets(char *ip2nets,
3623                                       struct lnet_ioctl_config_lnd_tunables *tun)
3624 {
3625         struct lnet_net *net;
3626         const char *nets;
3627         int rc;
3628         LIST_HEAD(net_head);
3629
3630         rc = lnet_parse_ip2nets(&nets, ip2nets);
3631         if (rc < 0)
3632                 return rc;
3633
3634         rc = lnet_parse_networks(&net_head, nets);
3635         if (rc < 0)
3636                 return rc;
3637
3638         lnet_set_tune_defaults(tun);
3639
3640         mutex_lock(&the_lnet.ln_api_mutex);
3641         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3642                 rc = -ESHUTDOWN;
3643                 goto out;
3644         }
3645
3646         while ((net = list_first_entry_or_null(&net_head,
3647                                                struct lnet_net,
3648                                                net_list)) != NULL) {
3649                 list_del_init(&net->net_list);
3650                 rc = lnet_add_net_common(net, tun);
3651                 if (rc < 0)
3652                         goto out;
3653         }
3654
3655 out:
3656         mutex_unlock(&the_lnet.ln_api_mutex);
3657
3658         while ((net = list_first_entry_or_null(&net_head,
3659                                                struct lnet_net,
3660                                                net_list)) != NULL) {
3661                 list_del_init(&net->net_list);
3662                 lnet_net_free(net);
3663         }
3664         return rc;
3665 }
3666
3667 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf, u32 net_id,
3668                     struct lnet_ioctl_config_lnd_tunables *tun)
3669 {
3670         struct lnet_net *net;
3671         struct lnet_ni *ni;
3672         int rc, i;
3673         u32 lnd_type;
3674
3675         /* handle legacy ip2nets from DLC */
3676         if (conf->lic_legacy_ip2nets[0] != '\0')
3677                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
3678                                                   tun);
3679
3680         lnd_type = LNET_NETTYP(net_id);
3681
3682         if (!libcfs_isknown_lnd(lnd_type)) {
3683                 CERROR("No valid net and lnd information provided\n");
3684                 return -ENOENT;
3685         }
3686
3687         net = lnet_net_alloc(net_id, NULL);
3688         if (!net)
3689                 return -ENOMEM;
3690
3691         for (i = 0; i < conf->lic_ncpts; i++) {
3692                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER) {
3693                         lnet_net_free(net);
3694                         return -ERANGE;
3695                 }
3696         }
3697
3698         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
3699                                        conf->lic_ni_intf);
3700         if (!ni) {
3701                 lnet_net_free(net);
3702                 return -ENOMEM;
3703         }
3704
3705         lnet_set_tune_defaults(tun);
3706
3707         mutex_lock(&the_lnet.ln_api_mutex);
3708         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3709                 lnet_net_free(net);
3710                 rc = -ESHUTDOWN;
3711         } else {
3712                 rc = lnet_add_net_common(net, tun);
3713         }
3714
3715         mutex_unlock(&the_lnet.ln_api_mutex);
3716
3717         /* If NI already exist delete this new unused copy */
3718         if (rc == -EEXIST)
3719                 lnet_ni_free(ni);
3720
3721         return rc;
3722 }
3723
3724 int lnet_dyn_del_ni(struct lnet_nid *nid)
3725 {
3726         struct lnet_net *net;
3727         struct lnet_ni *ni;
3728         u32 net_id = LNET_NID_NET(nid);
3729         struct lnet_ping_buffer *pbuf;
3730         struct lnet_handle_md ping_mdh;
3731         int net_bytes, rc;
3732         bool net_empty;
3733
3734         /* don't allow userspace to shutdown the LOLND */
3735         if (LNET_NETTYP(net_id) == LOLND)
3736                 return -EINVAL;
3737
3738         mutex_lock(&the_lnet.ln_api_mutex);
3739         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3740                 rc = -ESHUTDOWN;
3741                 goto unlock_api_mutex;
3742         }
3743
3744         lnet_net_lock(0);
3745
3746         net = lnet_get_net_locked(net_id);
3747         if (!net) {
3748                 CERROR("net %s not found\n",
3749                        libcfs_net2str(net_id));
3750                 rc = -ENOENT;
3751                 goto unlock_net;
3752         }
3753
3754         if (!nid_addr_is_set(nid)) {
3755                 /* remove the entire net */
3756                 net_bytes = lnet_get_net_ni_bytes_locked(net);
3757
3758                 lnet_net_unlock(0);
3759
3760                 /* create and link a new ping info, before removing the old one */
3761                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3762                                             LNET_PING_INFO_HDR_SIZE +
3763                                             lnet_get_ni_bytes() - net_bytes,
3764                                             false);
3765                 if (rc != 0)
3766                         goto unlock_api_mutex;
3767
3768                 lnet_shutdown_lndnet(net);
3769
3770                 lnet_acceptor_stop();
3771
3772                 lnet_ping_target_update(pbuf, ping_mdh);
3773
3774                 goto unlock_api_mutex;
3775         }
3776
3777         ni = lnet_nid_to_ni_locked(nid, 0);
3778         if (!ni) {
3779                 CERROR("nid %s not found\n", libcfs_nidstr(nid));
3780                 rc = -ENOENT;
3781                 goto unlock_net;
3782         }
3783
3784         net_bytes = lnet_get_net_ni_bytes_locked(net);
3785         net_empty = list_is_singular(&net->net_ni_list);
3786
3787         lnet_net_unlock(0);
3788
3789         /* create and link a new ping info, before removing the old one */
3790         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3791                                     (LNET_PING_INFO_HDR_SIZE +
3792                                      lnet_get_ni_bytes() -
3793                                      lnet_ping_sts_size(&ni->ni_nid)),
3794                                     false);
3795         if (rc != 0)
3796                 goto unlock_api_mutex;
3797
3798         lnet_shutdown_lndni(ni);
3799
3800         lnet_acceptor_stop();
3801
3802         lnet_ping_target_update(pbuf, ping_mdh);
3803
3804         /* check if the net is empty and remove it if it is */
3805         if (net_empty)
3806                 lnet_shutdown_lndnet(net);
3807
3808         goto unlock_api_mutex;
3809
3810 unlock_net:
3811         lnet_net_unlock(0);
3812 unlock_api_mutex:
3813         mutex_unlock(&the_lnet.ln_api_mutex);
3814
3815         return rc;
3816 }
3817
3818 /*
3819  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
3820  * They are only expected to be called for unique networks.
3821  * That can be as a result of older DLC library
3822  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
3823  */
3824 int
3825 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
3826 {
3827         struct lnet_net *net;
3828         LIST_HEAD(net_head);
3829         int rc;
3830         struct lnet_ioctl_config_lnd_tunables tun;
3831         const char *nets = conf->cfg_config_u.cfg_net.net_intf;
3832
3833         /* Create a net/ni structures for the network string */
3834         rc = lnet_parse_networks(&net_head, nets);
3835         if (rc <= 0)
3836                 return rc == 0 ? -EINVAL : rc;
3837
3838         mutex_lock(&the_lnet.ln_api_mutex);
3839         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3840                 rc = -ESHUTDOWN;
3841                 goto out_unlock_clean;
3842         }
3843
3844         if (rc > 1) {
3845                 rc = -EINVAL; /* only add one network per call */
3846                 goto out_unlock_clean;
3847         }
3848
3849         net = list_first_entry(&net_head, struct lnet_net, net_list);
3850         list_del_init(&net->net_list);
3851
3852         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
3853
3854         memset(&tun, 0, sizeof(tun));
3855
3856         tun.lt_cmn.lct_peer_timeout =
3857           (!conf->cfg_config_u.cfg_net.net_peer_timeout) ? DEFAULT_PEER_TIMEOUT :
3858                 conf->cfg_config_u.cfg_net.net_peer_timeout;
3859         tun.lt_cmn.lct_peer_tx_credits =
3860           (!conf->cfg_config_u.cfg_net.net_peer_tx_credits) ? DEFAULT_PEER_CREDITS :
3861                 conf->cfg_config_u.cfg_net.net_peer_tx_credits;
3862         tun.lt_cmn.lct_peer_rtr_credits =
3863           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
3864         tun.lt_cmn.lct_max_tx_credits =
3865           (!conf->cfg_config_u.cfg_net.net_max_tx_credits) ? DEFAULT_CREDITS :
3866                 conf->cfg_config_u.cfg_net.net_max_tx_credits;
3867
3868         rc = lnet_add_net_common(net, &tun);
3869
3870 out_unlock_clean:
3871         mutex_unlock(&the_lnet.ln_api_mutex);
3872         /* net_head list is empty in success case */
3873         while ((net = list_first_entry_or_null(&net_head,
3874                                                struct lnet_net,
3875                                                net_list)) != NULL) {
3876                 list_del_init(&net->net_list);
3877                 lnet_net_free(net);
3878         }
3879         return rc;
3880 }
3881
3882 int
3883 lnet_dyn_del_net(u32 net_id)
3884 {
3885         struct lnet_net *net;
3886         struct lnet_ping_buffer *pbuf;
3887         struct lnet_handle_md ping_mdh;
3888         int net_ni_bytes, rc;
3889
3890         /* don't allow userspace to shutdown the LOLND */
3891         if (LNET_NETTYP(net_id) == LOLND)
3892                 return -EINVAL;
3893
3894         mutex_lock(&the_lnet.ln_api_mutex);
3895         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3896                 rc = -ESHUTDOWN;
3897                 goto out;
3898         }
3899
3900         lnet_net_lock(0);
3901
3902         net = lnet_get_net_locked(net_id);
3903         if (net == NULL) {
3904                 lnet_net_unlock(0);
3905                 rc = -EINVAL;
3906                 goto out;
3907         }
3908
3909         net_ni_bytes = lnet_get_net_ni_bytes_locked(net);
3910
3911         lnet_net_unlock(0);
3912
3913         /* create and link a new ping info, before removing the old one */
3914         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3915                                     LNET_PING_INFO_HDR_SIZE +
3916                                     lnet_get_ni_bytes() - net_ni_bytes,
3917                                     false);
3918         if (rc != 0)
3919                 goto out;
3920
3921         lnet_shutdown_lndnet(net);
3922
3923         lnet_acceptor_stop();
3924
3925         lnet_ping_target_update(pbuf, ping_mdh);
3926
3927 out:
3928         mutex_unlock(&the_lnet.ln_api_mutex);
3929
3930         return rc;
3931 }
3932
3933 void lnet_update_ping_buffer(void)
3934 {
3935         struct lnet_ping_buffer *pbuf;
3936         struct lnet_handle_md ping_mdh;
3937
3938         if (the_lnet.ln_routing)
3939                 return;
3940
3941         mutex_lock(&the_lnet.ln_api_mutex);
3942
3943         if (!lnet_ping_target_setup(&pbuf, &ping_mdh,
3944                                     LNET_PING_INFO_HDR_SIZE +
3945                                     lnet_get_ni_bytes(),
3946                                     false))
3947                 lnet_ping_target_update(pbuf, ping_mdh);
3948
3949         mutex_unlock(&the_lnet.ln_api_mutex);
3950 }
3951 EXPORT_SYMBOL(lnet_update_ping_buffer);
3952
3953 void lnet_incr_dlc_seq(void)
3954 {
3955         atomic_inc(&lnet_dlc_seq_no);
3956 }
3957
3958 __u32 lnet_get_dlc_seq_locked(void)
3959 {
3960         return atomic_read(&lnet_dlc_seq_no);
3961 }
3962
3963 static void
3964 lnet_ni_set_healthv(lnet_nid_t nid, int value, bool all)
3965 {
3966         struct lnet_net *net;
3967         struct lnet_ni *ni;
3968
3969         lnet_net_lock(LNET_LOCK_EX);
3970         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3971                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3972                         if (all || (nid_is_nid4(&ni->ni_nid) &&
3973                                     lnet_nid_to_nid4(&ni->ni_nid) == nid)) {
3974                                 atomic_set(&ni->ni_healthv, value);
3975                                 if (list_empty(&ni->ni_recovery) &&
3976                                     value < LNET_MAX_HEALTH_VALUE) {
3977                                         CERROR("manually adding local NI %s to recovery\n",
3978                                                libcfs_nidstr(&ni->ni_nid));
3979                                         list_add_tail(&ni->ni_recovery,
3980                                                       &the_lnet.ln_mt_localNIRecovq);
3981                                         lnet_ni_addref_locked(ni, 0);
3982                                 }
3983                                 if (!all) {
3984                                         lnet_net_unlock(LNET_LOCK_EX);
3985                                         return;
3986                                 }
3987                         }
3988                 }
3989         }
3990         lnet_net_unlock(LNET_LOCK_EX);
3991 }
3992
3993 static void
3994 lnet_ni_set_conns_per_peer(lnet_nid_t nid, int value, bool all)
3995 {
3996         struct lnet_net *net;
3997         struct lnet_ni *ni;
3998
3999         lnet_net_lock(LNET_LOCK_EX);
4000         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
4001                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
4002                         if (lnet_nid_to_nid4(&ni->ni_nid) != nid && !all)
4003                                 continue;
4004                         if (LNET_NETTYP(net->net_id) == SOCKLND)
4005                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_sock.lnd_conns_per_peer = value;
4006                         else if (LNET_NETTYP(net->net_id) == O2IBLND)
4007                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer = value;
4008                         if (!all) {
4009                                 lnet_net_unlock(LNET_LOCK_EX);
4010                                 return;
4011                         }
4012                 }
4013         }
4014         lnet_net_unlock(LNET_LOCK_EX);
4015 }
4016
4017 static int
4018 lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats)
4019 {
4020         int cpt, rc = 0;
4021         struct lnet_ni *ni;
4022         struct lnet_nid nid;
4023
4024         lnet_nid4_to_nid(stats->hlni_nid, &nid);
4025         cpt = lnet_net_lock_current();
4026         ni = lnet_nid_to_ni_locked(&nid, cpt);
4027         if (!ni) {
4028                 rc = -ENOENT;
4029                 goto unlock;
4030         }
4031
4032         stats->hlni_local_interrupt = atomic_read(&ni->ni_hstats.hlt_local_interrupt);
4033         stats->hlni_local_dropped = atomic_read(&ni->ni_hstats.hlt_local_dropped);
4034         stats->hlni_local_aborted = atomic_read(&ni->ni_hstats.hlt_local_aborted);
4035         stats->hlni_local_no_route = atomic_read(&ni->ni_hstats.hlt_local_no_route);
4036         stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout);
4037         stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error);
4038         stats->hlni_fatal_error = atomic_read(&ni->ni_fatal_error_on);
4039         stats->hlni_health_value = atomic_read(&ni->ni_healthv);
4040         stats->hlni_ping_count = ni->ni_ping_count;
4041         stats->hlni_next_ping = ni->ni_next_ping;
4042
4043 unlock:
4044         lnet_net_unlock(cpt);
4045
4046         return rc;
4047 }
4048
4049 static int
4050 lnet_get_local_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4051 {
4052         struct lnet_ni *ni;
4053         int i = 0;
4054
4055         lnet_net_lock(LNET_LOCK_EX);
4056         list_for_each_entry(ni, &the_lnet.ln_mt_localNIRecovq, ni_recovery) {
4057                 if (!nid_is_nid4(&ni->ni_nid))
4058                         continue;
4059                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&ni->ni_nid);
4060                 i++;
4061                 if (i >= LNET_MAX_SHOW_NUM_NID)
4062                         break;
4063         }
4064         lnet_net_unlock(LNET_LOCK_EX);
4065         list->rlst_num_nids = i;
4066
4067         return 0;
4068 }
4069
4070 static int
4071 lnet_get_peer_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4072 {
4073         struct lnet_peer_ni *lpni;
4074         int i = 0;
4075
4076         lnet_net_lock(LNET_LOCK_EX);
4077         list_for_each_entry(lpni, &the_lnet.ln_mt_peerNIRecovq, lpni_recovery) {
4078                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&lpni->lpni_nid);
4079                 i++;
4080                 if (i >= LNET_MAX_SHOW_NUM_NID)
4081                         break;
4082         }
4083         lnet_net_unlock(LNET_LOCK_EX);
4084         list->rlst_num_nids = i;
4085
4086         return 0;
4087 }
4088
4089 /**
4090  * LNet ioctl handler.
4091  *
4092  */
4093 int
4094 LNetCtl(unsigned int cmd, void *arg)
4095 {
4096         struct libcfs_ioctl_data *data = arg;
4097         struct lnet_ioctl_config_data *config;
4098         struct lnet_ni           *ni;
4099         struct lnet_nid           nid;
4100         int                       rc;
4101
4102         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
4103                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
4104
4105         switch (cmd) {
4106         case IOC_LIBCFS_FAIL_NID:
4107                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
4108
4109         case IOC_LIBCFS_ADD_ROUTE: {
4110                 /* default router sensitivity to 1 */
4111                 unsigned int sensitivity = 1;
4112                 config = arg;
4113
4114                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4115                         return -EINVAL;
4116
4117                 if (config->cfg_config_u.cfg_route.rtr_sensitivity) {
4118                         sensitivity =
4119                           config->cfg_config_u.cfg_route.rtr_sensitivity;
4120                 }
4121
4122                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4123                 mutex_lock(&the_lnet.ln_api_mutex);
4124                 rc = lnet_add_route(config->cfg_net,
4125                                     config->cfg_config_u.cfg_route.rtr_hop,
4126                                     &nid,
4127                                     config->cfg_config_u.cfg_route.
4128                                         rtr_priority, sensitivity);
4129                 mutex_unlock(&the_lnet.ln_api_mutex);
4130                 return rc;
4131         }
4132
4133         case IOC_LIBCFS_DEL_ROUTE:
4134                 config = arg;
4135
4136                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4137                         return -EINVAL;
4138
4139                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4140                 mutex_lock(&the_lnet.ln_api_mutex);
4141                 rc = lnet_del_route(config->cfg_net, &nid);
4142                 mutex_unlock(&the_lnet.ln_api_mutex);
4143                 return rc;
4144
4145         case IOC_LIBCFS_GET_ROUTE:
4146                 config = arg;
4147
4148                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4149                         return -EINVAL;
4150
4151                 mutex_lock(&the_lnet.ln_api_mutex);
4152                 rc = lnet_get_route(config->cfg_count,
4153                                     &config->cfg_net,
4154                                     &config->cfg_config_u.cfg_route.rtr_hop,
4155                                     &config->cfg_nid,
4156                                     &config->cfg_config_u.cfg_route.rtr_flags,
4157                                     &config->cfg_config_u.cfg_route.
4158                                         rtr_priority,
4159                                     &config->cfg_config_u.cfg_route.
4160                                         rtr_sensitivity);
4161                 mutex_unlock(&the_lnet.ln_api_mutex);
4162                 return rc;
4163
4164         case IOC_LIBCFS_GET_LOCAL_NI: {
4165                 struct lnet_ioctl_config_ni *cfg_ni;
4166                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
4167                 struct lnet_ioctl_element_stats *stats;
4168                 __u32 tun_size;
4169
4170                 cfg_ni = arg;
4171
4172                 /* get the tunables if they are available */
4173                 if (cfg_ni->lic_cfg_hdr.ioc_len <
4174                     sizeof(*cfg_ni) + sizeof(*stats) + sizeof(*tun))
4175                         return -EINVAL;
4176
4177                 stats = (struct lnet_ioctl_element_stats *)
4178                         cfg_ni->lic_bulk;
4179                 tun = (struct lnet_ioctl_config_lnd_tunables *)
4180                                 (cfg_ni->lic_bulk + sizeof(*stats));
4181
4182                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
4183                         sizeof(*stats);
4184
4185                 mutex_lock(&the_lnet.ln_api_mutex);
4186                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
4187                 mutex_unlock(&the_lnet.ln_api_mutex);
4188                 return rc;
4189         }
4190
4191         case IOC_LIBCFS_GET_LOCAL_NI_MSG_STATS: {
4192                 struct lnet_ioctl_element_msg_stats *msg_stats = arg;
4193
4194                 if (msg_stats->im_hdr.ioc_len != sizeof(*msg_stats))
4195                         return -EINVAL;
4196
4197                 mutex_lock(&the_lnet.ln_api_mutex);
4198                 rc = lnet_get_ni_stats(msg_stats);
4199                 mutex_unlock(&the_lnet.ln_api_mutex);
4200
4201                 return rc;
4202         }
4203
4204         case IOC_LIBCFS_GET_NET: {
4205                 size_t total = sizeof(*config) +
4206                                sizeof(struct lnet_ioctl_net_config);
4207                 config = arg;
4208
4209                 if (config->cfg_hdr.ioc_len < total)
4210                         return -EINVAL;
4211
4212                 mutex_lock(&the_lnet.ln_api_mutex);
4213                 rc = lnet_get_net_config(config);
4214                 mutex_unlock(&the_lnet.ln_api_mutex);
4215                 return rc;
4216         }
4217
4218         case IOC_LIBCFS_GET_LNET_STATS:
4219         {
4220                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
4221
4222                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
4223                         return -EINVAL;
4224
4225                 mutex_lock(&the_lnet.ln_api_mutex);
4226                 rc = lnet_counters_get(&lnet_stats->st_cntrs);
4227                 mutex_unlock(&the_lnet.ln_api_mutex);
4228                 return rc;
4229         }
4230
4231         case IOC_LIBCFS_RESET_LNET_STATS:
4232         {
4233                 mutex_lock(&the_lnet.ln_api_mutex);
4234                 lnet_counters_reset();
4235                 mutex_unlock(&the_lnet.ln_api_mutex);
4236                 return 0;
4237         }
4238
4239         case IOC_LIBCFS_CONFIG_RTR:
4240                 config = arg;
4241
4242                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4243                         return -EINVAL;
4244
4245                 mutex_lock(&the_lnet.ln_api_mutex);
4246                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
4247                         rc = lnet_rtrpools_enable();
4248                         mutex_unlock(&the_lnet.ln_api_mutex);
4249                         return rc;
4250                 }
4251                 lnet_rtrpools_disable();
4252                 mutex_unlock(&the_lnet.ln_api_mutex);
4253                 return 0;
4254
4255         case IOC_LIBCFS_ADD_BUF:
4256                 config = arg;
4257
4258                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4259                         return -EINVAL;
4260
4261                 mutex_lock(&the_lnet.ln_api_mutex);
4262                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
4263                                                 buf_tiny,
4264                                           config->cfg_config_u.cfg_buffers.
4265                                                 buf_small,
4266                                           config->cfg_config_u.cfg_buffers.
4267                                                 buf_large);
4268                 mutex_unlock(&the_lnet.ln_api_mutex);
4269                 return rc;
4270
4271         case IOC_LIBCFS_SET_NUMA_RANGE: {
4272                 struct lnet_ioctl_set_value *numa;
4273                 numa = arg;
4274                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4275                         return -EINVAL;
4276                 lnet_net_lock(LNET_LOCK_EX);
4277                 lnet_numa_range = numa->sv_value;
4278                 lnet_net_unlock(LNET_LOCK_EX);
4279                 return 0;
4280         }
4281
4282         case IOC_LIBCFS_GET_NUMA_RANGE: {
4283                 struct lnet_ioctl_set_value *numa;
4284                 numa = arg;
4285                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4286                         return -EINVAL;
4287                 numa->sv_value = lnet_numa_range;
4288                 return 0;
4289         }
4290
4291         case IOC_LIBCFS_GET_BUF: {
4292                 struct lnet_ioctl_pool_cfg *pool_cfg;
4293                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
4294
4295                 config = arg;
4296
4297                 if (config->cfg_hdr.ioc_len < total)
4298                         return -EINVAL;
4299
4300                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
4301
4302                 mutex_lock(&the_lnet.ln_api_mutex);
4303                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
4304                 mutex_unlock(&the_lnet.ln_api_mutex);
4305                 return rc;
4306         }
4307
4308         case IOC_LIBCFS_GET_LOCAL_HSTATS: {
4309                 struct lnet_ioctl_local_ni_hstats *stats = arg;
4310
4311                 if (stats->hlni_hdr.ioc_len < sizeof(*stats))
4312                         return -EINVAL;
4313
4314                 mutex_lock(&the_lnet.ln_api_mutex);
4315                 rc = lnet_get_local_ni_hstats(stats);
4316                 mutex_unlock(&the_lnet.ln_api_mutex);
4317
4318                 return rc;
4319         }
4320
4321         case IOC_LIBCFS_GET_RECOVERY_QUEUE: {
4322                 struct lnet_ioctl_recovery_list *list = arg;
4323                 if (list->rlst_hdr.ioc_len < sizeof(*list))
4324                         return -EINVAL;
4325
4326                 mutex_lock(&the_lnet.ln_api_mutex);
4327                 if (list->rlst_type == LNET_HEALTH_TYPE_LOCAL_NI)
4328                         rc = lnet_get_local_ni_recovery_list(list);
4329                 else
4330                         rc = lnet_get_peer_ni_recovery_list(list);
4331                 mutex_unlock(&the_lnet.ln_api_mutex);
4332                 return rc;
4333         }
4334
4335         case IOC_LIBCFS_ADD_PEER_NI: {
4336                 struct lnet_ioctl_peer_cfg *cfg = arg;
4337                 struct lnet_nid prim_nid;
4338
4339                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4340                         return -EINVAL;
4341
4342                 mutex_lock(&the_lnet.ln_api_mutex);
4343                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4344                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4345                 rc = lnet_user_add_peer_ni(&prim_nid, &nid, cfg->prcfg_mr,
4346                                            cfg->prcfg_count == 1);
4347                 mutex_unlock(&the_lnet.ln_api_mutex);
4348                 return rc;
4349         }
4350
4351         case IOC_LIBCFS_DEL_PEER_NI: {
4352                 struct lnet_ioctl_peer_cfg *cfg = arg;
4353                 struct lnet_nid prim_nid;
4354
4355                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4356                         return -EINVAL;
4357
4358                 mutex_lock(&the_lnet.ln_api_mutex);
4359                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4360                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4361                 rc = lnet_del_peer_ni(&prim_nid,
4362                                       &nid,
4363                                       cfg->prcfg_count);
4364                 mutex_unlock(&the_lnet.ln_api_mutex);
4365                 return rc;
4366         }
4367
4368         case IOC_LIBCFS_GET_PEER_INFO: {
4369                 struct lnet_ioctl_peer *peer_info = arg;
4370
4371                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
4372                         return -EINVAL;
4373
4374                 mutex_lock(&the_lnet.ln_api_mutex);
4375                 rc = lnet_get_peer_ni_info(
4376                    peer_info->pr_count,
4377                    &peer_info->pr_nid,
4378                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
4379                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
4380                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
4381                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
4382                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
4383                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
4384                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
4385                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
4386                 mutex_unlock(&the_lnet.ln_api_mutex);
4387                 return rc;
4388         }
4389
4390         case IOC_LIBCFS_GET_PEER_NI: {
4391                 struct lnet_ioctl_peer_cfg *cfg = arg;
4392
4393                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4394                         return -EINVAL;
4395
4396                 mutex_lock(&the_lnet.ln_api_mutex);
4397                 rc = lnet_get_peer_info(cfg,
4398                                         (void __user *)cfg->prcfg_bulk);
4399                 mutex_unlock(&the_lnet.ln_api_mutex);
4400                 return rc;
4401         }
4402
4403         case IOC_LIBCFS_GET_PEER_LIST: {
4404                 struct lnet_ioctl_peer_cfg *cfg = arg;
4405
4406                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4407                         return -EINVAL;
4408
4409                 mutex_lock(&the_lnet.ln_api_mutex);
4410                 rc = lnet_get_peer_list(&cfg->prcfg_count, &cfg->prcfg_size,
4411                                 (struct lnet_process_id __user *)cfg->prcfg_bulk);
4412                 mutex_unlock(&the_lnet.ln_api_mutex);
4413                 return rc;
4414         }
4415
4416         case IOC_LIBCFS_SET_HEALHV: {
4417                 struct lnet_ioctl_reset_health_cfg *cfg = arg;
4418                 int value;
4419                 if (cfg->rh_hdr.ioc_len < sizeof(*cfg))
4420                         return -EINVAL;
4421                 if (cfg->rh_value < 0 ||
4422                     cfg->rh_value > LNET_MAX_HEALTH_VALUE)
4423                         value = LNET_MAX_HEALTH_VALUE;
4424                 else
4425                         value = cfg->rh_value;
4426                 CDEBUG(D_NET, "Manually setting healthv to %d for %s:%s. all = %d\n",
4427                        value, (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) ?
4428                        "local" : "peer", libcfs_nid2str(cfg->rh_nid), cfg->rh_all);
4429                 mutex_lock(&the_lnet.ln_api_mutex);
4430                 if (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI)
4431                         lnet_ni_set_healthv(cfg->rh_nid, value,
4432                                              cfg->rh_all);
4433                 else
4434                         lnet_peer_ni_set_healthv(cfg->rh_nid, value,
4435                                                   cfg->rh_all);
4436                 mutex_unlock(&the_lnet.ln_api_mutex);
4437                 return 0;
4438         }
4439
4440         case IOC_LIBCFS_SET_CONNS_PER_PEER: {
4441                 struct lnet_ioctl_reset_conns_per_peer_cfg *cfg = arg;
4442                 int value;
4443
4444                 if (cfg->rcpp_hdr.ioc_len < sizeof(*cfg))
4445                         return -EINVAL;
4446                 if (cfg->rcpp_value < 0)
4447                         value = 1;
4448                 else
4449                         value = cfg->rcpp_value;
4450                 CDEBUG(D_NET,
4451                        "Setting conns_per_peer to %d for %s. all = %d\n",
4452                        value, libcfs_nid2str(cfg->rcpp_nid), cfg->rcpp_all);
4453                 mutex_lock(&the_lnet.ln_api_mutex);
4454                 lnet_ni_set_conns_per_peer(cfg->rcpp_nid, value, cfg->rcpp_all);
4455                 mutex_unlock(&the_lnet.ln_api_mutex);
4456                 return 0;
4457         }
4458
4459         case IOC_LIBCFS_NOTIFY_ROUTER: {
4460                 time64_t deadline = ktime_get_real_seconds() - data->ioc_u64[0];
4461
4462                 /* The deadline passed in by the user should be some time in
4463                  * seconds in the future since the UNIX epoch. We have to map
4464                  * that deadline to the wall clock.
4465                  */
4466                 deadline += ktime_get_seconds();
4467                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4468                 return lnet_notify(NULL, &nid, data->ioc_flags, false,
4469                                    deadline);
4470         }
4471
4472         case IOC_LIBCFS_LNET_DIST:
4473                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4474                 rc = LNetDist(&nid, &nid, &data->ioc_u32[1]);
4475                 if (rc < 0 && rc != -EHOSTUNREACH)
4476                         return rc;
4477
4478                 data->ioc_nid = lnet_nid_to_nid4(&nid);
4479                 data->ioc_u32[0] = rc;
4480                 return 0;
4481
4482         case IOC_LIBCFS_TESTPROTOCOMPAT:
4483                 the_lnet.ln_testprotocompat = data->ioc_flags;
4484                 return 0;
4485
4486         case IOC_LIBCFS_LNET_FAULT:
4487                 return lnet_fault_ctl(data->ioc_flags, data);
4488
4489         case IOC_LIBCFS_PING_PEER: {
4490                 struct lnet_ioctl_ping_data *ping = arg;
4491                 struct lnet_process_id __user *ids = ping->ping_buf;
4492                 struct lnet_nid src_nid = LNET_ANY_NID;
4493                 struct lnet_genl_ping_list plist;
4494                 struct lnet_processid id;
4495                 struct lnet_peer *lp;
4496                 signed long timeout;
4497                 int count, i;
4498
4499                 /* Check if the supplied ping data supports source nid
4500                  * NB: This check is sufficient if lnet_ioctl_ping_data has
4501                  * additional fields added, but if they are re-ordered or
4502                  * fields removed then this will break. It is expected that
4503                  * these ioctls will be replaced with netlink implementation, so
4504                  * it is probably not worth coming up with a more robust version
4505                  * compatibility scheme.
4506                  */
4507                 if (ping->ping_hdr.ioc_len >= sizeof(struct lnet_ioctl_ping_data))
4508                         lnet_nid4_to_nid(ping->ping_src, &src_nid);
4509
4510                 /* If timeout is negative then set default of 3 minutes */
4511                 if (((s32)ping->op_param) <= 0 ||
4512                     ping->op_param > (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
4513                         timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
4514                 else
4515                         timeout = nsecs_to_jiffies(ping->op_param * NSEC_PER_MSEC);
4516
4517                 id.pid = ping->ping_id.pid;
4518                 lnet_nid4_to_nid(ping->ping_id.nid, &id.nid);
4519                 rc = lnet_ping(&id, &src_nid, timeout, &plist,
4520                                ping->ping_count);
4521                 if (rc < 0)
4522                         goto report_ping_err;
4523                 count = rc;
4524
4525                 for (i = 0; i < count; i++) {
4526                         struct lnet_processid *result;
4527                         struct lnet_process_id tmpid;
4528
4529                         result = genradix_ptr(&plist.lgpl_list, i);
4530                         memset(&tmpid, 0, sizeof(tmpid));
4531                         tmpid.pid = result->pid;
4532                         tmpid.nid = lnet_nid_to_nid4(&result->nid);
4533                         if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
4534                                 rc = -EFAULT;
4535                                 goto report_ping_err;
4536                         }
4537                 }
4538
4539                 mutex_lock(&the_lnet.ln_api_mutex);
4540                 lp = lnet_find_peer(&id.nid);
4541                 if (lp) {
4542                         ping->ping_id.nid =
4543                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4544                         ping->mr_info = lnet_peer_is_multi_rail(lp);
4545                         lnet_peer_decref_locked(lp);
4546                 }
4547                 mutex_unlock(&the_lnet.ln_api_mutex);
4548
4549                 ping->ping_count = count;
4550 report_ping_err:
4551                 genradix_free(&plist.lgpl_list);
4552                 return rc;
4553         }
4554
4555         case IOC_LIBCFS_DISCOVER: {
4556                 struct lnet_ioctl_ping_data *discover = arg;
4557                 struct lnet_peer *lp;
4558
4559                 rc = lnet_discover(discover->ping_id, discover->op_param,
4560                                    discover->ping_buf,
4561                                    discover->ping_count);
4562                 if (rc < 0)
4563                         return rc;
4564
4565                 mutex_lock(&the_lnet.ln_api_mutex);
4566                 lnet_nid4_to_nid(discover->ping_id.nid, &nid);
4567                 lp = lnet_find_peer(&nid);
4568                 if (lp) {
4569                         discover->ping_id.nid =
4570                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4571                         discover->mr_info = lnet_peer_is_multi_rail(lp);
4572                         lnet_peer_decref_locked(lp);
4573                 }
4574                 mutex_unlock(&the_lnet.ln_api_mutex);
4575
4576                 discover->ping_count = rc;
4577                 return 0;
4578         }
4579
4580         case IOC_LIBCFS_ADD_UDSP: {
4581                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4582                 __u32 bulk_size = ioc_udsp->iou_hdr.ioc_len;
4583
4584                 mutex_lock(&the_lnet.ln_api_mutex);
4585                 rc = lnet_udsp_demarshal_add(arg, bulk_size);
4586                 if (!rc) {
4587                         rc = lnet_udsp_apply_policies(NULL, false);
4588                         CDEBUG(D_NET, "policy application returned %d\n", rc);
4589                         rc = 0;
4590                 }
4591                 mutex_unlock(&the_lnet.ln_api_mutex);
4592
4593                 return rc;
4594         }
4595
4596         case IOC_LIBCFS_DEL_UDSP: {
4597                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4598                 int idx = ioc_udsp->iou_idx;
4599
4600                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4601                         return -EINVAL;
4602
4603                 mutex_lock(&the_lnet.ln_api_mutex);
4604                 rc = lnet_udsp_del_policy(idx);
4605                 if (!rc) {
4606                         rc = lnet_udsp_apply_policies(NULL, false);
4607                         CDEBUG(D_NET, "policy re-application returned %d\n",
4608                                rc);
4609                         rc = 0;
4610                 }
4611                 mutex_unlock(&the_lnet.ln_api_mutex);
4612
4613                 return rc;
4614         }
4615
4616         case IOC_LIBCFS_GET_UDSP_SIZE: {
4617                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4618                 struct lnet_udsp *udsp;
4619
4620                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4621                         return -EINVAL;
4622
4623                 rc = 0;
4624
4625                 mutex_lock(&the_lnet.ln_api_mutex);
4626                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4627                 if (!udsp) {
4628                         rc = -ENOENT;
4629                 } else {
4630                         /* coming in iou_idx will hold the idx of the udsp
4631                          * to get the size of. going out the iou_idx will
4632                          * hold the size of the UDSP found at the passed
4633                          * in index.
4634                          */
4635                         ioc_udsp->iou_idx = lnet_get_udsp_size(udsp);
4636                         if (ioc_udsp->iou_idx < 0)
4637                                 rc = -EINVAL;
4638                 }
4639                 mutex_unlock(&the_lnet.ln_api_mutex);
4640
4641                 return rc;
4642         }
4643
4644         case IOC_LIBCFS_GET_UDSP: {
4645                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4646                 struct lnet_udsp *udsp;
4647
4648                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4649                         return -EINVAL;
4650
4651                 rc = 0;
4652
4653                 mutex_lock(&the_lnet.ln_api_mutex);
4654                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4655                 if (!udsp)
4656                         rc = -ENOENT;
4657                 else
4658                         rc = lnet_udsp_marshal(udsp, ioc_udsp);
4659                 mutex_unlock(&the_lnet.ln_api_mutex);
4660
4661                 return rc;
4662         }
4663
4664         case IOC_LIBCFS_GET_CONST_UDSP_INFO: {
4665                 struct lnet_ioctl_construct_udsp_info *info = arg;
4666
4667                 if (info->cud_hdr.ioc_len < sizeof(*info))
4668                         return -EINVAL;
4669
4670                 CDEBUG(D_NET, "GET_UDSP_INFO for %s\n",
4671                        libcfs_nid2str(info->cud_nid));
4672
4673                 mutex_lock(&the_lnet.ln_api_mutex);
4674                 lnet_udsp_get_construct_info(info);
4675                 mutex_unlock(&the_lnet.ln_api_mutex);
4676
4677                 return 0;
4678         }
4679
4680         default:
4681                 ni = lnet_net2ni_addref(data->ioc_net);
4682                 if (ni == NULL)
4683                         return -EINVAL;
4684
4685                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
4686                         rc = -EINVAL;
4687                 else
4688                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
4689
4690                 lnet_ni_decref(ni);
4691                 return rc;
4692         }
4693         /* not reached */
4694 }
4695 EXPORT_SYMBOL(LNetCtl);
4696
4697 static const struct ln_key_list net_props_list = {
4698         .lkl_maxattr                    = LNET_NET_ATTR_MAX,
4699         .lkl_list                       = {
4700                 [LNET_NET_ATTR_HDR]             = {
4701                         .lkp_value              = "net",
4702                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
4703                         .lkp_data_type          = NLA_NUL_STRING,
4704                 },
4705                 [LNET_NET_ATTR_TYPE]            = {
4706                         .lkp_value              = "net type",
4707                         .lkp_data_type          = NLA_STRING
4708                 },
4709                 [LNET_NET_ATTR_LOCAL]           = {
4710                         .lkp_value              = "local NI(s)",
4711                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
4712                         .lkp_data_type          = NLA_NESTED
4713                 },
4714         },
4715 };
4716
4717 static struct ln_key_list local_ni_list = {
4718         .lkl_maxattr                    = LNET_NET_LOCAL_NI_ATTR_MAX,
4719         .lkl_list                       = {
4720                 [LNET_NET_LOCAL_NI_ATTR_NID]    = {
4721                         .lkp_value              = "nid",
4722                         .lkp_data_type          = NLA_STRING
4723                 },
4724                 [LNET_NET_LOCAL_NI_ATTR_STATUS] = {
4725                         .lkp_value              = "status",
4726                         .lkp_data_type          = NLA_STRING
4727                 },
4728                 [LNET_NET_LOCAL_NI_ATTR_INTERFACE] = {
4729                         .lkp_value              = "interfaces",
4730                         .lkp_key_format         = LNKF_MAPPING,
4731                         .lkp_data_type          = NLA_NESTED
4732                 },
4733         },
4734 };
4735
4736 static const struct ln_key_list local_ni_interfaces_list = {
4737         .lkl_maxattr                    = LNET_NET_LOCAL_NI_INTF_ATTR_MAX,
4738         .lkl_list                       = {
4739                 [LNET_NET_LOCAL_NI_INTF_ATTR_TYPE] = {
4740                         .lkp_value      = "0",
4741                         .lkp_data_type  = NLA_STRING
4742                 },
4743         },
4744 };
4745
4746 /* Use an index since the traversal is across LNet nets and ni collections */
4747 struct lnet_genl_net_list {
4748         unsigned int    lngl_net_id;
4749         unsigned int    lngl_idx;
4750 };
4751
4752 static inline struct lnet_genl_net_list *
4753 lnet_net_dump_ctx(struct netlink_callback *cb)
4754 {
4755         return (struct lnet_genl_net_list *)cb->args[0];
4756 }
4757
4758 static int lnet_net_show_done(struct netlink_callback *cb)
4759 {
4760         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
4761
4762         if (nlist) {
4763                 LIBCFS_FREE(nlist, sizeof(*nlist));
4764                 cb->args[0] = 0;
4765         }
4766
4767         return 0;
4768 }
4769
4770 /* LNet net ->start() handler for GET requests */
4771 static int lnet_net_show_start(struct netlink_callback *cb)
4772 {
4773         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
4774 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4775         struct netlink_ext_ack *extack = NULL;
4776 #endif
4777         struct lnet_genl_net_list *nlist;
4778         int msg_len = genlmsg_len(gnlh);
4779         struct nlattr *params, *top;
4780         int rem, rc = 0;
4781
4782 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4783         extack = cb->extack;
4784 #endif
4785         if (the_lnet.ln_refcount == 0) {
4786                 NL_SET_ERR_MSG(extack, "LNet stack down");
4787                 return -ENETDOWN;
4788         }
4789
4790         LIBCFS_ALLOC(nlist, sizeof(*nlist));
4791         if (!nlist)
4792                 return -ENOMEM;
4793
4794         nlist->lngl_net_id = LNET_NET_ANY;
4795         nlist->lngl_idx = 0;
4796         cb->args[0] = (long)nlist;
4797
4798         if (!msg_len)
4799                 return 0;
4800
4801         params = genlmsg_data(gnlh);
4802         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
4803                 NL_SET_ERR_MSG(extack, "invalid configuration");
4804                 return -EINVAL;
4805         }
4806
4807         nla_for_each_nested(top, params, rem) {
4808                 struct nlattr *net;
4809                 int rem2;
4810
4811                 nla_for_each_nested(net, top, rem2) {
4812                         char filter[LNET_NIDSTR_SIZE];
4813
4814                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE ||
4815                             nla_strcmp(net, "net type") != 0)
4816                                 continue;
4817
4818                         net = nla_next(net, &rem2);
4819                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE) {
4820                                 NL_SET_ERR_MSG(extack, "invalid config param");
4821                                 GOTO(report_err, rc = -EINVAL);
4822                         }
4823
4824                         rc = nla_strscpy(filter, net, sizeof(filter));
4825                         if (rc < 0) {
4826                                 NL_SET_ERR_MSG(extack, "failed to get param");
4827                                 GOTO(report_err, rc);
4828                         }
4829                         rc = 0;
4830
4831                         nlist->lngl_net_id = libcfs_str2net(filter);
4832                         if (nlist->lngl_net_id == LNET_NET_ANY) {
4833                                 NL_SET_ERR_MSG(extack, "cannot parse net");
4834                                 GOTO(report_err, rc = -ENOENT);
4835                         }
4836                 }
4837         }
4838 report_err:
4839         if (rc < 0)
4840                 lnet_net_show_done(cb);
4841
4842         return rc;
4843 }
4844
4845 static int lnet_net_show_dump(struct sk_buff *msg,
4846                               struct netlink_callback *cb)
4847 {
4848         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
4849 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4850         struct netlink_ext_ack *extack = NULL;
4851 #endif
4852         int portid = NETLINK_CB(cb->skb).portid;
4853         int seq = cb->nlh->nlmsg_seq;
4854         struct lnet_net *net;
4855         int idx = 0, rc = 0;
4856         bool found = false;
4857         void *hdr = NULL;
4858
4859 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4860         extack = cb->extack;
4861 #endif
4862         if (!nlist->lngl_idx) {
4863                 const struct ln_key_list *all[] = {
4864                         &net_props_list, &local_ni_list,
4865                         &local_ni_interfaces_list,
4866                         NULL
4867                 };
4868
4869                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
4870                                                 &lnet_family,
4871                                                 NLM_F_CREATE | NLM_F_MULTI,
4872                                                 LNET_CMD_NETS, all);
4873                 if (rc < 0) {
4874                         NL_SET_ERR_MSG(extack, "failed to send key table");
4875                         GOTO(send_error, rc);
4876                 }
4877         }
4878
4879         lnet_net_lock(LNET_LOCK_EX);
4880
4881         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
4882                 struct lnet_ni *ni;
4883
4884                 if (nlist->lngl_net_id != LNET_NET_ANY &&
4885                     nlist->lngl_net_id != net->net_id)
4886                         continue;
4887
4888                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
4889                         struct nlattr *local_ni, *ni_attr;
4890                         char *status = "up";
4891
4892                         if (idx++ < nlist->lngl_idx)
4893                                 continue;
4894
4895                         hdr = genlmsg_put(msg, portid, seq, &lnet_family,
4896                                           NLM_F_MULTI, LNET_CMD_NETS);
4897                         if (!hdr) {
4898                                 NL_SET_ERR_MSG(extack, "failed to send values");
4899                                 GOTO(net_unlock, rc = -EMSGSIZE);
4900                         }
4901
4902                         if (idx == 1)
4903                                 nla_put_string(msg, LNET_NET_ATTR_HDR, "");
4904
4905                         nla_put_string(msg, LNET_NET_ATTR_TYPE,
4906                                        libcfs_net2str(net->net_id));
4907                         found = true;
4908
4909                         local_ni = nla_nest_start(msg, LNET_NET_ATTR_LOCAL);
4910                         ni_attr = nla_nest_start(msg, idx - 1);
4911
4912                         lnet_ni_lock(ni);
4913                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_NID,
4914                                        libcfs_nidstr(&ni->ni_nid));
4915                         if (nid_is_lo0(&ni->ni_nid) &&
4916                             *ni->ni_status != LNET_NI_STATUS_UP)
4917                                 status = "down";
4918                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_STATUS, "up");
4919
4920                         if (!nid_is_lo0(&ni->ni_nid) && ni->ni_interface) {
4921                                 struct nlattr *intf_nest, *intf_attr;
4922
4923                                 intf_nest = nla_nest_start(msg,
4924                                                            LNET_NET_LOCAL_NI_ATTR_INTERFACE);
4925                                 intf_attr = nla_nest_start(msg, 0);
4926                                 nla_put_string(msg,
4927                                                LNET_NET_LOCAL_NI_INTF_ATTR_TYPE,
4928                                                ni->ni_interface);
4929                                 nla_nest_end(msg, intf_attr);
4930                                 nla_nest_end(msg, intf_nest);
4931                         }
4932
4933                         lnet_ni_unlock(ni);
4934                         nla_nest_end(msg, ni_attr);
4935                         nla_nest_end(msg, local_ni);
4936
4937                         genlmsg_end(msg, hdr);
4938                 }
4939         }
4940
4941         if (!found) {
4942                 struct nlmsghdr *nlh = nlmsg_hdr(msg);
4943
4944                 nlmsg_cancel(msg, nlh);
4945                 NL_SET_ERR_MSG(extack, "Network is down");
4946                 rc = -ESRCH;
4947         }
4948 net_unlock:
4949         lnet_net_unlock(LNET_LOCK_EX);
4950 send_error:
4951         nlist->lngl_idx = idx;
4952
4953         return lnet_nl_send_error(cb->skb, portid, seq, rc);
4954 }
4955
4956 #ifndef HAVE_NETLINK_CALLBACK_START
4957 static int lnet_old_net_show_dump(struct sk_buff *msg,
4958                                    struct netlink_callback *cb)
4959 {
4960         if (!cb->args[0]) {
4961                 int rc = lnet_net_show_start(cb);
4962
4963                 if (rc < 0)
4964                         return rc;
4965         }
4966
4967         return lnet_net_show_dump(msg, cb);
4968 }
4969 #endif
4970
4971 static int lnet_genl_parse_tunables(struct nlattr *settings,
4972                                     struct lnet_ioctl_config_lnd_tunables *tun)
4973 {
4974         struct nlattr *param;
4975         int rem, rc = 0;
4976
4977         nla_for_each_nested(param, settings, rem) {
4978                 int type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_UNSPEC;
4979                 s64 num;
4980
4981                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
4982                         continue;
4983
4984                 if (nla_strcmp(param, "peer_timeout") == 0)
4985                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT;
4986                 else if (nla_strcmp(param, "peer_credits") == 0)
4987                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS;
4988                 else if (nla_strcmp(param, "peer_buffer_credits") == 0)
4989                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS;
4990                 else if (nla_strcmp(param, "credits") == 0)
4991                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS;
4992
4993                 param = nla_next(param, &rem);
4994                 if (nla_type(param) != LN_SCALAR_ATTR_INT_VALUE)
4995                         return -EINVAL;
4996
4997                 num = nla_get_s64(param);
4998                 switch (type) {
4999                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT:
5000                         if (num >= 0)
5001                                 tun->lt_cmn.lct_peer_timeout = num;
5002                         break;
5003                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS:
5004                         if (num > 0)
5005                                 tun->lt_cmn.lct_peer_tx_credits = num;
5006                         break;
5007                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS:
5008                         if (num > 0)
5009                                 tun->lt_cmn.lct_peer_rtr_credits = num;
5010                         break;
5011                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS:
5012                         if (num > 0)
5013                                 tun->lt_cmn.lct_max_tx_credits = num;
5014                         break;
5015                 default:
5016                         rc = -EINVAL;
5017                         break;
5018                 }
5019         }
5020         return rc;
5021 }
5022
5023 static int lnet_genl_parse_lnd_tunables(struct nlattr *settings,
5024                                         struct lnet_lnd_tunables *tun,
5025                                         const struct lnet_lnd *lnd)
5026 {
5027         const struct ln_key_list *list = lnd->lnd_keys;
5028         struct nlattr *param;
5029         int rem, rc = 0;
5030         int i = 1;
5031
5032         /* silently ignore these setting if the LND driver doesn't
5033          * support any LND tunables
5034          */
5035         if (!list || !lnd->lnd_nl_set || !list->lkl_maxattr)
5036                 return 0;
5037
5038         nla_for_each_nested(param, settings, rem) {
5039                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
5040                         continue;
5041
5042                 for (i = 1; i <= list->lkl_maxattr; i++) {
5043                         if (!list->lkl_list[i].lkp_value ||
5044                             nla_strcmp(param, list->lkl_list[i].lkp_value) != 0)
5045                                 continue;
5046
5047                         param = nla_next(param, &rem);
5048                         rc = lnd->lnd_nl_set(LNET_CMD_NETS, param, i, tun);
5049                         if (rc < 0)
5050                                 return rc;
5051                 }
5052         }
5053
5054         return rc;
5055 }
5056
5057 static int
5058 lnet_genl_parse_local_ni(struct nlattr *entry, struct genl_info *info,
5059                          int net_id, struct lnet_ioctl_config_ni *conf,
5060                          bool *ni_list)
5061 {
5062         bool create = info->nlhdr->nlmsg_flags & NLM_F_CREATE;
5063         struct lnet_ioctl_config_lnd_tunables tun;
5064         struct nlattr *settings;
5065         int rem3, rc = 0;
5066
5067         memset(&tun, 0, sizeof(tun));
5068         /* Use LND defaults */
5069         tun.lt_cmn.lct_peer_timeout = -1;
5070         tun.lt_cmn.lct_peer_tx_credits = -1;
5071         tun.lt_cmn.lct_peer_rtr_credits = -1;
5072         tun.lt_cmn.lct_max_tx_credits = -1;
5073         conf->lic_ncpts = 0;
5074
5075         nla_for_each_nested(settings, entry, rem3) {
5076                 if (nla_type(settings) != LN_SCALAR_ATTR_VALUE)
5077                         continue;
5078
5079                 if (nla_strcmp(settings, "interfaces") == 0) {
5080                         struct nlattr *intf;
5081                         int rem4;
5082
5083                         settings = nla_next(settings, &rem3);
5084                         if (nla_type(settings) !=
5085                             LN_SCALAR_ATTR_LIST) {
5086                                 GENL_SET_ERR_MSG(info,
5087                                                  "invalid interfaces");
5088                                 GOTO(out, rc = -EINVAL);
5089                         }
5090
5091                         nla_for_each_nested(intf, settings, rem4) {
5092                                 intf = nla_next(intf, &rem4);
5093                                 if (nla_type(intf) !=
5094                                     LN_SCALAR_ATTR_VALUE) {
5095                                         GENL_SET_ERR_MSG(info,
5096                                                          "0 key is invalid");
5097                                         GOTO(out, rc = -EINVAL);
5098                                 }
5099
5100                                 rc = nla_strscpy(conf->lic_ni_intf, intf,
5101                                                  sizeof(conf->lic_ni_intf));
5102                                 if (rc < 0) {
5103                                         GENL_SET_ERR_MSG(info,
5104                                                          "failed to parse interfaces");
5105                                         GOTO(out, rc);
5106                                 }
5107                         }
5108                         *ni_list = true;
5109                 } else if (nla_strcmp(settings, "tunables") == 0) {
5110                         settings = nla_next(settings, &rem3);
5111                         if (nla_type(settings) !=
5112                             LN_SCALAR_ATTR_LIST) {
5113                                 GENL_SET_ERR_MSG(info,
5114                                                  "invalid tunables");
5115                                 GOTO(out, rc = -EINVAL);
5116                         }
5117
5118                         rc = lnet_genl_parse_tunables(settings, &tun);
5119                         if (rc < 0) {
5120                                 GENL_SET_ERR_MSG(info,
5121                                                  "failed to parse tunables");
5122                                 GOTO(out, rc);
5123                         }
5124                 } else if ((nla_strcmp(settings, "lnd tunables") == 0)) {
5125                         const struct lnet_lnd *lnd;
5126
5127                         lnd = lnet_load_lnd(LNET_NETTYP(net_id));
5128                         if (IS_ERR(lnd)) {
5129                                 GENL_SET_ERR_MSG(info,
5130                                                  "LND type not supported");
5131                                 GOTO(out, rc = PTR_ERR(lnd));
5132                         }
5133
5134                         settings = nla_next(settings, &rem3);
5135                         if (nla_type(settings) !=
5136                             LN_SCALAR_ATTR_LIST) {
5137                                 GENL_SET_ERR_MSG(info,
5138                                                  "lnd tunables should be list\n");
5139                                 GOTO(out, rc = -EINVAL);
5140                         }
5141
5142                         rc = lnet_genl_parse_lnd_tunables(settings,
5143                                                           &tun.lt_tun, lnd);
5144                         if (rc < 0) {
5145                                 GENL_SET_ERR_MSG(info,
5146                                                  "failed to parse lnd tunables");
5147                                 GOTO(out, rc);
5148                         }
5149                 } else if (nla_strcmp(settings, "CPT") == 0) {
5150                         struct nlattr *cpt;
5151                         int rem4;
5152
5153                         settings = nla_next(settings, &rem3);
5154                         if (nla_type(settings) != LN_SCALAR_ATTR_LIST) {
5155                                 GENL_SET_ERR_MSG(info,
5156                                                  "CPT should be list");
5157                                 GOTO(out, rc = -EINVAL);
5158                         }
5159
5160                         nla_for_each_nested(cpt, settings, rem4) {
5161                                 s64 core;
5162
5163                                 if (nla_type(cpt) !=
5164                                     LN_SCALAR_ATTR_INT_VALUE) {
5165                                         GENL_SET_ERR_MSG(info,
5166                                                          "invalid CPT config");
5167                                         GOTO(out, rc = -EINVAL);
5168                                 }
5169
5170                                 core = nla_get_s64(cpt);
5171                                 if (core >= LNET_CPT_NUMBER) {
5172                                         GENL_SET_ERR_MSG(info,
5173                                                          "invalid CPT value");
5174                                         GOTO(out, rc = -ERANGE);
5175                                 }
5176
5177                                 conf->lic_cpts[conf->lic_ncpts] = core;
5178                                 conf->lic_ncpts++;
5179                         }
5180                 }
5181         }
5182
5183         if (!create) {
5184                 struct lnet_net *net;
5185                 struct lnet_ni *ni;
5186
5187                 rc = -ENODEV;
5188                 if (!strlen(conf->lic_ni_intf)) {
5189                         GENL_SET_ERR_MSG(info,
5190                                          "interface is missing");
5191                         GOTO(out, rc);
5192                 }
5193
5194                 lnet_net_lock(LNET_LOCK_EX);
5195                 net = lnet_get_net_locked(net_id);
5196                 if (!net) {
5197                         GENL_SET_ERR_MSG(info,
5198                                          "LNet net doesn't exist");
5199                         lnet_net_unlock(LNET_LOCK_EX);
5200                         GOTO(out, rc);
5201                 }
5202
5203                 list_for_each_entry(ni, &net->net_ni_list,
5204                                     ni_netlist) {
5205                         if (!ni->ni_interface ||
5206                             strcmp(ni->ni_interface,
5207                                   conf->lic_ni_intf) != 0)
5208                                 continue;
5209
5210                         lnet_net_unlock(LNET_LOCK_EX);
5211                         rc = lnet_dyn_del_ni(&ni->ni_nid);
5212                         if (rc < 0) {
5213                                 GENL_SET_ERR_MSG(info,
5214                                                  "cannot del LNet NI");
5215                                 GOTO(out, rc);
5216                         }
5217                         break;
5218                 }
5219
5220                 if (rc < 0) { /* will be -ENODEV */
5221                         GENL_SET_ERR_MSG(info,
5222                                          "interface invalid for deleting LNet NI");
5223                         lnet_net_unlock(LNET_LOCK_EX);
5224                 }
5225         } else {
5226                 if (!strlen(conf->lic_ni_intf)) {
5227                         GENL_SET_ERR_MSG(info,
5228                                          "interface is missing");
5229                         GOTO(out, rc);
5230                 }
5231
5232                 rc = lnet_dyn_add_ni(conf, net_id, &tun);
5233                 switch (rc) {
5234                 case -ENOENT:
5235                         GENL_SET_ERR_MSG(info,
5236                                          "cannot parse net");
5237                         break;
5238                 case -ERANGE:
5239                         GENL_SET_ERR_MSG(info,
5240                                          "invalid CPT set");
5241                         break;
5242                 default:
5243                         GENL_SET_ERR_MSG(info,
5244                                          "cannot add LNet NI");
5245                 case 0:
5246                         break;
5247                 }
5248         }
5249 out:
5250         return rc;
5251 }
5252
5253 static int lnet_net_cmd(struct sk_buff *skb, struct genl_info *info)
5254 {
5255         struct nlmsghdr *nlh = nlmsg_hdr(skb);
5256         struct genlmsghdr *gnlh = nlmsg_data(nlh);
5257         struct nlattr *params = genlmsg_data(gnlh);
5258         int msg_len, rem, rc = 0;
5259         struct nlattr *attr;
5260
5261         msg_len = genlmsg_len(gnlh);
5262         if (!msg_len) {
5263                 GENL_SET_ERR_MSG(info, "no configuration");
5264                 return -ENOMSG;
5265         }
5266
5267         if (!(nla_type(params) & LN_SCALAR_ATTR_LIST)) {
5268                 GENL_SET_ERR_MSG(info, "invalid configuration");
5269                 return -EINVAL;
5270         }
5271
5272         nla_for_each_nested(attr, params, rem) {
5273                 struct lnet_ioctl_config_ni conf;
5274                 u32 net_id = LNET_NET_ANY;
5275                 struct nlattr *entry;
5276                 bool ni_list = false;
5277                 int rem2;
5278
5279                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
5280                         continue;
5281
5282                 nla_for_each_nested(entry, attr, rem2) {
5283                         switch (nla_type(entry)) {
5284                         case LN_SCALAR_ATTR_VALUE: {
5285                                 ssize_t len;
5286
5287                                 memset(&conf, 0, sizeof(conf));
5288                                 if (nla_strcmp(entry, "ip2net") == 0) {
5289                                         entry = nla_next(entry, &rem2);
5290                                         if (nla_type(entry) !=
5291                                             LN_SCALAR_ATTR_VALUE) {
5292                                                 GENL_SET_ERR_MSG(info,
5293                                                                  "ip2net has invalid key");
5294                                                 GOTO(out, rc = -EINVAL);
5295                                         }
5296
5297                                         len = nla_strscpy(conf.lic_legacy_ip2nets,
5298                                                           entry,
5299                                                           sizeof(conf.lic_legacy_ip2nets));
5300                                         if (len < 0) {
5301                                                 GENL_SET_ERR_MSG(info,
5302                                                                  "ip2net key string is invalid");
5303                                                 GOTO(out, rc = len);
5304                                         }
5305                                         ni_list = true;
5306                                 } else if (nla_strcmp(entry, "net type") == 0) {
5307                                         char tmp[LNET_NIDSTR_SIZE];
5308
5309                                         entry = nla_next(entry, &rem2);
5310                                         if (nla_type(entry) !=
5311                                             LN_SCALAR_ATTR_VALUE) {
5312                                                 GENL_SET_ERR_MSG(info,
5313                                                                  "net type has invalid key");
5314                                                 GOTO(out, rc = -EINVAL);
5315                                         }
5316
5317                                         len = nla_strscpy(tmp, entry,
5318                                                           sizeof(tmp));
5319                                         if (len < 0) {
5320                                                 GENL_SET_ERR_MSG(info,
5321                                                                  "net type key string is invalid");
5322                                                 GOTO(out, rc = len);
5323                                         }
5324
5325                                         net_id = libcfs_str2net(tmp);
5326                                         if (!net_id) {
5327                                                 GENL_SET_ERR_MSG(info,
5328                                                                  "cannot parse net");
5329                                                 GOTO(out, rc = -ENODEV);
5330                                         }
5331                                         if (LNET_NETTYP(net_id) == LOLND) {
5332                                                 GENL_SET_ERR_MSG(info,
5333                                                                  "setting @lo not allowed");
5334                                                 GOTO(out, rc = -ENODEV);
5335                                         }
5336                                         conf.lic_legacy_ip2nets[0] = '\0';
5337                                         conf.lic_ni_intf[0] = '\0';
5338                                         ni_list = false;
5339                                 }
5340                                 if (rc < 0)
5341                                         GOTO(out, rc);
5342                                 break;
5343                         }
5344                         case LN_SCALAR_ATTR_LIST: {
5345                                 struct nlattr *interface;
5346                                 int rem3;
5347
5348                                 nla_for_each_nested(interface, entry, rem3) {
5349                                         rc = lnet_genl_parse_local_ni(interface, info,
5350                                                                       net_id, &conf,
5351                                                                       &ni_list);
5352                                         if (rc < 0)
5353                                                 GOTO(out, rc);
5354                                 }
5355                                 break;
5356                         }
5357                         /* it is possible a newer version of the user land send
5358                          * values older kernels doesn't handle. So silently
5359                          * ignore these values
5360                          */
5361                         default:
5362                                 break;
5363                         }
5364                 }
5365
5366                 /* Handle case of just sent NET with no list of NIDs */
5367                 if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && !ni_list) {
5368                         rc = lnet_dyn_del_net(net_id);
5369                         if (rc < 0) {
5370                                 GENL_SET_ERR_MSG(info,
5371                                                  "cannot del network");
5372                         }
5373                 }
5374         }
5375 out:
5376         return rc;
5377 }
5378
5379 static inline struct lnet_genl_ping_list *
5380 lnet_ping_dump_ctx(struct netlink_callback *cb)
5381 {
5382         return (struct lnet_genl_ping_list *)cb->args[0];
5383 }
5384
5385 static int lnet_ping_show_done(struct netlink_callback *cb)
5386 {
5387         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
5388
5389         if (plist) {
5390                 genradix_free(&plist->lgpl_failed);
5391                 genradix_free(&plist->lgpl_list);
5392                 LIBCFS_FREE(plist, sizeof(*plist));
5393                 cb->args[0] = 0;
5394         }
5395
5396         return 0;
5397 }
5398
5399 /* LNet ping ->start() handler for GET requests */
5400 static int lnet_ping_show_start(struct netlink_callback *cb)
5401 {
5402         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5403 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5404         struct netlink_ext_ack *extack = NULL;
5405 #endif
5406         struct lnet_genl_ping_list *plist;
5407         int msg_len = genlmsg_len(gnlh);
5408         struct nlattr *params, *top;
5409         int rem, rc = 0;
5410
5411 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5412         extack = cb->extack;
5413 #endif
5414         if (the_lnet.ln_refcount == 0) {
5415                 NL_SET_ERR_MSG(extack, "Network is down");
5416                 return -ENETDOWN;
5417         }
5418
5419         if (!msg_len) {
5420                 NL_SET_ERR_MSG(extack, "Ping needs NID targets");
5421                 return -ENOENT;
5422         }
5423
5424         LIBCFS_ALLOC(plist, sizeof(*plist));
5425         if (!plist) {
5426                 NL_SET_ERR_MSG(extack, "failed to setup ping list");
5427                 return -ENOMEM;
5428         }
5429         genradix_init(&plist->lgpl_list);
5430         plist->lgpl_timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
5431         plist->lgpl_src_nid = LNET_ANY_NID;
5432         plist->lgpl_index = 0;
5433         plist->lgpl_list_count = 0;
5434         cb->args[0] = (long)plist;
5435
5436         params = genlmsg_data(gnlh);
5437         nla_for_each_attr(top, params, msg_len, rem) {
5438                 struct nlattr *nids;
5439                 int rem2;
5440
5441                 switch (nla_type(top)) {
5442                 case LN_SCALAR_ATTR_VALUE:
5443                         if (nla_strcmp(top, "timeout") == 0) {
5444                                 s64 timeout;
5445
5446                                 top = nla_next(top, &rem);
5447                                 if (nla_type(top) != LN_SCALAR_ATTR_INT_VALUE) {
5448                                         NL_SET_ERR_MSG(extack,
5449                                                        "invalid timeout param");
5450                                         GOTO(report_err, rc = -EINVAL);
5451                                 }
5452
5453                                 /* If timeout is negative then set default of
5454                                  * 3 minutes
5455                                  */
5456                                 timeout = nla_get_s64(top);
5457                                 if (timeout > 0 &&
5458                                     timeout < (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
5459                                         plist->lgpl_timeout =
5460                                                 nsecs_to_jiffies(timeout * NSEC_PER_MSEC);
5461                         } else if (nla_strcmp(top, "source") == 0) {
5462                                 char nidstr[LNET_NIDSTR_SIZE + 1];
5463
5464                                 top = nla_next(top, &rem);
5465                                 if (nla_type(top) != LN_SCALAR_ATTR_VALUE) {
5466                                         NL_SET_ERR_MSG(extack,
5467                                                        "invalid source param");
5468                                         GOTO(report_err, rc = -EINVAL);
5469                                 }
5470
5471                                 rc = nla_strscpy(nidstr, top, sizeof(nidstr));
5472                                 if (rc < 0) {
5473                                         NL_SET_ERR_MSG(extack,
5474                                                        "failed to parse source nid");
5475                                         GOTO(report_err, rc);
5476                                 }
5477
5478                                 rc = libcfs_strnid(&plist->lgpl_src_nid,
5479                                                    strim(nidstr));
5480                                 if (rc < 0) {
5481                                         NL_SET_ERR_MSG(extack,
5482                                                        "invalid source nid");
5483                                         GOTO(report_err, rc);
5484                                 }
5485                                 rc = 0;
5486                         }
5487                         break;
5488                 case LN_SCALAR_ATTR_LIST:
5489                         nla_for_each_nested(nids, top, rem2) {
5490                                 char nid[LNET_NIDSTR_SIZE + 1];
5491                                 struct lnet_processid *id;
5492
5493                                 if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
5494                                         continue;
5495
5496                                 memset(nid, 0, sizeof(nid));
5497                                 rc = nla_strscpy(nid, nids, sizeof(nid));
5498                                 if (rc < 0) {
5499                                         NL_SET_ERR_MSG(extack,
5500                                                        "failed to get NID");
5501                                         GOTO(report_err, rc);
5502                                 }
5503
5504                                 id = genradix_ptr_alloc(&plist->lgpl_list,
5505                                                         plist->lgpl_list_count++,
5506                                                         GFP_ATOMIC);
5507                                 if (!id) {
5508                                         NL_SET_ERR_MSG(extack,
5509                                                        "failed to allocate NID");
5510                                         GOTO(report_err, rc = -ENOMEM);
5511                                 }
5512
5513                                 rc = libcfs_strid(id, strim(nid));
5514                                 if (rc < 0) {
5515                                         NL_SET_ERR_MSG(extack, "invalid NID");
5516                                         GOTO(report_err, rc);
5517                                 }
5518                                 rc = 0;
5519                         }
5520                         fallthrough;
5521                 default:
5522                         break;
5523                 }
5524         }
5525 report_err:
5526         if (rc < 0)
5527                 lnet_ping_show_done(cb);
5528
5529         return rc;
5530 }
5531
5532 static const struct ln_key_list ping_props_list = {
5533         .lkl_maxattr                    = LNET_PING_ATTR_MAX,
5534         .lkl_list                       = {
5535                 [LNET_PING_ATTR_HDR]            = {
5536                         .lkp_value              = "ping",
5537                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5538                         .lkp_data_type          = NLA_NUL_STRING,
5539                 },
5540                 [LNET_PING_ATTR_PRIMARY_NID]    = {
5541                         .lkp_value              = "primary nid",
5542                         .lkp_data_type          = NLA_STRING
5543                 },
5544                 [LNET_PING_ATTR_ERRNO]          = {
5545                         .lkp_value              = "errno",
5546                         .lkp_data_type          = NLA_S16
5547                 },
5548                 [LNET_PING_ATTR_MULTIRAIL]      = {
5549                         .lkp_value              = "Multi-Rail",
5550                         .lkp_data_type          = NLA_FLAG
5551                 },
5552                 [LNET_PING_ATTR_PEER_NI_LIST]   = {
5553                         .lkp_value              = "peer_ni",
5554                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5555                         .lkp_data_type          = NLA_NESTED
5556                 },
5557         },
5558 };
5559
5560 static struct ln_key_list ping_peer_ni_list = {
5561         .lkl_maxattr                    = LNET_PING_PEER_NI_ATTR_MAX,
5562         .lkl_list                       = {
5563                 [LNET_PING_PEER_NI_ATTR_NID]    = {
5564                         .lkp_value              = "nid",
5565                         .lkp_data_type          = NLA_STRING
5566                 },
5567         },
5568 };
5569
5570 static int lnet_ping_show_dump(struct sk_buff *msg,
5571                                struct netlink_callback *cb)
5572 {
5573         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
5574         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5575 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5576         struct netlink_ext_ack *extack = NULL;
5577 #endif
5578         int portid = NETLINK_CB(cb->skb).portid;
5579         int seq = cb->nlh->nlmsg_seq;
5580         int idx = plist->lgpl_index;
5581         int rc = 0, i = 0;
5582
5583 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5584         extack = cb->extack;
5585 #endif
5586         if (!plist->lgpl_index) {
5587                 const struct ln_key_list *all[] = {
5588                         &ping_props_list, &ping_peer_ni_list, NULL
5589                 };
5590
5591                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
5592                                                 &lnet_family,
5593                                                 NLM_F_CREATE | NLM_F_MULTI,
5594                                                 LNET_CMD_PING, all);
5595                 if (rc < 0) {
5596                         NL_SET_ERR_MSG(extack, "failed to send key table");
5597                         GOTO(send_error, rc);
5598                 }
5599
5600                 genradix_init(&plist->lgpl_failed);
5601         }
5602
5603         while (idx < plist->lgpl_list_count) {
5604                 struct lnet_nid primary_nid = LNET_ANY_NID;
5605                 struct lnet_genl_ping_list peers;
5606                 struct lnet_processid *id;
5607                 struct nlattr *nid_list;
5608                 struct lnet_peer *lp;
5609                 bool mr_flag = false;
5610                 unsigned int count;
5611                 void *hdr = NULL;
5612
5613                 id = genradix_ptr(&plist->lgpl_list, idx++);
5614                 if (nid_is_lo0(&id->nid))
5615                         continue;
5616
5617                 rc = lnet_ping(id, &plist->lgpl_src_nid, plist->lgpl_timeout,
5618                                &peers, lnet_interfaces_max);
5619                 if (rc < 0) {
5620                         struct lnet_fail_ping *fail;
5621
5622                         fail = genradix_ptr_alloc(&plist->lgpl_failed,
5623                                                   plist->lgpl_failed_count++,
5624                                                   GFP_ATOMIC);
5625                         if (!fail) {
5626                                 NL_SET_ERR_MSG(extack,
5627                                                "failed to allocate failed NID");
5628                                 GOTO(send_error, rc);
5629                         }
5630                         fail->lfp_id = *id;
5631                         fail->lfp_errno = rc;
5632                         goto cant_reach;
5633                 }
5634
5635                 mutex_lock(&the_lnet.ln_api_mutex);
5636                 lp = lnet_find_peer(&id->nid);
5637                 if (lp) {
5638                         primary_nid = lp->lp_primary_nid;
5639                         mr_flag = lnet_peer_is_multi_rail(lp);
5640                         lnet_peer_decref_locked(lp);
5641                 }
5642                 mutex_unlock(&the_lnet.ln_api_mutex);
5643
5644                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
5645                                   NLM_F_MULTI, LNET_CMD_PING);
5646                 if (!hdr) {
5647                         NL_SET_ERR_MSG(extack, "failed to send values");
5648                         genlmsg_cancel(msg, hdr);
5649                         GOTO(send_error, rc = -EMSGSIZE);
5650                 }
5651
5652                 if (i++ == 0)
5653                         nla_put_string(msg, LNET_PING_ATTR_HDR, "");
5654
5655                 nla_put_string(msg, LNET_PING_ATTR_PRIMARY_NID,
5656                                libcfs_nidstr(&primary_nid));
5657                 if (mr_flag)
5658                         nla_put_flag(msg, LNET_PING_ATTR_MULTIRAIL);
5659
5660                 nid_list = nla_nest_start(msg, LNET_PING_ATTR_PEER_NI_LIST);
5661                 for (count = 0; count < rc; count++) {
5662                         struct lnet_processid *result;
5663                         struct nlattr *nid_attr;
5664                         char *idstr;
5665
5666                         result = genradix_ptr(&peers.lgpl_list, count);
5667                         if (nid_is_lo0(&result->nid))
5668                                 continue;
5669
5670                         nid_attr = nla_nest_start(msg, count + 1);
5671                         if (gnlh->version == 1)
5672                                 idstr = libcfs_nidstr(&result->nid);
5673                         else
5674                                 idstr = libcfs_idstr(result);
5675                         nla_put_string(msg, LNET_PING_PEER_NI_ATTR_NID, idstr);
5676                         nla_nest_end(msg, nid_attr);
5677                 }
5678                 nla_nest_end(msg, nid_list);
5679                 genlmsg_end(msg, hdr);
5680 cant_reach:
5681                 genradix_free(&peers.lgpl_list);
5682         }
5683
5684         for (i = 0; i < plist->lgpl_failed_count; i++) {
5685                 struct lnet_fail_ping *fail;
5686                 void *hdr;
5687
5688                 fail = genradix_ptr(&plist->lgpl_failed, i);
5689
5690                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
5691                                   NLM_F_MULTI, LNET_CMD_PING);
5692                 if (!hdr) {
5693                         NL_SET_ERR_MSG(extack, "failed to send failed values");
5694                         genlmsg_cancel(msg, hdr);
5695                         GOTO(send_error, rc = -EMSGSIZE);
5696                 }
5697
5698                 if (i == 0)
5699                         nla_put_string(msg, LNET_PING_ATTR_HDR, "");
5700
5701                 nla_put_string(msg, LNET_PING_ATTR_PRIMARY_NID,
5702                                libcfs_nidstr(&fail->lfp_id.nid));
5703                 nla_put_s16(msg, LNET_PING_ATTR_ERRNO, fail->lfp_errno);
5704                 genlmsg_end(msg, hdr);
5705         }
5706         rc = 0; /* don't treat it as an error */
5707
5708         plist->lgpl_index = idx;
5709 send_error:
5710         return lnet_nl_send_error(cb->skb, portid, seq, rc);
5711 }
5712
5713 #ifndef HAVE_NETLINK_CALLBACK_START
5714 static int lnet_old_ping_show_dump(struct sk_buff *msg,
5715                                    struct netlink_callback *cb)
5716 {
5717         if (!cb->args[0]) {
5718                 int rc = lnet_ping_show_start(cb);
5719
5720                 if (rc < 0)
5721                         return rc;
5722         }
5723
5724         return lnet_ping_show_dump(msg, cb);
5725 }
5726 #endif
5727
5728 static const struct genl_multicast_group lnet_mcast_grps[] = {
5729         { .name =       "ip2net",       },
5730         { .name =       "net",          },
5731         { .name =       "ping",         },
5732 };
5733
5734 static const struct genl_ops lnet_genl_ops[] = {
5735         {
5736                 .cmd            = LNET_CMD_NETS,
5737                 .flags          = GENL_ADMIN_PERM,
5738 #ifdef HAVE_NETLINK_CALLBACK_START
5739                 .start          = lnet_net_show_start,
5740                 .dumpit         = lnet_net_show_dump,
5741 #else
5742                 .dumpit         = lnet_old_net_show_dump,
5743 #endif
5744                 .done           = lnet_net_show_done,
5745                 .doit           = lnet_net_cmd,
5746         },
5747         {
5748                 .cmd            = LNET_CMD_PING,
5749 #ifdef HAVE_NETLINK_CALLBACK_START
5750                 .start          = lnet_ping_show_start,
5751                 .dumpit         = lnet_ping_show_dump,
5752 #else
5753                 .dumpit         = lnet_old_ping_show_dump,
5754 #endif
5755                 .done           = lnet_ping_show_done,
5756         },
5757 };
5758
5759 static struct genl_family lnet_family = {
5760         .name           = LNET_GENL_NAME,
5761         .version        = LNET_GENL_VERSION,
5762         .module         = THIS_MODULE,
5763         .netnsok        = true,
5764         .ops            = lnet_genl_ops,
5765         .n_ops          = ARRAY_SIZE(lnet_genl_ops),
5766         .mcgrps         = lnet_mcast_grps,
5767         .n_mcgrps       = ARRAY_SIZE(lnet_mcast_grps),
5768 };
5769
5770 void LNetDebugPeer(struct lnet_processid *id)
5771 {
5772         lnet_debug_peer(&id->nid);
5773 }
5774 EXPORT_SYMBOL(LNetDebugPeer);
5775
5776 /**
5777  * Determine if the specified peer \a nid is on the local node.
5778  *
5779  * \param nid   peer nid to check
5780  *
5781  * \retval true         If peer NID is on the local node.
5782  * \retval false        If peer NID is not on the local node.
5783  */
5784 bool LNetIsPeerLocal(struct lnet_nid *nid)
5785 {
5786         struct lnet_net *net;
5787         struct lnet_ni *ni;
5788         int cpt;
5789
5790         cpt = lnet_net_lock_current();
5791         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
5792                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
5793                         if (nid_same(&ni->ni_nid, nid)) {
5794                                 lnet_net_unlock(cpt);
5795                                 return true;
5796                         }
5797                 }
5798         }
5799         lnet_net_unlock(cpt);
5800
5801         return false;
5802 }
5803 EXPORT_SYMBOL(LNetIsPeerLocal);
5804
5805 /**
5806  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
5807  * Note that all interfaces share a same PID, as requested by LNetNIInit().
5808  *
5809  * \param index Index of the interface to look up.
5810  * \param id On successful return, this location will hold the
5811  * struct lnet_process_id ID of the interface.
5812  *
5813  * \retval 0 If an interface exists at \a index.
5814  * \retval -ENOENT If no interface has been found.
5815  */
5816 int
5817 LNetGetId(unsigned int index, struct lnet_processid *id)
5818 {
5819         struct lnet_ni   *ni;
5820         struct lnet_net  *net;
5821         int               cpt;
5822         int               rc = -ENOENT;
5823
5824         LASSERT(the_lnet.ln_refcount > 0);
5825
5826         cpt = lnet_net_lock_current();
5827
5828         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
5829                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
5830                         if (!nid_is_nid4(&ni->ni_nid))
5831                                 /* FIXME this needs to be handled */
5832                                 continue;
5833                         if (index-- != 0)
5834                                 continue;
5835
5836                         id->nid = ni->ni_nid;
5837                         id->pid = the_lnet.ln_pid;
5838                         rc = 0;
5839                         break;
5840                 }
5841         }
5842
5843         lnet_net_unlock(cpt);
5844         return rc;
5845 }
5846 EXPORT_SYMBOL(LNetGetId);
5847
5848 struct ping_data {
5849         int rc;
5850         int replied;
5851         int pd_unlinked;
5852         struct lnet_handle_md mdh;
5853         struct completion completion;
5854 };
5855
5856 static void
5857 lnet_ping_event_handler(struct lnet_event *event)
5858 {
5859         struct ping_data *pd = event->md_user_ptr;
5860
5861         CDEBUG(D_NET, "ping event (%d %d)%s\n",
5862                event->type, event->status,
5863                event->unlinked ? " unlinked" : "");
5864
5865         if (event->status) {
5866                 if (!pd->rc)
5867                         pd->rc = event->status;
5868         } else if (event->type == LNET_EVENT_REPLY) {
5869                 pd->replied = 1;
5870                 pd->rc = event->mlength;
5871         }
5872
5873         if (event->unlinked)
5874                 pd->pd_unlinked = 1;
5875
5876         if (event->unlinked ||
5877             (event->type == LNET_EVENT_SEND && event->status))
5878                 complete(&pd->completion);
5879 }
5880
5881 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
5882                      signed long timeout, struct lnet_genl_ping_list *plist,
5883                      int n_ids)
5884 {
5885         int id_bytes = sizeof(struct lnet_ni_status); /* For 0@lo */
5886         struct lnet_md md = { NULL };
5887         struct ping_data pd = { 0 };
5888         struct lnet_ping_buffer *pbuf;
5889         struct lnet_processid pid;
5890         struct lnet_ping_iter pi;
5891         int i = 0;
5892         u32 *st;
5893         int nob;
5894         int rc;
5895         int rc2;
5896
5897         genradix_init(&plist->lgpl_list);
5898
5899         /* n_ids limit is arbitrary */
5900         if (n_ids <= 0 || LNET_NID_IS_ANY(&id->nid))
5901                 return -EINVAL;
5902
5903         /* if the user buffer has more space than the lnet_interfaces_max
5904          * then only fill it up to lnet_interfaces_max
5905          */
5906         if (n_ids > lnet_interfaces_max)
5907                 n_ids = lnet_interfaces_max;
5908
5909         if (id->pid == LNET_PID_ANY)
5910                 id->pid = LNET_PID_LUSTRE;
5911
5912         id_bytes += lnet_ping_sts_size(&id->nid) * n_ids;
5913         pbuf = lnet_ping_buffer_alloc(id_bytes, GFP_NOFS);
5914         if (!pbuf)
5915                 return -ENOMEM;
5916
5917         /* initialize md content */
5918         md.start     = &pbuf->pb_info;
5919         md.length    = id_bytes;
5920         md.threshold = 2; /* GET/REPLY */
5921         md.max_size  = 0;
5922         md.options   = LNET_MD_TRUNCATE;
5923         md.user_ptr  = &pd;
5924         md.handler   = lnet_ping_event_handler;
5925
5926         init_completion(&pd.completion);
5927
5928         rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh);
5929         if (rc != 0) {
5930                 CERROR("Can't bind MD: %d\n", rc);
5931                 goto fail_ping_buffer_decref;
5932         }
5933
5934         rc = LNetGet(src_nid, pd.mdh, id, LNET_RESERVED_PORTAL,
5935                      LNET_PROTO_PING_MATCHBITS, 0, false);
5936         if (rc != 0) {
5937                 /* Don't CERROR; this could be deliberate! */
5938                 rc2 = LNetMDUnlink(pd.mdh);
5939                 LASSERT(rc2 == 0);
5940
5941                 /* NB must wait for the UNLINK event below... */
5942         }
5943
5944         /* Ensure completion in finite time... */
5945         wait_for_completion_timeout(&pd.completion, timeout);
5946         if (!pd.pd_unlinked) {
5947                 LNetMDUnlink(pd.mdh);
5948                 wait_for_completion(&pd.completion);
5949         }
5950
5951         if (!pd.replied) {
5952                 rc = pd.rc ?: -EIO;
5953                 goto fail_ping_buffer_decref;
5954         }
5955
5956         nob = pd.rc;
5957         LASSERT(nob >= 0 && nob <= id_bytes);
5958
5959         rc = -EPROTO;           /* if I can't parse... */
5960
5961         if (nob < LNET_PING_INFO_HDR_SIZE) {
5962                 CERROR("%s: ping info too short %d\n",
5963                        libcfs_idstr(id), nob);
5964                 goto fail_ping_buffer_decref;
5965         }
5966
5967         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
5968                 lnet_swap_pinginfo(pbuf);
5969         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
5970                 CERROR("%s: Unexpected magic %08x\n",
5971                        libcfs_idstr(id), pbuf->pb_info.pi_magic);
5972                 goto fail_ping_buffer_decref;
5973         }
5974
5975         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
5976                 CERROR("%s: ping w/o NI status: 0x%x\n",
5977                        libcfs_idstr(id), pbuf->pb_info.pi_features);
5978                 goto fail_ping_buffer_decref;
5979         }
5980
5981         /* Test if smaller than lnet_pinginfo with just one pi_ni status info.
5982          * That one might contain size when large nids are used.
5983          */
5984         if (nob < offsetof(struct lnet_ping_info, pi_ni[1])) {
5985                 CERROR("%s: Short reply %d(%lu min)\n",
5986                        libcfs_idstr(id), nob,
5987                        offsetof(struct lnet_ping_info, pi_ni[1]));
5988                 goto fail_ping_buffer_decref;
5989         }
5990
5991         if (ping_info_count_entries(pbuf) < n_ids) {
5992                 n_ids = ping_info_count_entries(pbuf);
5993                 id_bytes = lnet_ping_info_size(&pbuf->pb_info);
5994         }
5995
5996         if (nob < id_bytes) {
5997                 CERROR("%s: Short reply %d(%d expected)\n",
5998                        libcfs_idstr(id), nob, id_bytes);
5999                 goto fail_ping_buffer_decref;
6000         }
6001
6002         for (st = ping_iter_first(&pi, pbuf, &pid.nid);
6003              st;
6004              st = ping_iter_next(&pi, &pid.nid)) {
6005                 id = genradix_ptr_alloc(&plist->lgpl_list, i++, GFP_ATOMIC);
6006                 if (!id) {
6007                         rc = -ENOMEM;
6008                         goto fail_ping_buffer_decref;
6009                 }
6010
6011                 id->pid = pbuf->pb_info.pi_pid;
6012                 id->nid = pid.nid;
6013         }
6014         rc = i;
6015 fail_ping_buffer_decref:
6016         lnet_ping_buffer_decref(pbuf);
6017         return rc;
6018 }
6019
6020 static int
6021 lnet_discover(struct lnet_process_id id4, __u32 force,
6022               struct lnet_process_id __user *ids, int n_ids)
6023 {
6024         struct lnet_peer_ni *lpni;
6025         struct lnet_peer_ni *p;
6026         struct lnet_peer *lp;
6027         struct lnet_process_id *buf;
6028         struct lnet_processid id;
6029         int cpt;
6030         int i;
6031         int rc;
6032
6033         if (n_ids <= 0 ||
6034             id4.nid == LNET_NID_ANY)
6035                 return -EINVAL;
6036
6037         lnet_pid4_to_pid(id4, &id);
6038         if (id.pid == LNET_PID_ANY)
6039                 id.pid = LNET_PID_LUSTRE;
6040
6041         /*
6042          * If the user buffer has more space than the lnet_interfaces_max,
6043          * then only fill it up to lnet_interfaces_max.
6044          */
6045         if (n_ids > lnet_interfaces_max)
6046                 n_ids = lnet_interfaces_max;
6047
6048         CFS_ALLOC_PTR_ARRAY(buf, n_ids);
6049         if (!buf)
6050                 return -ENOMEM;
6051
6052         cpt = lnet_net_lock_current();
6053         lpni = lnet_peerni_by_nid_locked(&id.nid, NULL, cpt);
6054         if (IS_ERR(lpni)) {
6055                 rc = PTR_ERR(lpni);
6056                 goto out;
6057         }
6058
6059         /*
6060          * Clearing the NIDS_UPTODATE flag ensures the peer will
6061          * be discovered, provided discovery has not been disabled.
6062          */
6063         lp = lpni->lpni_peer_net->lpn_peer;
6064         spin_lock(&lp->lp_lock);
6065         lp->lp_state &= ~LNET_PEER_NIDS_UPTODATE;
6066         /* If the force flag is set, force a PING and PUSH as well. */
6067         if (force)
6068                 lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH;
6069         spin_unlock(&lp->lp_lock);
6070         rc = lnet_discover_peer_locked(lpni, cpt, true);
6071         if (rc)
6072                 goto out_decref;
6073
6074         /* The lpni (or lp) for this NID may have changed and our ref is
6075          * the only thing keeping the old one around. Release the ref
6076          * and lookup the lpni again
6077          */
6078         lnet_peer_ni_decref_locked(lpni);
6079         lpni = lnet_peer_ni_find_locked(&id.nid);
6080         if (!lpni) {
6081                 rc = -ENOENT;
6082                 goto out;
6083         }
6084         lp = lpni->lpni_peer_net->lpn_peer;
6085
6086         i = 0;
6087         p = NULL;
6088         while ((p = lnet_get_next_peer_ni_locked(lp, NULL, p)) != NULL) {
6089                 buf[i].pid = id.pid;
6090                 buf[i].nid = lnet_nid_to_nid4(&p->lpni_nid);
6091                 if (++i >= n_ids)
6092                         break;
6093         }
6094         rc = i;
6095
6096 out_decref:
6097         lnet_peer_ni_decref_locked(lpni);
6098 out:
6099         lnet_net_unlock(cpt);
6100
6101         if (rc >= 0)
6102                 if (copy_to_user(ids, buf, rc * sizeof(*buf)))
6103                         rc = -EFAULT;
6104         CFS_FREE_PTR_ARRAY(buf, n_ids);
6105
6106         return rc;
6107 }
6108
6109 /**
6110  * Retrieve peer discovery status.
6111  *
6112  * \retval 1 if lnet_peer_discovery_disabled is 0
6113  * \retval 0 if lnet_peer_discovery_disabled is 1
6114  */
6115 int
6116 LNetGetPeerDiscoveryStatus(void)
6117 {
6118         return !lnet_peer_discovery_disabled;
6119 }
6120 EXPORT_SYMBOL(LNetGetPeerDiscoveryStatus);