Whamcloud - gitweb
58ebd4d7d4fdcf562d58e5256f8bdf962b787d01
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  */
31
32 #define DEBUG_SUBSYSTEM S_LNET
33
34 #include <linux/ctype.h>
35 #include <linux/generic-radix-tree.h>
36 #include <linux/log2.h>
37 #include <linux/ktime.h>
38 #include <linux/moduleparam.h>
39 #include <linux/uaccess.h>
40 #ifdef HAVE_SCHED_HEADERS
41 #include <linux/sched/signal.h>
42 #endif
43 #include <net/genetlink.h>
44
45 #include <libcfs/linux/linux-net.h>
46 #include <lnet/udsp.h>
47 #include <lnet/lib-lnet.h>
48
49 #define D_LNI D_CONSOLE
50
51 /*
52  * initialize ln_api_mutex statically, since it needs to be used in
53  * discovery_set callback. That module parameter callback can be called
54  * before module init completes. The mutex needs to be ready for use then.
55  */
56 struct lnet the_lnet = {
57         .ln_api_mutex = __MUTEX_INITIALIZER(the_lnet.ln_api_mutex),
58 };              /* THE state of the network */
59 EXPORT_SYMBOL(the_lnet);
60
61 static char *ip2nets = "";
62 module_param(ip2nets, charp, 0444);
63 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
64
65 static char *networks = "";
66 module_param(networks, charp, 0444);
67 MODULE_PARM_DESC(networks, "local networks");
68
69 static char *routes = "";
70 module_param(routes, charp, 0444);
71 MODULE_PARM_DESC(routes, "routes to non-local networks");
72
73 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
74 module_param(rnet_htable_size, int, 0444);
75 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
76
77 static int use_tcp_bonding;
78 module_param(use_tcp_bonding, int, 0444);
79 MODULE_PARM_DESC(use_tcp_bonding,
80                  "use_tcp_bonding parameter has been removed");
81
82 unsigned int lnet_numa_range = 0;
83 module_param(lnet_numa_range, uint, 0444);
84 MODULE_PARM_DESC(lnet_numa_range,
85                 "NUMA range to consider during Multi-Rail selection");
86
87 /*
88  * lnet_health_sensitivity determines by how much we decrement the health
89  * value on sending error. The value defaults to 100, which means health
90  * interface health is decremented by 100 points every failure.
91  */
92 unsigned int lnet_health_sensitivity = 100;
93 static int sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp);
94 #ifdef HAVE_KERNEL_PARAM_OPS
95 static struct kernel_param_ops param_ops_health_sensitivity = {
96         .set = sensitivity_set,
97         .get = param_get_int,
98 };
99 #define param_check_health_sensitivity(name, p) \
100                 __param_check(name, p, int)
101 module_param(lnet_health_sensitivity, health_sensitivity, S_IRUGO|S_IWUSR);
102 #else
103 module_param_call(lnet_health_sensitivity, sensitivity_set, param_get_int,
104                   &lnet_health_sensitivity, S_IRUGO|S_IWUSR);
105 #endif
106 MODULE_PARM_DESC(lnet_health_sensitivity,
107                 "Value to decrement the health value by on error");
108
109 /*
110  * lnet_recovery_interval determines how often we should perform recovery
111  * on unhealthy interfaces.
112  */
113 unsigned int lnet_recovery_interval = 1;
114 static int recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp);
115 #ifdef HAVE_KERNEL_PARAM_OPS
116 static struct kernel_param_ops param_ops_recovery_interval = {
117         .set = recovery_interval_set,
118         .get = param_get_int,
119 };
120 #define param_check_recovery_interval(name, p) \
121                 __param_check(name, p, int)
122 module_param(lnet_recovery_interval, recovery_interval, S_IRUGO|S_IWUSR);
123 #else
124 module_param_call(lnet_recovery_interval, recovery_interval_set, param_get_int,
125                   &lnet_recovery_interval, S_IRUGO|S_IWUSR);
126 #endif
127 MODULE_PARM_DESC(lnet_recovery_interval,
128                 "DEPRECATED - Interval to recover unhealthy interfaces in seconds");
129
130 unsigned int lnet_recovery_limit;
131 module_param(lnet_recovery_limit, uint, 0644);
132 MODULE_PARM_DESC(lnet_recovery_limit,
133                  "How long to attempt recovery of unhealthy peer interfaces in seconds. Set to 0 to allow indefinite recovery");
134
135 unsigned int lnet_max_recovery_ping_interval = 900;
136 unsigned int lnet_max_recovery_ping_count = 9;
137 static int max_recovery_ping_interval_set(const char *val,
138                                           cfs_kernel_param_arg_t *kp);
139
140 #define param_check_max_recovery_ping_interval(name, p) \
141                 __param_check(name, p, int)
142
143 #ifdef HAVE_KERNEL_PARAM_OPS
144 static struct kernel_param_ops param_ops_max_recovery_ping_interval = {
145         .set = max_recovery_ping_interval_set,
146         .get = param_get_int,
147 };
148 module_param(lnet_max_recovery_ping_interval, max_recovery_ping_interval, 0644);
149 #else
150 module_param_call(lnet_max_recovery_ping_interval, max_recovery_ping_interval,
151                   param_get_int, &lnet_max_recovery_ping_interval, 0644);
152 #endif
153 MODULE_PARM_DESC(lnet_max_recovery_ping_interval,
154                  "The max interval between LNet recovery pings, in seconds");
155
156 static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
157 static int intf_max_set(const char *val, cfs_kernel_param_arg_t *kp);
158
159 static struct kernel_param_ops param_ops_interfaces_max = {
160         .set = intf_max_set,
161         .get = param_get_int,
162 };
163
164 #define param_check_interfaces_max(name, p) \
165                 __param_check(name, p, int)
166
167 #ifdef HAVE_KERNEL_PARAM_OPS
168 module_param(lnet_interfaces_max, interfaces_max, 0644);
169 #else
170 module_param_call(lnet_interfaces_max, intf_max_set, param_get_int,
171                   &param_ops_interfaces_max, 0644);
172 #endif
173 MODULE_PARM_DESC(lnet_interfaces_max,
174                 "Maximum number of interfaces in a node.");
175
176 unsigned lnet_peer_discovery_disabled = 0;
177 static int discovery_set(const char *val, cfs_kernel_param_arg_t *kp);
178
179 static struct kernel_param_ops param_ops_discovery_disabled = {
180         .set = discovery_set,
181         .get = param_get_int,
182 };
183
184 #define param_check_discovery_disabled(name, p) \
185                 __param_check(name, p, int)
186 #ifdef HAVE_KERNEL_PARAM_OPS
187 module_param(lnet_peer_discovery_disabled, discovery_disabled, 0644);
188 #else
189 module_param_call(lnet_peer_discovery_disabled, discovery_set, param_get_int,
190                   &param_ops_discovery_disabled, 0644);
191 #endif
192 MODULE_PARM_DESC(lnet_peer_discovery_disabled,
193                 "Set to 1 to disable peer discovery on this node.");
194
195 unsigned int lnet_drop_asym_route;
196 static int drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp);
197
198 static struct kernel_param_ops param_ops_drop_asym_route = {
199         .set = drop_asym_route_set,
200         .get = param_get_int,
201 };
202
203 #define param_check_drop_asym_route(name, p)    \
204         __param_check(name, p, int)
205 #ifdef HAVE_KERNEL_PARAM_OPS
206 module_param(lnet_drop_asym_route, drop_asym_route, 0644);
207 #else
208 module_param_call(lnet_drop_asym_route, drop_asym_route_set, param_get_int,
209                   &param_ops_drop_asym_route, 0644);
210 #endif
211 MODULE_PARM_DESC(lnet_drop_asym_route,
212                  "Set to 1 to drop asymmetrical route messages.");
213
214 #define LNET_TRANSACTION_TIMEOUT_DEFAULT 150
215 unsigned int lnet_transaction_timeout = LNET_TRANSACTION_TIMEOUT_DEFAULT;
216 static int transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp);
217 #ifdef HAVE_KERNEL_PARAM_OPS
218 static struct kernel_param_ops param_ops_transaction_timeout = {
219         .set = transaction_to_set,
220         .get = param_get_int,
221 };
222
223 #define param_check_transaction_timeout(name, p) \
224                 __param_check(name, p, int)
225 module_param(lnet_transaction_timeout, transaction_timeout, S_IRUGO|S_IWUSR);
226 #else
227 module_param_call(lnet_transaction_timeout, transaction_to_set, param_get_int,
228                   &lnet_transaction_timeout, S_IRUGO|S_IWUSR);
229 #endif
230 MODULE_PARM_DESC(lnet_transaction_timeout,
231                 "Maximum number of seconds to wait for a peer response.");
232
233 #define LNET_RETRY_COUNT_DEFAULT 2
234 unsigned int lnet_retry_count = LNET_RETRY_COUNT_DEFAULT;
235 static int retry_count_set(const char *val, cfs_kernel_param_arg_t *kp);
236 #ifdef HAVE_KERNEL_PARAM_OPS
237 static struct kernel_param_ops param_ops_retry_count = {
238         .set = retry_count_set,
239         .get = param_get_int,
240 };
241
242 #define param_check_retry_count(name, p) \
243                 __param_check(name, p, int)
244 module_param(lnet_retry_count, retry_count, S_IRUGO|S_IWUSR);
245 #else
246 module_param_call(lnet_retry_count, retry_count_set, param_get_int,
247                   &lnet_retry_count, S_IRUGO|S_IWUSR);
248 #endif
249 MODULE_PARM_DESC(lnet_retry_count,
250                  "Maximum number of times to retry transmitting a message");
251
252 unsigned int lnet_response_tracking = 3;
253 static int response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp);
254
255 #ifdef HAVE_KERNEL_PARAM_OPS
256 static struct kernel_param_ops param_ops_response_tracking = {
257         .set = response_tracking_set,
258         .get = param_get_int,
259 };
260
261 #define param_check_response_tracking(name, p)  \
262         __param_check(name, p, int)
263 module_param(lnet_response_tracking, response_tracking, 0644);
264 #else
265 module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
266                   &lnet_response_tracking, 0644);
267 #endif
268 MODULE_PARM_DESC(lnet_response_tracking,
269                  "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
270
271 #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
272                                   (LNET_RETRY_COUNT_DEFAULT + 1))
273 unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
274 static void lnet_set_lnd_timeout(void)
275 {
276         lnet_lnd_timeout = (lnet_transaction_timeout - 1) /
277                            (lnet_retry_count + 1);
278 }
279
280 /*
281  * This sequence number keeps track of how many times DLC was used to
282  * update the local NIs. It is incremented when a NI is added or
283  * removed and checked when sending a message to determine if there is
284  * a need to re-run the selection algorithm. See lnet_select_pathway()
285  * for more details on its usage.
286  */
287 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
288
289 struct lnet_fail_ping {
290         struct lnet_processid           lfp_id;
291         int                             lfp_errno;
292 };
293
294 struct lnet_genl_ping_list {
295         unsigned int                    lgpl_index;
296         unsigned int                    lgpl_list_count;
297         unsigned int                    lgpl_failed_count;
298         signed long                     lgpl_timeout;
299         struct lnet_nid                 lgpl_src_nid;
300         GENRADIX(struct lnet_fail_ping) lgpl_failed;
301         GENRADIX(struct lnet_processid) lgpl_list;
302 };
303
304 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
305                      signed long timeout, struct lnet_genl_ping_list *plist,
306                      int n_ids);
307
308 static int lnet_discover(struct lnet_process_id id, __u32 force,
309                          struct lnet_process_id __user *ids, int n_ids);
310
311 static int
312 sensitivity_set(const char *val, cfs_kernel_param_arg_t *kp)
313 {
314         int rc;
315         unsigned *sensitivity = (unsigned *)kp->arg;
316         unsigned long value;
317
318         rc = kstrtoul(val, 0, &value);
319         if (rc) {
320                 CERROR("Invalid module parameter value for 'lnet_health_sensitivity'\n");
321                 return rc;
322         }
323
324         /*
325          * The purpose of locking the api_mutex here is to ensure that
326          * the correct value ends up stored properly.
327          */
328         mutex_lock(&the_lnet.ln_api_mutex);
329
330         if (value > LNET_MAX_HEALTH_VALUE) {
331                 mutex_unlock(&the_lnet.ln_api_mutex);
332                 CERROR("Invalid health value. Maximum: %d value = %lu\n",
333                        LNET_MAX_HEALTH_VALUE, value);
334                 return -EINVAL;
335         }
336
337         if (*sensitivity != 0 && value == 0 && lnet_retry_count != 0) {
338                 lnet_retry_count = 0;
339                 lnet_set_lnd_timeout();
340         }
341
342         *sensitivity = value;
343
344         mutex_unlock(&the_lnet.ln_api_mutex);
345
346         return 0;
347 }
348
349 static int
350 recovery_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
351 {
352         CWARN("'lnet_recovery_interval' has been deprecated\n");
353
354         return 0;
355 }
356
357 static int
358 max_recovery_ping_interval_set(const char *val, cfs_kernel_param_arg_t *kp)
359 {
360         int rc;
361         unsigned long value;
362
363         rc = kstrtoul(val, 0, &value);
364         if (rc) {
365                 CERROR("Invalid module parameter value for 'lnet_max_recovery_ping_interval'\n");
366                 return rc;
367         }
368
369         if (!value) {
370                 CERROR("Invalid max ping timeout. Must be strictly positive\n");
371                 return -EINVAL;
372         }
373
374         /* The purpose of locking the api_mutex here is to ensure that
375          * the correct value ends up stored properly.
376          */
377         mutex_lock(&the_lnet.ln_api_mutex);
378         lnet_max_recovery_ping_interval = value;
379         lnet_max_recovery_ping_count = 0;
380         value >>= 1;
381         while (value) {
382                 lnet_max_recovery_ping_count++;
383                 value >>= 1;
384         }
385         mutex_unlock(&the_lnet.ln_api_mutex);
386
387         return 0;
388 }
389
390 static int
391 discovery_set(const char *val, cfs_kernel_param_arg_t *kp)
392 {
393         int rc;
394         unsigned *discovery_off = (unsigned *)kp->arg;
395         unsigned long value;
396         struct lnet_ping_buffer *pbuf;
397
398         rc = kstrtoul(val, 0, &value);
399         if (rc) {
400                 CERROR("Invalid module parameter value for 'lnet_peer_discovery_disabled'\n");
401                 return rc;
402         }
403
404         value = (value) ? 1 : 0;
405
406         /*
407          * The purpose of locking the api_mutex here is to ensure that
408          * the correct value ends up stored properly.
409          */
410         mutex_lock(&the_lnet.ln_api_mutex);
411
412         if (value == *discovery_off) {
413                 mutex_unlock(&the_lnet.ln_api_mutex);
414                 return 0;
415         }
416
417         /*
418          * We still want to set the discovery value even when LNet is not
419          * running. This is the case when LNet is being loaded and we want
420          * the module parameters to take effect. Otherwise if we're
421          * changing the value dynamically, we want to set it after
422          * updating the peers
423          */
424         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
425                 *discovery_off = value;
426                 mutex_unlock(&the_lnet.ln_api_mutex);
427                 return 0;
428         }
429
430         /* tell peers that discovery setting has changed */
431         lnet_net_lock(LNET_LOCK_EX);
432         pbuf = the_lnet.ln_ping_target;
433         if (value)
434                 pbuf->pb_info.pi_features &= ~LNET_PING_FEAT_DISCOVERY;
435         else
436                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
437         lnet_net_unlock(LNET_LOCK_EX);
438
439         /* only send a push when we're turning off discovery */
440         if (*discovery_off <= 0 && value > 0)
441                 lnet_push_update_to_peers(1);
442         *discovery_off = value;
443
444         mutex_unlock(&the_lnet.ln_api_mutex);
445
446         return 0;
447 }
448
449 static int
450 drop_asym_route_set(const char *val, cfs_kernel_param_arg_t *kp)
451 {
452         int rc;
453         unsigned int *drop_asym_route = (unsigned int *)kp->arg;
454         unsigned long value;
455
456         rc = kstrtoul(val, 0, &value);
457         if (rc) {
458                 CERROR("Invalid module parameter value for "
459                        "'lnet_drop_asym_route'\n");
460                 return rc;
461         }
462
463         /*
464          * The purpose of locking the api_mutex here is to ensure that
465          * the correct value ends up stored properly.
466          */
467         mutex_lock(&the_lnet.ln_api_mutex);
468
469         if (value == *drop_asym_route) {
470                 mutex_unlock(&the_lnet.ln_api_mutex);
471                 return 0;
472         }
473
474         *drop_asym_route = value;
475
476         mutex_unlock(&the_lnet.ln_api_mutex);
477
478         return 0;
479 }
480
481 static int
482 transaction_to_set(const char *val, cfs_kernel_param_arg_t *kp)
483 {
484         int rc;
485         unsigned *transaction_to = (unsigned *)kp->arg;
486         unsigned long value;
487
488         rc = kstrtoul(val, 0, &value);
489         if (rc) {
490                 CERROR("Invalid module parameter value for 'lnet_transaction_timeout'\n");
491                 return rc;
492         }
493
494         /*
495          * The purpose of locking the api_mutex here is to ensure that
496          * the correct value ends up stored properly.
497          */
498         mutex_lock(&the_lnet.ln_api_mutex);
499
500         if (value <= lnet_retry_count || value == 0) {
501                 mutex_unlock(&the_lnet.ln_api_mutex);
502                 CERROR("Invalid value for lnet_transaction_timeout (%lu). "
503                        "Has to be greater than lnet_retry_count (%u)\n",
504                        value, lnet_retry_count);
505                 return -EINVAL;
506         }
507
508         if (value == *transaction_to) {
509                 mutex_unlock(&the_lnet.ln_api_mutex);
510                 return 0;
511         }
512
513         *transaction_to = value;
514         /* Update the lnet_lnd_timeout now that we've modified the
515          * transaction timeout
516          */
517         lnet_set_lnd_timeout();
518
519         mutex_unlock(&the_lnet.ln_api_mutex);
520
521         return 0;
522 }
523
524 static int
525 retry_count_set(const char *val, cfs_kernel_param_arg_t *kp)
526 {
527         int rc;
528         unsigned *retry_count = (unsigned *)kp->arg;
529         unsigned long value;
530
531         rc = kstrtoul(val, 0, &value);
532         if (rc) {
533                 CERROR("Invalid module parameter value for 'lnet_retry_count'\n");
534                 return rc;
535         }
536
537         /*
538          * The purpose of locking the api_mutex here is to ensure that
539          * the correct value ends up stored properly.
540          */
541         mutex_lock(&the_lnet.ln_api_mutex);
542
543         if (lnet_health_sensitivity == 0 && value > 0) {
544                 mutex_unlock(&the_lnet.ln_api_mutex);
545                 CERROR("Can not set lnet_retry_count when health feature is turned off\n");
546                 return -EINVAL;
547         }
548
549         if (value > lnet_transaction_timeout) {
550                 mutex_unlock(&the_lnet.ln_api_mutex);
551                 CERROR("Invalid value for lnet_retry_count (%lu). "
552                        "Has to be smaller than lnet_transaction_timeout (%u)\n",
553                        value, lnet_transaction_timeout);
554                 return -EINVAL;
555         }
556
557         *retry_count = value;
558
559         /* Update the lnet_lnd_timeout now that we've modified the
560          * retry count
561          */
562         lnet_set_lnd_timeout();
563
564         mutex_unlock(&the_lnet.ln_api_mutex);
565
566         return 0;
567 }
568
569 static int
570 intf_max_set(const char *val, cfs_kernel_param_arg_t *kp)
571 {
572         int value, rc;
573
574         rc = kstrtoint(val, 0, &value);
575         if (rc) {
576                 CERROR("Invalid module parameter value for 'lnet_interfaces_max'\n");
577                 return rc;
578         }
579
580         if (value < LNET_INTERFACES_MIN) {
581                 CWARN("max interfaces provided are too small, setting to %d\n",
582                       LNET_INTERFACES_MAX_DEFAULT);
583                 value = LNET_INTERFACES_MAX_DEFAULT;
584         }
585
586         *(int *)kp->arg = value;
587
588         return 0;
589 }
590
591 static int
592 response_tracking_set(const char *val, cfs_kernel_param_arg_t *kp)
593 {
594         int rc;
595         unsigned long new_value;
596
597         rc = kstrtoul(val, 0, &new_value);
598         if (rc) {
599                 CERROR("Invalid value for 'lnet_response_tracking'\n");
600                 return -EINVAL;
601         }
602
603         if (new_value < 0 || new_value > 3) {
604                 CWARN("Invalid value (%lu) for 'lnet_response_tracking'\n",
605                       new_value);
606                 return -EINVAL;
607         }
608
609         lnet_response_tracking = new_value;
610
611         return 0;
612 }
613
614 static const char *
615 lnet_get_routes(void)
616 {
617         return routes;
618 }
619
620 static const char *
621 lnet_get_networks(void)
622 {
623         const char *nets;
624         int rc;
625
626         if (*networks != 0 && *ip2nets != 0) {
627                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
628                                    "'ip2nets' but not both at once\n");
629                 return NULL;
630         }
631
632         if (*ip2nets != 0) {
633                 rc = lnet_parse_ip2nets(&nets, ip2nets);
634                 return (rc == 0) ? nets : NULL;
635         }
636
637         if (*networks != 0)
638                 return networks;
639
640         return "tcp";
641 }
642
643 static void
644 lnet_init_locks(void)
645 {
646         spin_lock_init(&the_lnet.ln_eq_wait_lock);
647         spin_lock_init(&the_lnet.ln_msg_resend_lock);
648         init_completion(&the_lnet.ln_mt_wait_complete);
649         mutex_init(&the_lnet.ln_lnd_mutex);
650 }
651
652 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
653 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
654                                             *  MDs kmem_cache */
655 struct kmem_cache *lnet_udsp_cachep;       /* udsp cache */
656 struct kmem_cache *lnet_rspt_cachep;       /* response tracker cache */
657 struct kmem_cache *lnet_msg_cachep;
658
659 static int
660 lnet_slab_setup(void)
661 {
662         /* create specific kmem_cache for MEs and small MDs (i.e., originally
663          * allocated in <size-xxx> kmem_cache).
664          */
665         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(struct lnet_me),
666                                             0, 0, NULL);
667         if (!lnet_mes_cachep)
668                 return -ENOMEM;
669
670         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
671                                                   LNET_SMALL_MD_SIZE, 0, 0,
672                                                   NULL);
673         if (!lnet_small_mds_cachep)
674                 return -ENOMEM;
675
676         lnet_udsp_cachep = kmem_cache_create("lnet_udsp",
677                                              sizeof(struct lnet_udsp),
678                                              0, 0, NULL);
679         if (!lnet_udsp_cachep)
680                 return -ENOMEM;
681
682         lnet_rspt_cachep = kmem_cache_create("lnet_rspt", sizeof(struct lnet_rsp_tracker),
683                                             0, 0, NULL);
684         if (!lnet_rspt_cachep)
685                 return -ENOMEM;
686
687         lnet_msg_cachep = kmem_cache_create("lnet_msg", sizeof(struct lnet_msg),
688                                             0, 0, NULL);
689         if (!lnet_msg_cachep)
690                 return -ENOMEM;
691
692         return 0;
693 }
694
695 static void
696 lnet_slab_cleanup(void)
697 {
698         if (lnet_msg_cachep) {
699                 kmem_cache_destroy(lnet_msg_cachep);
700                 lnet_msg_cachep = NULL;
701         }
702
703         if (lnet_rspt_cachep) {
704                 kmem_cache_destroy(lnet_rspt_cachep);
705                 lnet_rspt_cachep = NULL;
706         }
707
708         if (lnet_udsp_cachep) {
709                 kmem_cache_destroy(lnet_udsp_cachep);
710                 lnet_udsp_cachep = NULL;
711         }
712
713         if (lnet_small_mds_cachep) {
714                 kmem_cache_destroy(lnet_small_mds_cachep);
715                 lnet_small_mds_cachep = NULL;
716         }
717
718         if (lnet_mes_cachep) {
719                 kmem_cache_destroy(lnet_mes_cachep);
720                 lnet_mes_cachep = NULL;
721         }
722 }
723
724 static int
725 lnet_create_remote_nets_table(void)
726 {
727         int               i;
728         struct list_head *hash;
729
730         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
731         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
732         CFS_ALLOC_PTR_ARRAY(hash, LNET_REMOTE_NETS_HASH_SIZE);
733         if (hash == NULL) {
734                 CERROR("Failed to create remote nets hash table\n");
735                 return -ENOMEM;
736         }
737
738         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
739                 INIT_LIST_HEAD(&hash[i]);
740         the_lnet.ln_remote_nets_hash = hash;
741         return 0;
742 }
743
744 static void
745 lnet_destroy_remote_nets_table(void)
746 {
747         int i;
748
749         if (the_lnet.ln_remote_nets_hash == NULL)
750                 return;
751
752         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
753                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
754
755         CFS_FREE_PTR_ARRAY(the_lnet.ln_remote_nets_hash,
756                            LNET_REMOTE_NETS_HASH_SIZE);
757         the_lnet.ln_remote_nets_hash = NULL;
758 }
759
760 static void
761 lnet_destroy_locks(void)
762 {
763         if (the_lnet.ln_res_lock != NULL) {
764                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
765                 the_lnet.ln_res_lock = NULL;
766         }
767
768         if (the_lnet.ln_net_lock != NULL) {
769                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
770                 the_lnet.ln_net_lock = NULL;
771         }
772 }
773
774 static int
775 lnet_create_locks(void)
776 {
777         lnet_init_locks();
778
779         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
780         if (the_lnet.ln_res_lock == NULL)
781                 goto failed;
782
783         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
784         if (the_lnet.ln_net_lock == NULL)
785                 goto failed;
786
787         return 0;
788
789  failed:
790         lnet_destroy_locks();
791         return -ENOMEM;
792 }
793
794 static void lnet_assert_wire_constants(void)
795 {
796         /* Wire protocol assertions generated by 'wirecheck'
797          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
798          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
799          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7)
800          */
801
802         /* Constants... */
803         BUILD_BUG_ON(LNET_PROTO_TCP_MAGIC != 0xeebc0ded);
804         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MAJOR != 1);
805         BUILD_BUG_ON(LNET_PROTO_TCP_VERSION_MINOR != 0);
806         BUILD_BUG_ON(LNET_MSG_ACK != 0);
807         BUILD_BUG_ON(LNET_MSG_PUT != 1);
808         BUILD_BUG_ON(LNET_MSG_GET != 2);
809         BUILD_BUG_ON(LNET_MSG_REPLY != 3);
810         BUILD_BUG_ON(LNET_MSG_HELLO != 4);
811
812         BUILD_BUG_ON((int)sizeof(lnet_nid_t) != 8);
813         BUILD_BUG_ON((int)sizeof(lnet_pid_t) != 4);
814
815         /* Checks for struct lnet_nid */
816         BUILD_BUG_ON((int)sizeof(struct lnet_nid) != 20);
817         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_size) != 0);
818         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_size) != 1);
819         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_type) != 1);
820         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_type) != 1);
821         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_num) != 2);
822         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_num) != 2);
823         BUILD_BUG_ON((int)offsetof(struct lnet_nid, nid_addr) != 4);
824         BUILD_BUG_ON((int)sizeof(((struct lnet_nid *)0)->nid_addr) != 16);
825
826         /* Checks for struct lnet_process_id_packed */
827         BUILD_BUG_ON((int)sizeof(struct lnet_process_id_packed) != 12);
828         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, nid) != 0);
829         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->nid) != 8);
830         BUILD_BUG_ON((int)offsetof(struct lnet_process_id_packed, pid) != 8);
831         BUILD_BUG_ON((int)sizeof(((struct lnet_process_id_packed *)0)->pid) != 4);
832
833         /* Checks for struct lnet_handle_wire */
834         BUILD_BUG_ON((int)sizeof(struct lnet_handle_wire) != 16);
835         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
836                                    wh_interface_cookie) != 0);
837         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) != 8);
838         BUILD_BUG_ON((int)offsetof(struct lnet_handle_wire,
839                                    wh_object_cookie) != 8);
840         BUILD_BUG_ON((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) != 8);
841
842         /* Checks for struct struct lnet_magicversion */
843         BUILD_BUG_ON((int)sizeof(struct lnet_magicversion) != 8);
844         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, magic) != 0);
845         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->magic) != 4);
846         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion, version_major) != 4);
847         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_major) != 2);
848         BUILD_BUG_ON((int)offsetof(struct lnet_magicversion,
849                                    version_minor) != 6);
850         BUILD_BUG_ON((int)sizeof(((struct lnet_magicversion *)0)->version_minor) != 2);
851
852         /* Checks for struct _lnet_hdr_nid4 */
853         BUILD_BUG_ON((int)sizeof(struct _lnet_hdr_nid4) != 72);
854         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_nid) != 0);
855         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_nid) != 8);
856         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_nid) != 8);
857         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_nid) != 8);
858         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, dest_pid) != 16);
859         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->dest_pid) != 4);
860         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, src_pid) != 20);
861         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->src_pid) != 4);
862         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, type) != 24);
863         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->type) != 4);
864         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, payload_length) != 28);
865         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->payload_length) != 4);
866         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg) != 32);
867         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg) != 40);
868
869         /* Ack */
870         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.dst_wmd) != 32);
871         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.dst_wmd) != 16);
872         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.match_bits) != 48);
873         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.match_bits) != 8);
874         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.ack.mlength) != 56);
875         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.ack.mlength) != 4);
876
877         /* Put */
878         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ack_wmd) != 32);
879         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ack_wmd) != 16);
880         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.match_bits) != 48);
881         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.match_bits) != 8);
882         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.hdr_data) != 56);
883         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.hdr_data) != 8);
884         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.ptl_index) != 64);
885         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.ptl_index) != 4);
886         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.put.offset) != 68);
887         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.put.offset) != 4);
888
889         /* Get */
890         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.return_wmd) != 32);
891         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.return_wmd) != 16);
892         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.match_bits) != 48);
893         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.match_bits) != 8);
894         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.ptl_index) != 56);
895         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.ptl_index) != 4);
896         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.src_offset) != 60);
897         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.src_offset) != 4);
898         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.get.sink_length) != 64);
899         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.get.sink_length) != 4);
900
901         /* Reply */
902         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.reply.dst_wmd) != 32);
903         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.reply.dst_wmd) != 16);
904
905         /* Hello */
906         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.incarnation) != 32);
907         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.incarnation) != 8);
908         BUILD_BUG_ON((int)offsetof(struct _lnet_hdr_nid4, msg.hello.type) != 40);
909         BUILD_BUG_ON((int)sizeof(((struct _lnet_hdr_nid4 *)0)->msg.hello.type) != 4);
910
911         /* Checks for struct lnet_ni_status and related constants */
912         BUILD_BUG_ON(LNET_NI_STATUS_INVALID != 0x00000000);
913         BUILD_BUG_ON(LNET_NI_STATUS_UP != 0x15aac0de);
914         BUILD_BUG_ON(LNET_NI_STATUS_DOWN != 0xdeadface);
915
916         /* Checks for struct lnet_ni_status */
917         BUILD_BUG_ON((int)sizeof(struct lnet_ni_status) != 16);
918         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_nid) != 0);
919         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) != 8);
920         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_status) != 8);
921         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_status) != 4);
922         BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_msg_size) != 12);
923         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_msg_size) != 4);
924
925         /* Checks for struct lnet_ni_large_status */
926         BUILD_BUG_ON((int)sizeof(struct lnet_ni_large_status) != 24);
927         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_status) != 0);
928         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_status) != 4);
929         BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_nid) != 4);
930         BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_nid) != 20);
931
932         /* Checks for struct lnet_ping_info and related constants */
933         BUILD_BUG_ON(LNET_PROTO_PING_MAGIC != 0x70696E67);
934         BUILD_BUG_ON(LNET_PING_FEAT_INVAL != 0);
935         BUILD_BUG_ON(LNET_PING_FEAT_BASE != 1);
936         BUILD_BUG_ON(LNET_PING_FEAT_NI_STATUS != 2);
937         BUILD_BUG_ON(LNET_PING_FEAT_RTE_DISABLED != 4);
938         BUILD_BUG_ON(LNET_PING_FEAT_MULTI_RAIL != 8);
939         BUILD_BUG_ON(LNET_PING_FEAT_DISCOVERY != 16);
940         BUILD_BUG_ON(LNET_PING_FEAT_LARGE_ADDR != 32);
941         BUILD_BUG_ON(LNET_PING_FEAT_PRIMARY_LARGE != 64);
942         BUILD_BUG_ON(LNET_PING_FEAT_BITS != 127);
943
944         /* Checks for struct lnet_ping_info */
945         BUILD_BUG_ON((int)sizeof(struct lnet_ping_info) != 16);
946         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_magic) != 0);
947         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) != 4);
948         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_features) != 4);
949         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_features) != 4);
950         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_pid) != 8);
951         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) != 4);
952         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_nnis) != 12);
953         BUILD_BUG_ON((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) != 4);
954         BUILD_BUG_ON((int)offsetof(struct lnet_ping_info, pi_ni) != 16);
955         BUILD_BUG_ON(offsetof(struct lnet_ping_info, pi_ni) != sizeof(struct lnet_ping_info));
956
957         /* Acceptor connection request */
958         BUILD_BUG_ON(LNET_PROTO_ACCEPTOR_VERSION != 1);
959
960         /* Checks for struct lnet_acceptor_connreq */
961         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq) != 16);
962         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_magic) != 0);
963         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_magic) != 4);
964         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_version) != 4);
965         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_version) != 4);
966         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq, acr_nid) != 8);
967         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq *)0)->acr_nid) != 8);
968
969         /* Checks for struct lnet_acceptor_connreq_v2 */
970         BUILD_BUG_ON((int)sizeof(struct lnet_acceptor_connreq_v2) != 28);
971         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_magic) != 0);
972         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_magic) != 4);
973         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_version) != 4);
974         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_version) != 4);
975         BUILD_BUG_ON((int)offsetof(struct lnet_acceptor_connreq_v2, acr_nid) != 8);
976         BUILD_BUG_ON((int)sizeof(((struct lnet_acceptor_connreq_v2 *)0)->acr_nid) != 20);
977
978         /* Checks for struct lnet_counters_common */
979         BUILD_BUG_ON((int)sizeof(struct lnet_counters_common) != 60);
980         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_alloc) != 0);
981         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_alloc) != 4);
982         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_msgs_max) != 4);
983         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_msgs_max) != 4);
984         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_errors) != 8);
985         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_errors) != 4);
986         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_count) != 12);
987         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_count) != 4);
988         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_count) != 16);
989         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_count) != 4);
990         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_count) != 20);
991         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_count) != 4);
992         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_count) != 24);
993         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_count) != 4);
994         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_send_length) != 28);
995         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_send_length) != 8);
996         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_recv_length) != 36);
997         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_recv_length) != 8);
998         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_route_length) != 44);
999         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_route_length) != 8);
1000         BUILD_BUG_ON((int)offsetof(struct lnet_counters_common, lcc_drop_length) != 52);
1001         BUILD_BUG_ON((int)sizeof(((struct lnet_counters_common *)0)->lcc_drop_length) != 8);
1002 }
1003
1004 static const struct lnet_lnd *lnet_find_lnd_by_type(__u32 type)
1005 {
1006         const struct lnet_lnd *lnd;
1007
1008         /* holding lnd mutex */
1009         if (type >= NUM_LNDS)
1010                 return NULL;
1011         lnd = the_lnet.ln_lnds[type];
1012         LASSERT(!lnd || lnd->lnd_type == type);
1013
1014         return lnd;
1015 }
1016
1017 unsigned int
1018 lnet_get_lnd_timeout(void)
1019 {
1020         return lnet_lnd_timeout;
1021 }
1022 EXPORT_SYMBOL(lnet_get_lnd_timeout);
1023
1024 void
1025 lnet_register_lnd(const struct lnet_lnd *lnd)
1026 {
1027         mutex_lock(&the_lnet.ln_lnd_mutex);
1028
1029         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
1030         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
1031
1032         the_lnet.ln_lnds[lnd->lnd_type] = lnd;
1033
1034         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
1035
1036         mutex_unlock(&the_lnet.ln_lnd_mutex);
1037 }
1038 EXPORT_SYMBOL(lnet_register_lnd);
1039
1040 void
1041 lnet_unregister_lnd(const struct lnet_lnd *lnd)
1042 {
1043         mutex_lock(&the_lnet.ln_lnd_mutex);
1044
1045         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
1046
1047         the_lnet.ln_lnds[lnd->lnd_type] = NULL;
1048         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
1049
1050         mutex_unlock(&the_lnet.ln_lnd_mutex);
1051 }
1052 EXPORT_SYMBOL(lnet_unregister_lnd);
1053
1054 static void
1055 lnet_counters_get_common_locked(struct lnet_counters_common *common)
1056 {
1057         struct lnet_counters *ctr;
1058         int i;
1059
1060         /* FIXME !!! Their is no assert_lnet_net_locked() to ensure this
1061          * actually called under the protection of the lnet_net_lock.
1062          */
1063         memset(common, 0, sizeof(*common));
1064
1065         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1066                 common->lcc_msgs_max     += ctr->lct_common.lcc_msgs_max;
1067                 common->lcc_msgs_alloc   += ctr->lct_common.lcc_msgs_alloc;
1068                 common->lcc_errors       += ctr->lct_common.lcc_errors;
1069                 common->lcc_send_count   += ctr->lct_common.lcc_send_count;
1070                 common->lcc_recv_count   += ctr->lct_common.lcc_recv_count;
1071                 common->lcc_route_count  += ctr->lct_common.lcc_route_count;
1072                 common->lcc_drop_count   += ctr->lct_common.lcc_drop_count;
1073                 common->lcc_send_length  += ctr->lct_common.lcc_send_length;
1074                 common->lcc_recv_length  += ctr->lct_common.lcc_recv_length;
1075                 common->lcc_route_length += ctr->lct_common.lcc_route_length;
1076                 common->lcc_drop_length  += ctr->lct_common.lcc_drop_length;
1077         }
1078 }
1079
1080 void
1081 lnet_counters_get_common(struct lnet_counters_common *common)
1082 {
1083         lnet_net_lock(LNET_LOCK_EX);
1084         lnet_counters_get_common_locked(common);
1085         lnet_net_unlock(LNET_LOCK_EX);
1086 }
1087 EXPORT_SYMBOL(lnet_counters_get_common);
1088
1089 int
1090 lnet_counters_get(struct lnet_counters *counters)
1091 {
1092         struct lnet_counters *ctr;
1093         struct lnet_counters_health *health = &counters->lct_health;
1094         int i, rc = 0;
1095
1096         memset(counters, 0, sizeof(*counters));
1097
1098         lnet_net_lock(LNET_LOCK_EX);
1099
1100         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1101                 GOTO(out_unlock, rc = -ENODEV);
1102
1103         lnet_counters_get_common_locked(&counters->lct_common);
1104
1105         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
1106                 health->lch_rst_alloc    += ctr->lct_health.lch_rst_alloc;
1107                 health->lch_resend_count += ctr->lct_health.lch_resend_count;
1108                 health->lch_response_timeout_count +=
1109                                 ctr->lct_health.lch_response_timeout_count;
1110                 health->lch_local_interrupt_count +=
1111                                 ctr->lct_health.lch_local_interrupt_count;
1112                 health->lch_local_dropped_count +=
1113                                 ctr->lct_health.lch_local_dropped_count;
1114                 health->lch_local_aborted_count +=
1115                                 ctr->lct_health.lch_local_aborted_count;
1116                 health->lch_local_no_route_count +=
1117                                 ctr->lct_health.lch_local_no_route_count;
1118                 health->lch_local_timeout_count +=
1119                                 ctr->lct_health.lch_local_timeout_count;
1120                 health->lch_local_error_count +=
1121                                 ctr->lct_health.lch_local_error_count;
1122                 health->lch_remote_dropped_count +=
1123                                 ctr->lct_health.lch_remote_dropped_count;
1124                 health->lch_remote_error_count +=
1125                                 ctr->lct_health.lch_remote_error_count;
1126                 health->lch_remote_timeout_count +=
1127                                 ctr->lct_health.lch_remote_timeout_count;
1128                 health->lch_network_timeout_count +=
1129                                 ctr->lct_health.lch_network_timeout_count;
1130         }
1131 out_unlock:
1132         lnet_net_unlock(LNET_LOCK_EX);
1133         return rc;
1134 }
1135 EXPORT_SYMBOL(lnet_counters_get);
1136
1137 void
1138 lnet_counters_reset(void)
1139 {
1140         struct lnet_counters *counters;
1141         int             i;
1142
1143         lnet_net_lock(LNET_LOCK_EX);
1144
1145         if (the_lnet.ln_state != LNET_STATE_RUNNING)
1146                 goto avoid_reset;
1147
1148         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
1149                 memset(counters, 0, sizeof(struct lnet_counters));
1150 avoid_reset:
1151         lnet_net_unlock(LNET_LOCK_EX);
1152 }
1153
1154 static char *
1155 lnet_res_type2str(int type)
1156 {
1157         switch (type) {
1158         default:
1159                 LBUG();
1160         case LNET_COOKIE_TYPE_MD:
1161                 return "MD";
1162         case LNET_COOKIE_TYPE_ME:
1163                 return "ME";
1164         case LNET_COOKIE_TYPE_EQ:
1165                 return "EQ";
1166         }
1167 }
1168
1169 static void
1170 lnet_res_container_cleanup(struct lnet_res_container *rec)
1171 {
1172         int     count = 0;
1173
1174         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
1175                 return;
1176
1177         while (!list_empty(&rec->rec_active)) {
1178                 struct list_head *e = rec->rec_active.next;
1179
1180                 list_del_init(e);
1181                 if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
1182                         lnet_md_free(list_entry(e, struct lnet_libmd, md_list));
1183
1184                 } else { /* NB: Active MEs should be attached on portals */
1185                         LBUG();
1186                 }
1187                 count++;
1188         }
1189
1190         if (count > 0) {
1191                 /* Found alive MD/ME/EQ, user really should unlink/free
1192                  * all of them before finalize LNet, but if someone didn't,
1193                  * we have to recycle garbage for him */
1194                 CERROR("%d active elements on exit of %s container\n",
1195                        count, lnet_res_type2str(rec->rec_type));
1196         }
1197
1198         if (rec->rec_lh_hash != NULL) {
1199                 CFS_FREE_PTR_ARRAY(rec->rec_lh_hash, LNET_LH_HASH_SIZE);
1200                 rec->rec_lh_hash = NULL;
1201         }
1202
1203         rec->rec_type = 0; /* mark it as finalized */
1204 }
1205
1206 static int
1207 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
1208 {
1209         int     rc = 0;
1210         int     i;
1211
1212         LASSERT(rec->rec_type == 0);
1213
1214         rec->rec_type = type;
1215         INIT_LIST_HEAD(&rec->rec_active);
1216
1217         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
1218
1219         /* Arbitrary choice of hash table size */
1220         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
1221                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
1222         if (rec->rec_lh_hash == NULL) {
1223                 rc = -ENOMEM;
1224                 goto out;
1225         }
1226
1227         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
1228                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
1229
1230         return 0;
1231
1232 out:
1233         CERROR("Failed to setup %s resource container\n",
1234                lnet_res_type2str(type));
1235         lnet_res_container_cleanup(rec);
1236         return rc;
1237 }
1238
1239 static void
1240 lnet_res_containers_destroy(struct lnet_res_container **recs)
1241 {
1242         struct lnet_res_container       *rec;
1243         int                             i;
1244
1245         cfs_percpt_for_each(rec, i, recs)
1246                 lnet_res_container_cleanup(rec);
1247
1248         cfs_percpt_free(recs);
1249 }
1250
1251 static struct lnet_res_container **
1252 lnet_res_containers_create(int type)
1253 {
1254         struct lnet_res_container       **recs;
1255         struct lnet_res_container       *rec;
1256         int                             rc;
1257         int                             i;
1258
1259         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
1260         if (recs == NULL) {
1261                 CERROR("Failed to allocate %s resource containers\n",
1262                        lnet_res_type2str(type));
1263                 return NULL;
1264         }
1265
1266         cfs_percpt_for_each(rec, i, recs) {
1267                 rc = lnet_res_container_setup(rec, i, type);
1268                 if (rc != 0) {
1269                         lnet_res_containers_destroy(recs);
1270                         return NULL;
1271                 }
1272         }
1273
1274         return recs;
1275 }
1276
1277 struct lnet_libhandle *
1278 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
1279 {
1280         /* ALWAYS called with lnet_res_lock held */
1281         struct list_head        *head;
1282         struct lnet_libhandle   *lh;
1283         unsigned int            hash;
1284
1285         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
1286                 return NULL;
1287
1288         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
1289         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
1290
1291         list_for_each_entry(lh, head, lh_hash_chain) {
1292                 if (lh->lh_cookie == cookie)
1293                         return lh;
1294         }
1295
1296         return NULL;
1297 }
1298
1299 void
1300 lnet_res_lh_initialize(struct lnet_res_container *rec,
1301                        struct lnet_libhandle *lh)
1302 {
1303         /* ALWAYS called with lnet_res_lock held */
1304         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
1305         unsigned int    hash;
1306
1307         lh->lh_cookie = rec->rec_lh_cookie;
1308         rec->rec_lh_cookie += 1 << ibits;
1309
1310         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
1311
1312         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
1313 }
1314
1315 struct list_head **
1316 lnet_create_array_of_queues(void)
1317 {
1318         struct list_head **qs;
1319         struct list_head *q;
1320         int i;
1321
1322         qs = cfs_percpt_alloc(lnet_cpt_table(),
1323                               sizeof(struct list_head));
1324         if (!qs) {
1325                 CERROR("Failed to allocate queues\n");
1326                 return NULL;
1327         }
1328
1329         cfs_percpt_for_each(q, i, qs)
1330                 INIT_LIST_HEAD(q);
1331
1332         return qs;
1333 }
1334
1335 static int lnet_unprepare(void);
1336
1337 static int
1338 lnet_prepare(lnet_pid_t requested_pid)
1339 {
1340         /* Prepare to bring up the network */
1341         struct lnet_res_container **recs;
1342         int                       rc = 0;
1343
1344         if (requested_pid == LNET_PID_ANY) {
1345                 /* Don't instantiate LNET just for me */
1346                 return -ENETDOWN;
1347         }
1348
1349         LASSERT(the_lnet.ln_refcount == 0);
1350
1351         the_lnet.ln_routing = 0;
1352
1353         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
1354         the_lnet.ln_pid = requested_pid;
1355
1356         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
1357         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
1358         INIT_LIST_HEAD(&the_lnet.ln_nets);
1359         INIT_LIST_HEAD(&the_lnet.ln_routers);
1360         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
1361         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
1362         INIT_LIST_HEAD(&the_lnet.ln_dc_request);
1363         INIT_LIST_HEAD(&the_lnet.ln_dc_working);
1364         INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
1365         INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
1366         INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
1367         INIT_LIST_HEAD(&the_lnet.ln_udsp_list);
1368         init_waitqueue_head(&the_lnet.ln_dc_waitq);
1369         the_lnet.ln_mt_handler = NULL;
1370         init_completion(&the_lnet.ln_started);
1371
1372         rc = lnet_slab_setup();
1373         if (rc != 0)
1374                 goto failed;
1375
1376         rc = lnet_create_remote_nets_table();
1377         if (rc != 0)
1378                 goto failed;
1379
1380         /*
1381          * NB the interface cookie in wire handles guards against delayed
1382          * replies and ACKs appearing valid after reboot.
1383          */
1384         the_lnet.ln_interface_cookie = ktime_get_real_ns();
1385
1386         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
1387                                                 sizeof(struct lnet_counters));
1388         if (the_lnet.ln_counters == NULL) {
1389                 CERROR("Failed to allocate counters for LNet\n");
1390                 rc = -ENOMEM;
1391                 goto failed;
1392         }
1393
1394         rc = lnet_peer_tables_create();
1395         if (rc != 0)
1396                 goto failed;
1397
1398         rc = lnet_msg_containers_create();
1399         if (rc != 0)
1400                 goto failed;
1401
1402         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
1403                                       LNET_COOKIE_TYPE_EQ);
1404         if (rc != 0)
1405                 goto failed;
1406
1407         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
1408         if (recs == NULL) {
1409                 rc = -ENOMEM;
1410                 goto failed;
1411         }
1412
1413         the_lnet.ln_md_containers = recs;
1414
1415         rc = lnet_portals_create();
1416         if (rc != 0) {
1417                 CERROR("Failed to create portals for LNet: %d\n", rc);
1418                 goto failed;
1419         }
1420
1421         the_lnet.ln_mt_zombie_rstqs = lnet_create_array_of_queues();
1422         if (!the_lnet.ln_mt_zombie_rstqs) {
1423                 rc = -ENOMEM;
1424                 goto failed;
1425         }
1426
1427         return 0;
1428
1429  failed:
1430         lnet_unprepare();
1431         return rc;
1432 }
1433
1434 static int
1435 lnet_unprepare(void)
1436 {
1437         /* NB no LNET_LOCK since this is the last reference.  All LND instances
1438          * have shut down already, so it is safe to unlink and free all
1439          * descriptors, even those that appear committed to a network op (eg MD
1440          * with non-zero pending count) */
1441
1442         lnet_fail_nid(LNET_NID_ANY, 0);
1443
1444         LASSERT(the_lnet.ln_refcount == 0);
1445         LASSERT(list_empty(&the_lnet.ln_test_peers));
1446         LASSERT(list_empty(&the_lnet.ln_nets));
1447
1448         if (the_lnet.ln_mt_zombie_rstqs) {
1449                 lnet_clean_zombie_rstqs();
1450                 the_lnet.ln_mt_zombie_rstqs = NULL;
1451         }
1452
1453         lnet_assert_handler_unused(the_lnet.ln_mt_handler);
1454         the_lnet.ln_mt_handler = NULL;
1455
1456         lnet_portals_destroy();
1457
1458         if (the_lnet.ln_md_containers != NULL) {
1459                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
1460                 the_lnet.ln_md_containers = NULL;
1461         }
1462
1463         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
1464
1465         lnet_msg_containers_destroy();
1466         lnet_peer_uninit();
1467         lnet_rtrpools_free(0);
1468
1469         if (the_lnet.ln_counters != NULL) {
1470                 cfs_percpt_free(the_lnet.ln_counters);
1471                 the_lnet.ln_counters = NULL;
1472         }
1473         lnet_destroy_remote_nets_table();
1474         lnet_udsp_destroy(true);
1475         lnet_slab_cleanup();
1476
1477         return 0;
1478 }
1479
1480 struct lnet_ni  *
1481 lnet_net2ni_locked(__u32 net_id, int cpt)
1482 {
1483         struct lnet_ni   *ni;
1484         struct lnet_net  *net;
1485
1486         LASSERT(cpt != LNET_LOCK_EX);
1487
1488         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1489                 if (net->net_id == net_id) {
1490                         ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
1491                                               ni_netlist);
1492                         return ni;
1493                 }
1494         }
1495
1496         return NULL;
1497 }
1498
1499 struct lnet_ni *
1500 lnet_net2ni_addref(__u32 net)
1501 {
1502         struct lnet_ni *ni;
1503
1504         lnet_net_lock(0);
1505         ni = lnet_net2ni_locked(net, 0);
1506         if (ni)
1507                 lnet_ni_addref_locked(ni, 0);
1508         lnet_net_unlock(0);
1509
1510         return ni;
1511 }
1512 EXPORT_SYMBOL(lnet_net2ni_addref);
1513
1514 struct lnet_net *
1515 lnet_get_net_locked(__u32 net_id)
1516 {
1517         struct lnet_net  *net;
1518
1519         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1520                 if (net->net_id == net_id)
1521                         return net;
1522         }
1523
1524         return NULL;
1525 }
1526
1527 void
1528 lnet_net_clr_pref_rtrs(struct lnet_net *net)
1529 {
1530         struct list_head zombies;
1531         struct lnet_nid_list *ne;
1532         struct lnet_nid_list *tmp;
1533
1534         INIT_LIST_HEAD(&zombies);
1535
1536         lnet_net_lock(LNET_LOCK_EX);
1537         list_splice_init(&net->net_rtr_pref_nids, &zombies);
1538         lnet_net_unlock(LNET_LOCK_EX);
1539
1540         list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
1541                 list_del_init(&ne->nl_list);
1542                 LIBCFS_FREE(ne, sizeof(*ne));
1543         }
1544 }
1545
1546 int
1547 lnet_net_add_pref_rtr(struct lnet_net *net,
1548                       struct lnet_nid *gw_nid)
1549 __must_hold(&the_lnet.ln_api_mutex)
1550 {
1551         struct lnet_nid_list *ne;
1552
1553         /* This function is called with api_mutex held. When the api_mutex
1554          * is held the list can not be modified, as it is only modified as
1555          * a result of applying a UDSP and that happens under api_mutex
1556          * lock.
1557          */
1558         list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
1559                 if (nid_same(&ne->nl_nid, gw_nid))
1560                         return -EEXIST;
1561         }
1562
1563         LIBCFS_ALLOC(ne, sizeof(*ne));
1564         if (!ne)
1565                 return -ENOMEM;
1566
1567         ne->nl_nid = *gw_nid;
1568
1569         /* Lock the cpt to protect against addition and checks in the
1570          * selection algorithm
1571          */
1572         lnet_net_lock(LNET_LOCK_EX);
1573         list_add(&ne->nl_list, &net->net_rtr_pref_nids);
1574         lnet_net_unlock(LNET_LOCK_EX);
1575
1576         return 0;
1577 }
1578
1579 bool
1580 lnet_net_is_pref_rtr_locked(struct lnet_net *net, struct lnet_nid *rtr_nid)
1581 {
1582         struct lnet_nid_list *ne;
1583
1584         CDEBUG(D_NET, "%s: rtr pref empty: %d\n",
1585                libcfs_net2str(net->net_id),
1586                list_empty(&net->net_rtr_pref_nids));
1587
1588         if (list_empty(&net->net_rtr_pref_nids))
1589                 return false;
1590
1591         list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
1592                 CDEBUG(D_NET, "Comparing pref %s with gw %s\n",
1593                        libcfs_nidstr(&ne->nl_nid),
1594                        libcfs_nidstr(rtr_nid));
1595                 if (nid_same(rtr_nid, &ne->nl_nid))
1596                         return true;
1597         }
1598
1599         return false;
1600 }
1601
1602 static unsigned int
1603 lnet_nid4_cpt_hash(lnet_nid_t nid, unsigned int number)
1604 {
1605         __u64 key = nid;
1606         __u64 pair_bits = 0x0001000100010001LLU;
1607         __u64 mask = pair_bits * 0xFF;
1608         __u64 pair_sum;
1609
1610         /* Use (sum-by-multiplication of nid bytes) mod (number of CPTs)
1611          * to match nid to a CPT.
1612          */
1613         pair_sum = (key & mask) + ((key >> 8) & mask);
1614         pair_sum = (pair_sum * pair_bits) >> 48;
1615
1616         CDEBUG(D_NET, "Match nid %s to cpt %u\n",
1617                libcfs_nid2str(nid), (unsigned int)(pair_sum) % number);
1618
1619         return (unsigned int)(pair_sum) % number;
1620 }
1621
1622 unsigned int
1623 lnet_nid_cpt_hash(struct lnet_nid *nid, unsigned int number)
1624 {
1625         unsigned int val;
1626         u32 h = 0;
1627         int i;
1628
1629         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
1630
1631         if (number == 1)
1632                 return 0;
1633
1634         if (nid_is_nid4(nid))
1635                 return lnet_nid4_cpt_hash(lnet_nid_to_nid4(nid), number);
1636
1637         for (i = 0; i < 4; i++)
1638                 h = cfs_hash_32(nid->nid_addr[i]^h, 32);
1639         val = cfs_hash_32(LNET_NID_NET(nid) ^ h, LNET_CPT_BITS);
1640         if (val < number)
1641                 return val;
1642         return (unsigned int)(h + val + (val >> 1)) % number;
1643 }
1644
1645 int
1646 lnet_cpt_of_nid_locked(struct lnet_nid *nid, struct lnet_ni *ni)
1647 {
1648         struct lnet_net *net;
1649
1650         /* must called with hold of lnet_net_lock */
1651         if (LNET_CPT_NUMBER == 1)
1652                 return 0; /* the only one */
1653
1654         /*
1655          * If NI is provided then use the CPT identified in the NI cpt
1656          * list if one exists. If one doesn't exist, then that NI is
1657          * associated with all CPTs and it follows that the net it belongs
1658          * to is implicitly associated with all CPTs, so just hash the nid
1659          * and return that.
1660          */
1661         if (ni != NULL) {
1662                 if (ni->ni_cpts != NULL)
1663                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
1664                                                              ni->ni_ncpts)];
1665                 else
1666                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1667         }
1668
1669         /* no NI provided so look at the net */
1670         net = lnet_get_net_locked(LNET_NID_NET(nid));
1671
1672         if (net != NULL && net->net_cpts != NULL) {
1673                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
1674         }
1675
1676         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
1677 }
1678
1679 int
1680 lnet_nid2cpt(struct lnet_nid *nid, struct lnet_ni *ni)
1681 {
1682         int     cpt;
1683         int     cpt2;
1684
1685         if (LNET_CPT_NUMBER == 1)
1686                 return 0; /* the only one */
1687
1688         cpt = lnet_net_lock_current();
1689
1690         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
1691
1692         lnet_net_unlock(cpt);
1693
1694         return cpt2;
1695 }
1696 EXPORT_SYMBOL(lnet_nid2cpt);
1697
1698 int
1699 lnet_cpt_of_nid(lnet_nid_t nid4, struct lnet_ni *ni)
1700 {
1701         struct lnet_nid nid;
1702
1703         if (LNET_CPT_NUMBER == 1)
1704                 return 0; /* the only one */
1705
1706         lnet_nid4_to_nid(nid4, &nid);
1707         return lnet_nid2cpt(&nid, ni);
1708 }
1709 EXPORT_SYMBOL(lnet_cpt_of_nid);
1710
1711 int
1712 lnet_islocalnet_locked(__u32 net_id)
1713 {
1714         struct lnet_net *net;
1715         bool local;
1716
1717         net = lnet_get_net_locked(net_id);
1718
1719         local = net != NULL;
1720
1721         return local;
1722 }
1723
1724 int
1725 lnet_islocalnet(__u32 net_id)
1726 {
1727         int cpt;
1728         bool local;
1729
1730         cpt = lnet_net_lock_current();
1731
1732         local = lnet_islocalnet_locked(net_id);
1733
1734         lnet_net_unlock(cpt);
1735
1736         return local;
1737 }
1738
1739 struct lnet_ni  *
1740 lnet_nid_to_ni_locked(struct lnet_nid *nid, int cpt)
1741 {
1742         struct lnet_net  *net;
1743         struct lnet_ni *ni;
1744
1745         LASSERT(cpt != LNET_LOCK_EX);
1746
1747         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1748                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1749                         if (nid_same(&ni->ni_nid, nid))
1750                                 return ni;
1751                 }
1752         }
1753
1754         return NULL;
1755 }
1756
1757 struct lnet_ni *
1758 lnet_nid_to_ni_addref(struct lnet_nid *nid)
1759 {
1760         struct lnet_ni *ni;
1761
1762         lnet_net_lock(0);
1763         ni = lnet_nid_to_ni_locked(nid, 0);
1764         if (ni)
1765                 lnet_ni_addref_locked(ni, 0);
1766         lnet_net_unlock(0);
1767
1768         return ni;
1769 }
1770 EXPORT_SYMBOL(lnet_nid_to_ni_addref);
1771
1772 int
1773 lnet_islocalnid(struct lnet_nid *nid)
1774 {
1775         struct lnet_ni  *ni;
1776         int             cpt;
1777
1778         cpt = lnet_net_lock_current();
1779         ni = lnet_nid_to_ni_locked(nid, cpt);
1780         lnet_net_unlock(cpt);
1781
1782         return ni != NULL;
1783 }
1784
1785 int
1786 lnet_count_acceptor_nets(void)
1787 {
1788         /* Return the # of NIs that need the acceptor. */
1789         int              count = 0;
1790         struct lnet_net  *net;
1791         int              cpt;
1792
1793         cpt = lnet_net_lock_current();
1794         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1795                 /* all socklnd type networks should have the acceptor
1796                  * thread started */
1797                 if (net->net_lnd->lnd_accept != NULL)
1798                         count++;
1799         }
1800
1801         lnet_net_unlock(cpt);
1802
1803         return count;
1804 }
1805
1806 struct lnet_ping_buffer *
1807 lnet_ping_buffer_alloc(int nbytes, gfp_t gfp)
1808 {
1809         struct lnet_ping_buffer *pbuf;
1810
1811         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nbytes), gfp);
1812         if (pbuf) {
1813                 pbuf->pb_nbytes = nbytes;       /* sizeof of pb_info */
1814                 pbuf->pb_needs_post = false;
1815                 atomic_set(&pbuf->pb_refcnt, 1);
1816         }
1817
1818         return pbuf;
1819 }
1820
1821 void
1822 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
1823 {
1824         LASSERT(atomic_read(&pbuf->pb_refcnt) == 0);
1825         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nbytes));
1826 }
1827
1828 static struct lnet_ping_buffer *
1829 lnet_ping_target_create(int nbytes)
1830 {
1831         struct lnet_ping_buffer *pbuf;
1832
1833         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
1834         if (pbuf == NULL) {
1835                 CERROR("Can't allocate ping source [%d]\n", nbytes);
1836                 return NULL;
1837         }
1838
1839         pbuf->pb_info.pi_nnis = 0;
1840         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1841         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1842         pbuf->pb_info.pi_features =
1843                 LNET_PING_FEAT_NI_STATUS | LNET_PING_FEAT_MULTI_RAIL;
1844
1845         return pbuf;
1846 }
1847
1848 static inline int
1849 lnet_get_net_ni_bytes_locked(struct lnet_net *net)
1850 {
1851         struct lnet_ni *ni;
1852         int bytes = 0;
1853
1854         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1855                 bytes += lnet_ping_sts_size(&ni->ni_nid);
1856
1857         return bytes;
1858 }
1859
1860 static inline int
1861 lnet_get_ni_bytes(void)
1862 {
1863         struct lnet_ni *ni;
1864         struct lnet_net *net;
1865         int bytes = 0;
1866
1867         lnet_net_lock(0);
1868
1869         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1870                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1871                         bytes += lnet_ping_sts_size(&ni->ni_nid);
1872         }
1873
1874         lnet_net_unlock(0);
1875
1876         return bytes;
1877 }
1878
1879 void
1880 lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf)
1881 {
1882         struct lnet_ni_large_status *lstat, *lend;
1883         struct lnet_ni_status *stat, *end;
1884         int nnis;
1885         int i;
1886
1887         __swab32s(&pbuf->pb_info.pi_magic);
1888         __swab32s(&pbuf->pb_info.pi_features);
1889         __swab32s(&pbuf->pb_info.pi_pid);
1890         __swab32s(&pbuf->pb_info.pi_nnis);
1891         nnis = pbuf->pb_info.pi_nnis;
1892         stat = &pbuf->pb_info.pi_ni[0];
1893         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
1894         for (i = 0; i < nnis && stat + 1 <= end; i++, stat++) {
1895                 __swab64s(&stat->ns_nid);
1896                 __swab32s(&stat->ns_status);
1897                 if (i == 0)
1898                         /* Might be total size */
1899                         __swab32s(&stat->ns_msg_size);
1900         }
1901         if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_LARGE_ADDR))
1902                 return;
1903
1904         lstat = (struct lnet_ni_large_status *)stat;
1905         lend = (void *)end;
1906         while (lstat + 1 <= lend) {
1907                 __swab32s(&lstat->ns_status);
1908                 /* struct lnet_nid never needs to be swabed */
1909                 lstat = lnet_ping_sts_next(lstat);
1910         }
1911 }
1912
1913 int
1914 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1915 {
1916         if (!pinfo)
1917                 return -EINVAL;
1918         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1919                 return -EPROTO;
1920         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1921                 return -EPROTO;
1922         /* Loopback is guaranteed to be present */
1923         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
1924                 return -ERANGE;
1925         if (LNET_PING_INFO_LONI(pinfo) != LNET_NID_LO_0)
1926                 return -EPROTO;
1927         return 0;
1928 }
1929
1930 static void
1931 lnet_ping_target_destroy(void)
1932 {
1933         struct lnet_net *net;
1934         struct lnet_ni  *ni;
1935
1936         lnet_net_lock(LNET_LOCK_EX);
1937
1938         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1939                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1940                         lnet_ni_lock(ni);
1941                         ni->ni_status = NULL;
1942                         lnet_ni_unlock(ni);
1943                 }
1944         }
1945
1946         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1947         the_lnet.ln_ping_target = NULL;
1948
1949         lnet_net_unlock(LNET_LOCK_EX);
1950 }
1951
1952 static void
1953 lnet_ping_target_event_handler(struct lnet_event *event)
1954 {
1955         struct lnet_ping_buffer *pbuf = event->md_user_ptr;
1956
1957         if (event->unlinked)
1958                 lnet_ping_buffer_decref(pbuf);
1959 }
1960
1961 static int
1962 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1963                        struct lnet_handle_md *ping_mdh,
1964                        int ni_bytes, bool set_eq)
1965 {
1966         struct lnet_processid id = {
1967                 .nid = LNET_ANY_NID,
1968                 .pid = LNET_PID_ANY
1969         };
1970         struct lnet_me *me;
1971         struct lnet_md md = { NULL };
1972         int rc;
1973
1974         if (set_eq)
1975                 the_lnet.ln_ping_target_handler =
1976                         lnet_ping_target_event_handler;
1977
1978         *ppbuf = lnet_ping_target_create(ni_bytes);
1979         if (*ppbuf == NULL) {
1980                 rc = -ENOMEM;
1981                 goto fail_free_eq;
1982         }
1983
1984         /* Ping target ME/MD */
1985         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
1986                           LNET_PROTO_PING_MATCHBITS, 0,
1987                           LNET_UNLINK, LNET_INS_AFTER);
1988         if (IS_ERR(me)) {
1989                 rc = PTR_ERR(me);
1990                 CERROR("Can't create ping target ME: %d\n", rc);
1991                 goto fail_decref_ping_buffer;
1992         }
1993
1994         /* initialize md content */
1995         md.start     = &(*ppbuf)->pb_info;
1996         md.length    = (*ppbuf)->pb_nbytes;
1997         md.threshold = LNET_MD_THRESH_INF;
1998         md.max_size  = 0;
1999         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
2000                        LNET_MD_MANAGE_REMOTE;
2001         md.handler   = the_lnet.ln_ping_target_handler;
2002         md.user_ptr  = *ppbuf;
2003
2004         rc = LNetMDAttach(me, &md, LNET_RETAIN, ping_mdh);
2005         if (rc != 0) {
2006                 CERROR("Can't attach ping target MD: %d\n", rc);
2007                 goto fail_decref_ping_buffer;
2008         }
2009         lnet_ping_buffer_addref(*ppbuf);
2010
2011         return 0;
2012
2013 fail_decref_ping_buffer:
2014         LASSERT(atomic_read(&(*ppbuf)->pb_refcnt) == 1);
2015         lnet_ping_buffer_decref(*ppbuf);
2016         *ppbuf = NULL;
2017 fail_free_eq:
2018         return rc;
2019 }
2020
2021 static void
2022 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
2023                     struct lnet_handle_md *ping_mdh)
2024 {
2025         LNetMDUnlink(*ping_mdh);
2026         LNetInvalidateMDHandle(ping_mdh);
2027
2028         /* NB the MD could be busy; this just starts the unlink */
2029         wait_var_event_warning(&pbuf->pb_refcnt,
2030                                atomic_read(&pbuf->pb_refcnt) <= 1,
2031                                "Still waiting for ping data MD to unlink\n");
2032 }
2033
2034 static void
2035 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
2036 {
2037         struct lnet_ni *ni;
2038         struct lnet_net *net;
2039         struct lnet_ni_status *ns, *end;
2040         struct lnet_ni_large_status *lns, *lend;
2041         int rc;
2042
2043         pbuf->pb_info.pi_nnis = 0;
2044         ns = &pbuf->pb_info.pi_ni[0];
2045         end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
2046         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2047                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2048                         if (!nid_is_nid4(&ni->ni_nid)) {
2049                                 if (ns == &pbuf->pb_info.pi_ni[1]) {
2050                                         /* This is primary, and it is long */
2051                                         pbuf->pb_info.pi_features |=
2052                                                 LNET_PING_FEAT_PRIMARY_LARGE;
2053                                 }
2054                                 continue;
2055                         }
2056                         LASSERT(ns + 1 <= end);
2057                         ns->ns_nid = lnet_nid_to_nid4(&ni->ni_nid);
2058
2059                         lnet_ni_lock(ni);
2060                         ns->ns_status = lnet_ni_get_status_locked(ni);
2061                         ni->ni_status = &ns->ns_status;
2062                         lnet_ni_unlock(ni);
2063
2064                         pbuf->pb_info.pi_nnis++;
2065                         ns++;
2066                 }
2067         }
2068
2069         lns = (void *)ns;
2070         lend = (void *)end;
2071         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2072                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2073                         if (nid_is_nid4(&ni->ni_nid))
2074                                 continue;
2075                         LASSERT(lns + 1 <= lend);
2076
2077                         lns->ns_nid = ni->ni_nid;
2078
2079                         lnet_ni_lock(ni);
2080                         ns->ns_status = lnet_ni_get_status_locked(ni);
2081                         ni->ni_status = &lns->ns_status;
2082                         lnet_ni_unlock(ni);
2083
2084                         lns = lnet_ping_sts_next(lns);
2085                 }
2086         }
2087         if ((void *)lns > (void *)ns) {
2088                 /* Record total info size */
2089                 pbuf->pb_info.pi_ni[0].ns_msg_size =
2090                         (void *)lns - (void *)&pbuf->pb_info;
2091                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_LARGE_ADDR;
2092         }
2093
2094         /* We (ab)use the ns_status of the loopback interface to
2095          * transmit the sequence number. The first interface listed
2096          * must be the loopback interface.
2097          */
2098         rc = lnet_ping_info_validate(&pbuf->pb_info);
2099         if (rc) {
2100                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
2101                 LBUG();
2102         }
2103         LNET_PING_BUFFER_SEQNO(pbuf) =
2104                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
2105 }
2106
2107 static void
2108 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
2109                         struct lnet_handle_md ping_mdh)
2110 {
2111         struct lnet_ping_buffer *old_pbuf = NULL;
2112         struct lnet_handle_md old_ping_md;
2113
2114         /* switch the NIs to point to the new ping info created */
2115         lnet_net_lock(LNET_LOCK_EX);
2116
2117         if (!the_lnet.ln_routing)
2118                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
2119         if (!lnet_peer_discovery_disabled)
2120                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
2121
2122         /* Ensure only known feature bits have been set. */
2123         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
2124         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
2125
2126         lnet_ping_target_install_locked(pbuf);
2127
2128         if (the_lnet.ln_ping_target) {
2129                 old_pbuf = the_lnet.ln_ping_target;
2130                 old_ping_md = the_lnet.ln_ping_target_md;
2131         }
2132         the_lnet.ln_ping_target_md = ping_mdh;
2133         the_lnet.ln_ping_target = pbuf;
2134
2135         lnet_net_unlock(LNET_LOCK_EX);
2136
2137         if (old_pbuf) {
2138                 /* unlink and free the old ping info */
2139                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
2140                 lnet_ping_buffer_decref(old_pbuf);
2141         }
2142
2143         lnet_push_update_to_peers(0);
2144 }
2145
2146 static void
2147 lnet_ping_target_fini(void)
2148 {
2149         lnet_ping_md_unlink(the_lnet.ln_ping_target,
2150                             &the_lnet.ln_ping_target_md);
2151
2152         lnet_assert_handler_unused(the_lnet.ln_ping_target_handler);
2153         lnet_ping_target_destroy();
2154 }
2155
2156 /* Resize the push target. */
2157 int lnet_push_target_resize(void)
2158 {
2159         struct lnet_handle_md mdh;
2160         struct lnet_handle_md old_mdh;
2161         struct lnet_ping_buffer *pbuf;
2162         struct lnet_ping_buffer *old_pbuf;
2163         int nbytes;
2164         int rc;
2165
2166 again:
2167         nbytes = the_lnet.ln_push_target_nbytes;
2168         if (nbytes <= 0) {
2169                 CDEBUG(D_NET, "Invalid nbytes %d\n", nbytes);
2170                 return -EINVAL;
2171         }
2172
2173         /* NB: lnet_ping_buffer_alloc() sets pbuf refcount to 1. That ref is
2174          * dropped when we need to resize again (see "old_pbuf" below) or when
2175          * LNet is shutdown (see lnet_push_target_fini())
2176          */
2177         pbuf = lnet_ping_buffer_alloc(nbytes, GFP_NOFS);
2178         if (!pbuf) {
2179                 CDEBUG(D_NET, "Can't allocate pbuf for nbytes %d\n", nbytes);
2180                 return -ENOMEM;
2181         }
2182
2183         rc = lnet_push_target_post(pbuf, &mdh);
2184         if (rc) {
2185                 CDEBUG(D_NET, "Failed to post push target: %d\n", rc);
2186                 lnet_ping_buffer_decref(pbuf);
2187                 return rc;
2188         }
2189
2190         lnet_net_lock(LNET_LOCK_EX);
2191         old_pbuf = the_lnet.ln_push_target;
2192         old_mdh = the_lnet.ln_push_target_md;
2193         the_lnet.ln_push_target = pbuf;
2194         the_lnet.ln_push_target_md = mdh;
2195         lnet_net_unlock(LNET_LOCK_EX);
2196
2197         if (old_pbuf) {
2198                 LNetMDUnlink(old_mdh);
2199                 /* Drop ref set by lnet_ping_buffer_alloc() */
2200                 lnet_ping_buffer_decref(old_pbuf);
2201         }
2202
2203         /* Received another push or reply that requires a larger buffer */
2204         if (nbytes < the_lnet.ln_push_target_nbytes)
2205                 goto again;
2206
2207         CDEBUG(D_NET, "nbytes %d success\n", nbytes);
2208         return 0;
2209 }
2210
2211 int lnet_push_target_post(struct lnet_ping_buffer *pbuf,
2212                           struct lnet_handle_md *mdhp)
2213 {
2214         struct lnet_processid id = { LNET_ANY_NID, LNET_PID_ANY };
2215         struct lnet_md md = { NULL };
2216         struct lnet_me *me;
2217         int rc;
2218
2219         me = LNetMEAttach(LNET_RESERVED_PORTAL, &id,
2220                           LNET_PROTO_PING_MATCHBITS, 0,
2221                           LNET_UNLINK, LNET_INS_AFTER);
2222         if (IS_ERR(me)) {
2223                 rc = PTR_ERR(me);
2224                 CERROR("Can't create push target ME: %d\n", rc);
2225                 return rc;
2226         }
2227
2228         pbuf->pb_needs_post = false;
2229
2230         /* This reference is dropped by lnet_push_target_event_handler() */
2231         lnet_ping_buffer_addref(pbuf);
2232
2233         /* initialize md content */
2234         md.start     = &pbuf->pb_info;
2235         md.length    = pbuf->pb_nbytes;
2236         md.threshold = 1;
2237         md.max_size  = 0;
2238         md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE;
2239         md.user_ptr  = pbuf;
2240         md.handler   = the_lnet.ln_push_target_handler;
2241
2242         rc = LNetMDAttach(me, &md, LNET_UNLINK, mdhp);
2243         if (rc) {
2244                 CERROR("Can't attach push MD: %d\n", rc);
2245                 lnet_ping_buffer_decref(pbuf);
2246                 pbuf->pb_needs_post = true;
2247                 return rc;
2248         }
2249
2250         CDEBUG(D_NET, "posted push target %p\n", pbuf);
2251
2252         return 0;
2253 }
2254
2255 static void lnet_push_target_event_handler(struct lnet_event *ev)
2256 {
2257         struct lnet_ping_buffer *pbuf = ev->md_user_ptr;
2258
2259         CDEBUG(D_NET, "type %d status %d unlinked %d\n", ev->type, ev->status,
2260                ev->unlinked);
2261
2262         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
2263                 lnet_swap_pinginfo(pbuf);
2264
2265         if (ev->type == LNET_EVENT_UNLINK) {
2266                 /* Drop ref added by lnet_push_target_post() */
2267                 lnet_ping_buffer_decref(pbuf);
2268                 return;
2269         }
2270
2271         lnet_peer_push_event(ev);
2272         if (ev->unlinked)
2273                 /* Drop ref added by lnet_push_target_post */
2274                 lnet_ping_buffer_decref(pbuf);
2275 }
2276
2277 /* Initialize the push target. */
2278 static int lnet_push_target_init(void)
2279 {
2280         int rc;
2281
2282         if (the_lnet.ln_push_target)
2283                 return -EALREADY;
2284
2285         the_lnet.ln_push_target_handler =
2286                 lnet_push_target_event_handler;
2287
2288         rc = LNetSetLazyPortal(LNET_RESERVED_PORTAL);
2289         LASSERT(rc == 0);
2290
2291         /* Start at the required minimum, we'll enlarge if required. */
2292         the_lnet.ln_push_target_nbytes = LNET_PING_INFO_MIN_SIZE;
2293
2294         rc = lnet_push_target_resize();
2295         if (rc) {
2296                 LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2297                 the_lnet.ln_push_target_handler = NULL;
2298         }
2299
2300         return rc;
2301 }
2302
2303 /* Clean up the push target. */
2304 static void lnet_push_target_fini(void)
2305 {
2306         if (!the_lnet.ln_push_target)
2307                 return;
2308
2309         /* Unlink and invalidate to prevent new references. */
2310         LNetMDUnlink(the_lnet.ln_push_target_md);
2311         LNetInvalidateMDHandle(&the_lnet.ln_push_target_md);
2312
2313         /* Wait for the unlink to complete. */
2314         wait_var_event_warning(&the_lnet.ln_push_target->pb_refcnt,
2315                                atomic_read(&the_lnet.ln_push_target->pb_refcnt) <= 1,
2316                                "Still waiting for ping data MD to unlink\n");
2317
2318         /* Drop ref set by lnet_ping_buffer_alloc() */
2319         lnet_ping_buffer_decref(the_lnet.ln_push_target);
2320         the_lnet.ln_push_target = NULL;
2321         the_lnet.ln_push_target_nbytes = 0;
2322
2323         LNetClearLazyPortal(LNET_RESERVED_PORTAL);
2324         lnet_assert_handler_unused(the_lnet.ln_push_target_handler);
2325         the_lnet.ln_push_target_handler = NULL;
2326 }
2327
2328 static int
2329 lnet_ni_tq_credits(struct lnet_ni *ni)
2330 {
2331         int     credits;
2332
2333         LASSERT(ni->ni_ncpts >= 1);
2334
2335         if (ni->ni_ncpts == 1)
2336                 return ni->ni_net->net_tunables.lct_max_tx_credits;
2337
2338         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
2339         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
2340         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
2341
2342         return credits;
2343 }
2344
2345 static void
2346 lnet_ni_unlink_locked(struct lnet_ni *ni)
2347 {
2348         /* move it to zombie list and nobody can find it anymore */
2349         LASSERT(!list_empty(&ni->ni_netlist));
2350         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
2351         lnet_ni_decref_locked(ni, 0);
2352 }
2353
2354 static void
2355 lnet_clear_zombies_nis_locked(struct lnet_net *net)
2356 {
2357         int             i;
2358         int             islo;
2359         struct lnet_ni  *ni;
2360         struct list_head *zombie_list = &net->net_ni_zombie;
2361
2362         /*
2363          * Now wait for the NIs I just nuked to show up on the zombie
2364          * list and shut them down in guaranteed thread context
2365          */
2366         i = 2;
2367         while ((ni = list_first_entry_or_null(zombie_list,
2368                                               struct lnet_ni,
2369                                               ni_netlist)) != NULL) {
2370                 int *ref;
2371                 int j;
2372
2373                 list_del_init(&ni->ni_netlist);
2374                 /* the ni should be in deleting state. If it's not it's
2375                  * a bug */
2376                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
2377                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
2378                         if (*ref == 0)
2379                                 continue;
2380                         /* still busy, add it back to zombie list */
2381                         list_add(&ni->ni_netlist, zombie_list);
2382                         break;
2383                 }
2384
2385                 if (!list_empty(&ni->ni_netlist)) {
2386                         /* Unlock mutex while waiting to allow other
2387                          * threads to read the LNet state and fall through
2388                          * to avoid deadlock
2389                          */
2390                         lnet_net_unlock(LNET_LOCK_EX);
2391                         mutex_unlock(&the_lnet.ln_api_mutex);
2392
2393                         ++i;
2394                         if ((i & (-i)) == i) {
2395                                 CDEBUG(D_WARNING,
2396                                        "Waiting for zombie LNI %s\n",
2397                                        libcfs_nidstr(&ni->ni_nid));
2398                         }
2399                         schedule_timeout_uninterruptible(cfs_time_seconds(1));
2400
2401                         mutex_lock(&the_lnet.ln_api_mutex);
2402                         lnet_net_lock(LNET_LOCK_EX);
2403                         continue;
2404                 }
2405
2406                 lnet_net_unlock(LNET_LOCK_EX);
2407
2408                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
2409
2410                 LASSERT(!in_interrupt());
2411                 /* Holding the LND mutex makes it safe for lnd_shutdown
2412                  * to call module_put(). Module unload cannot finish
2413                  * until lnet_unregister_lnd() completes, and that
2414                  * requires the LND mutex.
2415                  */
2416                 mutex_unlock(&the_lnet.ln_api_mutex);
2417                 mutex_lock(&the_lnet.ln_lnd_mutex);
2418                 (net->net_lnd->lnd_shutdown)(ni);
2419                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2420                 mutex_lock(&the_lnet.ln_api_mutex);
2421
2422                 if (!islo)
2423                         CDEBUG(D_LNI, "Removed LNI %s\n",
2424                               libcfs_nidstr(&ni->ni_nid));
2425
2426                 lnet_ni_free(ni);
2427                 i = 2;
2428                 lnet_net_lock(LNET_LOCK_EX);
2429         }
2430 }
2431
2432 /* shutdown down the NI and release refcount */
2433 static void
2434 lnet_shutdown_lndni(struct lnet_ni *ni)
2435 {
2436         int i;
2437         struct lnet_net *net = ni->ni_net;
2438
2439         lnet_net_lock(LNET_LOCK_EX);
2440         lnet_ni_lock(ni);
2441         ni->ni_state = LNET_NI_STATE_DELETING;
2442         lnet_ni_unlock(ni);
2443         lnet_ni_unlink_locked(ni);
2444         lnet_incr_dlc_seq();
2445         lnet_net_unlock(LNET_LOCK_EX);
2446
2447         /* clear messages for this NI on the lazy portal */
2448         for (i = 0; i < the_lnet.ln_nportals; i++)
2449                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
2450
2451         lnet_net_lock(LNET_LOCK_EX);
2452         lnet_clear_zombies_nis_locked(net);
2453         lnet_net_unlock(LNET_LOCK_EX);
2454 }
2455
2456 static void
2457 lnet_shutdown_lndnet(struct lnet_net *net)
2458 {
2459         struct lnet_ni *ni;
2460
2461         lnet_net_lock(LNET_LOCK_EX);
2462
2463         list_del_init(&net->net_list);
2464
2465         while ((ni = list_first_entry_or_null(&net->net_ni_list,
2466                                               struct lnet_ni,
2467                                               ni_netlist)) != NULL) {
2468                 lnet_net_unlock(LNET_LOCK_EX);
2469                 lnet_shutdown_lndni(ni);
2470                 lnet_net_lock(LNET_LOCK_EX);
2471         }
2472
2473         lnet_net_unlock(LNET_LOCK_EX);
2474
2475         /* Do peer table cleanup for this net */
2476         lnet_peer_tables_cleanup(net);
2477
2478         lnet_net_free(net);
2479 }
2480
2481 static void
2482 lnet_shutdown_lndnets(void)
2483 {
2484         struct lnet_net *net;
2485         LIST_HEAD(resend);
2486         struct lnet_msg *msg, *tmp;
2487
2488         /* NB called holding the global mutex */
2489
2490         /* All quiet on the API front */
2491         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING ||
2492                 the_lnet.ln_state == LNET_STATE_STOPPING);
2493         LASSERT(the_lnet.ln_refcount == 0);
2494
2495         lnet_net_lock(LNET_LOCK_EX);
2496         the_lnet.ln_state = LNET_STATE_STOPPING;
2497
2498         /*
2499          * move the nets to the zombie list to avoid them being
2500          * picked up for new work. LONET is also included in the
2501          * Nets that will be moved to the zombie list
2502          */
2503         list_splice_init(&the_lnet.ln_nets, &the_lnet.ln_net_zombie);
2504
2505         /* Drop the cached loopback Net. */
2506         if (the_lnet.ln_loni != NULL) {
2507                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
2508                 the_lnet.ln_loni = NULL;
2509         }
2510         lnet_net_unlock(LNET_LOCK_EX);
2511
2512         /* iterate through the net zombie list and delete each net */
2513         while ((net = list_first_entry_or_null(&the_lnet.ln_net_zombie,
2514                                                struct lnet_net,
2515                                                net_list)) != NULL)
2516                 lnet_shutdown_lndnet(net);
2517
2518         spin_lock(&the_lnet.ln_msg_resend_lock);
2519         list_splice(&the_lnet.ln_msg_resend, &resend);
2520         spin_unlock(&the_lnet.ln_msg_resend_lock);
2521
2522         list_for_each_entry_safe(msg, tmp, &resend, msg_list) {
2523                 list_del_init(&msg->msg_list);
2524                 msg->msg_no_resend = true;
2525                 lnet_finalize(msg, -ECANCELED);
2526         }
2527
2528         lnet_net_lock(LNET_LOCK_EX);
2529         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
2530         lnet_net_unlock(LNET_LOCK_EX);
2531 }
2532
2533 static int
2534 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
2535 {
2536         int                     rc = -EINVAL;
2537         struct lnet_tx_queue    *tq;
2538         int                     i;
2539         struct lnet_net         *net = ni->ni_net;
2540
2541         mutex_lock(&the_lnet.ln_lnd_mutex);
2542
2543         if (tun) {
2544                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
2545                 ni->ni_lnd_tunables_set = true;
2546         }
2547
2548         rc = (net->net_lnd->lnd_startup)(ni);
2549
2550         mutex_unlock(&the_lnet.ln_lnd_mutex);
2551
2552         if (rc != 0) {
2553                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
2554                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
2555                 goto failed0;
2556         }
2557
2558         lnet_ni_lock(ni);
2559         ni->ni_state = LNET_NI_STATE_ACTIVE;
2560         lnet_ni_unlock(ni);
2561
2562         /* We keep a reference on the loopback net through the loopback NI */
2563         if (net->net_lnd->lnd_type == LOLND) {
2564                 lnet_ni_addref(ni);
2565                 LASSERT(the_lnet.ln_loni == NULL);
2566                 the_lnet.ln_loni = ni;
2567                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
2568                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
2569                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
2570                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
2571                 return 0;
2572         }
2573
2574         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
2575             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
2576                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
2577                                    libcfs_lnd2str(net->net_lnd->lnd_type),
2578                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
2579                                         "" : "per-peer ");
2580                 /* shutdown the NI since if we get here then it must've already
2581                  * been started
2582                  */
2583                 lnet_shutdown_lndni(ni);
2584                 return -EINVAL;
2585         }
2586
2587         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
2588                 tq->tq_credits_min =
2589                 tq->tq_credits_max =
2590                 tq->tq_credits = lnet_ni_tq_credits(ni);
2591         }
2592
2593         atomic_set(&ni->ni_tx_credits,
2594                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
2595         atomic_set(&ni->ni_healthv, LNET_MAX_HEALTH_VALUE);
2596
2597         /* Nodes with small feet have little entropy. The NID for this
2598          * node gives the most entropy in the low bits.
2599          */
2600         add_device_randomness(&ni->ni_nid, sizeof(ni->ni_nid));
2601
2602         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
2603                 libcfs_nidstr(&ni->ni_nid),
2604                 ni->ni_net->net_tunables.lct_peer_tx_credits,
2605                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
2606                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
2607                 ni->ni_net->net_tunables.lct_peer_timeout);
2608
2609         return 0;
2610 failed0:
2611         lnet_ni_free(ni);
2612         return rc;
2613 }
2614
2615 static const struct lnet_lnd *lnet_load_lnd(u32 lnd_type)
2616 {
2617         const struct lnet_lnd *lnd;
2618         int rc = 0;
2619
2620         mutex_lock(&the_lnet.ln_lnd_mutex);
2621         lnd = lnet_find_lnd_by_type(lnd_type);
2622         if (!lnd) {
2623                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2624                 rc = request_module("%s", libcfs_lnd2modname(lnd_type));
2625                 mutex_lock(&the_lnet.ln_lnd_mutex);
2626
2627                 lnd = lnet_find_lnd_by_type(lnd_type);
2628                 if (!lnd) {
2629                         mutex_unlock(&the_lnet.ln_lnd_mutex);
2630                         CERROR("Can't load LND %s, module %s, rc=%d\n",
2631                         libcfs_lnd2str(lnd_type),
2632                         libcfs_lnd2modname(lnd_type), rc);
2633 #ifndef HAVE_MODULE_LOADING_SUPPORT
2634                         LCONSOLE_ERROR_MSG(0x104,
2635                                            "Your kernel must be compiled with kernel module loading support.");
2636 #endif
2637                         return ERR_PTR(-EINVAL);
2638                 }
2639         }
2640         mutex_unlock(&the_lnet.ln_lnd_mutex);
2641
2642         return lnd;
2643 }
2644
2645 static int
2646 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
2647 {
2648         struct lnet_ni *ni;
2649         struct lnet_net *net_l = NULL;
2650         LIST_HEAD(local_ni_list);
2651         int rc;
2652         int ni_count = 0;
2653         __u32 lnd_type;
2654         const struct lnet_lnd  *lnd;
2655         int peer_timeout =
2656                 net->net_tunables.lct_peer_timeout;
2657         int maxtxcredits =
2658                 net->net_tunables.lct_max_tx_credits;
2659         int peerrtrcredits =
2660                 net->net_tunables.lct_peer_rtr_credits;
2661
2662         /*
2663          * make sure that this net is unique. If it isn't then
2664          * we are adding interfaces to an already existing network, and
2665          * 'net' is just a convenient way to pass in the list.
2666          * if it is unique we need to find the LND and load it if
2667          * necessary.
2668          */
2669         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
2670                 lnd_type = LNET_NETTYP(net->net_id);
2671
2672                 lnd = lnet_load_lnd(lnd_type);
2673                 if (IS_ERR(lnd)) {
2674                         rc = PTR_ERR(lnd);
2675                         goto failed0;
2676                 }
2677
2678                 mutex_lock(&the_lnet.ln_lnd_mutex);
2679                 net->net_lnd = lnd;
2680                 mutex_unlock(&the_lnet.ln_lnd_mutex);
2681
2682                 net_l = net;
2683         }
2684
2685         /*
2686          * net_l: if the network being added is unique then net_l
2687          *        will point to that network
2688          *        if the network being added is not unique then
2689          *        net_l points to the existing network.
2690          *
2691          * When we enter the loop below, we'll pick NIs off he
2692          * network beign added and start them up, then add them to
2693          * a local ni list. Once we've successfully started all
2694          * the NIs then we join the local NI list (of started up
2695          * networks) with the net_l->net_ni_list, which should
2696          * point to the correct network to add the new ni list to
2697          *
2698          * If any of the new NIs fail to start up, then we want to
2699          * iterate through the local ni list, which should include
2700          * any NIs which were successfully started up, and shut
2701          * them down.
2702          *
2703          * After than we want to delete the network being added,
2704          * to avoid a memory leak.
2705          */
2706         while ((ni = list_first_entry_or_null(&net->net_ni_added,
2707                                               struct lnet_ni,
2708                                               ni_netlist)) != NULL) {
2709                 list_del_init(&ni->ni_netlist);
2710
2711                 /* make sure that the the NI we're about to start
2712                  * up is actually unique. if it's not fail. */
2713                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
2714                                         ni->ni_interface)) {
2715                         rc = -EEXIST;
2716                         goto failed1;
2717                 }
2718
2719                 /* adjust the pointer the parent network, just in case it
2720                  * the net is a duplicate */
2721                 ni->ni_net = net_l;
2722
2723                 rc = lnet_startup_lndni(ni, tun);
2724
2725                 if (rc < 0)
2726                         goto failed1;
2727
2728                 lnet_ni_addref(ni);
2729                 list_add_tail(&ni->ni_netlist, &local_ni_list);
2730
2731                 ni_count++;
2732         }
2733
2734         lnet_net_lock(LNET_LOCK_EX);
2735         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
2736         lnet_incr_dlc_seq();
2737         lnet_net_unlock(LNET_LOCK_EX);
2738
2739         /* if the network is not unique then we don't want to keep
2740          * it around after we're done. Free it. Otherwise add that
2741          * net to the global the_lnet.ln_nets */
2742         if (net_l != net && net_l != NULL) {
2743                 /*
2744                  * TODO - note. currently the tunables can not be updated
2745                  * once added
2746                  */
2747                 lnet_net_free(net);
2748         } else {
2749                 /*
2750                  * restore tunables after it has been overwitten by the
2751                  * lnd
2752                  */
2753                 if (peer_timeout != -1)
2754                         net->net_tunables.lct_peer_timeout = peer_timeout;
2755                 if (maxtxcredits != -1)
2756                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
2757                 if (peerrtrcredits != -1)
2758                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
2759
2760                 lnet_net_lock(LNET_LOCK_EX);
2761                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
2762                 lnet_net_unlock(LNET_LOCK_EX);
2763         }
2764
2765         return ni_count;
2766
2767 failed1:
2768         /*
2769          * shutdown the new NIs that are being started up
2770          * free the NET being started
2771          */
2772         while ((ni = list_first_entry_or_null(&local_ni_list,
2773                                               struct lnet_ni,
2774                                               ni_netlist)) != NULL)
2775                 lnet_shutdown_lndni(ni);
2776
2777 failed0:
2778         lnet_net_free(net);
2779
2780         return rc;
2781 }
2782
2783 static int
2784 lnet_startup_lndnets(struct list_head *netlist)
2785 {
2786         struct lnet_net         *net;
2787         int                     rc;
2788         int                     ni_count = 0;
2789
2790         /*
2791          * Change to running state before bringing up the LNDs. This
2792          * allows lnet_shutdown_lndnets() to assert that we've passed
2793          * through here.
2794          */
2795         lnet_net_lock(LNET_LOCK_EX);
2796         the_lnet.ln_state = LNET_STATE_RUNNING;
2797         lnet_net_unlock(LNET_LOCK_EX);
2798
2799         while ((net = list_first_entry_or_null(netlist,
2800                                                struct lnet_net,
2801                                                net_list)) != NULL) {
2802                 list_del_init(&net->net_list);
2803
2804                 rc = lnet_startup_lndnet(net, NULL);
2805
2806                 if (rc < 0)
2807                         goto failed;
2808
2809                 ni_count += rc;
2810         }
2811
2812         return ni_count;
2813 failed:
2814         lnet_shutdown_lndnets();
2815
2816         return rc;
2817 }
2818
2819 static int lnet_genl_parse_list(struct sk_buff *msg,
2820                                 const struct ln_key_list *data[], u16 idx)
2821 {
2822         const struct ln_key_list *list = data[idx];
2823         const struct ln_key_props *props;
2824         struct nlattr *node;
2825         u16 count;
2826
2827         if (!list)
2828                 return 0;
2829
2830         if (!list->lkl_maxattr)
2831                 return -ERANGE;
2832
2833         props = list->lkl_list;
2834         if (!props)
2835                 return -EINVAL;
2836
2837         node = nla_nest_start(msg, LN_SCALAR_ATTR_LIST);
2838         if (!node)
2839                 return -ENOBUFS;
2840
2841         for (count = 1; count <= list->lkl_maxattr; count++) {
2842                 struct nlattr *key = nla_nest_start(msg, count);
2843
2844                 if (count == 1)
2845                         nla_put_u16(msg, LN_SCALAR_ATTR_LIST_SIZE,
2846                                     list->lkl_maxattr);
2847
2848                 nla_put_u16(msg, LN_SCALAR_ATTR_INDEX, count);
2849                 if (props[count].lkp_value)
2850                         nla_put_string(msg, LN_SCALAR_ATTR_VALUE,
2851                                        props[count].lkp_value);
2852                 if (props[count].lkp_key_format)
2853                         nla_put_u16(msg, LN_SCALAR_ATTR_KEY_FORMAT,
2854                                     props[count].lkp_key_format);
2855                 nla_put_u16(msg, LN_SCALAR_ATTR_NLA_TYPE,
2856                             props[count].lkp_data_type);
2857                 if (props[count].lkp_data_type == NLA_NESTED) {
2858                         int rc;
2859
2860                         rc = lnet_genl_parse_list(msg, data, ++idx);
2861                         if (rc < 0)
2862                                 return rc;
2863                         idx = rc;
2864                 }
2865
2866                 nla_nest_end(msg, key);
2867         }
2868
2869         nla_nest_end(msg, node);
2870         return idx;
2871 }
2872
2873 int lnet_genl_send_scalar_list(struct sk_buff *msg, u32 portid, u32 seq,
2874                                const struct genl_family *family, int flags,
2875                                u8 cmd, const struct ln_key_list *data[])
2876 {
2877         int rc = 0;
2878         void *hdr;
2879
2880         if (!data[0])
2881                 return -EINVAL;
2882
2883         hdr = genlmsg_put(msg, portid, seq, family, flags, cmd);
2884         if (!hdr)
2885                 GOTO(canceled, rc = -EMSGSIZE);
2886
2887         rc = lnet_genl_parse_list(msg, data, 0);
2888         if (rc < 0)
2889                 GOTO(canceled, rc);
2890
2891         genlmsg_end(msg, hdr);
2892 canceled:
2893         if (rc < 0)
2894                 genlmsg_cancel(msg, hdr);
2895         return rc > 0 ? 0 : rc;
2896 }
2897 EXPORT_SYMBOL(lnet_genl_send_scalar_list);
2898
2899 static struct genl_family lnet_family;
2900
2901 /**
2902  * Initialize LNet library.
2903  *
2904  * Automatically called at module loading time. Caller has to call
2905  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
2906  * latter returned 0. It must be called exactly once.
2907  *
2908  * \retval 0 on success
2909  * \retval -ve on failures.
2910  */
2911 int lnet_lib_init(void)
2912 {
2913         int rc;
2914
2915         lnet_assert_wire_constants();
2916
2917         /* refer to global cfs_cpt_table for now */
2918         the_lnet.ln_cpt_table = cfs_cpt_tab;
2919         the_lnet.ln_cpt_number = cfs_cpt_number(cfs_cpt_tab);
2920
2921         LASSERT(the_lnet.ln_cpt_number > 0);
2922         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
2923                 /* we are under risk of consuming all lh_cookie */
2924                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
2925                        "please change setting of CPT-table and retry\n",
2926                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
2927                 return -E2BIG;
2928         }
2929
2930         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
2931                 the_lnet.ln_cpt_bits++;
2932
2933         rc = lnet_create_locks();
2934         if (rc != 0) {
2935                 CERROR("Can't create LNet global locks: %d\n", rc);
2936                 return rc;
2937         }
2938
2939         rc = genl_register_family(&lnet_family);
2940         if (rc != 0) {
2941                 lnet_destroy_locks();
2942                 CERROR("Can't register LNet netlink family: %d\n", rc);
2943                 return rc;
2944         }
2945
2946         the_lnet.ln_refcount = 0;
2947         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
2948         INIT_LIST_HEAD(&the_lnet.ln_msg_resend);
2949
2950         /* The hash table size is the number of bits it takes to express the set
2951          * ln_num_routes, minus 1 (better to under estimate than over so we
2952          * don't waste memory). */
2953         if (rnet_htable_size <= 0)
2954                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
2955         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
2956                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
2957         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
2958                                            order_base_2(rnet_htable_size) - 1);
2959
2960         /* All LNDs apart from the LOLND are in separate modules.  They
2961          * register themselves when their module loads, and unregister
2962          * themselves when their module is unloaded. */
2963         lnet_register_lnd(&the_lolnd);
2964         return 0;
2965 }
2966
2967 /**
2968  * Finalize LNet library.
2969  *
2970  * \pre lnet_lib_init() called with success.
2971  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
2972  *
2973  * As this happens at module-unload, all lnds must already be unloaded,
2974  * so they must already be unregistered.
2975  */
2976 void lnet_lib_exit(void)
2977 {
2978         int i;
2979
2980         LASSERT(the_lnet.ln_refcount == 0);
2981         lnet_unregister_lnd(&the_lolnd);
2982         for (i = 0; i < NUM_LNDS; i++)
2983                 LASSERT(!the_lnet.ln_lnds[i]);
2984         lnet_destroy_locks();
2985         genl_unregister_family(&lnet_family);
2986 }
2987
2988 /**
2989  * Set LNet PID and start LNet interfaces, routing, and forwarding.
2990  *
2991  * Users must call this function at least once before any other functions.
2992  * For each successful call there must be a corresponding call to
2993  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
2994  * ignored.
2995  *
2996  * The PID used by LNet may be different from the one requested.
2997  * See LNetGetId().
2998  *
2999  * \param requested_pid PID requested by the caller.
3000  *
3001  * \return >= 0 on success, and < 0 error code on failures.
3002  */
3003 int
3004 LNetNIInit(lnet_pid_t requested_pid)
3005 {
3006         int im_a_router = 0;
3007         int rc;
3008         int ni_bytes;
3009         struct lnet_ping_buffer *pbuf;
3010         struct lnet_handle_md ping_mdh;
3011         LIST_HEAD(net_head);
3012         struct lnet_net *net;
3013
3014         mutex_lock(&the_lnet.ln_api_mutex);
3015
3016         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
3017
3018         if (the_lnet.ln_state == LNET_STATE_STOPPING) {
3019                 mutex_unlock(&the_lnet.ln_api_mutex);
3020                 return -ESHUTDOWN;
3021         }
3022
3023         if (the_lnet.ln_refcount > 0) {
3024                 rc = the_lnet.ln_refcount++;
3025                 mutex_unlock(&the_lnet.ln_api_mutex);
3026                 return rc;
3027         }
3028
3029         rc = lnet_prepare(requested_pid);
3030         if (rc != 0) {
3031                 mutex_unlock(&the_lnet.ln_api_mutex);
3032                 return rc;
3033         }
3034
3035         /* create a network for Loopback network */
3036         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
3037         if (net == NULL) {
3038                 rc = -ENOMEM;
3039                 goto err_empty_list;
3040         }
3041
3042         /* Add in the loopback NI */
3043         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
3044                 rc = -ENOMEM;
3045                 goto err_empty_list;
3046         }
3047
3048         if (use_tcp_bonding)
3049                 CWARN("use_tcp_bonding has been removed. Use Multi-Rail and Dynamic Discovery instead, see LU-13641\n");
3050
3051         /* If LNet is being initialized via DLC it is possible
3052          * that the user requests not to load module parameters (ones which
3053          * are supported by DLC) on initialization.  Therefore, make sure not
3054          * to load networks, routes and forwarding from module parameters
3055          * in this case.  On cleanup in case of failure only clean up
3056          * routes if it has been loaded */
3057         if (!the_lnet.ln_nis_from_mod_params) {
3058                 rc = lnet_parse_networks(&net_head, lnet_get_networks());
3059                 if (rc < 0)
3060                         goto err_empty_list;
3061         }
3062
3063         rc = lnet_startup_lndnets(&net_head);
3064         if (rc < 0)
3065                 goto err_empty_list;
3066
3067         if (!the_lnet.ln_nis_from_mod_params) {
3068                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
3069                 if (rc != 0)
3070                         goto err_shutdown_lndnis;
3071
3072                 rc = lnet_rtrpools_alloc(im_a_router);
3073                 if (rc != 0)
3074                         goto err_destroy_routes;
3075         }
3076
3077         rc = lnet_acceptor_start();
3078         if (rc != 0)
3079                 goto err_destroy_routes;
3080
3081         the_lnet.ln_refcount = 1;
3082         /* Now I may use my own API functions... */
3083
3084         ni_bytes = LNET_PING_INFO_HDR_SIZE;
3085         list_for_each_entry(net, &the_lnet.ln_nets, net_list)
3086                 ni_bytes += lnet_get_net_ni_bytes_locked(net);
3087
3088         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_bytes, true);
3089         if (rc != 0)
3090                 goto err_acceptor_stop;
3091
3092         lnet_ping_target_update(pbuf, ping_mdh);
3093
3094         the_lnet.ln_mt_handler = lnet_mt_event_handler;
3095
3096         rc = lnet_push_target_init();
3097         if (rc != 0)
3098                 goto err_stop_ping;
3099
3100         rc = lnet_peer_discovery_start();
3101         if (rc != 0)
3102                 goto err_destroy_push_target;
3103
3104         rc = lnet_monitor_thr_start();
3105         if (rc != 0)
3106                 goto err_stop_discovery_thr;
3107
3108         lnet_fault_init();
3109         lnet_router_debugfs_init();
3110
3111         mutex_unlock(&the_lnet.ln_api_mutex);
3112
3113         complete_all(&the_lnet.ln_started);
3114
3115         /* wait for all routers to start */
3116         lnet_wait_router_start();
3117
3118         return 0;
3119
3120 err_stop_discovery_thr:
3121         lnet_peer_discovery_stop();
3122 err_destroy_push_target:
3123         lnet_push_target_fini();
3124 err_stop_ping:
3125         lnet_ping_target_fini();
3126 err_acceptor_stop:
3127         the_lnet.ln_refcount = 0;
3128         lnet_acceptor_stop();
3129 err_destroy_routes:
3130         if (!the_lnet.ln_nis_from_mod_params)
3131                 lnet_destroy_routes();
3132 err_shutdown_lndnis:
3133         lnet_shutdown_lndnets();
3134 err_empty_list:
3135         lnet_unprepare();
3136         LASSERT(rc < 0);
3137         mutex_unlock(&the_lnet.ln_api_mutex);
3138         while ((net = list_first_entry_or_null(&net_head,
3139                                                struct lnet_net,
3140                                                net_list)) != NULL) {
3141                 list_del_init(&net->net_list);
3142                 lnet_net_free(net);
3143         }
3144         return rc;
3145 }
3146 EXPORT_SYMBOL(LNetNIInit);
3147
3148 /**
3149  * Stop LNet interfaces, routing, and forwarding.
3150  *
3151  * Users must call this function once for each successful call to LNetNIInit().
3152  * Once the LNetNIFini() operation has been started, the results of pending
3153  * API operations are undefined.
3154  *
3155  * \return always 0 for current implementation.
3156  */
3157 int
3158 LNetNIFini(void)
3159 {
3160         mutex_lock(&the_lnet.ln_api_mutex);
3161
3162         LASSERT(the_lnet.ln_refcount > 0);
3163
3164         if (the_lnet.ln_refcount != 1) {
3165                 the_lnet.ln_refcount--;
3166         } else {
3167                 LASSERT(!the_lnet.ln_niinit_self);
3168
3169                 lnet_net_lock(LNET_LOCK_EX);
3170                 the_lnet.ln_state = LNET_STATE_STOPPING;
3171                 lnet_net_unlock(LNET_LOCK_EX);
3172
3173                 lnet_fault_fini();
3174
3175                 lnet_router_debugfs_fini();
3176                 lnet_monitor_thr_stop();
3177                 lnet_peer_discovery_stop();
3178                 lnet_push_target_fini();
3179                 lnet_ping_target_fini();
3180
3181                 /* Teardown fns that use my own API functions BEFORE here */
3182                 the_lnet.ln_refcount = 0;
3183
3184                 lnet_acceptor_stop();
3185                 lnet_destroy_routes();
3186                 lnet_shutdown_lndnets();
3187                 lnet_unprepare();
3188         }
3189
3190         mutex_unlock(&the_lnet.ln_api_mutex);
3191         return 0;
3192 }
3193 EXPORT_SYMBOL(LNetNIFini);
3194
3195 /**
3196  * Grabs the ni data from the ni structure and fills the out
3197  * parameters
3198  *
3199  * \param[in] ni network        interface structure
3200  * \param[out] cfg_ni           NI config information
3201  * \param[out] tun              network and LND tunables
3202  */
3203 static void
3204 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
3205                    struct lnet_ioctl_config_lnd_tunables *tun,
3206                    struct lnet_ioctl_element_stats *stats,
3207                    __u32 tun_size)
3208 {
3209         size_t min_size = 0;
3210         int i;
3211
3212         if (!ni || !cfg_ni || !tun || !nid_is_nid4(&ni->ni_nid))
3213                 return;
3214
3215         if (ni->ni_interface != NULL) {
3216                 strncpy(cfg_ni->lic_ni_intf,
3217                         ni->ni_interface,
3218                         sizeof(cfg_ni->lic_ni_intf));
3219         }
3220
3221         cfg_ni->lic_nid = lnet_nid_to_nid4(&ni->ni_nid);
3222         cfg_ni->lic_status = lnet_ni_get_status_locked(ni);
3223         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
3224
3225         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
3226
3227         if (stats) {
3228                 stats->iel_send_count = lnet_sum_stats(&ni->ni_stats,
3229                                                        LNET_STATS_TYPE_SEND);
3230                 stats->iel_recv_count = lnet_sum_stats(&ni->ni_stats,
3231                                                        LNET_STATS_TYPE_RECV);
3232                 stats->iel_drop_count = lnet_sum_stats(&ni->ni_stats,
3233                                                        LNET_STATS_TYPE_DROP);
3234         }
3235
3236         /*
3237          * tun->lt_tun will always be present, but in order to be
3238          * backwards compatible, we need to deal with the cases when
3239          * tun->lt_tun is smaller than what the kernel has, because it
3240          * comes from an older version of a userspace program, then we'll
3241          * need to copy as much information as we have available space.
3242          */
3243         min_size = tun_size - sizeof(tun->lt_cmn);
3244         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
3245
3246         /* copy over the cpts */
3247         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
3248             ni->ni_cpts == NULL)  {
3249                 for (i = 0; i < ni->ni_ncpts; i++)
3250                         cfg_ni->lic_cpts[i] = i;
3251         } else {
3252                 for (i = 0;
3253                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
3254                      i < LNET_MAX_SHOW_NUM_CPT;
3255                      i++)
3256                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
3257         }
3258         cfg_ni->lic_ncpts = ni->ni_ncpts;
3259 }
3260
3261 /**
3262  * NOTE: This is a legacy function left in the code to be backwards
3263  * compatible with older userspace programs. It should eventually be
3264  * removed.
3265  *
3266  * Grabs the ni data from the ni structure and fills the out
3267  * parameters
3268  *
3269  * \param[in] ni network        interface structure
3270  * \param[out] config           config information
3271  */
3272 static void
3273 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
3274                          struct lnet_ioctl_config_data *config)
3275 {
3276         struct lnet_ioctl_net_config *net_config;
3277         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
3278         size_t min_size, tunable_size = 0;
3279         int i;
3280
3281         if (!ni || !config || !nid_is_nid4(&ni->ni_nid))
3282                 return;
3283
3284         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
3285         if (!net_config)
3286                 return;
3287
3288         if (!ni->ni_interface)
3289                 return;
3290
3291         strncpy(net_config->ni_interface,
3292                 ni->ni_interface,
3293                 sizeof(net_config->ni_interface));
3294
3295         config->cfg_nid = lnet_nid_to_nid4(&ni->ni_nid);
3296         config->cfg_config_u.cfg_net.net_peer_timeout =
3297                 ni->ni_net->net_tunables.lct_peer_timeout;
3298         config->cfg_config_u.cfg_net.net_max_tx_credits =
3299                 ni->ni_net->net_tunables.lct_max_tx_credits;
3300         config->cfg_config_u.cfg_net.net_peer_tx_credits =
3301                 ni->ni_net->net_tunables.lct_peer_tx_credits;
3302         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
3303                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
3304
3305         net_config->ni_status = lnet_ni_get_status_locked(ni);
3306
3307         if (ni->ni_cpts) {
3308                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
3309
3310                 for (i = 0; i < num_cpts; i++)
3311                         net_config->ni_cpts[i] = ni->ni_cpts[i];
3312
3313                 config->cfg_ncpts = num_cpts;
3314         }
3315
3316         /*
3317          * See if user land tools sent in a newer and larger version
3318          * of struct lnet_tunables than what the kernel uses.
3319          */
3320         min_size = sizeof(*config) + sizeof(*net_config);
3321
3322         if (config->cfg_hdr.ioc_len > min_size)
3323                 tunable_size = config->cfg_hdr.ioc_len - min_size;
3324
3325         /* Don't copy too much data to user space */
3326         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
3327         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
3328
3329         if (lnd_cfg && min_size) {
3330                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
3331                 config->cfg_config_u.cfg_net.net_interface_count = 1;
3332
3333                 /* Tell user land that kernel side has less data */
3334                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
3335                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
3336                         config->cfg_hdr.ioc_len -= min_size;
3337                 }
3338         }
3339 }
3340
3341 struct lnet_ni *
3342 lnet_get_ni_idx_locked(int idx)
3343 {
3344         struct lnet_ni          *ni;
3345         struct lnet_net         *net;
3346
3347         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3348                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3349                         if (idx-- == 0)
3350                                 return ni;
3351                 }
3352         }
3353
3354         return NULL;
3355 }
3356
3357 int lnet_get_net_healthv_locked(struct lnet_net *net)
3358 {
3359         struct lnet_ni *ni;
3360         int best_healthv = 0;
3361         int healthv, ni_fatal;
3362
3363         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3364                 healthv = atomic_read(&ni->ni_healthv);
3365                 ni_fatal = atomic_read(&ni->ni_fatal_error_on);
3366                 if (!ni_fatal && healthv > best_healthv)
3367                         best_healthv = healthv;
3368         }
3369
3370         return best_healthv;
3371 }
3372
3373 struct lnet_ni *
3374 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
3375 {
3376         struct lnet_ni          *ni;
3377         struct lnet_net         *net = mynet;
3378
3379         /*
3380          * It is possible that the net has been cleaned out while there is
3381          * a message being sent. This function accessed the net without
3382          * checking if the list is empty
3383          */
3384         if (!prev) {
3385                 if (!net)
3386                         net = list_first_entry(&the_lnet.ln_nets,
3387                                                struct lnet_net,
3388                                                net_list);
3389                 if (list_empty(&net->net_ni_list))
3390                         return NULL;
3391                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3392                                       ni_netlist);
3393
3394                 return ni;
3395         }
3396
3397         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
3398                 /* if you reached the end of the ni list and the net is
3399                  * specified, then there are no more nis in that net */
3400                 if (net != NULL)
3401                         return NULL;
3402
3403                 /* we reached the end of this net ni list. move to the
3404                  * next net */
3405                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
3406                         /* no more nets and no more NIs. */
3407                         return NULL;
3408
3409                 /* get the next net */
3410                 net = list_first_entry(&prev->ni_net->net_list, struct lnet_net,
3411                                        net_list);
3412                 if (list_empty(&net->net_ni_list))
3413                         return NULL;
3414                 /* get the ni on it */
3415                 ni = list_first_entry(&net->net_ni_list, struct lnet_ni,
3416                                       ni_netlist);
3417
3418                 return ni;
3419         }
3420
3421         if (list_empty(&prev->ni_netlist))
3422                 return NULL;
3423
3424         /* there are more nis left */
3425         ni = list_first_entry(&prev->ni_netlist, struct lnet_ni, ni_netlist);
3426
3427         return ni;
3428 }
3429
3430 int
3431 lnet_get_net_config(struct lnet_ioctl_config_data *config)
3432 {
3433         struct lnet_ni *ni;
3434         int cpt;
3435         int rc = -ENOENT;
3436         int idx = config->cfg_count;
3437
3438         cpt = lnet_net_lock_current();
3439
3440         ni = lnet_get_ni_idx_locked(idx);
3441
3442         if (ni != NULL) {
3443                 rc = 0;
3444                 lnet_ni_lock(ni);
3445                 lnet_fill_ni_info_legacy(ni, config);
3446                 lnet_ni_unlock(ni);
3447         }
3448
3449         lnet_net_unlock(cpt);
3450         return rc;
3451 }
3452
3453 int
3454 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
3455                    struct lnet_ioctl_config_lnd_tunables *tun,
3456                    struct lnet_ioctl_element_stats *stats,
3457                    __u32 tun_size)
3458 {
3459         struct lnet_ni          *ni;
3460         int                     cpt;
3461         int                     rc = -ENOENT;
3462
3463         if (!cfg_ni || !tun || !stats)
3464                 return -EINVAL;
3465
3466         cpt = lnet_net_lock_current();
3467
3468         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
3469
3470         if (ni) {
3471                 rc = 0;
3472                 lnet_ni_lock(ni);
3473                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
3474                 lnet_ni_unlock(ni);
3475         }
3476
3477         lnet_net_unlock(cpt);
3478         return rc;
3479 }
3480
3481 int lnet_get_ni_stats(struct lnet_ioctl_element_msg_stats *msg_stats)
3482 {
3483         struct lnet_ni *ni;
3484         int cpt;
3485         int rc = -ENOENT;
3486
3487         if (!msg_stats)
3488                 return -EINVAL;
3489
3490         cpt = lnet_net_lock_current();
3491
3492         ni = lnet_get_ni_idx_locked(msg_stats->im_idx);
3493
3494         if (ni) {
3495                 lnet_usr_translate_stats(msg_stats, &ni->ni_stats);
3496                 rc = 0;
3497         }
3498
3499         lnet_net_unlock(cpt);
3500
3501         return rc;
3502 }
3503
3504 static int lnet_add_net_common(struct lnet_net *net,
3505                                struct lnet_ioctl_config_lnd_tunables *tun)
3506 {
3507         struct lnet_handle_md ping_mdh;
3508         struct lnet_ping_buffer *pbuf;
3509         struct lnet_remotenet *rnet;
3510         struct lnet_ni *ni;
3511         u32 net_id;
3512         int rc;
3513
3514         lnet_net_lock(LNET_LOCK_EX);
3515         rnet = lnet_find_rnet_locked(net->net_id);
3516         lnet_net_unlock(LNET_LOCK_EX);
3517         /*
3518          * make sure that the net added doesn't invalidate the current
3519          * configuration LNet is keeping
3520          */
3521         if (rnet) {
3522                 CERROR("Adding net %s will invalidate routing configuration\n",
3523                        libcfs_net2str(net->net_id));
3524                 lnet_net_free(net);
3525                 return -EUSERS;
3526         }
3527
3528         if (tun)
3529                 memcpy(&net->net_tunables,
3530                        &tun->lt_cmn, sizeof(net->net_tunables));
3531         else
3532                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
3533
3534         net_id = net->net_id;
3535
3536         rc = lnet_startup_lndnet(net,
3537                                  (tun) ? &tun->lt_tun : NULL);
3538         if (rc < 0)
3539                 return rc;
3540
3541         /* make sure you calculate the correct number of slots in the ping
3542          * buffer. Since the ping info is a flattened list of all the NIs,
3543          * we should allocate enough slots to accomodate the number of NIs
3544          * which will be added.
3545          */
3546         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3547                                     LNET_PING_INFO_HDR_SIZE +
3548                                     lnet_get_ni_bytes(),
3549                                     false);
3550         if (rc < 0) {
3551                 lnet_shutdown_lndnet(net);
3552                 return rc;
3553         }
3554
3555         lnet_net_lock(LNET_LOCK_EX);
3556         net = lnet_get_net_locked(net_id);
3557         LASSERT(net);
3558
3559         /* apply the UDSPs */
3560         rc = lnet_udsp_apply_policies_on_net(net);
3561         if (rc)
3562                 CERROR("Failed to apply UDSPs on local net %s\n",
3563                        libcfs_net2str(net->net_id));
3564
3565         /* At this point we lost track of which NI was just added, so we
3566          * just re-apply the policies on all of the NIs on this net
3567          */
3568         list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3569                 rc = lnet_udsp_apply_policies_on_ni(ni);
3570                 if (rc)
3571                         CERROR("Failed to apply UDSPs on ni %s\n",
3572                                libcfs_nidstr(&ni->ni_nid));
3573         }
3574         lnet_net_unlock(LNET_LOCK_EX);
3575
3576         /*
3577          * Start the acceptor thread if this is the first network
3578          * being added that requires the thread.
3579          */
3580         if (net->net_lnd->lnd_accept) {
3581                 rc = lnet_acceptor_start();
3582                 if (rc < 0) {
3583                         /* shutdown the net that we just started */
3584                         CERROR("Failed to start up acceptor thread\n");
3585                         lnet_shutdown_lndnet(net);
3586                         goto failed;
3587                 }
3588         }
3589
3590         lnet_net_lock(LNET_LOCK_EX);
3591         lnet_peer_net_added(net);
3592         lnet_net_unlock(LNET_LOCK_EX);
3593
3594         lnet_ping_target_update(pbuf, ping_mdh);
3595
3596         return 0;
3597
3598 failed:
3599         lnet_ping_md_unlink(pbuf, &ping_mdh);
3600         lnet_ping_buffer_decref(pbuf);
3601         return rc;
3602 }
3603
3604 static void
3605 lnet_set_tune_defaults(struct lnet_ioctl_config_lnd_tunables *tun)
3606 {
3607         if (tun) {
3608                 if (tun->lt_cmn.lct_peer_timeout < 0)
3609                         tun->lt_cmn.lct_peer_timeout = DEFAULT_PEER_TIMEOUT;
3610                 if (!tun->lt_cmn.lct_peer_tx_credits)
3611                         tun->lt_cmn.lct_peer_tx_credits = DEFAULT_PEER_CREDITS;
3612                 if (!tun->lt_cmn.lct_max_tx_credits)
3613                         tun->lt_cmn.lct_max_tx_credits = DEFAULT_CREDITS;
3614         }
3615 }
3616
3617 static int lnet_handle_legacy_ip2nets(char *ip2nets,
3618                                       struct lnet_ioctl_config_lnd_tunables *tun)
3619 {
3620         struct lnet_net *net;
3621         const char *nets;
3622         int rc;
3623         LIST_HEAD(net_head);
3624
3625         rc = lnet_parse_ip2nets(&nets, ip2nets);
3626         if (rc < 0)
3627                 return rc;
3628
3629         rc = lnet_parse_networks(&net_head, nets);
3630         if (rc < 0)
3631                 return rc;
3632
3633         lnet_set_tune_defaults(tun);
3634
3635         mutex_lock(&the_lnet.ln_api_mutex);
3636         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3637                 rc = -ESHUTDOWN;
3638                 goto out;
3639         }
3640
3641         while ((net = list_first_entry_or_null(&net_head,
3642                                                struct lnet_net,
3643                                                net_list)) != NULL) {
3644                 list_del_init(&net->net_list);
3645                 rc = lnet_add_net_common(net, tun);
3646                 if (rc < 0)
3647                         goto out;
3648         }
3649
3650 out:
3651         mutex_unlock(&the_lnet.ln_api_mutex);
3652
3653         while ((net = list_first_entry_or_null(&net_head,
3654                                                struct lnet_net,
3655                                                net_list)) != NULL) {
3656                 list_del_init(&net->net_list);
3657                 lnet_net_free(net);
3658         }
3659         return rc;
3660 }
3661
3662 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf, u32 net_id,
3663                     struct lnet_ioctl_config_lnd_tunables *tun)
3664 {
3665         struct lnet_net *net;
3666         struct lnet_ni *ni;
3667         int rc, i;
3668         u32 lnd_type;
3669
3670         /* handle legacy ip2nets from DLC */
3671         if (conf->lic_legacy_ip2nets[0] != '\0')
3672                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
3673                                                   tun);
3674
3675         lnd_type = LNET_NETTYP(net_id);
3676
3677         if (!libcfs_isknown_lnd(lnd_type)) {
3678                 CERROR("No valid net and lnd information provided\n");
3679                 return -ENOENT;
3680         }
3681
3682         net = lnet_net_alloc(net_id, NULL);
3683         if (!net)
3684                 return -ENOMEM;
3685
3686         for (i = 0; i < conf->lic_ncpts; i++) {
3687                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER) {
3688                         lnet_net_free(net);
3689                         return -ERANGE;
3690                 }
3691         }
3692
3693         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
3694                                        conf->lic_ni_intf);
3695         if (!ni) {
3696                 lnet_net_free(net);
3697                 return -ENOMEM;
3698         }
3699
3700         lnet_set_tune_defaults(tun);
3701
3702         mutex_lock(&the_lnet.ln_api_mutex);
3703         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3704                 lnet_net_free(net);
3705                 rc = -ESHUTDOWN;
3706         } else {
3707                 rc = lnet_add_net_common(net, tun);
3708         }
3709
3710         mutex_unlock(&the_lnet.ln_api_mutex);
3711
3712         /* If NI already exist delete this new unused copy */
3713         if (rc == -EEXIST)
3714                 lnet_ni_free(ni);
3715
3716         return rc;
3717 }
3718
3719 int lnet_dyn_del_ni(struct lnet_nid *nid)
3720 {
3721         struct lnet_net *net;
3722         struct lnet_ni *ni;
3723         u32 net_id = LNET_NID_NET(nid);
3724         struct lnet_ping_buffer *pbuf;
3725         struct lnet_handle_md ping_mdh;
3726         int net_bytes, rc;
3727         bool net_empty;
3728
3729         /* don't allow userspace to shutdown the LOLND */
3730         if (LNET_NETTYP(net_id) == LOLND)
3731                 return -EINVAL;
3732
3733         mutex_lock(&the_lnet.ln_api_mutex);
3734         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3735                 rc = -ESHUTDOWN;
3736                 goto unlock_api_mutex;
3737         }
3738
3739         lnet_net_lock(0);
3740
3741         net = lnet_get_net_locked(net_id);
3742         if (!net) {
3743                 CERROR("net %s not found\n",
3744                        libcfs_net2str(net_id));
3745                 rc = -ENOENT;
3746                 goto unlock_net;
3747         }
3748
3749         if (!nid_addr_is_set(nid)) {
3750                 /* remove the entire net */
3751                 net_bytes = lnet_get_net_ni_bytes_locked(net);
3752
3753                 lnet_net_unlock(0);
3754
3755                 /* create and link a new ping info, before removing the old one */
3756                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3757                                             LNET_PING_INFO_HDR_SIZE +
3758                                             lnet_get_ni_bytes() - net_bytes,
3759                                             false);
3760                 if (rc != 0)
3761                         goto unlock_api_mutex;
3762
3763                 lnet_shutdown_lndnet(net);
3764
3765                 lnet_acceptor_stop();
3766
3767                 lnet_ping_target_update(pbuf, ping_mdh);
3768
3769                 goto unlock_api_mutex;
3770         }
3771
3772         ni = lnet_nid_to_ni_locked(nid, 0);
3773         if (!ni) {
3774                 CERROR("nid %s not found\n", libcfs_nidstr(nid));
3775                 rc = -ENOENT;
3776                 goto unlock_net;
3777         }
3778
3779         net_bytes = lnet_get_net_ni_bytes_locked(net);
3780         net_empty = list_is_singular(&net->net_ni_list);
3781
3782         lnet_net_unlock(0);
3783
3784         /* create and link a new ping info, before removing the old one */
3785         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3786                                     (LNET_PING_INFO_HDR_SIZE +
3787                                      lnet_get_ni_bytes() -
3788                                      lnet_ping_sts_size(&ni->ni_nid)),
3789                                     false);
3790         if (rc != 0)
3791                 goto unlock_api_mutex;
3792
3793         lnet_shutdown_lndni(ni);
3794
3795         lnet_acceptor_stop();
3796
3797         lnet_ping_target_update(pbuf, ping_mdh);
3798
3799         /* check if the net is empty and remove it if it is */
3800         if (net_empty)
3801                 lnet_shutdown_lndnet(net);
3802
3803         goto unlock_api_mutex;
3804
3805 unlock_net:
3806         lnet_net_unlock(0);
3807 unlock_api_mutex:
3808         mutex_unlock(&the_lnet.ln_api_mutex);
3809
3810         return rc;
3811 }
3812
3813 /*
3814  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
3815  * They are only expected to be called for unique networks.
3816  * That can be as a result of older DLC library
3817  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
3818  */
3819 int
3820 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
3821 {
3822         struct lnet_net *net;
3823         LIST_HEAD(net_head);
3824         int rc;
3825         struct lnet_ioctl_config_lnd_tunables tun;
3826         const char *nets = conf->cfg_config_u.cfg_net.net_intf;
3827
3828         /* Create a net/ni structures for the network string */
3829         rc = lnet_parse_networks(&net_head, nets);
3830         if (rc <= 0)
3831                 return rc == 0 ? -EINVAL : rc;
3832
3833         mutex_lock(&the_lnet.ln_api_mutex);
3834         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3835                 rc = -ESHUTDOWN;
3836                 goto out_unlock_clean;
3837         }
3838
3839         if (rc > 1) {
3840                 rc = -EINVAL; /* only add one network per call */
3841                 goto out_unlock_clean;
3842         }
3843
3844         net = list_first_entry(&net_head, struct lnet_net, net_list);
3845         list_del_init(&net->net_list);
3846
3847         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
3848
3849         memset(&tun, 0, sizeof(tun));
3850
3851         tun.lt_cmn.lct_peer_timeout =
3852           (!conf->cfg_config_u.cfg_net.net_peer_timeout) ? DEFAULT_PEER_TIMEOUT :
3853                 conf->cfg_config_u.cfg_net.net_peer_timeout;
3854         tun.lt_cmn.lct_peer_tx_credits =
3855           (!conf->cfg_config_u.cfg_net.net_peer_tx_credits) ? DEFAULT_PEER_CREDITS :
3856                 conf->cfg_config_u.cfg_net.net_peer_tx_credits;
3857         tun.lt_cmn.lct_peer_rtr_credits =
3858           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
3859         tun.lt_cmn.lct_max_tx_credits =
3860           (!conf->cfg_config_u.cfg_net.net_max_tx_credits) ? DEFAULT_CREDITS :
3861                 conf->cfg_config_u.cfg_net.net_max_tx_credits;
3862
3863         rc = lnet_add_net_common(net, &tun);
3864
3865 out_unlock_clean:
3866         mutex_unlock(&the_lnet.ln_api_mutex);
3867         /* net_head list is empty in success case */
3868         while ((net = list_first_entry_or_null(&net_head,
3869                                                struct lnet_net,
3870                                                net_list)) != NULL) {
3871                 list_del_init(&net->net_list);
3872                 lnet_net_free(net);
3873         }
3874         return rc;
3875 }
3876
3877 int
3878 lnet_dyn_del_net(u32 net_id)
3879 {
3880         struct lnet_net *net;
3881         struct lnet_ping_buffer *pbuf;
3882         struct lnet_handle_md ping_mdh;
3883         int net_ni_bytes, rc;
3884
3885         /* don't allow userspace to shutdown the LOLND */
3886         if (LNET_NETTYP(net_id) == LOLND)
3887                 return -EINVAL;
3888
3889         mutex_lock(&the_lnet.ln_api_mutex);
3890         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
3891                 rc = -ESHUTDOWN;
3892                 goto out;
3893         }
3894
3895         lnet_net_lock(0);
3896
3897         net = lnet_get_net_locked(net_id);
3898         if (net == NULL) {
3899                 lnet_net_unlock(0);
3900                 rc = -EINVAL;
3901                 goto out;
3902         }
3903
3904         net_ni_bytes = lnet_get_net_ni_bytes_locked(net);
3905
3906         lnet_net_unlock(0);
3907
3908         /* create and link a new ping info, before removing the old one */
3909         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
3910                                     LNET_PING_INFO_HDR_SIZE +
3911                                     lnet_get_ni_bytes() - net_ni_bytes,
3912                                     false);
3913         if (rc != 0)
3914                 goto out;
3915
3916         lnet_shutdown_lndnet(net);
3917
3918         lnet_acceptor_stop();
3919
3920         lnet_ping_target_update(pbuf, ping_mdh);
3921
3922 out:
3923         mutex_unlock(&the_lnet.ln_api_mutex);
3924
3925         return rc;
3926 }
3927
3928 void lnet_incr_dlc_seq(void)
3929 {
3930         atomic_inc(&lnet_dlc_seq_no);
3931 }
3932
3933 __u32 lnet_get_dlc_seq_locked(void)
3934 {
3935         return atomic_read(&lnet_dlc_seq_no);
3936 }
3937
3938 static void
3939 lnet_ni_set_healthv(lnet_nid_t nid, int value, bool all)
3940 {
3941         struct lnet_net *net;
3942         struct lnet_ni *ni;
3943
3944         lnet_net_lock(LNET_LOCK_EX);
3945         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3946                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3947                         if (all || (nid_is_nid4(&ni->ni_nid) &&
3948                                     lnet_nid_to_nid4(&ni->ni_nid) == nid)) {
3949                                 atomic_set(&ni->ni_healthv, value);
3950                                 if (list_empty(&ni->ni_recovery) &&
3951                                     value < LNET_MAX_HEALTH_VALUE) {
3952                                         CERROR("manually adding local NI %s to recovery\n",
3953                                                libcfs_nidstr(&ni->ni_nid));
3954                                         list_add_tail(&ni->ni_recovery,
3955                                                       &the_lnet.ln_mt_localNIRecovq);
3956                                         lnet_ni_addref_locked(ni, 0);
3957                                 }
3958                                 if (!all) {
3959                                         lnet_net_unlock(LNET_LOCK_EX);
3960                                         return;
3961                                 }
3962                         }
3963                 }
3964         }
3965         lnet_net_unlock(LNET_LOCK_EX);
3966 }
3967
3968 static void
3969 lnet_ni_set_conns_per_peer(lnet_nid_t nid, int value, bool all)
3970 {
3971         struct lnet_net *net;
3972         struct lnet_ni *ni;
3973
3974         lnet_net_lock(LNET_LOCK_EX);
3975         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3976                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3977                         if (lnet_nid_to_nid4(&ni->ni_nid) != nid && !all)
3978                                 continue;
3979                         if (LNET_NETTYP(net->net_id) == SOCKLND)
3980                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_sock.lnd_conns_per_peer = value;
3981                         else if (LNET_NETTYP(net->net_id) == O2IBLND)
3982                                 ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib.lnd_conns_per_peer = value;
3983                         if (!all) {
3984                                 lnet_net_unlock(LNET_LOCK_EX);
3985                                 return;
3986                         }
3987                 }
3988         }
3989         lnet_net_unlock(LNET_LOCK_EX);
3990 }
3991
3992 static int
3993 lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats)
3994 {
3995         int cpt, rc = 0;
3996         struct lnet_ni *ni;
3997         struct lnet_nid nid;
3998
3999         lnet_nid4_to_nid(stats->hlni_nid, &nid);
4000         cpt = lnet_net_lock_current();
4001         ni = lnet_nid_to_ni_locked(&nid, cpt);
4002         if (!ni) {
4003                 rc = -ENOENT;
4004                 goto unlock;
4005         }
4006
4007         stats->hlni_local_interrupt = atomic_read(&ni->ni_hstats.hlt_local_interrupt);
4008         stats->hlni_local_dropped = atomic_read(&ni->ni_hstats.hlt_local_dropped);
4009         stats->hlni_local_aborted = atomic_read(&ni->ni_hstats.hlt_local_aborted);
4010         stats->hlni_local_no_route = atomic_read(&ni->ni_hstats.hlt_local_no_route);
4011         stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout);
4012         stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error);
4013         stats->hlni_fatal_error = atomic_read(&ni->ni_fatal_error_on);
4014         stats->hlni_health_value = atomic_read(&ni->ni_healthv);
4015         stats->hlni_ping_count = ni->ni_ping_count;
4016         stats->hlni_next_ping = ni->ni_next_ping;
4017
4018 unlock:
4019         lnet_net_unlock(cpt);
4020
4021         return rc;
4022 }
4023
4024 static int
4025 lnet_get_local_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4026 {
4027         struct lnet_ni *ni;
4028         int i = 0;
4029
4030         lnet_net_lock(LNET_LOCK_EX);
4031         list_for_each_entry(ni, &the_lnet.ln_mt_localNIRecovq, ni_recovery) {
4032                 if (!nid_is_nid4(&ni->ni_nid))
4033                         continue;
4034                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&ni->ni_nid);
4035                 i++;
4036                 if (i >= LNET_MAX_SHOW_NUM_NID)
4037                         break;
4038         }
4039         lnet_net_unlock(LNET_LOCK_EX);
4040         list->rlst_num_nids = i;
4041
4042         return 0;
4043 }
4044
4045 static int
4046 lnet_get_peer_ni_recovery_list(struct lnet_ioctl_recovery_list *list)
4047 {
4048         struct lnet_peer_ni *lpni;
4049         int i = 0;
4050
4051         lnet_net_lock(LNET_LOCK_EX);
4052         list_for_each_entry(lpni, &the_lnet.ln_mt_peerNIRecovq, lpni_recovery) {
4053                 list->rlst_nid_array[i] = lnet_nid_to_nid4(&lpni->lpni_nid);
4054                 i++;
4055                 if (i >= LNET_MAX_SHOW_NUM_NID)
4056                         break;
4057         }
4058         lnet_net_unlock(LNET_LOCK_EX);
4059         list->rlst_num_nids = i;
4060
4061         return 0;
4062 }
4063
4064 /**
4065  * LNet ioctl handler.
4066  *
4067  */
4068 int
4069 LNetCtl(unsigned int cmd, void *arg)
4070 {
4071         struct libcfs_ioctl_data *data = arg;
4072         struct lnet_ioctl_config_data *config;
4073         struct lnet_ni           *ni;
4074         struct lnet_nid           nid;
4075         int                       rc;
4076
4077         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
4078                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
4079
4080         switch (cmd) {
4081         case IOC_LIBCFS_FAIL_NID:
4082                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
4083
4084         case IOC_LIBCFS_ADD_ROUTE: {
4085                 /* default router sensitivity to 1 */
4086                 unsigned int sensitivity = 1;
4087                 config = arg;
4088
4089                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4090                         return -EINVAL;
4091
4092                 if (config->cfg_config_u.cfg_route.rtr_sensitivity) {
4093                         sensitivity =
4094                           config->cfg_config_u.cfg_route.rtr_sensitivity;
4095                 }
4096
4097                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4098                 mutex_lock(&the_lnet.ln_api_mutex);
4099                 rc = lnet_add_route(config->cfg_net,
4100                                     config->cfg_config_u.cfg_route.rtr_hop,
4101                                     &nid,
4102                                     config->cfg_config_u.cfg_route.
4103                                         rtr_priority, sensitivity);
4104                 mutex_unlock(&the_lnet.ln_api_mutex);
4105                 return rc;
4106         }
4107
4108         case IOC_LIBCFS_DEL_ROUTE:
4109                 config = arg;
4110
4111                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4112                         return -EINVAL;
4113
4114                 lnet_nid4_to_nid(config->cfg_nid, &nid);
4115                 mutex_lock(&the_lnet.ln_api_mutex);
4116                 rc = lnet_del_route(config->cfg_net, &nid);
4117                 mutex_unlock(&the_lnet.ln_api_mutex);
4118                 return rc;
4119
4120         case IOC_LIBCFS_GET_ROUTE:
4121                 config = arg;
4122
4123                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4124                         return -EINVAL;
4125
4126                 mutex_lock(&the_lnet.ln_api_mutex);
4127                 rc = lnet_get_route(config->cfg_count,
4128                                     &config->cfg_net,
4129                                     &config->cfg_config_u.cfg_route.rtr_hop,
4130                                     &config->cfg_nid,
4131                                     &config->cfg_config_u.cfg_route.rtr_flags,
4132                                     &config->cfg_config_u.cfg_route.
4133                                         rtr_priority,
4134                                     &config->cfg_config_u.cfg_route.
4135                                         rtr_sensitivity);
4136                 mutex_unlock(&the_lnet.ln_api_mutex);
4137                 return rc;
4138
4139         case IOC_LIBCFS_GET_LOCAL_NI: {
4140                 struct lnet_ioctl_config_ni *cfg_ni;
4141                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
4142                 struct lnet_ioctl_element_stats *stats;
4143                 __u32 tun_size;
4144
4145                 cfg_ni = arg;
4146
4147                 /* get the tunables if they are available */
4148                 if (cfg_ni->lic_cfg_hdr.ioc_len <
4149                     sizeof(*cfg_ni) + sizeof(*stats) + sizeof(*tun))
4150                         return -EINVAL;
4151
4152                 stats = (struct lnet_ioctl_element_stats *)
4153                         cfg_ni->lic_bulk;
4154                 tun = (struct lnet_ioctl_config_lnd_tunables *)
4155                                 (cfg_ni->lic_bulk + sizeof(*stats));
4156
4157                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
4158                         sizeof(*stats);
4159
4160                 mutex_lock(&the_lnet.ln_api_mutex);
4161                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
4162                 mutex_unlock(&the_lnet.ln_api_mutex);
4163                 return rc;
4164         }
4165
4166         case IOC_LIBCFS_GET_LOCAL_NI_MSG_STATS: {
4167                 struct lnet_ioctl_element_msg_stats *msg_stats = arg;
4168
4169                 if (msg_stats->im_hdr.ioc_len != sizeof(*msg_stats))
4170                         return -EINVAL;
4171
4172                 mutex_lock(&the_lnet.ln_api_mutex);
4173                 rc = lnet_get_ni_stats(msg_stats);
4174                 mutex_unlock(&the_lnet.ln_api_mutex);
4175
4176                 return rc;
4177         }
4178
4179         case IOC_LIBCFS_GET_NET: {
4180                 size_t total = sizeof(*config) +
4181                                sizeof(struct lnet_ioctl_net_config);
4182                 config = arg;
4183
4184                 if (config->cfg_hdr.ioc_len < total)
4185                         return -EINVAL;
4186
4187                 mutex_lock(&the_lnet.ln_api_mutex);
4188                 rc = lnet_get_net_config(config);
4189                 mutex_unlock(&the_lnet.ln_api_mutex);
4190                 return rc;
4191         }
4192
4193         case IOC_LIBCFS_GET_LNET_STATS:
4194         {
4195                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
4196
4197                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
4198                         return -EINVAL;
4199
4200                 mutex_lock(&the_lnet.ln_api_mutex);
4201                 rc = lnet_counters_get(&lnet_stats->st_cntrs);
4202                 mutex_unlock(&the_lnet.ln_api_mutex);
4203                 return rc;
4204         }
4205
4206         case IOC_LIBCFS_RESET_LNET_STATS:
4207         {
4208                 mutex_lock(&the_lnet.ln_api_mutex);
4209                 lnet_counters_reset();
4210                 mutex_unlock(&the_lnet.ln_api_mutex);
4211                 return 0;
4212         }
4213
4214         case IOC_LIBCFS_CONFIG_RTR:
4215                 config = arg;
4216
4217                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4218                         return -EINVAL;
4219
4220                 mutex_lock(&the_lnet.ln_api_mutex);
4221                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
4222                         rc = lnet_rtrpools_enable();
4223                         mutex_unlock(&the_lnet.ln_api_mutex);
4224                         return rc;
4225                 }
4226                 lnet_rtrpools_disable();
4227                 mutex_unlock(&the_lnet.ln_api_mutex);
4228                 return 0;
4229
4230         case IOC_LIBCFS_ADD_BUF:
4231                 config = arg;
4232
4233                 if (config->cfg_hdr.ioc_len < sizeof(*config))
4234                         return -EINVAL;
4235
4236                 mutex_lock(&the_lnet.ln_api_mutex);
4237                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
4238                                                 buf_tiny,
4239                                           config->cfg_config_u.cfg_buffers.
4240                                                 buf_small,
4241                                           config->cfg_config_u.cfg_buffers.
4242                                                 buf_large);
4243                 mutex_unlock(&the_lnet.ln_api_mutex);
4244                 return rc;
4245
4246         case IOC_LIBCFS_SET_NUMA_RANGE: {
4247                 struct lnet_ioctl_set_value *numa;
4248                 numa = arg;
4249                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4250                         return -EINVAL;
4251                 lnet_net_lock(LNET_LOCK_EX);
4252                 lnet_numa_range = numa->sv_value;
4253                 lnet_net_unlock(LNET_LOCK_EX);
4254                 return 0;
4255         }
4256
4257         case IOC_LIBCFS_GET_NUMA_RANGE: {
4258                 struct lnet_ioctl_set_value *numa;
4259                 numa = arg;
4260                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
4261                         return -EINVAL;
4262                 numa->sv_value = lnet_numa_range;
4263                 return 0;
4264         }
4265
4266         case IOC_LIBCFS_GET_BUF: {
4267                 struct lnet_ioctl_pool_cfg *pool_cfg;
4268                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
4269
4270                 config = arg;
4271
4272                 if (config->cfg_hdr.ioc_len < total)
4273                         return -EINVAL;
4274
4275                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
4276
4277                 mutex_lock(&the_lnet.ln_api_mutex);
4278                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
4279                 mutex_unlock(&the_lnet.ln_api_mutex);
4280                 return rc;
4281         }
4282
4283         case IOC_LIBCFS_GET_LOCAL_HSTATS: {
4284                 struct lnet_ioctl_local_ni_hstats *stats = arg;
4285
4286                 if (stats->hlni_hdr.ioc_len < sizeof(*stats))
4287                         return -EINVAL;
4288
4289                 mutex_lock(&the_lnet.ln_api_mutex);
4290                 rc = lnet_get_local_ni_hstats(stats);
4291                 mutex_unlock(&the_lnet.ln_api_mutex);
4292
4293                 return rc;
4294         }
4295
4296         case IOC_LIBCFS_GET_RECOVERY_QUEUE: {
4297                 struct lnet_ioctl_recovery_list *list = arg;
4298                 if (list->rlst_hdr.ioc_len < sizeof(*list))
4299                         return -EINVAL;
4300
4301                 mutex_lock(&the_lnet.ln_api_mutex);
4302                 if (list->rlst_type == LNET_HEALTH_TYPE_LOCAL_NI)
4303                         rc = lnet_get_local_ni_recovery_list(list);
4304                 else
4305                         rc = lnet_get_peer_ni_recovery_list(list);
4306                 mutex_unlock(&the_lnet.ln_api_mutex);
4307                 return rc;
4308         }
4309
4310         case IOC_LIBCFS_ADD_PEER_NI: {
4311                 struct lnet_ioctl_peer_cfg *cfg = arg;
4312                 struct lnet_nid prim_nid;
4313
4314                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4315                         return -EINVAL;
4316
4317                 mutex_lock(&the_lnet.ln_api_mutex);
4318                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4319                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4320                 rc = lnet_add_peer_ni(&prim_nid, &nid, cfg->prcfg_mr, false);
4321                 mutex_unlock(&the_lnet.ln_api_mutex);
4322                 return rc;
4323         }
4324
4325         case IOC_LIBCFS_DEL_PEER_NI: {
4326                 struct lnet_ioctl_peer_cfg *cfg = arg;
4327                 struct lnet_nid prim_nid;
4328
4329                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4330                         return -EINVAL;
4331
4332                 mutex_lock(&the_lnet.ln_api_mutex);
4333                 lnet_nid4_to_nid(cfg->prcfg_prim_nid, &prim_nid);
4334                 lnet_nid4_to_nid(cfg->prcfg_cfg_nid, &nid);
4335                 rc = lnet_del_peer_ni(&prim_nid,
4336                                       &nid);
4337                 mutex_unlock(&the_lnet.ln_api_mutex);
4338                 return rc;
4339         }
4340
4341         case IOC_LIBCFS_GET_PEER_INFO: {
4342                 struct lnet_ioctl_peer *peer_info = arg;
4343
4344                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
4345                         return -EINVAL;
4346
4347                 mutex_lock(&the_lnet.ln_api_mutex);
4348                 rc = lnet_get_peer_ni_info(
4349                    peer_info->pr_count,
4350                    &peer_info->pr_nid,
4351                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
4352                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
4353                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
4354                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
4355                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
4356                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
4357                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
4358                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
4359                 mutex_unlock(&the_lnet.ln_api_mutex);
4360                 return rc;
4361         }
4362
4363         case IOC_LIBCFS_GET_PEER_NI: {
4364                 struct lnet_ioctl_peer_cfg *cfg = arg;
4365
4366                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4367                         return -EINVAL;
4368
4369                 mutex_lock(&the_lnet.ln_api_mutex);
4370                 rc = lnet_get_peer_info(cfg,
4371                                         (void __user *)cfg->prcfg_bulk);
4372                 mutex_unlock(&the_lnet.ln_api_mutex);
4373                 return rc;
4374         }
4375
4376         case IOC_LIBCFS_GET_PEER_LIST: {
4377                 struct lnet_ioctl_peer_cfg *cfg = arg;
4378
4379                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
4380                         return -EINVAL;
4381
4382                 mutex_lock(&the_lnet.ln_api_mutex);
4383                 rc = lnet_get_peer_list(&cfg->prcfg_count, &cfg->prcfg_size,
4384                                 (struct lnet_process_id __user *)cfg->prcfg_bulk);
4385                 mutex_unlock(&the_lnet.ln_api_mutex);
4386                 return rc;
4387         }
4388
4389         case IOC_LIBCFS_SET_HEALHV: {
4390                 struct lnet_ioctl_reset_health_cfg *cfg = arg;
4391                 int value;
4392                 if (cfg->rh_hdr.ioc_len < sizeof(*cfg))
4393                         return -EINVAL;
4394                 if (cfg->rh_value < 0 ||
4395                     cfg->rh_value > LNET_MAX_HEALTH_VALUE)
4396                         value = LNET_MAX_HEALTH_VALUE;
4397                 else
4398                         value = cfg->rh_value;
4399                 CDEBUG(D_NET, "Manually setting healthv to %d for %s:%s. all = %d\n",
4400                        value, (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI) ?
4401                        "local" : "peer", libcfs_nid2str(cfg->rh_nid), cfg->rh_all);
4402                 mutex_lock(&the_lnet.ln_api_mutex);
4403                 if (cfg->rh_type == LNET_HEALTH_TYPE_LOCAL_NI)
4404                         lnet_ni_set_healthv(cfg->rh_nid, value,
4405                                              cfg->rh_all);
4406                 else
4407                         lnet_peer_ni_set_healthv(cfg->rh_nid, value,
4408                                                   cfg->rh_all);
4409                 mutex_unlock(&the_lnet.ln_api_mutex);
4410                 return 0;
4411         }
4412
4413         case IOC_LIBCFS_SET_CONNS_PER_PEER: {
4414                 struct lnet_ioctl_reset_conns_per_peer_cfg *cfg = arg;
4415                 int value;
4416
4417                 if (cfg->rcpp_hdr.ioc_len < sizeof(*cfg))
4418                         return -EINVAL;
4419                 if (cfg->rcpp_value < 0)
4420                         value = 1;
4421                 else
4422                         value = cfg->rcpp_value;
4423                 CDEBUG(D_NET,
4424                        "Setting conns_per_peer to %d for %s. all = %d\n",
4425                        value, libcfs_nid2str(cfg->rcpp_nid), cfg->rcpp_all);
4426                 mutex_lock(&the_lnet.ln_api_mutex);
4427                 lnet_ni_set_conns_per_peer(cfg->rcpp_nid, value, cfg->rcpp_all);
4428                 mutex_unlock(&the_lnet.ln_api_mutex);
4429                 return 0;
4430         }
4431
4432         case IOC_LIBCFS_NOTIFY_ROUTER: {
4433                 time64_t deadline = ktime_get_real_seconds() - data->ioc_u64[0];
4434
4435                 /* The deadline passed in by the user should be some time in
4436                  * seconds in the future since the UNIX epoch. We have to map
4437                  * that deadline to the wall clock.
4438                  */
4439                 deadline += ktime_get_seconds();
4440                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4441                 return lnet_notify(NULL, &nid, data->ioc_flags, false,
4442                                    deadline);
4443         }
4444
4445         case IOC_LIBCFS_LNET_DIST:
4446                 lnet_nid4_to_nid(data->ioc_nid, &nid);
4447                 rc = LNetDist(&nid, &nid, &data->ioc_u32[1]);
4448                 if (rc < 0 && rc != -EHOSTUNREACH)
4449                         return rc;
4450
4451                 data->ioc_nid = lnet_nid_to_nid4(&nid);
4452                 data->ioc_u32[0] = rc;
4453                 return 0;
4454
4455         case IOC_LIBCFS_TESTPROTOCOMPAT:
4456                 the_lnet.ln_testprotocompat = data->ioc_flags;
4457                 return 0;
4458
4459         case IOC_LIBCFS_LNET_FAULT:
4460                 return lnet_fault_ctl(data->ioc_flags, data);
4461
4462         case IOC_LIBCFS_PING_PEER: {
4463                 struct lnet_ioctl_ping_data *ping = arg;
4464                 struct lnet_process_id __user *ids = ping->ping_buf;
4465                 struct lnet_nid src_nid = LNET_ANY_NID;
4466                 struct lnet_genl_ping_list plist;
4467                 struct lnet_processid id;
4468                 struct lnet_peer *lp;
4469                 signed long timeout;
4470                 int count, i;
4471
4472                 /* Check if the supplied ping data supports source nid
4473                  * NB: This check is sufficient if lnet_ioctl_ping_data has
4474                  * additional fields added, but if they are re-ordered or
4475                  * fields removed then this will break. It is expected that
4476                  * these ioctls will be replaced with netlink implementation, so
4477                  * it is probably not worth coming up with a more robust version
4478                  * compatibility scheme.
4479                  */
4480                 if (ping->ping_hdr.ioc_len >= sizeof(struct lnet_ioctl_ping_data))
4481                         lnet_nid4_to_nid(ping->ping_src, &src_nid);
4482
4483                 /* If timeout is negative then set default of 3 minutes */
4484                 if (((s32)ping->op_param) <= 0 ||
4485                     ping->op_param > (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
4486                         timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
4487                 else
4488                         timeout = nsecs_to_jiffies(ping->op_param * NSEC_PER_MSEC);
4489
4490                 id.pid = ping->ping_id.pid;
4491                 lnet_nid4_to_nid(ping->ping_id.nid, &id.nid);
4492                 rc = lnet_ping(&id, &src_nid, timeout, &plist,
4493                                ping->ping_count);
4494                 if (rc < 0)
4495                         goto report_ping_err;
4496                 count = rc;
4497
4498                 for (i = 0; i < count; i++) {
4499                         struct lnet_processid *result;
4500                         struct lnet_process_id tmpid;
4501
4502                         result = genradix_ptr(&plist.lgpl_list, i);
4503                         memset(&tmpid, 0, sizeof(tmpid));
4504                         tmpid.pid = result->pid;
4505                         tmpid.nid = lnet_nid_to_nid4(&result->nid);
4506                         if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) {
4507                                 rc = -EFAULT;
4508                                 goto report_ping_err;
4509                         }
4510                 }
4511
4512                 mutex_lock(&the_lnet.ln_api_mutex);
4513                 lp = lnet_find_peer(&id.nid);
4514                 if (lp) {
4515                         ping->ping_id.nid =
4516                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4517                         ping->mr_info = lnet_peer_is_multi_rail(lp);
4518                         lnet_peer_decref_locked(lp);
4519                 }
4520                 mutex_unlock(&the_lnet.ln_api_mutex);
4521
4522                 ping->ping_count = count;
4523 report_ping_err:
4524                 genradix_free(&plist.lgpl_list);
4525                 return rc;
4526         }
4527
4528         case IOC_LIBCFS_DISCOVER: {
4529                 struct lnet_ioctl_ping_data *discover = arg;
4530                 struct lnet_peer *lp;
4531
4532                 rc = lnet_discover(discover->ping_id, discover->op_param,
4533                                    discover->ping_buf,
4534                                    discover->ping_count);
4535                 if (rc < 0)
4536                         return rc;
4537
4538                 mutex_lock(&the_lnet.ln_api_mutex);
4539                 lnet_nid4_to_nid(discover->ping_id.nid, &nid);
4540                 lp = lnet_find_peer(&nid);
4541                 if (lp) {
4542                         discover->ping_id.nid =
4543                                 lnet_nid_to_nid4(&lp->lp_primary_nid);
4544                         discover->mr_info = lnet_peer_is_multi_rail(lp);
4545                         lnet_peer_decref_locked(lp);
4546                 }
4547                 mutex_unlock(&the_lnet.ln_api_mutex);
4548
4549                 discover->ping_count = rc;
4550                 return 0;
4551         }
4552
4553         case IOC_LIBCFS_ADD_UDSP: {
4554                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4555                 __u32 bulk_size = ioc_udsp->iou_hdr.ioc_len;
4556
4557                 mutex_lock(&the_lnet.ln_api_mutex);
4558                 rc = lnet_udsp_demarshal_add(arg, bulk_size);
4559                 if (!rc) {
4560                         rc = lnet_udsp_apply_policies(NULL, false);
4561                         CDEBUG(D_NET, "policy application returned %d\n", rc);
4562                         rc = 0;
4563                 }
4564                 mutex_unlock(&the_lnet.ln_api_mutex);
4565
4566                 return rc;
4567         }
4568
4569         case IOC_LIBCFS_DEL_UDSP: {
4570                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4571                 int idx = ioc_udsp->iou_idx;
4572
4573                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4574                         return -EINVAL;
4575
4576                 mutex_lock(&the_lnet.ln_api_mutex);
4577                 rc = lnet_udsp_del_policy(idx);
4578                 if (!rc) {
4579                         rc = lnet_udsp_apply_policies(NULL, false);
4580                         CDEBUG(D_NET, "policy re-application returned %d\n",
4581                                rc);
4582                         rc = 0;
4583                 }
4584                 mutex_unlock(&the_lnet.ln_api_mutex);
4585
4586                 return rc;
4587         }
4588
4589         case IOC_LIBCFS_GET_UDSP_SIZE: {
4590                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4591                 struct lnet_udsp *udsp;
4592
4593                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4594                         return -EINVAL;
4595
4596                 rc = 0;
4597
4598                 mutex_lock(&the_lnet.ln_api_mutex);
4599                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4600                 if (!udsp) {
4601                         rc = -ENOENT;
4602                 } else {
4603                         /* coming in iou_idx will hold the idx of the udsp
4604                          * to get the size of. going out the iou_idx will
4605                          * hold the size of the UDSP found at the passed
4606                          * in index.
4607                          */
4608                         ioc_udsp->iou_idx = lnet_get_udsp_size(udsp);
4609                         if (ioc_udsp->iou_idx < 0)
4610                                 rc = -EINVAL;
4611                 }
4612                 mutex_unlock(&the_lnet.ln_api_mutex);
4613
4614                 return rc;
4615         }
4616
4617         case IOC_LIBCFS_GET_UDSP: {
4618                 struct lnet_ioctl_udsp *ioc_udsp = arg;
4619                 struct lnet_udsp *udsp;
4620
4621                 if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
4622                         return -EINVAL;
4623
4624                 rc = 0;
4625
4626                 mutex_lock(&the_lnet.ln_api_mutex);
4627                 udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
4628                 if (!udsp)
4629                         rc = -ENOENT;
4630                 else
4631                         rc = lnet_udsp_marshal(udsp, ioc_udsp);
4632                 mutex_unlock(&the_lnet.ln_api_mutex);
4633
4634                 return rc;
4635         }
4636
4637         case IOC_LIBCFS_GET_CONST_UDSP_INFO: {
4638                 struct lnet_ioctl_construct_udsp_info *info = arg;
4639
4640                 if (info->cud_hdr.ioc_len < sizeof(*info))
4641                         return -EINVAL;
4642
4643                 CDEBUG(D_NET, "GET_UDSP_INFO for %s\n",
4644                        libcfs_nid2str(info->cud_nid));
4645
4646                 mutex_lock(&the_lnet.ln_api_mutex);
4647                 lnet_udsp_get_construct_info(info);
4648                 mutex_unlock(&the_lnet.ln_api_mutex);
4649
4650                 return 0;
4651         }
4652
4653         default:
4654                 ni = lnet_net2ni_addref(data->ioc_net);
4655                 if (ni == NULL)
4656                         return -EINVAL;
4657
4658                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
4659                         rc = -EINVAL;
4660                 else
4661                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
4662
4663                 lnet_ni_decref(ni);
4664                 return rc;
4665         }
4666         /* not reached */
4667 }
4668 EXPORT_SYMBOL(LNetCtl);
4669
4670 static const struct ln_key_list net_props_list = {
4671         .lkl_maxattr                    = LNET_NET_ATTR_MAX,
4672         .lkl_list                       = {
4673                 [LNET_NET_ATTR_HDR]             = {
4674                         .lkp_value              = "net",
4675                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
4676                         .lkp_data_type          = NLA_NUL_STRING,
4677                 },
4678                 [LNET_NET_ATTR_TYPE]            = {
4679                         .lkp_value              = "net type",
4680                         .lkp_data_type          = NLA_STRING
4681                 },
4682                 [LNET_NET_ATTR_LOCAL]           = {
4683                         .lkp_value              = "local NI(s)",
4684                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
4685                         .lkp_data_type          = NLA_NESTED
4686                 },
4687         },
4688 };
4689
4690 static struct ln_key_list local_ni_list = {
4691         .lkl_maxattr                    = LNET_NET_LOCAL_NI_ATTR_MAX,
4692         .lkl_list                       = {
4693                 [LNET_NET_LOCAL_NI_ATTR_NID]    = {
4694                         .lkp_value              = "nid",
4695                         .lkp_data_type          = NLA_STRING
4696                 },
4697                 [LNET_NET_LOCAL_NI_ATTR_STATUS] = {
4698                         .lkp_value              = "status",
4699                         .lkp_data_type          = NLA_STRING
4700                 },
4701                 [LNET_NET_LOCAL_NI_ATTR_INTERFACE] = {
4702                         .lkp_value              = "interfaces",
4703                         .lkp_key_format         = LNKF_MAPPING,
4704                         .lkp_data_type          = NLA_NESTED
4705                 },
4706         },
4707 };
4708
4709 static const struct ln_key_list local_ni_interfaces_list = {
4710         .lkl_maxattr                    = LNET_NET_LOCAL_NI_INTF_ATTR_MAX,
4711         .lkl_list                       = {
4712                 [LNET_NET_LOCAL_NI_INTF_ATTR_TYPE] = {
4713                         .lkp_value      = "0",
4714                         .lkp_data_type  = NLA_STRING
4715                 },
4716         },
4717 };
4718
4719 /* Use an index since the traversal is across LNet nets and ni collections */
4720 struct lnet_genl_net_list {
4721         unsigned int    lngl_net_id;
4722         unsigned int    lngl_idx;
4723 };
4724
4725 static inline struct lnet_genl_net_list *
4726 lnet_net_dump_ctx(struct netlink_callback *cb)
4727 {
4728         return (struct lnet_genl_net_list *)cb->args[0];
4729 }
4730
4731 static int lnet_net_show_done(struct netlink_callback *cb)
4732 {
4733         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
4734
4735         if (nlist) {
4736                 LIBCFS_FREE(nlist, sizeof(*nlist));
4737                 cb->args[0] = 0;
4738         }
4739
4740         return 0;
4741 }
4742
4743 /* LNet net ->start() handler for GET requests */
4744 static int lnet_net_show_start(struct netlink_callback *cb)
4745 {
4746         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
4747 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4748         struct netlink_ext_ack *extack = NULL;
4749 #endif
4750         struct lnet_genl_net_list *nlist;
4751         int msg_len = genlmsg_len(gnlh);
4752         struct nlattr *params, *top;
4753         int rem, rc = 0;
4754
4755 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4756         extack = cb->extack;
4757 #endif
4758         if (the_lnet.ln_refcount == 0) {
4759                 NL_SET_ERR_MSG(extack, "LNet stack down");
4760                 return -ENETDOWN;
4761         }
4762
4763         LIBCFS_ALLOC(nlist, sizeof(*nlist));
4764         if (!nlist)
4765                 return -ENOMEM;
4766
4767         nlist->lngl_net_id = LNET_NET_ANY;
4768         nlist->lngl_idx = 0;
4769         cb->args[0] = (long)nlist;
4770
4771         if (!msg_len)
4772                 return 0;
4773
4774         params = genlmsg_data(gnlh);
4775         nla_for_each_attr(top, params, msg_len, rem) {
4776                 struct nlattr *net;
4777                 int rem2;
4778
4779                 nla_for_each_nested(net, top, rem2) {
4780                         char filter[LNET_NIDSTR_SIZE];
4781
4782                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE ||
4783                             nla_strcmp(net, "name") != 0)
4784                                 continue;
4785
4786                         net = nla_next(net, &rem2);
4787                         if (nla_type(net) != LN_SCALAR_ATTR_VALUE) {
4788                                 NL_SET_ERR_MSG(extack, "invalid config param");
4789                                 GOTO(report_err, rc = -EINVAL);
4790                         }
4791
4792                         rc = nla_strscpy(filter, net, sizeof(filter));
4793                         if (rc < 0) {
4794                                 NL_SET_ERR_MSG(extack, "failed to get param");
4795                                 GOTO(report_err, rc);
4796                         }
4797                         rc = 0;
4798
4799                         nlist->lngl_net_id = libcfs_str2net(filter);
4800                         if (nlist->lngl_net_id == LNET_NET_ANY) {
4801                                 NL_SET_ERR_MSG(extack, "cannot parse net");
4802                                 GOTO(report_err, rc = -ENOENT);
4803                         }
4804                 }
4805         }
4806 report_err:
4807         if (rc < 0)
4808                 lnet_net_show_done(cb);
4809
4810         return rc;
4811 }
4812
4813 static int lnet_net_show_dump(struct sk_buff *msg,
4814                               struct netlink_callback *cb)
4815 {
4816         struct lnet_genl_net_list *nlist = lnet_net_dump_ctx(cb);
4817 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
4818         struct netlink_ext_ack *extack = NULL;
4819 #endif
4820         int portid = NETLINK_CB(cb->skb).portid;
4821         int seq = cb->nlh->nlmsg_seq;
4822         struct lnet_net *net;
4823         int idx = 0, rc = 0;
4824         bool found = false;
4825         void *hdr = NULL;
4826
4827 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
4828         extack = cb->extack;
4829 #endif
4830         if (!nlist->lngl_idx) {
4831                 const struct ln_key_list *all[] = {
4832                         &net_props_list, &local_ni_list,
4833                         &local_ni_interfaces_list,
4834                         NULL
4835                 };
4836
4837                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
4838                                                 &lnet_family,
4839                                                 NLM_F_CREATE | NLM_F_MULTI,
4840                                                 LNET_CMD_NETS, all);
4841                 if (rc < 0) {
4842                         NL_SET_ERR_MSG(extack, "failed to send key table");
4843                         GOTO(send_error, rc);
4844                 }
4845         }
4846
4847         lnet_net_lock(LNET_LOCK_EX);
4848
4849         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
4850                 struct lnet_ni *ni;
4851
4852                 if (nlist->lngl_net_id != LNET_NET_ANY &&
4853                     nlist->lngl_net_id != net->net_id)
4854                         continue;
4855
4856                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
4857                         struct nlattr *local_ni, *ni_attr;
4858                         char *status = "up";
4859
4860                         if (idx++ < nlist->lngl_idx)
4861                                 continue;
4862
4863                         hdr = genlmsg_put(msg, portid, seq, &lnet_family,
4864                                           NLM_F_MULTI, LNET_CMD_NETS);
4865                         if (!hdr) {
4866                                 NL_SET_ERR_MSG(extack, "failed to send values");
4867                                 GOTO(net_unlock, rc = -EMSGSIZE);
4868                         }
4869
4870                         if (idx == 1)
4871                                 nla_put_string(msg, LNET_NET_ATTR_HDR, "");
4872
4873                         nla_put_string(msg, LNET_NET_ATTR_TYPE,
4874                                        libcfs_net2str(net->net_id));
4875                         found = true;
4876
4877                         local_ni = nla_nest_start(msg, LNET_NET_ATTR_LOCAL);
4878                         ni_attr = nla_nest_start(msg, idx - 1);
4879
4880                         lnet_ni_lock(ni);
4881                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_NID,
4882                                        libcfs_nidstr(&ni->ni_nid));
4883                         if (nid_is_lo0(&ni->ni_nid) &&
4884                             *ni->ni_status != LNET_NI_STATUS_UP)
4885                                 status = "down";
4886                         nla_put_string(msg, LNET_NET_LOCAL_NI_ATTR_STATUS, "up");
4887
4888                         if (!nid_is_lo0(&ni->ni_nid) && ni->ni_interface) {
4889                                 struct nlattr *intf_nest, *intf_attr;
4890
4891                                 intf_nest = nla_nest_start(msg,
4892                                                            LNET_NET_LOCAL_NI_ATTR_INTERFACE);
4893                                 intf_attr = nla_nest_start(msg, 0);
4894                                 nla_put_string(msg,
4895                                                LNET_NET_LOCAL_NI_INTF_ATTR_TYPE,
4896                                                ni->ni_interface);
4897                                 nla_nest_end(msg, intf_attr);
4898                                 nla_nest_end(msg, intf_nest);
4899                         }
4900
4901                         lnet_ni_unlock(ni);
4902                         nla_nest_end(msg, ni_attr);
4903                         nla_nest_end(msg, local_ni);
4904
4905                         genlmsg_end(msg, hdr);
4906                 }
4907         }
4908
4909         if (!found) {
4910                 struct nlmsghdr *nlh = nlmsg_hdr(msg);
4911
4912                 nlmsg_cancel(msg, nlh);
4913                 NL_SET_ERR_MSG(extack, "Network is down");
4914                 rc = -ESRCH;
4915         }
4916 net_unlock:
4917         lnet_net_unlock(LNET_LOCK_EX);
4918 send_error:
4919         nlist->lngl_idx = idx;
4920
4921         return lnet_nl_send_error(cb->skb, portid, seq, rc);
4922 }
4923
4924 #ifndef HAVE_NETLINK_CALLBACK_START
4925 static int lnet_old_net_show_dump(struct sk_buff *msg,
4926                                    struct netlink_callback *cb)
4927 {
4928         if (!cb->args[0]) {
4929                 int rc = lnet_net_show_start(cb);
4930
4931                 if (rc < 0)
4932                         return rc;
4933         }
4934
4935         return lnet_net_show_dump(msg, cb);
4936 }
4937 #endif
4938
4939 static int lnet_genl_parse_tunables(struct nlattr *settings,
4940                                     struct lnet_ioctl_config_lnd_tunables *tun)
4941 {
4942         struct nlattr *param;
4943         int rem, rc = 0;
4944
4945         nla_for_each_nested(param, settings, rem) {
4946                 int type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_UNSPEC;
4947                 s64 num;
4948
4949                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
4950                         continue;
4951
4952                 if (nla_strcmp(param, "peer_timeout") == 0)
4953                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT;
4954                 else if (nla_strcmp(param, "peer_credits") == 0)
4955                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS;
4956                 else if (nla_strcmp(param, "peer_buffer_credits") == 0)
4957                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS;
4958                 else if (nla_strcmp(param, "credits") == 0)
4959                         type = LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS;
4960
4961                 param = nla_next(param, &rem);
4962                 if (nla_type(param) != LN_SCALAR_ATTR_INT_VALUE)
4963                         return -EINVAL;
4964
4965                 num = nla_get_s64(param);
4966                 switch (type) {
4967                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_TIMEOUT:
4968                         if (num >= 0)
4969                                 tun->lt_cmn.lct_peer_timeout = num;
4970                         break;
4971                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_CREDITS:
4972                         if (num > 0)
4973                                 tun->lt_cmn.lct_peer_tx_credits = num;
4974                         break;
4975                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_PEER_BUFFER_CREDITS:
4976                         if (num > 0)
4977                                 tun->lt_cmn.lct_peer_rtr_credits = num;
4978                         break;
4979                 case LNET_NET_LOCAL_NI_TUNABLES_ATTR_CREDITS:
4980                         if (num > 0)
4981                                 tun->lt_cmn.lct_max_tx_credits = num;
4982                         break;
4983                 default:
4984                         rc = -EINVAL;
4985                         break;
4986                 }
4987         }
4988         return rc;
4989 }
4990
4991 static int lnet_genl_parse_lnd_tunables(struct nlattr *settings,
4992                                         struct lnet_lnd_tunables *tun,
4993                                         const struct lnet_lnd *lnd)
4994 {
4995         const struct ln_key_list *list = lnd->lnd_keys;
4996         struct nlattr *param;
4997         int rem, rc = 0;
4998         int i = 1;
4999
5000         /* silently ignore these setting if the LND driver doesn't
5001          * support any LND tunables
5002          */
5003         if (!list || !lnd->lnd_nl_set || !list->lkl_maxattr)
5004                 return 0;
5005
5006         nla_for_each_nested(param, settings, rem) {
5007                 if (nla_type(param) != LN_SCALAR_ATTR_VALUE)
5008                         continue;
5009
5010                 for (i = 1; i <= list->lkl_maxattr; i++) {
5011                         if (!list->lkl_list[i].lkp_value ||
5012                             nla_strcmp(param, list->lkl_list[i].lkp_value) != 0)
5013                                 continue;
5014
5015                         param = nla_next(param, &rem);
5016                         rc = lnd->lnd_nl_set(LNET_CMD_NETS, param, i, tun);
5017                         if (rc < 0)
5018                                 return rc;
5019                 }
5020         }
5021
5022         return rc;
5023 }
5024
5025 static int
5026 lnet_genl_parse_local_ni(struct nlattr *entry, struct genl_info *info,
5027                          int net_id, struct lnet_ioctl_config_ni *conf,
5028                          struct lnet_ioctl_config_lnd_tunables *tun,
5029                          bool *ni_list)
5030 {
5031         struct nlattr *settings;
5032         int rem3, rc = 0;
5033
5034         nla_for_each_nested(settings, entry, rem3) {
5035                 if (nla_type(settings) != LN_SCALAR_ATTR_VALUE)
5036                         continue;
5037
5038                 if (nla_strcmp(settings, "interfaces") == 0) {
5039                         struct nlattr *intf;
5040                         int rem4;
5041
5042                         settings = nla_next(settings, &rem3);
5043                         if (nla_type(settings) !=
5044                             LN_SCALAR_ATTR_LIST) {
5045                                 GENL_SET_ERR_MSG(info,
5046                                                  "invalid interfaces");
5047                                 GOTO(out, rc = -EINVAL);
5048                         }
5049
5050                         nla_for_each_nested(intf, settings, rem4) {
5051                                 intf = nla_next(intf, &rem4);
5052                                 if (nla_type(intf) !=
5053                                     LN_SCALAR_ATTR_VALUE) {
5054                                         GENL_SET_ERR_MSG(info,
5055                                                          "0 key is invalid");
5056                                         GOTO(out, rc = -EINVAL);
5057                                 }
5058
5059                                 rc = nla_strscpy(conf->lic_ni_intf, intf,
5060                                                  sizeof(conf->lic_ni_intf));
5061                                 if (rc < 0) {
5062                                         GENL_SET_ERR_MSG(info,
5063                                                          "failed to parse interfaces");
5064                                         GOTO(out, rc);
5065                                 }
5066                         }
5067                         *ni_list = true;
5068                 } else if (nla_strcmp(settings, "tunables") == 0) {
5069                         settings = nla_next(settings, &rem3);
5070                         if (nla_type(settings) !=
5071                             LN_SCALAR_ATTR_LIST) {
5072                                 GENL_SET_ERR_MSG(info,
5073                                                  "invalid tunables");
5074                                 GOTO(out, rc = -EINVAL);
5075                         }
5076
5077                         rc = lnet_genl_parse_tunables(settings, tun);
5078                         if (rc < 0) {
5079                                 GENL_SET_ERR_MSG(info,
5080                                                  "failed to parse tunables");
5081                                 GOTO(out, rc);
5082                         }
5083                 } else if ((nla_strcmp(settings, "lnd tunables") == 0)) {
5084                         const struct lnet_lnd *lnd;
5085
5086                         lnd = lnet_load_lnd(LNET_NETTYP(net_id));
5087                         if (IS_ERR(lnd)) {
5088                                 GENL_SET_ERR_MSG(info,
5089                                                  "LND type not supported");
5090                                 GOTO(out, rc = PTR_ERR(lnd));
5091                         }
5092
5093                         settings = nla_next(settings, &rem3);
5094                         if (nla_type(settings) !=
5095                             LN_SCALAR_ATTR_LIST) {
5096                                 GENL_SET_ERR_MSG(info,
5097                                                  "lnd tunables should be list\n");
5098                                 GOTO(out, rc = -EINVAL);
5099                         }
5100
5101                         rc = lnet_genl_parse_lnd_tunables(settings,
5102                                                           &tun->lt_tun, lnd);
5103                         if (rc < 0) {
5104                                 GENL_SET_ERR_MSG(info,
5105                                                  "failed to parse lnd tunables");
5106                                 GOTO(out, rc);
5107                         }
5108                 } else if (nla_strcmp(settings, "CPT") == 0) {
5109                         struct nlattr *cpt;
5110                         int rem4;
5111
5112                         settings = nla_next(settings, &rem3);
5113                         if (nla_type(settings) != LN_SCALAR_ATTR_LIST) {
5114                                 GENL_SET_ERR_MSG(info,
5115                                                  "CPT should be list");
5116                                 GOTO(out, rc = -EINVAL);
5117                         }
5118
5119                         nla_for_each_nested(cpt, settings, rem4) {
5120                                 s64 core;
5121
5122                                 if (nla_type(cpt) !=
5123                                     LN_SCALAR_ATTR_INT_VALUE) {
5124                                         GENL_SET_ERR_MSG(info,
5125                                                          "invalid CPT config");
5126                                         GOTO(out, rc = -EINVAL);
5127                                 }
5128
5129                                 core = nla_get_s64(cpt);
5130                                 if (core >= LNET_CPT_NUMBER) {
5131                                         GENL_SET_ERR_MSG(info,
5132                                                          "invalid CPT value");
5133                                         GOTO(out, rc = -ERANGE);
5134                                 }
5135
5136                                 conf->lic_cpts[conf->lic_ncpts] = core;
5137                                 conf->lic_ncpts++;
5138                         }
5139                 }
5140         }
5141 out:
5142         return rc;
5143 }
5144
5145 static int lnet_net_cmd(struct sk_buff *skb, struct genl_info *info)
5146 {
5147         struct nlmsghdr *nlh = nlmsg_hdr(skb);
5148         struct genlmsghdr *gnlh = nlmsg_data(nlh);
5149         struct nlattr *params = genlmsg_data(gnlh);
5150         int msg_len, rem, rc = 0;
5151         struct nlattr *attr;
5152
5153         msg_len = genlmsg_len(gnlh);
5154         if (!msg_len) {
5155                 GENL_SET_ERR_MSG(info, "no configuration");
5156                 return -ENOMSG;
5157         }
5158
5159         nla_for_each_attr(attr, params, msg_len, rem) {
5160                 struct lnet_ioctl_config_ni conf;
5161                 u32 net_id = LNET_NET_ANY;
5162                 struct nlattr *entry;
5163                 bool ni_list = false;
5164                 int rem2;
5165
5166                 if (nla_type(attr) != LN_SCALAR_ATTR_LIST)
5167                         continue;
5168
5169                 nla_for_each_nested(entry, attr, rem2) {
5170                         switch (nla_type(entry)) {
5171                         case LN_SCALAR_ATTR_VALUE: {
5172                                 ssize_t len;
5173
5174                                 memset(&conf, 0, sizeof(conf));
5175                                 if (nla_strcmp(entry, "ip2net") == 0) {
5176                                         entry = nla_next(entry, &rem2);
5177                                         if (nla_type(entry) !=
5178                                             LN_SCALAR_ATTR_VALUE) {
5179                                                 GENL_SET_ERR_MSG(info,
5180                                                                  "ip2net has invalid key");
5181                                                 GOTO(out, rc = -EINVAL);
5182                                         }
5183
5184                                         len = nla_strscpy(conf.lic_legacy_ip2nets,
5185                                                           entry,
5186                                                           sizeof(conf.lic_legacy_ip2nets));
5187                                         if (len < 0) {
5188                                                 GENL_SET_ERR_MSG(info,
5189                                                                  "ip2net key string is invalid");
5190                                                 GOTO(out, rc = len);
5191                                         }
5192                                         ni_list = true;
5193                                 } else if (nla_strcmp(entry, "net type") == 0) {
5194                                         char tmp[LNET_NIDSTR_SIZE];
5195
5196                                         entry = nla_next(entry, &rem2);
5197                                         if (nla_type(entry) !=
5198                                             LN_SCALAR_ATTR_VALUE) {
5199                                                 GENL_SET_ERR_MSG(info,
5200                                                                  "net type has invalid key");
5201                                                 GOTO(out, rc = -EINVAL);
5202                                         }
5203
5204                                         len = nla_strscpy(tmp, entry,
5205                                                           sizeof(tmp));
5206                                         if (len < 0) {
5207                                                 GENL_SET_ERR_MSG(info,
5208                                                                  "net type key string is invalid");
5209                                                 GOTO(out, rc = len);
5210                                         }
5211
5212                                         net_id = libcfs_str2net(tmp);
5213                                         if (!net_id) {
5214                                                 GENL_SET_ERR_MSG(info,
5215                                                                  "cannot parse net");
5216                                                 GOTO(out, rc = -ENODEV);
5217                                         }
5218                                         if (LNET_NETTYP(net_id) == LOLND) {
5219                                                 GENL_SET_ERR_MSG(info,
5220                                                                  "setting @lo not allowed");
5221                                                 GOTO(out, rc = -ENODEV);
5222                                         }
5223                                         conf.lic_legacy_ip2nets[0] = '\0';
5224                                         conf.lic_ni_intf[0] = '\0';
5225                                         ni_list = false;
5226                                 }
5227                                 if (rc < 0)
5228                                         GOTO(out, rc);
5229                                 break;
5230                         }
5231                         case LN_SCALAR_ATTR_LIST: {
5232                                 bool create = info->nlhdr->nlmsg_flags &
5233                                               NLM_F_CREATE;
5234                                 struct lnet_ioctl_config_lnd_tunables tun;
5235
5236                                 memset(&tun, 0, sizeof(tun));
5237                                 /* Use LND defaults */
5238                                 tun.lt_cmn.lct_peer_timeout = -1;
5239                                 tun.lt_cmn.lct_peer_tx_credits = -1;
5240                                 tun.lt_cmn.lct_peer_rtr_credits = -1;
5241                                 tun.lt_cmn.lct_max_tx_credits = -1;
5242                                 conf.lic_ncpts = 0;
5243
5244                                 rc = lnet_genl_parse_local_ni(entry, info,
5245                                                               net_id, &conf,
5246                                                               &tun, &ni_list);
5247                                 if (rc < 0)
5248                                         GOTO(out, rc);
5249
5250                                 if (!create) {
5251                                         struct lnet_net *net;
5252                                         struct lnet_ni *ni;
5253
5254                                         rc = -ENODEV;
5255                                         if (!strlen(conf.lic_ni_intf)) {
5256                                                 GENL_SET_ERR_MSG(info,
5257                                                                  "interface is missing");
5258                                                 GOTO(out, rc);
5259                                         }
5260
5261                                         lnet_net_lock(LNET_LOCK_EX);
5262                                         net = lnet_get_net_locked(net_id);
5263                                         if (!net) {
5264                                                 GENL_SET_ERR_MSG(info,
5265                                                                  "LNet net doesn't exist");
5266                                                 lnet_net_unlock(LNET_LOCK_EX);
5267                                                 GOTO(out, rc);
5268                                         }
5269                                         list_for_each_entry(ni, &net->net_ni_list,
5270                                                             ni_netlist) {
5271                                                 if (!ni->ni_interface ||
5272                                                     strncmp(ni->ni_interface,
5273                                                             conf.lic_ni_intf,
5274                                                             strlen(conf.lic_ni_intf)) != 0) {
5275                                                         ni = NULL;
5276                                                         continue;
5277                                                 }
5278
5279                                                 lnet_net_unlock(LNET_LOCK_EX);
5280                                                 rc = lnet_dyn_del_ni(&ni->ni_nid);
5281                                                 if (rc < 0) {
5282                                                         GENL_SET_ERR_MSG(info,
5283                                                                          "cannot del LNet NI");
5284                                                         GOTO(out, rc);
5285                                                 }
5286                                                 break;
5287                                         }
5288
5289                                         if (rc < 0) { /* will be -ENODEV */
5290                                                 GENL_SET_ERR_MSG(info,
5291                                                                  "interface invalid for deleting LNet NI");
5292                                                 lnet_net_unlock(LNET_LOCK_EX);
5293                                         }
5294                                 } else {
5295                                         rc = lnet_dyn_add_ni(&conf, net_id, &tun);
5296                                         switch (rc) {
5297                                         case -ENOENT:
5298                                                 GENL_SET_ERR_MSG(info,
5299                                                                  "cannot parse net");
5300                                                 break;
5301                                         case -ERANGE:
5302                                                 GENL_SET_ERR_MSG(info,
5303                                                                  "invalid CPT set");
5304                                         fallthrough;
5305                                         default:
5306                                                 GENL_SET_ERR_MSG(info,
5307                                                                  "cannot add LNet NI");
5308                                         case 0:
5309                                                 break;
5310                                         }
5311                                         if (rc < 0)
5312                                                 GOTO(out, rc);
5313                                 }
5314                                 break;
5315                         }
5316                         /* it is possible a newer version of the user land send
5317                          * values older kernels doesn't handle. So silently
5318                          * ignore these values
5319                          */
5320                         default:
5321                                 break;
5322                         }
5323                 }
5324
5325                 /* Handle case of just sent NET with no list of NIDs */
5326                 if (!(info->nlhdr->nlmsg_flags & NLM_F_CREATE) && !ni_list) {
5327                         rc = lnet_dyn_del_net(net_id);
5328                         if (rc < 0) {
5329                                 GENL_SET_ERR_MSG(info,
5330                                                  "cannot del network");
5331                         }
5332                 }
5333         }
5334 out:
5335         return rc;
5336 }
5337
5338 static inline struct lnet_genl_ping_list *
5339 lnet_ping_dump_ctx(struct netlink_callback *cb)
5340 {
5341         return (struct lnet_genl_ping_list *)cb->args[0];
5342 }
5343
5344 static int lnet_ping_show_done(struct netlink_callback *cb)
5345 {
5346         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
5347
5348         if (plist) {
5349                 genradix_free(&plist->lgpl_failed);
5350                 genradix_free(&plist->lgpl_list);
5351                 LIBCFS_FREE(plist, sizeof(*plist));
5352                 cb->args[0] = 0;
5353         }
5354
5355         return 0;
5356 }
5357
5358 /* LNet ping ->start() handler for GET requests */
5359 static int lnet_ping_show_start(struct netlink_callback *cb)
5360 {
5361         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5362 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5363         struct netlink_ext_ack *extack = NULL;
5364 #endif
5365         struct lnet_genl_ping_list *plist;
5366         int msg_len = genlmsg_len(gnlh);
5367         struct nlattr *params, *top;
5368         int rem, rc = 0;
5369
5370 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5371         extack = cb->extack;
5372 #endif
5373         if (the_lnet.ln_refcount == 0) {
5374                 NL_SET_ERR_MSG(extack, "Network is down");
5375                 return -ENETDOWN;
5376         }
5377
5378         if (!msg_len) {
5379                 NL_SET_ERR_MSG(extack, "Ping needs NID targets");
5380                 return -ENOENT;
5381         }
5382
5383         LIBCFS_ALLOC(plist, sizeof(*plist));
5384         if (!plist) {
5385                 NL_SET_ERR_MSG(extack, "failed to setup ping list");
5386                 return -ENOMEM;
5387         }
5388         genradix_init(&plist->lgpl_list);
5389         plist->lgpl_timeout = cfs_time_seconds(DEFAULT_PEER_TIMEOUT);
5390         plist->lgpl_src_nid = LNET_ANY_NID;
5391         plist->lgpl_index = 0;
5392         plist->lgpl_list_count = 0;
5393         cb->args[0] = (long)plist;
5394
5395         params = genlmsg_data(gnlh);
5396         nla_for_each_attr(top, params, msg_len, rem) {
5397                 struct nlattr *nids;
5398                 int rem2;
5399
5400                 switch (nla_type(top)) {
5401                 case LN_SCALAR_ATTR_VALUE:
5402                         if (nla_strcmp(top, "timeout") == 0) {
5403                                 s64 timeout;
5404
5405                                 top = nla_next(top, &rem);
5406                                 if (nla_type(top) != LN_SCALAR_ATTR_INT_VALUE) {
5407                                         NL_SET_ERR_MSG(extack,
5408                                                        "invalid timeout param");
5409                                         GOTO(report_err, rc = -EINVAL);
5410                                 }
5411
5412                                 /* If timeout is negative then set default of
5413                                  * 3 minutes
5414                                  */
5415                                 timeout = nla_get_s64(top);
5416                                 if (timeout > 0 &&
5417                                     timeout < (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
5418                                         plist->lgpl_timeout =
5419                                                 nsecs_to_jiffies(timeout * NSEC_PER_MSEC);
5420                         } else if (nla_strcmp(top, "source") == 0) {
5421                                 char nidstr[LNET_NIDSTR_SIZE + 1];
5422
5423                                 top = nla_next(top, &rem);
5424                                 if (nla_type(top) != LN_SCALAR_ATTR_VALUE) {
5425                                         NL_SET_ERR_MSG(extack,
5426                                                        "invalid source param");
5427                                         GOTO(report_err, rc = -EINVAL);
5428                                 }
5429
5430                                 rc = nla_strscpy(nidstr, top, sizeof(nidstr));
5431                                 if (rc < 0) {
5432                                         NL_SET_ERR_MSG(extack,
5433                                                        "failed to parse source nid");
5434                                         GOTO(report_err, rc);
5435                                 }
5436
5437                                 rc = libcfs_strnid(&plist->lgpl_src_nid,
5438                                                    strim(nidstr));
5439                                 if (rc < 0) {
5440                                         NL_SET_ERR_MSG(extack,
5441                                                        "invalid source nid");
5442                                         GOTO(report_err, rc);
5443                                 }
5444                                 rc = 0;
5445                         }
5446                         break;
5447                 case LN_SCALAR_ATTR_LIST:
5448                         nla_for_each_nested(nids, top, rem2) {
5449                                 char nid[LNET_NIDSTR_SIZE + 1];
5450                                 struct lnet_processid *id;
5451
5452                                 if (nla_type(nids) != LN_SCALAR_ATTR_VALUE)
5453                                         continue;
5454
5455                                 memset(nid, 0, sizeof(nid));
5456                                 rc = nla_strscpy(nid, nids, sizeof(nid));
5457                                 if (rc < 0) {
5458                                         NL_SET_ERR_MSG(extack,
5459                                                        "failed to get NID");
5460                                         GOTO(report_err, rc);
5461                                 }
5462
5463                                 id = genradix_ptr_alloc(&plist->lgpl_list,
5464                                                         plist->lgpl_list_count++,
5465                                                         GFP_ATOMIC);
5466                                 if (!id) {
5467                                         NL_SET_ERR_MSG(extack,
5468                                                        "failed to allocate NID");
5469                                         GOTO(report_err, rc = -ENOMEM);
5470                                 }
5471
5472                                 rc = libcfs_strid(id, strim(nid));
5473                                 if (rc < 0) {
5474                                         NL_SET_ERR_MSG(extack, "invalid NID");
5475                                         GOTO(report_err, rc);
5476                                 }
5477                                 rc = 0;
5478                         }
5479                         fallthrough;
5480                 default:
5481                         break;
5482                 }
5483         }
5484 report_err:
5485         if (rc < 0)
5486                 lnet_ping_show_done(cb);
5487
5488         return rc;
5489 }
5490
5491 static const struct ln_key_list ping_props_list = {
5492         .lkl_maxattr                    = LNET_PING_ATTR_MAX,
5493         .lkl_list                       = {
5494                 [LNET_PING_ATTR_HDR]            = {
5495                         .lkp_value              = "ping",
5496                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5497                         .lkp_data_type          = NLA_NUL_STRING,
5498                 },
5499                 [LNET_PING_ATTR_PRIMARY_NID]    = {
5500                         .lkp_value              = "primary nid",
5501                         .lkp_data_type          = NLA_STRING
5502                 },
5503                 [LNET_PING_ATTR_ERRNO]          = {
5504                         .lkp_value              = "errno",
5505                         .lkp_data_type          = NLA_S16
5506                 },
5507                 [LNET_PING_ATTR_MULTIRAIL]      = {
5508                         .lkp_value              = "Multi-Rail",
5509                         .lkp_data_type          = NLA_FLAG
5510                 },
5511                 [LNET_PING_ATTR_PEER_NI_LIST]   = {
5512                         .lkp_value              = "peer_ni",
5513                         .lkp_key_format         = LNKF_SEQUENCE | LNKF_MAPPING,
5514                         .lkp_data_type          = NLA_NESTED
5515                 },
5516         },
5517 };
5518
5519 static struct ln_key_list ping_peer_ni_list = {
5520         .lkl_maxattr                    = LNET_PING_PEER_NI_ATTR_MAX,
5521         .lkl_list                       = {
5522                 [LNET_PING_PEER_NI_ATTR_NID]    = {
5523                         .lkp_value              = "nid",
5524                         .lkp_data_type          = NLA_STRING
5525                 },
5526         },
5527 };
5528
5529 static int lnet_ping_show_dump(struct sk_buff *msg,
5530                                struct netlink_callback *cb)
5531 {
5532         struct lnet_genl_ping_list *plist = lnet_ping_dump_ctx(cb);
5533         struct genlmsghdr *gnlh = nlmsg_data(cb->nlh);
5534 #ifdef HAVE_NL_PARSE_WITH_EXT_ACK
5535         struct netlink_ext_ack *extack = NULL;
5536 #endif
5537         int portid = NETLINK_CB(cb->skb).portid;
5538         int seq = cb->nlh->nlmsg_seq;
5539         int idx = plist->lgpl_index;
5540         int rc = 0, i = 0;
5541
5542 #ifdef HAVE_NL_DUMP_WITH_EXT_ACK
5543         extack = cb->extack;
5544 #endif
5545         if (!plist->lgpl_index) {
5546                 const struct ln_key_list *all[] = {
5547                         &ping_props_list, &ping_peer_ni_list, NULL
5548                 };
5549
5550                 rc = lnet_genl_send_scalar_list(msg, portid, seq,
5551                                                 &lnet_family,
5552                                                 NLM_F_CREATE | NLM_F_MULTI,
5553                                                 LNET_CMD_PING, all);
5554                 if (rc < 0) {
5555                         NL_SET_ERR_MSG(extack, "failed to send key table");
5556                         GOTO(send_error, rc);
5557                 }
5558
5559                 genradix_init(&plist->lgpl_failed);
5560         }
5561
5562         while (idx < plist->lgpl_list_count) {
5563                 struct lnet_nid primary_nid = LNET_ANY_NID;
5564                 struct lnet_genl_ping_list peers;
5565                 struct lnet_processid *id;
5566                 struct nlattr *nid_list;
5567                 struct lnet_peer *lp;
5568                 bool mr_flag = false;
5569                 unsigned int count;
5570                 void *hdr = NULL;
5571
5572                 id = genradix_ptr(&plist->lgpl_list, idx++);
5573                 if (nid_is_lo0(&id->nid))
5574                         continue;
5575
5576                 rc = lnet_ping(id, &plist->lgpl_src_nid, plist->lgpl_timeout,
5577                                &peers, lnet_interfaces_max);
5578                 if (rc < 0) {
5579                         struct lnet_fail_ping *fail;
5580
5581                         fail = genradix_ptr_alloc(&plist->lgpl_failed,
5582                                                   plist->lgpl_failed_count++,
5583                                                   GFP_ATOMIC);
5584                         if (!fail) {
5585                                 NL_SET_ERR_MSG(extack,
5586                                                "failed to allocate failed NID");
5587                                 GOTO(send_error, rc);
5588                         }
5589                         fail->lfp_id = *id;
5590                         fail->lfp_errno = rc;
5591                         goto cant_reach;
5592                 }
5593
5594                 mutex_lock(&the_lnet.ln_api_mutex);
5595                 lp = lnet_find_peer(&id->nid);
5596                 if (lp) {
5597                         primary_nid = lp->lp_primary_nid;
5598                         mr_flag = lnet_peer_is_multi_rail(lp);
5599                         lnet_peer_decref_locked(lp);
5600                 }
5601                 mutex_unlock(&the_lnet.ln_api_mutex);
5602
5603                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
5604                                   NLM_F_MULTI, LNET_CMD_PING);
5605                 if (!hdr) {
5606                         NL_SET_ERR_MSG(extack, "failed to send values");
5607                         genlmsg_cancel(msg, hdr);
5608                         GOTO(send_error, rc = -EMSGSIZE);
5609                 }
5610
5611                 if (i++ == 0)
5612                         nla_put_string(msg, LNET_PING_ATTR_HDR, "");
5613
5614                 nla_put_string(msg, LNET_PING_ATTR_PRIMARY_NID,
5615                                libcfs_nidstr(&primary_nid));
5616                 if (mr_flag)
5617                         nla_put_flag(msg, LNET_PING_ATTR_MULTIRAIL);
5618
5619                 nid_list = nla_nest_start(msg, LNET_PING_ATTR_PEER_NI_LIST);
5620                 for (count = 0; count < rc; count++) {
5621                         struct lnet_processid *result;
5622                         struct nlattr *nid_attr;
5623                         char *idstr;
5624
5625                         result = genradix_ptr(&peers.lgpl_list, count);
5626                         if (nid_is_lo0(&result->nid))
5627                                 continue;
5628
5629                         nid_attr = nla_nest_start(msg, count + 1);
5630                         if (gnlh->version == 1)
5631                                 idstr = libcfs_nidstr(&result->nid);
5632                         else
5633                                 idstr = libcfs_idstr(result);
5634                         nla_put_string(msg, LNET_PING_PEER_NI_ATTR_NID, idstr);
5635                         nla_nest_end(msg, nid_attr);
5636                 }
5637                 nla_nest_end(msg, nid_list);
5638                 genlmsg_end(msg, hdr);
5639 cant_reach:
5640                 genradix_free(&peers.lgpl_list);
5641         }
5642
5643         for (i = 0; i < plist->lgpl_failed_count; i++) {
5644                 struct lnet_fail_ping *fail;
5645                 void *hdr;
5646
5647                 fail = genradix_ptr(&plist->lgpl_failed, i);
5648
5649                 hdr = genlmsg_put(msg, portid, seq, &lnet_family,
5650                                   NLM_F_MULTI, LNET_CMD_PING);
5651                 if (!hdr) {
5652                         NL_SET_ERR_MSG(extack, "failed to send failed values");
5653                         genlmsg_cancel(msg, hdr);
5654                         GOTO(send_error, rc = -EMSGSIZE);
5655                 }
5656
5657                 if (i == 0)
5658                         nla_put_string(msg, LNET_PING_ATTR_HDR, "");
5659
5660                 nla_put_string(msg, LNET_PING_ATTR_PRIMARY_NID,
5661                                libcfs_nidstr(&fail->lfp_id.nid));
5662                 nla_put_s16(msg, LNET_PING_ATTR_ERRNO, fail->lfp_errno);
5663                 genlmsg_end(msg, hdr);
5664         }
5665         rc = 0; /* don't treat it as an error */
5666
5667         plist->lgpl_index = idx;
5668 send_error:
5669         return lnet_nl_send_error(cb->skb, portid, seq, rc);
5670 }
5671
5672 #ifndef HAVE_NETLINK_CALLBACK_START
5673 static int lnet_old_ping_show_dump(struct sk_buff *msg,
5674                                    struct netlink_callback *cb)
5675 {
5676         if (!cb->args[0]) {
5677                 int rc = lnet_ping_show_start(cb);
5678
5679                 if (rc < 0)
5680                         return rc;
5681         }
5682
5683         return lnet_ping_show_dump(msg, cb);
5684 }
5685 #endif
5686
5687 static const struct genl_multicast_group lnet_mcast_grps[] = {
5688         { .name =       "ip2net",       },
5689         { .name =       "net",          },
5690         { .name =       "ping",         },
5691 };
5692
5693 static const struct genl_ops lnet_genl_ops[] = {
5694         {
5695                 .cmd            = LNET_CMD_NETS,
5696 #ifdef HAVE_NETLINK_CALLBACK_START
5697                 .start          = lnet_net_show_start,
5698                 .dumpit         = lnet_net_show_dump,
5699 #else
5700                 .dumpit         = lnet_old_net_show_dump,
5701 #endif
5702                 .done           = lnet_net_show_done,
5703                 .doit           = lnet_net_cmd,
5704         },
5705         {
5706                 .cmd            = LNET_CMD_PING,
5707 #ifdef HAVE_NETLINK_CALLBACK_START
5708                 .start          = lnet_ping_show_start,
5709                 .dumpit         = lnet_ping_show_dump,
5710 #else
5711                 .dumpit         = lnet_old_ping_show_dump,
5712 #endif
5713                 .done           = lnet_ping_show_done,
5714         },
5715 };
5716
5717 static struct genl_family lnet_family = {
5718         .name           = LNET_GENL_NAME,
5719         .version        = LNET_GENL_VERSION,
5720         .module         = THIS_MODULE,
5721         .netnsok        = true,
5722         .ops            = lnet_genl_ops,
5723         .n_ops          = ARRAY_SIZE(lnet_genl_ops),
5724         .mcgrps         = lnet_mcast_grps,
5725         .n_mcgrps       = ARRAY_SIZE(lnet_mcast_grps),
5726 };
5727
5728 void LNetDebugPeer(struct lnet_processid *id)
5729 {
5730         lnet_debug_peer(&id->nid);
5731 }
5732 EXPORT_SYMBOL(LNetDebugPeer);
5733
5734 /**
5735  * Determine if the specified peer \a nid is on the local node.
5736  *
5737  * \param nid   peer nid to check
5738  *
5739  * \retval true         If peer NID is on the local node.
5740  * \retval false        If peer NID is not on the local node.
5741  */
5742 bool LNetIsPeerLocal(struct lnet_nid *nid)
5743 {
5744         struct lnet_net *net;
5745         struct lnet_ni *ni;
5746         int cpt;
5747
5748         cpt = lnet_net_lock_current();
5749         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
5750                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
5751                         if (nid_same(&ni->ni_nid, nid)) {
5752                                 lnet_net_unlock(cpt);
5753                                 return true;
5754                         }
5755                 }
5756         }
5757         lnet_net_unlock(cpt);
5758
5759         return false;
5760 }
5761 EXPORT_SYMBOL(LNetIsPeerLocal);
5762
5763 /**
5764  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
5765  * Note that all interfaces share a same PID, as requested by LNetNIInit().
5766  *
5767  * \param index Index of the interface to look up.
5768  * \param id On successful return, this location will hold the
5769  * struct lnet_process_id ID of the interface.
5770  *
5771  * \retval 0 If an interface exists at \a index.
5772  * \retval -ENOENT If no interface has been found.
5773  */
5774 int
5775 LNetGetId(unsigned int index, struct lnet_processid *id)
5776 {
5777         struct lnet_ni   *ni;
5778         struct lnet_net  *net;
5779         int               cpt;
5780         int               rc = -ENOENT;
5781
5782         LASSERT(the_lnet.ln_refcount > 0);
5783
5784         cpt = lnet_net_lock_current();
5785
5786         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
5787                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
5788                         if (!nid_is_nid4(&ni->ni_nid))
5789                                 /* FIXME this needs to be handled */
5790                                 continue;
5791                         if (index-- != 0)
5792                                 continue;
5793
5794                         id->nid = ni->ni_nid;
5795                         id->pid = the_lnet.ln_pid;
5796                         rc = 0;
5797                         break;
5798                 }
5799         }
5800
5801         lnet_net_unlock(cpt);
5802         return rc;
5803 }
5804 EXPORT_SYMBOL(LNetGetId);
5805
5806 struct ping_data {
5807         int rc;
5808         int replied;
5809         int pd_unlinked;
5810         struct lnet_handle_md mdh;
5811         struct completion completion;
5812 };
5813
5814 static void
5815 lnet_ping_event_handler(struct lnet_event *event)
5816 {
5817         struct ping_data *pd = event->md_user_ptr;
5818
5819         CDEBUG(D_NET, "ping event (%d %d)%s\n",
5820                event->type, event->status,
5821                event->unlinked ? " unlinked" : "");
5822
5823         if (event->status) {
5824                 if (!pd->rc)
5825                         pd->rc = event->status;
5826         } else if (event->type == LNET_EVENT_REPLY) {
5827                 pd->replied = 1;
5828                 pd->rc = event->mlength;
5829         }
5830
5831         if (event->unlinked)
5832                 pd->pd_unlinked = 1;
5833
5834         if (event->unlinked ||
5835             (event->type == LNET_EVENT_SEND && event->status))
5836                 complete(&pd->completion);
5837 }
5838
5839 static int lnet_ping(struct lnet_processid *id, struct lnet_nid *src_nid,
5840                      signed long timeout, struct lnet_genl_ping_list *plist,
5841                      int n_ids)
5842 {
5843         int id_bytes = sizeof(struct lnet_ni_status); /* For 0@lo */
5844         struct lnet_md md = { NULL };
5845         struct ping_data pd = { 0 };
5846         struct lnet_ping_buffer *pbuf;
5847         struct lnet_processid pid;
5848         struct lnet_ping_iter pi;
5849         int i = 0;
5850         u32 *st;
5851         int nob;
5852         int rc;
5853         int rc2;
5854
5855         genradix_init(&plist->lgpl_list);
5856
5857         /* n_ids limit is arbitrary */
5858         if (n_ids <= 0 || LNET_NID_IS_ANY(&id->nid))
5859                 return -EINVAL;
5860
5861         /* if the user buffer has more space than the lnet_interfaces_max
5862          * then only fill it up to lnet_interfaces_max
5863          */
5864         if (n_ids > lnet_interfaces_max)
5865                 n_ids = lnet_interfaces_max;
5866
5867         if (id->pid == LNET_PID_ANY)
5868                 id->pid = LNET_PID_LUSTRE;
5869
5870         id_bytes += lnet_ping_sts_size(&id->nid) * n_ids;
5871         pbuf = lnet_ping_buffer_alloc(id_bytes, GFP_NOFS);
5872         if (!pbuf)
5873                 return -ENOMEM;
5874
5875         /* initialize md content */
5876         md.start     = &pbuf->pb_info;
5877         md.length    = id_bytes;
5878         md.threshold = 2; /* GET/REPLY */
5879         md.max_size  = 0;
5880         md.options   = LNET_MD_TRUNCATE;
5881         md.user_ptr  = &pd;
5882         md.handler   = lnet_ping_event_handler;
5883
5884         init_completion(&pd.completion);
5885
5886         rc = LNetMDBind(&md, LNET_UNLINK, &pd.mdh);
5887         if (rc != 0) {
5888                 CERROR("Can't bind MD: %d\n", rc);
5889                 goto fail_ping_buffer_decref;
5890         }
5891
5892         rc = LNetGet(src_nid, pd.mdh, id, LNET_RESERVED_PORTAL,
5893                      LNET_PROTO_PING_MATCHBITS, 0, false);
5894         if (rc != 0) {
5895                 /* Don't CERROR; this could be deliberate! */
5896                 rc2 = LNetMDUnlink(pd.mdh);
5897                 LASSERT(rc2 == 0);
5898
5899                 /* NB must wait for the UNLINK event below... */
5900         }
5901
5902         /* Ensure completion in finite time... */
5903         wait_for_completion_timeout(&pd.completion, timeout);
5904         if (!pd.pd_unlinked) {
5905                 LNetMDUnlink(pd.mdh);
5906                 wait_for_completion(&pd.completion);
5907         }
5908
5909         if (!pd.replied) {
5910                 rc = pd.rc ?: -EIO;
5911                 goto fail_ping_buffer_decref;
5912         }
5913
5914         nob = pd.rc;
5915         LASSERT(nob >= 0 && nob <= id_bytes);
5916
5917         rc = -EPROTO;           /* if I can't parse... */
5918
5919         if (nob < LNET_PING_INFO_HDR_SIZE) {
5920                 CERROR("%s: ping info too short %d\n",
5921                        libcfs_idstr(id), nob);
5922                 goto fail_ping_buffer_decref;
5923         }
5924
5925         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
5926                 lnet_swap_pinginfo(pbuf);
5927         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
5928                 CERROR("%s: Unexpected magic %08x\n",
5929                        libcfs_idstr(id), pbuf->pb_info.pi_magic);
5930                 goto fail_ping_buffer_decref;
5931         }
5932
5933         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
5934                 CERROR("%s: ping w/o NI status: 0x%x\n",
5935                        libcfs_idstr(id), pbuf->pb_info.pi_features);
5936                 goto fail_ping_buffer_decref;
5937         }
5938
5939         /* Test if smaller than lnet_pinginfo with just one pi_ni status info.
5940          * That one might contain size when large nids are used.
5941          */
5942         if (nob < offsetof(struct lnet_ping_info, pi_ni[1])) {
5943                 CERROR("%s: Short reply %d(%lu min)\n",
5944                        libcfs_idstr(id), nob,
5945                        offsetof(struct lnet_ping_info, pi_ni[1]));
5946                 goto fail_ping_buffer_decref;
5947         }
5948
5949         if (ping_info_count_entries(pbuf) < n_ids) {
5950                 n_ids = ping_info_count_entries(pbuf);
5951                 id_bytes = lnet_ping_info_size(&pbuf->pb_info);
5952         }
5953
5954         if (nob < id_bytes) {
5955                 CERROR("%s: Short reply %d(%d expected)\n",
5956                        libcfs_idstr(id), nob, id_bytes);
5957                 goto fail_ping_buffer_decref;
5958         }
5959
5960         for (st = ping_iter_first(&pi, pbuf, &pid.nid);
5961              st;
5962              st = ping_iter_next(&pi, &pid.nid)) {
5963                 id = genradix_ptr_alloc(&plist->lgpl_list, i++, GFP_ATOMIC);
5964                 if (!id) {
5965                         rc = -ENOMEM;
5966                         goto fail_ping_buffer_decref;
5967                 }
5968
5969                 id->pid = pbuf->pb_info.pi_pid;
5970                 id->nid = pid.nid;
5971         }
5972         rc = i;
5973 fail_ping_buffer_decref:
5974         lnet_ping_buffer_decref(pbuf);
5975         return rc;
5976 }
5977
5978 static int
5979 lnet_discover(struct lnet_process_id id4, __u32 force,
5980               struct lnet_process_id __user *ids, int n_ids)
5981 {
5982         struct lnet_peer_ni *lpni;
5983         struct lnet_peer_ni *p;
5984         struct lnet_peer *lp;
5985         struct lnet_process_id *buf;
5986         struct lnet_processid id;
5987         int cpt;
5988         int i;
5989         int rc;
5990
5991         if (n_ids <= 0 ||
5992             id4.nid == LNET_NID_ANY)
5993                 return -EINVAL;
5994
5995         lnet_pid4_to_pid(id4, &id);
5996         if (id.pid == LNET_PID_ANY)
5997                 id.pid = LNET_PID_LUSTRE;
5998
5999         /*
6000          * If the user buffer has more space than the lnet_interfaces_max,
6001          * then only fill it up to lnet_interfaces_max.
6002          */
6003         if (n_ids > lnet_interfaces_max)
6004                 n_ids = lnet_interfaces_max;
6005
6006         CFS_ALLOC_PTR_ARRAY(buf, n_ids);
6007         if (!buf)
6008                 return -ENOMEM;
6009
6010         cpt = lnet_net_lock_current();
6011         lpni = lnet_peerni_by_nid_locked(&id.nid, NULL, cpt);
6012         if (IS_ERR(lpni)) {
6013                 rc = PTR_ERR(lpni);
6014                 goto out;
6015         }
6016
6017         /*
6018          * Clearing the NIDS_UPTODATE flag ensures the peer will
6019          * be discovered, provided discovery has not been disabled.
6020          */
6021         lp = lpni->lpni_peer_net->lpn_peer;
6022         spin_lock(&lp->lp_lock);
6023         lp->lp_state &= ~LNET_PEER_NIDS_UPTODATE;
6024         /* If the force flag is set, force a PING and PUSH as well. */
6025         if (force)
6026                 lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH;
6027         spin_unlock(&lp->lp_lock);
6028         rc = lnet_discover_peer_locked(lpni, cpt, true);
6029         if (rc)
6030                 goto out_decref;
6031
6032         /* The lpni (or lp) for this NID may have changed and our ref is
6033          * the only thing keeping the old one around. Release the ref
6034          * and lookup the lpni again
6035          */
6036         lnet_peer_ni_decref_locked(lpni);
6037         lpni = lnet_peer_ni_find_locked(&id.nid);
6038         if (!lpni) {
6039                 rc = -ENOENT;
6040                 goto out;
6041         }
6042         lp = lpni->lpni_peer_net->lpn_peer;
6043
6044         i = 0;
6045         p = NULL;
6046         while ((p = lnet_get_next_peer_ni_locked(lp, NULL, p)) != NULL) {
6047                 buf[i].pid = id.pid;
6048                 buf[i].nid = lnet_nid_to_nid4(&p->lpni_nid);
6049                 if (++i >= n_ids)
6050                         break;
6051         }
6052         rc = i;
6053
6054 out_decref:
6055         lnet_peer_ni_decref_locked(lpni);
6056 out:
6057         lnet_net_unlock(cpt);
6058
6059         if (rc >= 0)
6060                 if (copy_to_user(ids, buf, rc * sizeof(*buf)))
6061                         rc = -EFAULT;
6062         CFS_FREE_PTR_ARRAY(buf, n_ids);
6063
6064         return rc;
6065 }
6066
6067 /**
6068  * Retrieve peer discovery status.
6069  *
6070  * \retval 1 if lnet_peer_discovery_disabled is 0
6071  * \retval 0 if lnet_peer_discovery_disabled is 1
6072  */
6073 int
6074 LNetGetPeerDiscoveryStatus(void)
6075 {
6076         return !lnet_peer_discovery_disabled;
6077 }
6078 EXPORT_SYMBOL(LNetGetPeerDiscoveryStatus);