Whamcloud - gitweb
LU-9480 lnet: add "lnetctl ping" command
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_LNET
34 #include <linux/log2.h>
35 #include <linux/ktime.h>
36 #include <linux/moduleparam.h>
37
38 #include <lnet/lib-lnet.h>
39
40 #define D_LNI D_CONSOLE
41
42 /*
43  * initialize ln_api_mutex statically, since it needs to be used in
44  * discovery_set callback. That module parameter callback can be called
45  * before module init completes. The mutex needs to be ready for use then.
46  */
47 struct lnet the_lnet = {
48         .ln_api_mutex = __MUTEX_INITIALIZER(the_lnet.ln_api_mutex),
49 };              /* THE state of the network */
50 EXPORT_SYMBOL(the_lnet);
51
52 static char *ip2nets = "";
53 module_param(ip2nets, charp, 0444);
54 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
55
56 static char *networks = "";
57 module_param(networks, charp, 0444);
58 MODULE_PARM_DESC(networks, "local networks");
59
60 static char *routes = "";
61 module_param(routes, charp, 0444);
62 MODULE_PARM_DESC(routes, "routes to non-local networks");
63
64 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
65 module_param(rnet_htable_size, int, 0444);
66 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
67
68 static int use_tcp_bonding = false;
69 module_param(use_tcp_bonding, int, 0444);
70 MODULE_PARM_DESC(use_tcp_bonding,
71                  "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
72
73 unsigned int lnet_numa_range = 0;
74 module_param(lnet_numa_range, uint, 0444);
75 MODULE_PARM_DESC(lnet_numa_range,
76                 "NUMA range to consider during Multi-Rail selection");
77
78 static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
79 static int intf_max_set(const char *val, struct kernel_param *kp);
80 module_param_call(lnet_interfaces_max, intf_max_set, param_get_int,
81                   &lnet_interfaces_max, S_IRUGO|S_IWUSR);
82 MODULE_PARM_DESC(lnet_interfaces_max,
83                 "Maximum number of interfaces in a node.");
84
85 unsigned lnet_peer_discovery_disabled = 0;
86 static int discovery_set(const char *val, struct kernel_param *kp);
87 module_param_call(lnet_peer_discovery_disabled, discovery_set, param_get_int,
88                   &lnet_peer_discovery_disabled, S_IRUGO|S_IWUSR);
89 MODULE_PARM_DESC(lnet_peer_discovery_disabled,
90                 "Set to 1 to disable peer discovery on this node.");
91
92 /*
93  * This sequence number keeps track of how many times DLC was used to
94  * update the local NIs. It is incremented when a NI is added or
95  * removed and checked when sending a message to determine if there is
96  * a need to re-run the selection algorithm. See lnet_select_pathway()
97  * for more details on its usage.
98  */
99 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
100
101 static int lnet_ping(struct lnet_process_id id, signed long timeout,
102                      struct lnet_process_id __user *ids, int n_ids);
103
104 static int
105 discovery_set(const char *val, struct kernel_param *kp)
106 {
107         int rc;
108         unsigned *discovery = (unsigned *)kp->arg;
109         unsigned long value;
110         struct lnet_ping_buffer *pbuf;
111
112         rc = kstrtoul(val, 0, &value);
113         if (rc) {
114                 CERROR("Invalid module parameter value for 'lnet_peer_discovery_disabled'\n");
115                 return rc;
116         }
117
118         value = (value) ? 1 : 0;
119
120         /*
121          * The purpose of locking the api_mutex here is to ensure that
122          * the correct value ends up stored properly.
123          */
124         mutex_lock(&the_lnet.ln_api_mutex);
125
126         if (value == *discovery) {
127                 mutex_unlock(&the_lnet.ln_api_mutex);
128                 return 0;
129         }
130
131         *discovery = value;
132
133         if (the_lnet.ln_state != LNET_STATE_RUNNING) {
134                 mutex_unlock(&the_lnet.ln_api_mutex);
135                 return 0;
136         }
137
138         /* tell peers that discovery setting has changed */
139         lnet_net_lock(LNET_LOCK_EX);
140         pbuf = the_lnet.ln_ping_target;
141         if (value)
142                 pbuf->pb_info.pi_features &= ~LNET_PING_FEAT_DISCOVERY;
143         else
144                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
145         lnet_net_unlock(LNET_LOCK_EX);
146
147         lnet_push_update_to_peers(1);
148
149         mutex_unlock(&the_lnet.ln_api_mutex);
150
151         return 0;
152 }
153
154 static int
155 intf_max_set(const char *val, struct kernel_param *kp)
156 {
157         int value, rc;
158
159         rc = kstrtoint(val, 0, &value);
160         if (rc) {
161                 CERROR("Invalid module parameter value for 'lnet_interfaces_max'\n");
162                 return rc;
163         }
164
165         if (value < LNET_INTERFACES_MIN) {
166                 CWARN("max interfaces provided are too small, setting to %d\n",
167                       LNET_INTERFACES_MIN);
168                 value = LNET_INTERFACES_MIN;
169         }
170
171         *(int *)kp->arg = value;
172
173         return 0;
174 }
175
176 static char *
177 lnet_get_routes(void)
178 {
179         return routes;
180 }
181
182 static char *
183 lnet_get_networks(void)
184 {
185         char   *nets;
186         int     rc;
187
188         if (*networks != 0 && *ip2nets != 0) {
189                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
190                                    "'ip2nets' but not both at once\n");
191                 return NULL;
192         }
193
194         if (*ip2nets != 0) {
195                 rc = lnet_parse_ip2nets(&nets, ip2nets);
196                 return (rc == 0) ? nets : NULL;
197         }
198
199         if (*networks != 0)
200                 return networks;
201
202         return "tcp";
203 }
204
205 static void
206 lnet_init_locks(void)
207 {
208         spin_lock_init(&the_lnet.ln_eq_wait_lock);
209         init_waitqueue_head(&the_lnet.ln_eq_waitq);
210         init_waitqueue_head(&the_lnet.ln_rc_waitq);
211         mutex_init(&the_lnet.ln_lnd_mutex);
212 }
213
214 static void
215 lnet_fini_locks(void)
216 {
217 }
218
219 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
220 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
221                                             *  MDs kmem_cache */
222
223 static int
224 lnet_descriptor_setup(void)
225 {
226         /* create specific kmem_cache for MEs and small MDs (i.e., originally
227          * allocated in <size-xxx> kmem_cache).
228          */
229         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(struct lnet_me),
230                                             0, 0, NULL);
231         if (!lnet_mes_cachep)
232                 return -ENOMEM;
233
234         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
235                                                   LNET_SMALL_MD_SIZE, 0, 0,
236                                                   NULL);
237         if (!lnet_small_mds_cachep)
238                 return -ENOMEM;
239
240         return 0;
241 }
242
243 static void
244 lnet_descriptor_cleanup(void)
245 {
246
247         if (lnet_small_mds_cachep) {
248                 kmem_cache_destroy(lnet_small_mds_cachep);
249                 lnet_small_mds_cachep = NULL;
250         }
251
252         if (lnet_mes_cachep) {
253                 kmem_cache_destroy(lnet_mes_cachep);
254                 lnet_mes_cachep = NULL;
255         }
256 }
257
258 static int
259 lnet_create_remote_nets_table(void)
260 {
261         int               i;
262         struct list_head *hash;
263
264         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
265         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
266         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
267         if (hash == NULL) {
268                 CERROR("Failed to create remote nets hash table\n");
269                 return -ENOMEM;
270         }
271
272         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
273                 INIT_LIST_HEAD(&hash[i]);
274         the_lnet.ln_remote_nets_hash = hash;
275         return 0;
276 }
277
278 static void
279 lnet_destroy_remote_nets_table(void)
280 {
281         int i;
282
283         if (the_lnet.ln_remote_nets_hash == NULL)
284                 return;
285
286         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
287                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
288
289         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
290                     LNET_REMOTE_NETS_HASH_SIZE *
291                     sizeof(the_lnet.ln_remote_nets_hash[0]));
292         the_lnet.ln_remote_nets_hash = NULL;
293 }
294
295 static void
296 lnet_destroy_locks(void)
297 {
298         if (the_lnet.ln_res_lock != NULL) {
299                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
300                 the_lnet.ln_res_lock = NULL;
301         }
302
303         if (the_lnet.ln_net_lock != NULL) {
304                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
305                 the_lnet.ln_net_lock = NULL;
306         }
307
308         lnet_fini_locks();
309 }
310
311 static int
312 lnet_create_locks(void)
313 {
314         lnet_init_locks();
315
316         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
317         if (the_lnet.ln_res_lock == NULL)
318                 goto failed;
319
320         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
321         if (the_lnet.ln_net_lock == NULL)
322                 goto failed;
323
324         return 0;
325
326  failed:
327         lnet_destroy_locks();
328         return -ENOMEM;
329 }
330
331 static void lnet_assert_wire_constants(void)
332 {
333         /* Wire protocol assertions generated by 'wirecheck'
334          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
335          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
336          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
337
338         /* Constants... */
339         CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
340         CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1);
341         CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0);
342         CLASSERT(LNET_MSG_ACK == 0);
343         CLASSERT(LNET_MSG_PUT == 1);
344         CLASSERT(LNET_MSG_GET == 2);
345         CLASSERT(LNET_MSG_REPLY == 3);
346         CLASSERT(LNET_MSG_HELLO == 4);
347
348         /* Checks for struct lnet_handle_wire */
349         CLASSERT((int)sizeof(struct lnet_handle_wire) == 16);
350         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) == 0);
351         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) == 8);
352         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_object_cookie) == 8);
353         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) == 8);
354
355         /* Checks for struct struct lnet_magicversion */
356         CLASSERT((int)sizeof(struct lnet_magicversion) == 8);
357         CLASSERT((int)offsetof(struct lnet_magicversion, magic) == 0);
358         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->magic) == 4);
359         CLASSERT((int)offsetof(struct lnet_magicversion, version_major) == 4);
360         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->version_major) == 2);
361         CLASSERT((int)offsetof(struct lnet_magicversion, version_minor) == 6);
362         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->version_minor) == 2);
363
364         /* Checks for struct struct lnet_hdr */
365         CLASSERT((int)sizeof(struct lnet_hdr) == 72);
366         CLASSERT((int)offsetof(struct lnet_hdr, dest_nid) == 0);
367         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->dest_nid) == 8);
368         CLASSERT((int)offsetof(struct lnet_hdr, src_nid) == 8);
369         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->src_nid) == 8);
370         CLASSERT((int)offsetof(struct lnet_hdr, dest_pid) == 16);
371         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->dest_pid) == 4);
372         CLASSERT((int)offsetof(struct lnet_hdr, src_pid) == 20);
373         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->src_pid) == 4);
374         CLASSERT((int)offsetof(struct lnet_hdr, type) == 24);
375         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->type) == 4);
376         CLASSERT((int)offsetof(struct lnet_hdr, payload_length) == 28);
377         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->payload_length) == 4);
378         CLASSERT((int)offsetof(struct lnet_hdr, msg) == 32);
379         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg) == 40);
380
381         /* Ack */
382         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) == 32);
383         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) == 16);
384         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.match_bits) == 48);
385         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) == 8);
386         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.mlength) == 56);
387         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) == 4);
388
389         /* Put */
390         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) == 32);
391         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) == 16);
392         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.match_bits) == 48);
393         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) == 8);
394         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.hdr_data) == 56);
395         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) == 8);
396         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.ptl_index) == 64);
397         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) == 4);
398         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.offset) == 68);
399         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) == 4);
400
401         /* Get */
402         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.return_wmd) == 32);
403         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) == 16);
404         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.match_bits) == 48);
405         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) == 8);
406         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.ptl_index) == 56);
407         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) == 4);
408         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.src_offset) == 60);
409         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) == 4);
410         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.sink_length) == 64);
411         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) == 4);
412
413         /* Reply */
414         CLASSERT((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) == 32);
415         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) == 16);
416
417         /* Hello */
418         CLASSERT((int)offsetof(struct lnet_hdr, msg.hello.incarnation) == 32);
419         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) == 8);
420         CLASSERT((int)offsetof(struct lnet_hdr, msg.hello.type) == 40);
421         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) == 4);
422
423         /* Checks for struct lnet_ni_status and related constants */
424         CLASSERT(LNET_NI_STATUS_INVALID == 0x00000000);
425         CLASSERT(LNET_NI_STATUS_UP == 0x15aac0de);
426         CLASSERT(LNET_NI_STATUS_DOWN == 0xdeadface);
427
428         /* Checks for struct lnet_ni_status */
429         CLASSERT((int)sizeof(struct lnet_ni_status) == 16);
430         CLASSERT((int)offsetof(struct lnet_ni_status, ns_nid) == 0);
431         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) == 8);
432         CLASSERT((int)offsetof(struct lnet_ni_status, ns_status) == 8);
433         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_status) == 4);
434         CLASSERT((int)offsetof(struct lnet_ni_status, ns_unused) == 12);
435         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_unused) == 4);
436
437         /* Checks for struct lnet_ping_info and related constants */
438         CLASSERT(LNET_PROTO_PING_MAGIC == 0x70696E67);
439         CLASSERT(LNET_PING_FEAT_INVAL == 0);
440         CLASSERT(LNET_PING_FEAT_BASE == 1);
441         CLASSERT(LNET_PING_FEAT_NI_STATUS == 2);
442         CLASSERT(LNET_PING_FEAT_RTE_DISABLED == 4);
443         CLASSERT(LNET_PING_FEAT_MULTI_RAIL == 8);
444         CLASSERT(LNET_PING_FEAT_DISCOVERY == 16);
445         CLASSERT(LNET_PING_FEAT_BITS == 31);
446
447         /* Checks for struct lnet_ping_info */
448         CLASSERT((int)sizeof(struct lnet_ping_info) == 16);
449         CLASSERT((int)offsetof(struct lnet_ping_info, pi_magic) == 0);
450         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) == 4);
451         CLASSERT((int)offsetof(struct lnet_ping_info, pi_features) == 4);
452         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_features) == 4);
453         CLASSERT((int)offsetof(struct lnet_ping_info, pi_pid) == 8);
454         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) == 4);
455         CLASSERT((int)offsetof(struct lnet_ping_info, pi_nnis) == 12);
456         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) == 4);
457         CLASSERT((int)offsetof(struct lnet_ping_info, pi_ni) == 16);
458         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_ni) == 0);
459 }
460
461 static struct lnet_lnd *lnet_find_lnd_by_type(__u32 type)
462 {
463         struct lnet_lnd *lnd;
464         struct list_head *tmp;
465
466         /* holding lnd mutex */
467         list_for_each(tmp, &the_lnet.ln_lnds) {
468                 lnd = list_entry(tmp, struct lnet_lnd, lnd_list);
469
470                 if (lnd->lnd_type == type)
471                         return lnd;
472         }
473         return NULL;
474 }
475
476 void
477 lnet_register_lnd(struct lnet_lnd *lnd)
478 {
479         mutex_lock(&the_lnet.ln_lnd_mutex);
480
481         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
482         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
483
484         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
485         lnd->lnd_refcount = 0;
486
487         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
488
489         mutex_unlock(&the_lnet.ln_lnd_mutex);
490 }
491 EXPORT_SYMBOL(lnet_register_lnd);
492
493 void
494 lnet_unregister_lnd(struct lnet_lnd *lnd)
495 {
496         mutex_lock(&the_lnet.ln_lnd_mutex);
497
498         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
499         LASSERT(lnd->lnd_refcount == 0);
500
501         list_del(&lnd->lnd_list);
502         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
503
504         mutex_unlock(&the_lnet.ln_lnd_mutex);
505 }
506 EXPORT_SYMBOL(lnet_unregister_lnd);
507
508 void
509 lnet_counters_get(struct lnet_counters *counters)
510 {
511         struct lnet_counters *ctr;
512         int             i;
513
514         memset(counters, 0, sizeof(*counters));
515
516         lnet_net_lock(LNET_LOCK_EX);
517
518         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
519                 counters->msgs_max     += ctr->msgs_max;
520                 counters->msgs_alloc   += ctr->msgs_alloc;
521                 counters->errors       += ctr->errors;
522                 counters->send_count   += ctr->send_count;
523                 counters->recv_count   += ctr->recv_count;
524                 counters->route_count  += ctr->route_count;
525                 counters->drop_count   += ctr->drop_count;
526                 counters->send_length  += ctr->send_length;
527                 counters->recv_length  += ctr->recv_length;
528                 counters->route_length += ctr->route_length;
529                 counters->drop_length  += ctr->drop_length;
530
531         }
532         lnet_net_unlock(LNET_LOCK_EX);
533 }
534 EXPORT_SYMBOL(lnet_counters_get);
535
536 void
537 lnet_counters_reset(void)
538 {
539         struct lnet_counters *counters;
540         int             i;
541
542         lnet_net_lock(LNET_LOCK_EX);
543
544         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
545                 memset(counters, 0, sizeof(struct lnet_counters));
546
547         lnet_net_unlock(LNET_LOCK_EX);
548 }
549
550 static char *
551 lnet_res_type2str(int type)
552 {
553         switch (type) {
554         default:
555                 LBUG();
556         case LNET_COOKIE_TYPE_MD:
557                 return "MD";
558         case LNET_COOKIE_TYPE_ME:
559                 return "ME";
560         case LNET_COOKIE_TYPE_EQ:
561                 return "EQ";
562         }
563 }
564
565 static void
566 lnet_res_container_cleanup(struct lnet_res_container *rec)
567 {
568         int     count = 0;
569
570         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
571                 return;
572
573         while (!list_empty(&rec->rec_active)) {
574                 struct list_head *e = rec->rec_active.next;
575
576                 list_del_init(e);
577                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
578                         lnet_eq_free(list_entry(e, struct lnet_eq, eq_list));
579
580                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
581                         lnet_md_free(list_entry(e, struct lnet_libmd, md_list));
582
583                 } else { /* NB: Active MEs should be attached on portals */
584                         LBUG();
585                 }
586                 count++;
587         }
588
589         if (count > 0) {
590                 /* Found alive MD/ME/EQ, user really should unlink/free
591                  * all of them before finalize LNet, but if someone didn't,
592                  * we have to recycle garbage for him */
593                 CERROR("%d active elements on exit of %s container\n",
594                        count, lnet_res_type2str(rec->rec_type));
595         }
596
597         if (rec->rec_lh_hash != NULL) {
598                 LIBCFS_FREE(rec->rec_lh_hash,
599                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
600                 rec->rec_lh_hash = NULL;
601         }
602
603         rec->rec_type = 0; /* mark it as finalized */
604 }
605
606 static int
607 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
608 {
609         int     rc = 0;
610         int     i;
611
612         LASSERT(rec->rec_type == 0);
613
614         rec->rec_type = type;
615         INIT_LIST_HEAD(&rec->rec_active);
616
617         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
618
619         /* Arbitrary choice of hash table size */
620         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
621                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
622         if (rec->rec_lh_hash == NULL) {
623                 rc = -ENOMEM;
624                 goto out;
625         }
626
627         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
628                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
629
630         return 0;
631
632 out:
633         CERROR("Failed to setup %s resource container\n",
634                lnet_res_type2str(type));
635         lnet_res_container_cleanup(rec);
636         return rc;
637 }
638
639 static void
640 lnet_res_containers_destroy(struct lnet_res_container **recs)
641 {
642         struct lnet_res_container       *rec;
643         int                             i;
644
645         cfs_percpt_for_each(rec, i, recs)
646                 lnet_res_container_cleanup(rec);
647
648         cfs_percpt_free(recs);
649 }
650
651 static struct lnet_res_container **
652 lnet_res_containers_create(int type)
653 {
654         struct lnet_res_container       **recs;
655         struct lnet_res_container       *rec;
656         int                             rc;
657         int                             i;
658
659         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
660         if (recs == NULL) {
661                 CERROR("Failed to allocate %s resource containers\n",
662                        lnet_res_type2str(type));
663                 return NULL;
664         }
665
666         cfs_percpt_for_each(rec, i, recs) {
667                 rc = lnet_res_container_setup(rec, i, type);
668                 if (rc != 0) {
669                         lnet_res_containers_destroy(recs);
670                         return NULL;
671                 }
672         }
673
674         return recs;
675 }
676
677 struct lnet_libhandle *
678 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
679 {
680         /* ALWAYS called with lnet_res_lock held */
681         struct list_head        *head;
682         struct lnet_libhandle   *lh;
683         unsigned int            hash;
684
685         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
686                 return NULL;
687
688         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
689         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
690
691         list_for_each_entry(lh, head, lh_hash_chain) {
692                 if (lh->lh_cookie == cookie)
693                         return lh;
694         }
695
696         return NULL;
697 }
698
699 void
700 lnet_res_lh_initialize(struct lnet_res_container *rec,
701                        struct lnet_libhandle *lh)
702 {
703         /* ALWAYS called with lnet_res_lock held */
704         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
705         unsigned int    hash;
706
707         lh->lh_cookie = rec->rec_lh_cookie;
708         rec->rec_lh_cookie += 1 << ibits;
709
710         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
711
712         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
713 }
714
715 static int lnet_unprepare(void);
716
717 static int
718 lnet_prepare(lnet_pid_t requested_pid)
719 {
720         /* Prepare to bring up the network */
721         struct lnet_res_container **recs;
722         int                       rc = 0;
723
724         if (requested_pid == LNET_PID_ANY) {
725                 /* Don't instantiate LNET just for me */
726                 return -ENETDOWN;
727         }
728
729         LASSERT(the_lnet.ln_refcount == 0);
730
731         the_lnet.ln_routing = 0;
732
733         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
734         the_lnet.ln_pid = requested_pid;
735
736         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
737         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
738         INIT_LIST_HEAD(&the_lnet.ln_nets);
739         INIT_LIST_HEAD(&the_lnet.ln_routers);
740         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
741         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
742         INIT_LIST_HEAD(&the_lnet.ln_dc_request);
743         INIT_LIST_HEAD(&the_lnet.ln_dc_working);
744         INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
745         init_waitqueue_head(&the_lnet.ln_dc_waitq);
746
747         rc = lnet_descriptor_setup();
748         if (rc != 0)
749                 goto failed;
750
751         rc = lnet_create_remote_nets_table();
752         if (rc != 0)
753                 goto failed;
754
755         /*
756          * NB the interface cookie in wire handles guards against delayed
757          * replies and ACKs appearing valid after reboot.
758          */
759         the_lnet.ln_interface_cookie = ktime_get_real_ns();
760
761         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
762                                                 sizeof(struct lnet_counters));
763         if (the_lnet.ln_counters == NULL) {
764                 CERROR("Failed to allocate counters for LNet\n");
765                 rc = -ENOMEM;
766                 goto failed;
767         }
768
769         rc = lnet_peer_tables_create();
770         if (rc != 0)
771                 goto failed;
772
773         rc = lnet_msg_containers_create();
774         if (rc != 0)
775                 goto failed;
776
777         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
778                                       LNET_COOKIE_TYPE_EQ);
779         if (rc != 0)
780                 goto failed;
781
782         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
783         if (recs == NULL) {
784                 rc = -ENOMEM;
785                 goto failed;
786         }
787
788         the_lnet.ln_me_containers = recs;
789
790         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
791         if (recs == NULL) {
792                 rc = -ENOMEM;
793                 goto failed;
794         }
795
796         the_lnet.ln_md_containers = recs;
797
798         rc = lnet_portals_create();
799         if (rc != 0) {
800                 CERROR("Failed to create portals for LNet: %d\n", rc);
801                 goto failed;
802         }
803
804         return 0;
805
806  failed:
807         lnet_unprepare();
808         return rc;
809 }
810
811 static int
812 lnet_unprepare (void)
813 {
814         /* NB no LNET_LOCK since this is the last reference.  All LND instances
815          * have shut down already, so it is safe to unlink and free all
816          * descriptors, even those that appear committed to a network op (eg MD
817          * with non-zero pending count) */
818
819         lnet_fail_nid(LNET_NID_ANY, 0);
820
821         LASSERT(the_lnet.ln_refcount == 0);
822         LASSERT(list_empty(&the_lnet.ln_test_peers));
823         LASSERT(list_empty(&the_lnet.ln_nets));
824
825         lnet_portals_destroy();
826
827         if (the_lnet.ln_md_containers != NULL) {
828                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
829                 the_lnet.ln_md_containers = NULL;
830         }
831
832         if (the_lnet.ln_me_containers != NULL) {
833                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
834                 the_lnet.ln_me_containers = NULL;
835         }
836
837         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
838
839         lnet_msg_containers_destroy();
840         lnet_peer_uninit();
841         lnet_rtrpools_free(0);
842
843         if (the_lnet.ln_counters != NULL) {
844                 cfs_percpt_free(the_lnet.ln_counters);
845                 the_lnet.ln_counters = NULL;
846         }
847         lnet_destroy_remote_nets_table();
848         lnet_descriptor_cleanup();
849
850         return 0;
851 }
852
853 struct lnet_ni  *
854 lnet_net2ni_locked(__u32 net_id, int cpt)
855 {
856         struct lnet_ni   *ni;
857         struct lnet_net  *net;
858
859         LASSERT(cpt != LNET_LOCK_EX);
860
861         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
862                 if (net->net_id == net_id) {
863                         ni = list_entry(net->net_ni_list.next, struct lnet_ni,
864                                         ni_netlist);
865                         return ni;
866                 }
867         }
868
869         return NULL;
870 }
871
872 struct lnet_ni *
873 lnet_net2ni_addref(__u32 net)
874 {
875         struct lnet_ni *ni;
876
877         lnet_net_lock(0);
878         ni = lnet_net2ni_locked(net, 0);
879         if (ni)
880                 lnet_ni_addref_locked(ni, 0);
881         lnet_net_unlock(0);
882
883         return ni;
884 }
885 EXPORT_SYMBOL(lnet_net2ni_addref);
886
887 struct lnet_net *
888 lnet_get_net_locked(__u32 net_id)
889 {
890         struct lnet_net  *net;
891
892         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
893                 if (net->net_id == net_id)
894                         return net;
895         }
896
897         return NULL;
898 }
899
900 unsigned int
901 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
902 {
903         __u64           key = nid;
904         unsigned int    val;
905
906         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
907
908         if (number == 1)
909                 return 0;
910
911         val = hash_long(key, LNET_CPT_BITS);
912         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
913         if (val < number)
914                 return val;
915
916         return (unsigned int)(key + val + (val >> 1)) % number;
917 }
918
919 int
920 lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
921 {
922         struct lnet_net *net;
923
924         /* must called with hold of lnet_net_lock */
925         if (LNET_CPT_NUMBER == 1)
926                 return 0; /* the only one */
927
928         /*
929          * If NI is provided then use the CPT identified in the NI cpt
930          * list if one exists. If one doesn't exist, then that NI is
931          * associated with all CPTs and it follows that the net it belongs
932          * to is implicitly associated with all CPTs, so just hash the nid
933          * and return that.
934          */
935         if (ni != NULL) {
936                 if (ni->ni_cpts != NULL)
937                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
938                                                              ni->ni_ncpts)];
939                 else
940                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
941         }
942
943         /* no NI provided so look at the net */
944         net = lnet_get_net_locked(LNET_NIDNET(nid));
945
946         if (net != NULL && net->net_cpts != NULL) {
947                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
948         }
949
950         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
951 }
952
953 int
954 lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
955 {
956         int     cpt;
957         int     cpt2;
958
959         if (LNET_CPT_NUMBER == 1)
960                 return 0; /* the only one */
961
962         cpt = lnet_net_lock_current();
963
964         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
965
966         lnet_net_unlock(cpt);
967
968         return cpt2;
969 }
970 EXPORT_SYMBOL(lnet_cpt_of_nid);
971
972 int
973 lnet_islocalnet(__u32 net_id)
974 {
975         struct lnet_net *net;
976         int             cpt;
977         bool            local;
978
979         cpt = lnet_net_lock_current();
980
981         net = lnet_get_net_locked(net_id);
982
983         local = net != NULL;
984
985         lnet_net_unlock(cpt);
986
987         return local;
988 }
989
990 bool
991 lnet_is_ni_healthy_locked(struct lnet_ni *ni)
992 {
993         if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
994             ni->ni_state == LNET_NI_STATE_DEGRADED)
995                 return true;
996
997         return false;
998 }
999
1000 struct lnet_ni  *
1001 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
1002 {
1003         struct lnet_net  *net;
1004         struct lnet_ni   *ni;
1005
1006         LASSERT(cpt != LNET_LOCK_EX);
1007
1008         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1009                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1010                         if (ni->ni_nid == nid)
1011                                 return ni;
1012                 }
1013         }
1014
1015         return NULL;
1016 }
1017
1018 struct lnet_ni *
1019 lnet_nid2ni_addref(lnet_nid_t nid)
1020 {
1021         struct lnet_ni *ni;
1022
1023         lnet_net_lock(0);
1024         ni = lnet_nid2ni_locked(nid, 0);
1025         if (ni)
1026                 lnet_ni_addref_locked(ni, 0);
1027         lnet_net_unlock(0);
1028
1029         return ni;
1030 }
1031 EXPORT_SYMBOL(lnet_nid2ni_addref);
1032
1033 int
1034 lnet_islocalnid(lnet_nid_t nid)
1035 {
1036         struct lnet_ni  *ni;
1037         int             cpt;
1038
1039         cpt = lnet_net_lock_current();
1040         ni = lnet_nid2ni_locked(nid, cpt);
1041         lnet_net_unlock(cpt);
1042
1043         return ni != NULL;
1044 }
1045
1046 int
1047 lnet_count_acceptor_nets(void)
1048 {
1049         /* Return the # of NIs that need the acceptor. */
1050         int              count = 0;
1051         struct lnet_net  *net;
1052         int              cpt;
1053
1054         cpt = lnet_net_lock_current();
1055         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1056                 /* all socklnd type networks should have the acceptor
1057                  * thread started */
1058                 if (net->net_lnd->lnd_accept != NULL)
1059                         count++;
1060         }
1061
1062         lnet_net_unlock(cpt);
1063
1064         return count;
1065 }
1066
1067 struct lnet_ping_buffer *
1068 lnet_ping_buffer_alloc(int nnis, gfp_t gfp)
1069 {
1070         struct lnet_ping_buffer *pbuf;
1071
1072         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nnis), gfp);
1073         if (pbuf) {
1074                 pbuf->pb_nnis = nnis;
1075                 atomic_set(&pbuf->pb_refcnt, 1);
1076         }
1077
1078         return pbuf;
1079 }
1080
1081 void
1082 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
1083 {
1084         LASSERT(lnet_ping_buffer_numref(pbuf) == 0);
1085         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nnis));
1086 }
1087
1088 static struct lnet_ping_buffer *
1089 lnet_ping_target_create(int nnis)
1090 {
1091         struct lnet_ping_buffer *pbuf;
1092
1093         pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
1094         if (pbuf == NULL) {
1095                 CERROR("Can't allocate ping source [%d]\n", nnis);
1096                 return NULL;
1097         }
1098
1099         pbuf->pb_info.pi_nnis = nnis;
1100         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1101         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1102         pbuf->pb_info.pi_features =
1103                 LNET_PING_FEAT_NI_STATUS | LNET_PING_FEAT_MULTI_RAIL;
1104
1105         return pbuf;
1106 }
1107
1108 static inline int
1109 lnet_get_net_ni_count_locked(struct lnet_net *net)
1110 {
1111         struct lnet_ni  *ni;
1112         int             count = 0;
1113
1114         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1115                 count++;
1116
1117         return count;
1118 }
1119
1120 static inline int
1121 lnet_get_net_ni_count_pre(struct lnet_net *net)
1122 {
1123         struct lnet_ni  *ni;
1124         int             count = 0;
1125
1126         list_for_each_entry(ni, &net->net_ni_added, ni_netlist)
1127                 count++;
1128
1129         return count;
1130 }
1131
1132 static inline int
1133 lnet_get_ni_count(void)
1134 {
1135         struct lnet_ni  *ni;
1136         struct lnet_net *net;
1137         int             count = 0;
1138
1139         lnet_net_lock(0);
1140
1141         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1142                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1143                         count++;
1144         }
1145
1146         lnet_net_unlock(0);
1147
1148         return count;
1149 }
1150
1151 int
1152 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1153 {
1154         if (!pinfo)
1155                 return -EINVAL;
1156         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1157                 return -EPROTO;
1158         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1159                 return -EPROTO;
1160         /* Loopback is guaranteed to be present */
1161         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
1162                 return -ERANGE;
1163         if (LNET_NETTYP(LNET_NIDNET(LNET_PING_INFO_LONI(pinfo))) != LOLND)
1164                 return -EPROTO;
1165         return 0;
1166 }
1167
1168 static void
1169 lnet_ping_target_destroy(void)
1170 {
1171         struct lnet_net *net;
1172         struct lnet_ni  *ni;
1173
1174         lnet_net_lock(LNET_LOCK_EX);
1175
1176         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1177                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1178                         lnet_ni_lock(ni);
1179                         ni->ni_status = NULL;
1180                         lnet_ni_unlock(ni);
1181                 }
1182         }
1183
1184         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1185         the_lnet.ln_ping_target = NULL;
1186
1187         lnet_net_unlock(LNET_LOCK_EX);
1188 }
1189
1190 static void
1191 lnet_ping_target_event_handler(struct lnet_event *event)
1192 {
1193         struct lnet_ping_buffer *pbuf = event->md.user_ptr;
1194
1195         if (event->unlinked)
1196                 lnet_ping_buffer_decref(pbuf);
1197 }
1198
1199 static int
1200 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1201                        struct lnet_handle_md *ping_mdh,
1202                        int ni_count, bool set_eq)
1203 {
1204         struct lnet_process_id id = {
1205                 .nid = LNET_NID_ANY,
1206                 .pid = LNET_PID_ANY
1207         };
1208         struct lnet_handle_me me_handle;
1209         struct lnet_md md = { NULL };
1210         int rc, rc2;
1211
1212         if (set_eq) {
1213                 rc = LNetEQAlloc(0, lnet_ping_target_event_handler,
1214                                  &the_lnet.ln_ping_target_eq);
1215                 if (rc != 0) {
1216                         CERROR("Can't allocate ping buffer EQ: %d\n", rc);
1217                         return rc;
1218                 }
1219         }
1220
1221         *ppbuf = lnet_ping_target_create(ni_count);
1222         if (*ppbuf == NULL) {
1223                 rc = -ENOMEM;
1224                 goto fail_free_eq;
1225         }
1226
1227         /* Ping target ME/MD */
1228         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1229                           LNET_PROTO_PING_MATCHBITS, 0,
1230                           LNET_UNLINK, LNET_INS_AFTER,
1231                           &me_handle);
1232         if (rc != 0) {
1233                 CERROR("Can't create ping target ME: %d\n", rc);
1234                 goto fail_decref_ping_buffer;
1235         }
1236
1237         /* initialize md content */
1238         md.start     = &(*ppbuf)->pb_info;
1239         md.length    = LNET_PING_INFO_SIZE((*ppbuf)->pb_nnis);
1240         md.threshold = LNET_MD_THRESH_INF;
1241         md.max_size  = 0;
1242         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1243                        LNET_MD_MANAGE_REMOTE;
1244         md.eq_handle = the_lnet.ln_ping_target_eq;
1245         md.user_ptr  = *ppbuf;
1246
1247         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, ping_mdh);
1248         if (rc != 0) {
1249                 CERROR("Can't attach ping target MD: %d\n", rc);
1250                 goto fail_unlink_ping_me;
1251         }
1252         lnet_ping_buffer_addref(*ppbuf);
1253
1254         return 0;
1255
1256 fail_unlink_ping_me:
1257         rc2 = LNetMEUnlink(me_handle);
1258         LASSERT(rc2 == 0);
1259 fail_decref_ping_buffer:
1260         LASSERT(lnet_ping_buffer_numref(*ppbuf) == 1);
1261         lnet_ping_buffer_decref(*ppbuf);
1262         *ppbuf = NULL;
1263 fail_free_eq:
1264         if (set_eq) {
1265                 rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
1266                 LASSERT(rc2 == 0);
1267         }
1268         return rc;
1269 }
1270
1271 static void
1272 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
1273                     struct lnet_handle_md *ping_mdh)
1274 {
1275         sigset_t        blocked = cfs_block_allsigs();
1276
1277         LNetMDUnlink(*ping_mdh);
1278         LNetInvalidateMDHandle(ping_mdh);
1279
1280         /* NB the MD could be busy; this just starts the unlink */
1281         while (lnet_ping_buffer_numref(pbuf) > 1) {
1282                 CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
1283                 set_current_state(TASK_UNINTERRUPTIBLE);
1284                 schedule_timeout(cfs_time_seconds(1));
1285         }
1286
1287         cfs_restore_sigs(blocked);
1288 }
1289
1290 static void
1291 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
1292 {
1293         struct lnet_ni          *ni;
1294         struct lnet_net         *net;
1295         struct lnet_ni_status *ns;
1296         int                     i;
1297         int                     rc;
1298
1299         i = 0;
1300         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1301                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1302                         LASSERT(i < pbuf->pb_nnis);
1303
1304                         ns = &pbuf->pb_info.pi_ni[i];
1305
1306                         ns->ns_nid = ni->ni_nid;
1307
1308                         lnet_ni_lock(ni);
1309                         ns->ns_status = (ni->ni_status != NULL) ?
1310                                          ni->ni_status->ns_status :
1311                                                 LNET_NI_STATUS_UP;
1312                         ni->ni_status = ns;
1313                         lnet_ni_unlock(ni);
1314
1315                         i++;
1316                 }
1317         }
1318         /*
1319          * We (ab)use the ns_status of the loopback interface to
1320          * transmit the sequence number. The first interface listed
1321          * must be the loopback interface.
1322          */
1323         rc = lnet_ping_info_validate(&pbuf->pb_info);
1324         if (rc) {
1325                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
1326                 LBUG();
1327         }
1328         LNET_PING_BUFFER_SEQNO(pbuf) =
1329                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
1330 }
1331
1332 static void
1333 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
1334                         struct lnet_handle_md ping_mdh)
1335 {
1336         struct lnet_ping_buffer *old_pbuf = NULL;
1337         struct lnet_handle_md old_ping_md;
1338
1339         /* switch the NIs to point to the new ping info created */
1340         lnet_net_lock(LNET_LOCK_EX);
1341
1342         if (!the_lnet.ln_routing)
1343                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1344         if (!lnet_peer_discovery_disabled)
1345                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_DISCOVERY;
1346
1347         /* Ensure only known feature bits have been set. */
1348         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
1349         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
1350
1351         lnet_ping_target_install_locked(pbuf);
1352
1353         if (the_lnet.ln_ping_target) {
1354                 old_pbuf = the_lnet.ln_ping_target;
1355                 old_ping_md = the_lnet.ln_ping_target_md;
1356         }
1357         the_lnet.ln_ping_target_md = ping_mdh;
1358         the_lnet.ln_ping_target = pbuf;
1359
1360         lnet_net_unlock(LNET_LOCK_EX);
1361
1362         if (old_pbuf) {
1363                 /* unlink and free the old ping info */
1364                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
1365                 lnet_ping_buffer_decref(old_pbuf);
1366         }
1367
1368         lnet_push_update_to_peers(0);
1369 }
1370
1371 static void
1372 lnet_ping_target_fini(void)
1373 {
1374         int             rc;
1375
1376         lnet_ping_md_unlink(the_lnet.ln_ping_target,
1377                             &the_lnet.ln_ping_target_md);
1378
1379         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1380         LASSERT(rc == 0);
1381
1382         lnet_ping_target_destroy();
1383 }
1384
1385 /* Resize the push target. */
1386 int lnet_push_target_resize(void)
1387 {
1388         lnet_process_id_t id = { LNET_NID_ANY, LNET_PID_ANY };
1389         lnet_md_t md = { NULL };
1390         lnet_handle_me_t meh;
1391         lnet_handle_md_t mdh;
1392         lnet_handle_md_t old_mdh;
1393         struct lnet_ping_buffer *pbuf;
1394         struct lnet_ping_buffer *old_pbuf;
1395         int nnis = the_lnet.ln_push_target_nnis;
1396         int rc;
1397
1398         if (nnis <= 0) {
1399                 rc = -EINVAL;
1400                 goto fail_return;
1401         }
1402 again:
1403         pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
1404         if (!pbuf) {
1405                 rc = -ENOMEM;
1406                 goto fail_return;
1407         }
1408
1409         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1410                           LNET_PROTO_PING_MATCHBITS, 0,
1411                           LNET_UNLINK, LNET_INS_AFTER,
1412                           &meh);
1413         if (rc) {
1414                 CERROR("Can't create push target ME: %d\n", rc);
1415                 goto fail_decref_pbuf;
1416         }
1417
1418         /* initialize md content */
1419         md.start     = &pbuf->pb_info;
1420         md.length    = LNET_PING_INFO_SIZE(nnis);
1421         md.threshold = LNET_MD_THRESH_INF;
1422         md.max_size  = 0;
1423         md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE |
1424                        LNET_MD_MANAGE_REMOTE;
1425         md.user_ptr  = pbuf;
1426         md.eq_handle = the_lnet.ln_push_target_eq;
1427
1428         rc = LNetMDAttach(meh, md, LNET_RETAIN, &mdh);
1429         if (rc) {
1430                 CERROR("Can't attach push MD: %d\n", rc);
1431                 goto fail_unlink_meh;
1432         }
1433         lnet_ping_buffer_addref(pbuf);
1434
1435         lnet_net_lock(LNET_LOCK_EX);
1436         old_pbuf = the_lnet.ln_push_target;
1437         old_mdh = the_lnet.ln_push_target_md;
1438         the_lnet.ln_push_target = pbuf;
1439         the_lnet.ln_push_target_md = mdh;
1440         lnet_net_unlock(LNET_LOCK_EX);
1441
1442         if (old_pbuf) {
1443                 LNetMDUnlink(old_mdh);
1444                 lnet_ping_buffer_decref(old_pbuf);
1445         }
1446
1447         if (nnis < the_lnet.ln_push_target_nnis)
1448                 goto again;
1449
1450         CDEBUG(D_NET, "nnis %d success\n", nnis);
1451
1452         return 0;
1453
1454 fail_unlink_meh:
1455         LNetMEUnlink(meh);
1456 fail_decref_pbuf:
1457         lnet_ping_buffer_decref(pbuf);
1458 fail_return:
1459         CDEBUG(D_NET, "nnis %d error %d\n", nnis, rc);
1460         return rc;
1461 }
1462
1463 static void lnet_push_target_event_handler(struct lnet_event *ev)
1464 {
1465         struct lnet_ping_buffer *pbuf = ev->md.user_ptr;
1466
1467         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
1468                 lnet_swap_pinginfo(pbuf);
1469
1470         lnet_peer_push_event(ev);
1471         if (ev->unlinked)
1472                 lnet_ping_buffer_decref(pbuf);
1473 }
1474
1475 /* Initialize the push target. */
1476 static int lnet_push_target_init(void)
1477 {
1478         int rc;
1479
1480         if (the_lnet.ln_push_target)
1481                 return -EALREADY;
1482
1483         rc = LNetEQAlloc(0, lnet_push_target_event_handler,
1484                          &the_lnet.ln_push_target_eq);
1485         if (rc) {
1486                 CERROR("Can't allocated push target EQ: %d\n", rc);
1487                 return rc;
1488         }
1489
1490         /* Start at the required minimum, we'll enlarge if required. */
1491         the_lnet.ln_push_target_nnis = LNET_INTERFACES_MIN;
1492
1493         rc = lnet_push_target_resize();
1494
1495         if (rc) {
1496                 LNetEQFree(the_lnet.ln_push_target_eq);
1497                 LNetInvalidateEQHandle(&the_lnet.ln_push_target_eq);
1498         }
1499
1500         return rc;
1501 }
1502
1503 /* Clean up the push target. */
1504 static void lnet_push_target_fini(void)
1505 {
1506         if (!the_lnet.ln_push_target)
1507                 return;
1508
1509         /* Unlink and invalidate to prevent new references. */
1510         LNetMDUnlink(the_lnet.ln_push_target_md);
1511         LNetInvalidateMDHandle(&the_lnet.ln_push_target_md);
1512
1513         /* Wait for the unlink to complete. */
1514         while (lnet_ping_buffer_numref(the_lnet.ln_push_target) > 1) {
1515                 CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
1516                 set_current_state(TASK_UNINTERRUPTIBLE);
1517                 schedule_timeout(cfs_time_seconds(1));
1518         }
1519
1520         lnet_ping_buffer_decref(the_lnet.ln_push_target);
1521         the_lnet.ln_push_target = NULL;
1522         the_lnet.ln_push_target_nnis = 0;
1523
1524         LNetEQFree(the_lnet.ln_push_target_eq);
1525         LNetInvalidateEQHandle(&the_lnet.ln_push_target_eq);
1526 }
1527
1528 static int
1529 lnet_ni_tq_credits(struct lnet_ni *ni)
1530 {
1531         int     credits;
1532
1533         LASSERT(ni->ni_ncpts >= 1);
1534
1535         if (ni->ni_ncpts == 1)
1536                 return ni->ni_net->net_tunables.lct_max_tx_credits;
1537
1538         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
1539         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
1540         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
1541
1542         return credits;
1543 }
1544
1545 static void
1546 lnet_ni_unlink_locked(struct lnet_ni *ni)
1547 {
1548         if (!list_empty(&ni->ni_cptlist)) {
1549                 list_del_init(&ni->ni_cptlist);
1550                 lnet_ni_decref_locked(ni, 0);
1551         }
1552
1553         /* move it to zombie list and nobody can find it anymore */
1554         LASSERT(!list_empty(&ni->ni_netlist));
1555         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
1556         lnet_ni_decref_locked(ni, 0);
1557 }
1558
1559 static void
1560 lnet_clear_zombies_nis_locked(struct lnet_net *net)
1561 {
1562         int             i;
1563         int             islo;
1564         struct lnet_ni  *ni;
1565         struct list_head *zombie_list = &net->net_ni_zombie;
1566
1567         /*
1568          * Now wait for the NIs I just nuked to show up on the zombie
1569          * list and shut them down in guaranteed thread context
1570          */
1571         i = 2;
1572         while (!list_empty(zombie_list)) {
1573                 int     *ref;
1574                 int     j;
1575
1576                 ni = list_entry(zombie_list->next,
1577                                 struct lnet_ni, ni_netlist);
1578                 list_del_init(&ni->ni_netlist);
1579                 /* the ni should be in deleting state. If it's not it's
1580                  * a bug */
1581                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
1582                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1583                         if (*ref == 0)
1584                                 continue;
1585                         /* still busy, add it back to zombie list */
1586                         list_add(&ni->ni_netlist, zombie_list);
1587                         break;
1588                 }
1589
1590                 if (!list_empty(&ni->ni_netlist)) {
1591                         lnet_net_unlock(LNET_LOCK_EX);
1592                         ++i;
1593                         if ((i & (-i)) == i) {
1594                                 CDEBUG(D_WARNING,
1595                                        "Waiting for zombie LNI %s\n",
1596                                        libcfs_nid2str(ni->ni_nid));
1597                         }
1598                         set_current_state(TASK_UNINTERRUPTIBLE);
1599                         schedule_timeout(cfs_time_seconds(1));
1600                         lnet_net_lock(LNET_LOCK_EX);
1601                         continue;
1602                 }
1603
1604                 lnet_net_unlock(LNET_LOCK_EX);
1605
1606                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
1607
1608                 LASSERT(!in_interrupt());
1609                 (net->net_lnd->lnd_shutdown)(ni);
1610
1611                 if (!islo)
1612                         CDEBUG(D_LNI, "Removed LNI %s\n",
1613                               libcfs_nid2str(ni->ni_nid));
1614
1615                 lnet_ni_free(ni);
1616                 i = 2;
1617                 lnet_net_lock(LNET_LOCK_EX);
1618         }
1619 }
1620
1621 /* shutdown down the NI and release refcount */
1622 static void
1623 lnet_shutdown_lndni(struct lnet_ni *ni)
1624 {
1625         int i;
1626         struct lnet_net *net = ni->ni_net;
1627
1628         lnet_net_lock(LNET_LOCK_EX);
1629         ni->ni_state = LNET_NI_STATE_DELETING;
1630         lnet_ni_unlink_locked(ni);
1631         lnet_incr_dlc_seq();
1632         lnet_net_unlock(LNET_LOCK_EX);
1633
1634         /* clear messages for this NI on the lazy portal */
1635         for (i = 0; i < the_lnet.ln_nportals; i++)
1636                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
1637
1638         lnet_net_lock(LNET_LOCK_EX);
1639         lnet_clear_zombies_nis_locked(net);
1640         lnet_net_unlock(LNET_LOCK_EX);
1641 }
1642
1643 static void
1644 lnet_shutdown_lndnet(struct lnet_net *net)
1645 {
1646         struct lnet_ni *ni;
1647
1648         lnet_net_lock(LNET_LOCK_EX);
1649
1650         net->net_state = LNET_NET_STATE_DELETING;
1651
1652         list_del_init(&net->net_list);
1653
1654         while (!list_empty(&net->net_ni_list)) {
1655                 ni = list_entry(net->net_ni_list.next,
1656                                 struct lnet_ni, ni_netlist);
1657                 lnet_net_unlock(LNET_LOCK_EX);
1658                 lnet_shutdown_lndni(ni);
1659                 lnet_net_lock(LNET_LOCK_EX);
1660         }
1661
1662         lnet_net_unlock(LNET_LOCK_EX);
1663
1664         /* Do peer table cleanup for this net */
1665         lnet_peer_tables_cleanup(net);
1666
1667         lnet_net_lock(LNET_LOCK_EX);
1668         /*
1669          * decrement ref count on lnd only when the entire network goes
1670          * away
1671          */
1672         net->net_lnd->lnd_refcount--;
1673
1674         lnet_net_unlock(LNET_LOCK_EX);
1675
1676         lnet_net_free(net);
1677 }
1678
1679 static void
1680 lnet_shutdown_lndnets(void)
1681 {
1682         struct lnet_net *net;
1683
1684         /* NB called holding the global mutex */
1685
1686         /* All quiet on the API front */
1687         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
1688         LASSERT(the_lnet.ln_refcount == 0);
1689
1690         lnet_net_lock(LNET_LOCK_EX);
1691         the_lnet.ln_state = LNET_STATE_STOPPING;
1692
1693         while (!list_empty(&the_lnet.ln_nets)) {
1694                 /*
1695                  * move the nets to the zombie list to avoid them being
1696                  * picked up for new work. LONET is also included in the
1697                  * Nets that will be moved to the zombie list
1698                  */
1699                 net = list_entry(the_lnet.ln_nets.next,
1700                                  struct lnet_net, net_list);
1701                 list_move(&net->net_list, &the_lnet.ln_net_zombie);
1702         }
1703
1704         /* Drop the cached loopback Net. */
1705         if (the_lnet.ln_loni != NULL) {
1706                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1707                 the_lnet.ln_loni = NULL;
1708         }
1709         lnet_net_unlock(LNET_LOCK_EX);
1710
1711         /* iterate through the net zombie list and delete each net */
1712         while (!list_empty(&the_lnet.ln_net_zombie)) {
1713                 net = list_entry(the_lnet.ln_net_zombie.next,
1714                                  struct lnet_net, net_list);
1715                 lnet_shutdown_lndnet(net);
1716         }
1717
1718         lnet_net_lock(LNET_LOCK_EX);
1719         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
1720         lnet_net_unlock(LNET_LOCK_EX);
1721 }
1722
1723 static int
1724 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
1725 {
1726         int                     rc = -EINVAL;
1727         struct lnet_tx_queue    *tq;
1728         int                     i;
1729         struct lnet_net         *net = ni->ni_net;
1730
1731         mutex_lock(&the_lnet.ln_lnd_mutex);
1732
1733         if (tun) {
1734                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
1735                 ni->ni_lnd_tunables_set = true;
1736         }
1737
1738         rc = (net->net_lnd->lnd_startup)(ni);
1739
1740         mutex_unlock(&the_lnet.ln_lnd_mutex);
1741
1742         if (rc != 0) {
1743                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1744                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
1745                 lnet_net_lock(LNET_LOCK_EX);
1746                 net->net_lnd->lnd_refcount--;
1747                 lnet_net_unlock(LNET_LOCK_EX);
1748                 goto failed0;
1749         }
1750
1751         ni->ni_state = LNET_NI_STATE_ACTIVE;
1752
1753         /* We keep a reference on the loopback net through the loopback NI */
1754         if (net->net_lnd->lnd_type == LOLND) {
1755                 lnet_ni_addref(ni);
1756                 LASSERT(the_lnet.ln_loni == NULL);
1757                 the_lnet.ln_loni = ni;
1758                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
1759                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
1760                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
1761                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
1762                 return 0;
1763         }
1764
1765         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
1766             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
1767                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1768                                    libcfs_lnd2str(net->net_lnd->lnd_type),
1769                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
1770                                         "" : "per-peer ");
1771                 /* shutdown the NI since if we get here then it must've already
1772                  * been started
1773                  */
1774                 lnet_shutdown_lndni(ni);
1775                 return -EINVAL;
1776         }
1777
1778         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1779                 tq->tq_credits_min =
1780                 tq->tq_credits_max =
1781                 tq->tq_credits = lnet_ni_tq_credits(ni);
1782         }
1783
1784         atomic_set(&ni->ni_tx_credits,
1785                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
1786
1787         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1788                 libcfs_nid2str(ni->ni_nid),
1789                 ni->ni_net->net_tunables.lct_peer_tx_credits,
1790                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1791                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
1792                 ni->ni_net->net_tunables.lct_peer_timeout);
1793
1794         return 0;
1795 failed0:
1796         lnet_ni_free(ni);
1797         return rc;
1798 }
1799
1800 static int
1801 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
1802 {
1803         struct lnet_ni *ni;
1804         struct lnet_net *net_l = NULL;
1805         struct list_head        local_ni_list;
1806         int                     rc;
1807         int                     ni_count = 0;
1808         __u32                   lnd_type;
1809         struct lnet_lnd *lnd;
1810         int                     peer_timeout =
1811                 net->net_tunables.lct_peer_timeout;
1812         int                     maxtxcredits =
1813                 net->net_tunables.lct_max_tx_credits;
1814         int                     peerrtrcredits =
1815                 net->net_tunables.lct_peer_rtr_credits;
1816
1817         INIT_LIST_HEAD(&local_ni_list);
1818
1819         /*
1820          * make sure that this net is unique. If it isn't then
1821          * we are adding interfaces to an already existing network, and
1822          * 'net' is just a convenient way to pass in the list.
1823          * if it is unique we need to find the LND and load it if
1824          * necessary.
1825          */
1826         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
1827                 lnd_type = LNET_NETTYP(net->net_id);
1828
1829                 LASSERT(libcfs_isknown_lnd(lnd_type));
1830
1831                 mutex_lock(&the_lnet.ln_lnd_mutex);
1832                 lnd = lnet_find_lnd_by_type(lnd_type);
1833
1834                 if (lnd == NULL) {
1835                         mutex_unlock(&the_lnet.ln_lnd_mutex);
1836                         rc = request_module("%s", libcfs_lnd2modname(lnd_type));
1837                         mutex_lock(&the_lnet.ln_lnd_mutex);
1838
1839                         lnd = lnet_find_lnd_by_type(lnd_type);
1840                         if (lnd == NULL) {
1841                                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1842                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1843                                 libcfs_lnd2str(lnd_type),
1844                                 libcfs_lnd2modname(lnd_type), rc);
1845 #ifndef HAVE_MODULE_LOADING_SUPPORT
1846                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1847                                                 "compiled with kernel module "
1848                                                 "loading support.");
1849 #endif
1850                                 rc = -EINVAL;
1851                                 goto failed0;
1852                         }
1853                 }
1854
1855                 lnet_net_lock(LNET_LOCK_EX);
1856                 lnd->lnd_refcount++;
1857                 lnet_net_unlock(LNET_LOCK_EX);
1858
1859                 net->net_lnd = lnd;
1860
1861                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1862
1863                 net_l = net;
1864         }
1865
1866         /*
1867          * net_l: if the network being added is unique then net_l
1868          *        will point to that network
1869          *        if the network being added is not unique then
1870          *        net_l points to the existing network.
1871          *
1872          * When we enter the loop below, we'll pick NIs off he
1873          * network beign added and start them up, then add them to
1874          * a local ni list. Once we've successfully started all
1875          * the NIs then we join the local NI list (of started up
1876          * networks) with the net_l->net_ni_list, which should
1877          * point to the correct network to add the new ni list to
1878          *
1879          * If any of the new NIs fail to start up, then we want to
1880          * iterate through the local ni list, which should include
1881          * any NIs which were successfully started up, and shut
1882          * them down.
1883          *
1884          * After than we want to delete the network being added,
1885          * to avoid a memory leak.
1886          */
1887
1888         /*
1889          * When a network uses TCP bonding then all its interfaces
1890          * must be specified when the network is first defined: the
1891          * TCP bonding code doesn't allow for interfaces to be added
1892          * or removed.
1893          */
1894         if (net_l != net && net_l != NULL && use_tcp_bonding &&
1895             LNET_NETTYP(net_l->net_id) == SOCKLND) {
1896                 rc = -EINVAL;
1897                 goto failed0;
1898         }
1899
1900         while (!list_empty(&net->net_ni_added)) {
1901                 ni = list_entry(net->net_ni_added.next, struct lnet_ni,
1902                                 ni_netlist);
1903                 list_del_init(&ni->ni_netlist);
1904
1905                 /* make sure that the the NI we're about to start
1906                  * up is actually unique. if it's not fail. */
1907                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
1908                                         ni->ni_interfaces[0])) {
1909                         rc = -EINVAL;
1910                         goto failed1;
1911                 }
1912
1913                 /* adjust the pointer the parent network, just in case it
1914                  * the net is a duplicate */
1915                 ni->ni_net = net_l;
1916
1917                 rc = lnet_startup_lndni(ni, tun);
1918
1919                 LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
1920                         ni->ni_net->net_lnd->lnd_query != NULL);
1921
1922                 if (rc < 0)
1923                         goto failed1;
1924
1925                 lnet_ni_addref(ni);
1926                 list_add_tail(&ni->ni_netlist, &local_ni_list);
1927
1928                 ni_count++;
1929         }
1930
1931         lnet_net_lock(LNET_LOCK_EX);
1932         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
1933         lnet_incr_dlc_seq();
1934         lnet_net_unlock(LNET_LOCK_EX);
1935
1936         /* if the network is not unique then we don't want to keep
1937          * it around after we're done. Free it. Otherwise add that
1938          * net to the global the_lnet.ln_nets */
1939         if (net_l != net && net_l != NULL) {
1940                 /*
1941                  * TODO - note. currently the tunables can not be updated
1942                  * once added
1943                  */
1944                 lnet_net_free(net);
1945         } else {
1946                 net->net_state = LNET_NET_STATE_ACTIVE;
1947                 /*
1948                  * restore tunables after it has been overwitten by the
1949                  * lnd
1950                  */
1951                 if (peer_timeout != -1)
1952                         net->net_tunables.lct_peer_timeout = peer_timeout;
1953                 if (maxtxcredits != -1)
1954                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
1955                 if (peerrtrcredits != -1)
1956                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
1957
1958                 lnet_net_lock(LNET_LOCK_EX);
1959                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
1960                 lnet_net_unlock(LNET_LOCK_EX);
1961         }
1962
1963         return ni_count;
1964
1965 failed1:
1966         /*
1967          * shutdown the new NIs that are being started up
1968          * free the NET being started
1969          */
1970         while (!list_empty(&local_ni_list)) {
1971                 ni = list_entry(local_ni_list.next, struct lnet_ni,
1972                                 ni_netlist);
1973
1974                 lnet_shutdown_lndni(ni);
1975         }
1976
1977 failed0:
1978         lnet_net_free(net);
1979
1980         return rc;
1981 }
1982
1983 static int
1984 lnet_startup_lndnets(struct list_head *netlist)
1985 {
1986         struct lnet_net         *net;
1987         int                     rc;
1988         int                     ni_count = 0;
1989
1990         /*
1991          * Change to running state before bringing up the LNDs. This
1992          * allows lnet_shutdown_lndnets() to assert that we've passed
1993          * through here.
1994          */
1995         lnet_net_lock(LNET_LOCK_EX);
1996         the_lnet.ln_state = LNET_STATE_RUNNING;
1997         lnet_net_unlock(LNET_LOCK_EX);
1998
1999         while (!list_empty(netlist)) {
2000                 net = list_entry(netlist->next, struct lnet_net, net_list);
2001                 list_del_init(&net->net_list);
2002
2003                 rc = lnet_startup_lndnet(net, NULL);
2004
2005                 if (rc < 0)
2006                         goto failed;
2007
2008                 ni_count += rc;
2009         }
2010
2011         return ni_count;
2012 failed:
2013         lnet_shutdown_lndnets();
2014
2015         return rc;
2016 }
2017
2018 /**
2019  * Initialize LNet library.
2020  *
2021  * Automatically called at module loading time. Caller has to call
2022  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
2023  * latter returned 0. It must be called exactly once.
2024  *
2025  * \retval 0 on success
2026  * \retval -ve on failures.
2027  */
2028 int lnet_lib_init(void)
2029 {
2030         int rc;
2031
2032         lnet_assert_wire_constants();
2033
2034         /* refer to global cfs_cpt_table for now */
2035         the_lnet.ln_cpt_table   = cfs_cpt_table;
2036         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
2037
2038         LASSERT(the_lnet.ln_cpt_number > 0);
2039         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
2040                 /* we are under risk of consuming all lh_cookie */
2041                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
2042                        "please change setting of CPT-table and retry\n",
2043                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
2044                 return -E2BIG;
2045         }
2046
2047         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
2048                 the_lnet.ln_cpt_bits++;
2049
2050         rc = lnet_create_locks();
2051         if (rc != 0) {
2052                 CERROR("Can't create LNet global locks: %d\n", rc);
2053                 return rc;
2054         }
2055
2056         the_lnet.ln_refcount = 0;
2057         LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh);
2058         INIT_LIST_HEAD(&the_lnet.ln_lnds);
2059         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
2060         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
2061         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
2062
2063         /* The hash table size is the number of bits it takes to express the set
2064          * ln_num_routes, minus 1 (better to under estimate than over so we
2065          * don't waste memory). */
2066         if (rnet_htable_size <= 0)
2067                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
2068         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
2069                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
2070         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
2071                                            order_base_2(rnet_htable_size) - 1);
2072
2073         /* All LNDs apart from the LOLND are in separate modules.  They
2074          * register themselves when their module loads, and unregister
2075          * themselves when their module is unloaded. */
2076         lnet_register_lnd(&the_lolnd);
2077         return 0;
2078 }
2079
2080 /**
2081  * Finalize LNet library.
2082  *
2083  * \pre lnet_lib_init() called with success.
2084  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
2085  */
2086 void lnet_lib_exit(void)
2087 {
2088         LASSERT(the_lnet.ln_refcount == 0);
2089
2090         while (!list_empty(&the_lnet.ln_lnds))
2091                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
2092                                                struct lnet_lnd, lnd_list));
2093         lnet_destroy_locks();
2094 }
2095
2096 /**
2097  * Set LNet PID and start LNet interfaces, routing, and forwarding.
2098  *
2099  * Users must call this function at least once before any other functions.
2100  * For each successful call there must be a corresponding call to
2101  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
2102  * ignored.
2103  *
2104  * The PID used by LNet may be different from the one requested.
2105  * See LNetGetId().
2106  *
2107  * \param requested_pid PID requested by the caller.
2108  *
2109  * \return >= 0 on success, and < 0 error code on failures.
2110  */
2111 int
2112 LNetNIInit(lnet_pid_t requested_pid)
2113 {
2114         int                     im_a_router = 0;
2115         int                     rc;
2116         int                     ni_count;
2117         struct lnet_ping_buffer *pbuf;
2118         struct lnet_handle_md   ping_mdh;
2119         struct list_head        net_head;
2120         struct lnet_net         *net;
2121
2122         INIT_LIST_HEAD(&net_head);
2123
2124         mutex_lock(&the_lnet.ln_api_mutex);
2125
2126         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
2127
2128         if (the_lnet.ln_refcount > 0) {
2129                 rc = the_lnet.ln_refcount++;
2130                 mutex_unlock(&the_lnet.ln_api_mutex);
2131                 return rc;
2132         }
2133
2134         rc = lnet_prepare(requested_pid);
2135         if (rc != 0) {
2136                 mutex_unlock(&the_lnet.ln_api_mutex);
2137                 return rc;
2138         }
2139
2140         /* create a network for Loopback network */
2141         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
2142         if (net == NULL) {
2143                 rc = -ENOMEM;
2144                 goto err_empty_list;
2145         }
2146
2147         /* Add in the loopback NI */
2148         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
2149                 rc = -ENOMEM;
2150                 goto err_empty_list;
2151         }
2152
2153         /* If LNet is being initialized via DLC it is possible
2154          * that the user requests not to load module parameters (ones which
2155          * are supported by DLC) on initialization.  Therefore, make sure not
2156          * to load networks, routes and forwarding from module parameters
2157          * in this case.  On cleanup in case of failure only clean up
2158          * routes if it has been loaded */
2159         if (!the_lnet.ln_nis_from_mod_params) {
2160                 rc = lnet_parse_networks(&net_head, lnet_get_networks(),
2161                                          use_tcp_bonding);
2162                 if (rc < 0)
2163                         goto err_empty_list;
2164         }
2165
2166         ni_count = lnet_startup_lndnets(&net_head);
2167         if (ni_count < 0) {
2168                 rc = ni_count;
2169                 goto err_empty_list;
2170         }
2171
2172         if (!the_lnet.ln_nis_from_mod_params) {
2173                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
2174                 if (rc != 0)
2175                         goto err_shutdown_lndnis;
2176
2177                 rc = lnet_check_routes();
2178                 if (rc != 0)
2179                         goto err_destroy_routes;
2180
2181                 rc = lnet_rtrpools_alloc(im_a_router);
2182                 if (rc != 0)
2183                         goto err_destroy_routes;
2184         }
2185
2186         rc = lnet_acceptor_start();
2187         if (rc != 0)
2188                 goto err_destroy_routes;
2189
2190         the_lnet.ln_refcount = 1;
2191         /* Now I may use my own API functions... */
2192
2193         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_count, true);
2194         if (rc != 0)
2195                 goto err_acceptor_stop;
2196
2197         lnet_ping_target_update(pbuf, ping_mdh);
2198
2199         rc = lnet_router_checker_start();
2200         if (rc != 0)
2201                 goto err_stop_ping;
2202
2203         rc = lnet_push_target_init();
2204         if (rc != 0)
2205                 goto err_stop_router_checker;
2206
2207         rc = lnet_peer_discovery_start();
2208         if (rc != 0)
2209                 goto err_destroy_push_target;
2210
2211         lnet_fault_init();
2212         lnet_proc_init();
2213
2214         mutex_unlock(&the_lnet.ln_api_mutex);
2215
2216         return 0;
2217
2218 err_destroy_push_target:
2219         lnet_push_target_fini();
2220 err_stop_router_checker:
2221         lnet_router_checker_stop();
2222 err_stop_ping:
2223         lnet_ping_target_fini();
2224 err_acceptor_stop:
2225         the_lnet.ln_refcount = 0;
2226         lnet_acceptor_stop();
2227 err_destroy_routes:
2228         if (!the_lnet.ln_nis_from_mod_params)
2229                 lnet_destroy_routes();
2230 err_shutdown_lndnis:
2231         lnet_shutdown_lndnets();
2232 err_empty_list:
2233         lnet_unprepare();
2234         LASSERT(rc < 0);
2235         mutex_unlock(&the_lnet.ln_api_mutex);
2236         while (!list_empty(&net_head)) {
2237                 struct lnet_net *net;
2238
2239                 net = list_entry(net_head.next, struct lnet_net, net_list);
2240                 list_del_init(&net->net_list);
2241                 lnet_net_free(net);
2242         }
2243         return rc;
2244 }
2245 EXPORT_SYMBOL(LNetNIInit);
2246
2247 /**
2248  * Stop LNet interfaces, routing, and forwarding.
2249  *
2250  * Users must call this function once for each successful call to LNetNIInit().
2251  * Once the LNetNIFini() operation has been started, the results of pending
2252  * API operations are undefined.
2253  *
2254  * \return always 0 for current implementation.
2255  */
2256 int
2257 LNetNIFini()
2258 {
2259         mutex_lock(&the_lnet.ln_api_mutex);
2260
2261         LASSERT(the_lnet.ln_refcount > 0);
2262
2263         if (the_lnet.ln_refcount != 1) {
2264                 the_lnet.ln_refcount--;
2265         } else {
2266                 LASSERT(!the_lnet.ln_niinit_self);
2267
2268                 lnet_fault_fini();
2269
2270                 lnet_proc_fini();
2271                 lnet_peer_discovery_stop();
2272                 lnet_push_target_fini();
2273                 lnet_router_checker_stop();
2274                 lnet_ping_target_fini();
2275
2276                 /* Teardown fns that use my own API functions BEFORE here */
2277                 the_lnet.ln_refcount = 0;
2278
2279                 lnet_acceptor_stop();
2280                 lnet_destroy_routes();
2281                 lnet_shutdown_lndnets();
2282                 lnet_unprepare();
2283         }
2284
2285         mutex_unlock(&the_lnet.ln_api_mutex);
2286         return 0;
2287 }
2288 EXPORT_SYMBOL(LNetNIFini);
2289
2290 /**
2291  * Grabs the ni data from the ni structure and fills the out
2292  * parameters
2293  *
2294  * \param[in] ni network        interface structure
2295  * \param[out] cfg_ni           NI config information
2296  * \param[out] tun              network and LND tunables
2297  */
2298 static void
2299 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
2300                    struct lnet_ioctl_config_lnd_tunables *tun,
2301                    struct lnet_ioctl_element_stats *stats,
2302                    __u32 tun_size)
2303 {
2304         size_t min_size = 0;
2305         int i;
2306
2307         if (!ni || !cfg_ni || !tun)
2308                 return;
2309
2310         if (ni->ni_interfaces[0] != NULL) {
2311                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
2312                         if (ni->ni_interfaces[i] != NULL) {
2313                                 strncpy(cfg_ni->lic_ni_intf[i],
2314                                         ni->ni_interfaces[i],
2315                                         sizeof(cfg_ni->lic_ni_intf[i]));
2316                         }
2317                 }
2318         }
2319
2320         cfg_ni->lic_nid = ni->ni_nid;
2321         if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
2322                 cfg_ni->lic_status = LNET_NI_STATUS_UP;
2323         else
2324                 cfg_ni->lic_status = ni->ni_status->ns_status;
2325         cfg_ni->lic_tcp_bonding = use_tcp_bonding;
2326         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
2327
2328         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
2329
2330         if (stats) {
2331                 stats->iel_send_count = atomic_read(&ni->ni_stats.send_count);
2332                 stats->iel_recv_count = atomic_read(&ni->ni_stats.recv_count);
2333         }
2334
2335         /*
2336          * tun->lt_tun will always be present, but in order to be
2337          * backwards compatible, we need to deal with the cases when
2338          * tun->lt_tun is smaller than what the kernel has, because it
2339          * comes from an older version of a userspace program, then we'll
2340          * need to copy as much information as we have available space.
2341          */
2342         min_size = tun_size - sizeof(tun->lt_cmn);
2343         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
2344
2345         /* copy over the cpts */
2346         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
2347             ni->ni_cpts == NULL)  {
2348                 for (i = 0; i < ni->ni_ncpts; i++)
2349                         cfg_ni->lic_cpts[i] = i;
2350         } else {
2351                 for (i = 0;
2352                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
2353                      i < LNET_MAX_SHOW_NUM_CPT;
2354                      i++)
2355                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
2356         }
2357         cfg_ni->lic_ncpts = ni->ni_ncpts;
2358 }
2359
2360 /**
2361  * NOTE: This is a legacy function left in the code to be backwards
2362  * compatible with older userspace programs. It should eventually be
2363  * removed.
2364  *
2365  * Grabs the ni data from the ni structure and fills the out
2366  * parameters
2367  *
2368  * \param[in] ni network        interface structure
2369  * \param[out] config           config information
2370  */
2371 static void
2372 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
2373                          struct lnet_ioctl_config_data *config)
2374 {
2375         struct lnet_ioctl_net_config *net_config;
2376         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
2377         size_t min_size, tunable_size = 0;
2378         int i;
2379
2380         if (!ni || !config)
2381                 return;
2382
2383         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
2384         if (!net_config)
2385                 return;
2386
2387         BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
2388                      ARRAY_SIZE(net_config->ni_interfaces));
2389
2390         for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
2391                 if (!ni->ni_interfaces[i])
2392                         break;
2393
2394                 strncpy(net_config->ni_interfaces[i],
2395                         ni->ni_interfaces[i],
2396                         sizeof(net_config->ni_interfaces[i]));
2397         }
2398
2399         config->cfg_nid = ni->ni_nid;
2400         config->cfg_config_u.cfg_net.net_peer_timeout =
2401                 ni->ni_net->net_tunables.lct_peer_timeout;
2402         config->cfg_config_u.cfg_net.net_max_tx_credits =
2403                 ni->ni_net->net_tunables.lct_max_tx_credits;
2404         config->cfg_config_u.cfg_net.net_peer_tx_credits =
2405                 ni->ni_net->net_tunables.lct_peer_tx_credits;
2406         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
2407                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
2408
2409         if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
2410                 net_config->ni_status = LNET_NI_STATUS_UP;
2411         else
2412                 net_config->ni_status = ni->ni_status->ns_status;
2413
2414         if (ni->ni_cpts) {
2415                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
2416
2417                 for (i = 0; i < num_cpts; i++)
2418                         net_config->ni_cpts[i] = ni->ni_cpts[i];
2419
2420                 config->cfg_ncpts = num_cpts;
2421         }
2422
2423         /*
2424          * See if user land tools sent in a newer and larger version
2425          * of struct lnet_tunables than what the kernel uses.
2426          */
2427         min_size = sizeof(*config) + sizeof(*net_config);
2428
2429         if (config->cfg_hdr.ioc_len > min_size)
2430                 tunable_size = config->cfg_hdr.ioc_len - min_size;
2431
2432         /* Don't copy too much data to user space */
2433         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
2434         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
2435
2436         if (lnd_cfg && min_size) {
2437                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
2438                 config->cfg_config_u.cfg_net.net_interface_count = 1;
2439
2440                 /* Tell user land that kernel side has less data */
2441                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
2442                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
2443                         config->cfg_hdr.ioc_len -= min_size;
2444                 }
2445         }
2446 }
2447
2448 struct lnet_ni *
2449 lnet_get_ni_idx_locked(int idx)
2450 {
2451         struct lnet_ni          *ni;
2452         struct lnet_net         *net;
2453
2454         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2455                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2456                         if (idx-- == 0)
2457                                 return ni;
2458                 }
2459         }
2460
2461         return NULL;
2462 }
2463
2464 struct lnet_ni *
2465 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
2466 {
2467         struct lnet_ni          *ni;
2468         struct lnet_net         *net = mynet;
2469
2470         if (prev == NULL) {
2471                 if (net == NULL)
2472                         net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
2473                                         net_list);
2474                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2475                                 ni_netlist);
2476
2477                 return ni;
2478         }
2479
2480         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
2481                 /* if you reached the end of the ni list and the net is
2482                  * specified, then there are no more nis in that net */
2483                 if (net != NULL)
2484                         return NULL;
2485
2486                 /* we reached the end of this net ni list. move to the
2487                  * next net */
2488                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
2489                         /* no more nets and no more NIs. */
2490                         return NULL;
2491
2492                 /* get the next net */
2493                 net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
2494                                  net_list);
2495                 /* get the ni on it */
2496                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2497                                 ni_netlist);
2498
2499                 return ni;
2500         }
2501
2502         /* there are more nis left */
2503         ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
2504
2505         return ni;
2506 }
2507
2508 int
2509 lnet_get_net_config(struct lnet_ioctl_config_data *config)
2510 {
2511         struct lnet_ni *ni;
2512         int cpt;
2513         int rc = -ENOENT;
2514         int idx = config->cfg_count;
2515
2516         cpt = lnet_net_lock_current();
2517
2518         ni = lnet_get_ni_idx_locked(idx);
2519
2520         if (ni != NULL) {
2521                 rc = 0;
2522                 lnet_ni_lock(ni);
2523                 lnet_fill_ni_info_legacy(ni, config);
2524                 lnet_ni_unlock(ni);
2525         }
2526
2527         lnet_net_unlock(cpt);
2528         return rc;
2529 }
2530
2531 int
2532 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
2533                    struct lnet_ioctl_config_lnd_tunables *tun,
2534                    struct lnet_ioctl_element_stats *stats,
2535                    __u32 tun_size)
2536 {
2537         struct lnet_ni          *ni;
2538         int                     cpt;
2539         int                     rc = -ENOENT;
2540
2541         if (!cfg_ni || !tun || !stats)
2542                 return -EINVAL;
2543
2544         cpt = lnet_net_lock_current();
2545
2546         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
2547
2548         if (ni) {
2549                 rc = 0;
2550                 lnet_ni_lock(ni);
2551                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
2552                 lnet_ni_unlock(ni);
2553         }
2554
2555         lnet_net_unlock(cpt);
2556         return rc;
2557 }
2558
2559 static int lnet_add_net_common(struct lnet_net *net,
2560                                struct lnet_ioctl_config_lnd_tunables *tun)
2561 {
2562         __u32                   net_id;
2563         struct lnet_ping_buffer *pbuf;
2564         struct lnet_handle_md   ping_mdh;
2565         int                     rc;
2566         struct lnet_remotenet *rnet;
2567         int                     net_ni_count;
2568         int                     num_acceptor_nets;
2569
2570         lnet_net_lock(LNET_LOCK_EX);
2571         rnet = lnet_find_rnet_locked(net->net_id);
2572         lnet_net_unlock(LNET_LOCK_EX);
2573         /*
2574          * make sure that the net added doesn't invalidate the current
2575          * configuration LNet is keeping
2576          */
2577         if (rnet) {
2578                 CERROR("Adding net %s will invalidate routing configuration\n",
2579                        libcfs_net2str(net->net_id));
2580                 lnet_net_free(net);
2581                 return -EUSERS;
2582         }
2583
2584         /*
2585          * make sure you calculate the correct number of slots in the ping
2586          * buffer. Since the ping info is a flattened list of all the NIs,
2587          * we should allocate enough slots to accomodate the number of NIs
2588          * which will be added.
2589          *
2590          * since ni hasn't been configured yet, use
2591          * lnet_get_net_ni_count_pre() which checks the net_ni_added list
2592          */
2593         net_ni_count = lnet_get_net_ni_count_pre(net);
2594
2595         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2596                                     net_ni_count + lnet_get_ni_count(),
2597                                     false);
2598         if (rc < 0) {
2599                 lnet_net_free(net);
2600                 return rc;
2601         }
2602
2603         if (tun)
2604                 memcpy(&net->net_tunables,
2605                        &tun->lt_cmn, sizeof(net->net_tunables));
2606         else
2607                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
2608
2609         /*
2610          * before starting this network get a count of the current TCP
2611          * networks which require the acceptor thread running. If that
2612          * count is == 0 before we start up this network, then we'd want to
2613          * start up the acceptor thread after starting up this network
2614          */
2615         num_acceptor_nets = lnet_count_acceptor_nets();
2616
2617         net_id = net->net_id;
2618
2619         rc = lnet_startup_lndnet(net,
2620                                  (tun) ? &tun->lt_tun : NULL);
2621         if (rc < 0)
2622                 goto failed;
2623
2624         lnet_net_lock(LNET_LOCK_EX);
2625         net = lnet_get_net_locked(net_id);
2626         lnet_net_unlock(LNET_LOCK_EX);
2627
2628         LASSERT(net);
2629
2630         /*
2631          * Start the acceptor thread if this is the first network
2632          * being added that requires the thread.
2633          */
2634         if (net->net_lnd->lnd_accept && num_acceptor_nets == 0) {
2635                 rc = lnet_acceptor_start();
2636                 if (rc < 0) {
2637                         /* shutdown the net that we just started */
2638                         CERROR("Failed to start up acceptor thread\n");
2639                         lnet_shutdown_lndnet(net);
2640                         goto failed;
2641                 }
2642         }
2643
2644         lnet_net_lock(LNET_LOCK_EX);
2645         lnet_peer_net_added(net);
2646         lnet_net_unlock(LNET_LOCK_EX);
2647
2648         lnet_ping_target_update(pbuf, ping_mdh);
2649
2650         return 0;
2651
2652 failed:
2653         lnet_ping_md_unlink(pbuf, &ping_mdh);
2654         lnet_ping_buffer_decref(pbuf);
2655         return rc;
2656 }
2657
2658 static int lnet_handle_legacy_ip2nets(char *ip2nets,
2659                                       struct lnet_ioctl_config_lnd_tunables *tun)
2660 {
2661         struct lnet_net *net;
2662         char *nets;
2663         int rc;
2664         struct list_head net_head;
2665
2666         INIT_LIST_HEAD(&net_head);
2667
2668         rc = lnet_parse_ip2nets(&nets, ip2nets);
2669         if (rc < 0)
2670                 return rc;
2671
2672         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2673         if (rc < 0)
2674                 return rc;
2675
2676         mutex_lock(&the_lnet.ln_api_mutex);
2677         while (!list_empty(&net_head)) {
2678                 net = list_entry(net_head.next, struct lnet_net, net_list);
2679                 list_del_init(&net->net_list);
2680                 rc = lnet_add_net_common(net, tun);
2681                 if (rc < 0)
2682                         goto out;
2683         }
2684
2685 out:
2686         mutex_unlock(&the_lnet.ln_api_mutex);
2687
2688         while (!list_empty(&net_head)) {
2689                 net = list_entry(net_head.next, struct lnet_net, net_list);
2690                 list_del_init(&net->net_list);
2691                 lnet_net_free(net);
2692         }
2693         return rc;
2694 }
2695
2696 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf)
2697 {
2698         struct lnet_net *net;
2699         struct lnet_ni *ni;
2700         struct lnet_ioctl_config_lnd_tunables *tun = NULL;
2701         int rc, i;
2702         __u32 net_id;
2703
2704         /* get the tunables if they are available */
2705         if (conf->lic_cfg_hdr.ioc_len >=
2706             sizeof(*conf) + sizeof(*tun))
2707                 tun = (struct lnet_ioctl_config_lnd_tunables *)
2708                         conf->lic_bulk;
2709
2710         /* handle legacy ip2nets from DLC */
2711         if (conf->lic_legacy_ip2nets[0] != '\0')
2712                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
2713                                                   tun);
2714
2715         net_id = LNET_NIDNET(conf->lic_nid);
2716
2717         net = lnet_net_alloc(net_id, NULL);
2718         if (!net)
2719                 return -ENOMEM;
2720
2721         for (i = 0; i < conf->lic_ncpts; i++) {
2722                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER)
2723                         return -EINVAL;
2724         }
2725
2726         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
2727                                        conf->lic_ni_intf[0]);
2728         if (!ni)
2729                 return -ENOMEM;
2730
2731         mutex_lock(&the_lnet.ln_api_mutex);
2732
2733         rc = lnet_add_net_common(net, tun);
2734
2735         mutex_unlock(&the_lnet.ln_api_mutex);
2736
2737         return rc;
2738 }
2739
2740 int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf)
2741 {
2742         struct lnet_net  *net;
2743         struct lnet_ni *ni;
2744         __u32 net_id = LNET_NIDNET(conf->lic_nid);
2745         struct lnet_ping_buffer *pbuf;
2746         struct lnet_handle_md  ping_mdh;
2747         int               rc;
2748         int               net_count;
2749         __u32             addr;
2750
2751         /* don't allow userspace to shutdown the LOLND */
2752         if (LNET_NETTYP(net_id) == LOLND)
2753                 return -EINVAL;
2754
2755         mutex_lock(&the_lnet.ln_api_mutex);
2756
2757         lnet_net_lock(0);
2758
2759         net = lnet_get_net_locked(net_id);
2760         if (!net) {
2761                 CERROR("net %s not found\n",
2762                        libcfs_net2str(net_id));
2763                 rc = -ENOENT;
2764                 goto unlock_net;
2765         }
2766
2767         addr = LNET_NIDADDR(conf->lic_nid);
2768         if (addr == 0) {
2769                 /* remove the entire net */
2770                 net_count = lnet_get_net_ni_count_locked(net);
2771
2772                 lnet_net_unlock(0);
2773
2774                 /* create and link a new ping info, before removing the old one */
2775                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2776                                         lnet_get_ni_count() - net_count,
2777                                         false);
2778                 if (rc != 0)
2779                         goto unlock_api_mutex;
2780
2781                 lnet_shutdown_lndnet(net);
2782
2783                 if (lnet_count_acceptor_nets() == 0)
2784                         lnet_acceptor_stop();
2785
2786                 lnet_ping_target_update(pbuf, ping_mdh);
2787
2788                 goto unlock_api_mutex;
2789         }
2790
2791         ni = lnet_nid2ni_locked(conf->lic_nid, 0);
2792         if (!ni) {
2793                 CERROR("nid %s not found\n",
2794                        libcfs_nid2str(conf->lic_nid));
2795                 rc = -ENOENT;
2796                 goto unlock_net;
2797         }
2798
2799         net_count = lnet_get_net_ni_count_locked(net);
2800
2801         lnet_net_unlock(0);
2802
2803         /* create and link a new ping info, before removing the old one */
2804         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2805                                   lnet_get_ni_count() - 1, false);
2806         if (rc != 0)
2807                 goto unlock_api_mutex;
2808
2809         lnet_shutdown_lndni(ni);
2810
2811         if (lnet_count_acceptor_nets() == 0)
2812                 lnet_acceptor_stop();
2813
2814         lnet_ping_target_update(pbuf, ping_mdh);
2815
2816         /* check if the net is empty and remove it if it is */
2817         if (net_count == 1)
2818                 lnet_shutdown_lndnet(net);
2819
2820         goto unlock_api_mutex;
2821
2822 unlock_net:
2823         lnet_net_unlock(0);
2824 unlock_api_mutex:
2825         mutex_unlock(&the_lnet.ln_api_mutex);
2826
2827         return rc;
2828 }
2829
2830 /*
2831  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
2832  * They are only expected to be called for unique networks.
2833  * That can be as a result of older DLC library
2834  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
2835  */
2836 int
2837 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
2838 {
2839         struct lnet_net         *net;
2840         struct list_head        net_head;
2841         int                     rc;
2842         struct lnet_ioctl_config_lnd_tunables tun;
2843         char *nets = conf->cfg_config_u.cfg_net.net_intf;
2844
2845         INIT_LIST_HEAD(&net_head);
2846
2847         /* Create a net/ni structures for the network string */
2848         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2849         if (rc <= 0)
2850                 return rc == 0 ? -EINVAL : rc;
2851
2852         mutex_lock(&the_lnet.ln_api_mutex);
2853
2854         if (rc > 1) {
2855                 rc = -EINVAL; /* only add one network per call */
2856                 goto out_unlock_clean;
2857         }
2858
2859         net = list_entry(net_head.next, struct lnet_net, net_list);
2860         list_del_init(&net->net_list);
2861
2862         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
2863
2864         memset(&tun, 0, sizeof(tun));
2865
2866         tun.lt_cmn.lct_peer_timeout =
2867           conf->cfg_config_u.cfg_net.net_peer_timeout;
2868         tun.lt_cmn.lct_peer_tx_credits =
2869           conf->cfg_config_u.cfg_net.net_peer_tx_credits;
2870         tun.lt_cmn.lct_peer_rtr_credits =
2871           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
2872         tun.lt_cmn.lct_max_tx_credits =
2873           conf->cfg_config_u.cfg_net.net_max_tx_credits;
2874
2875         rc = lnet_add_net_common(net, &tun);
2876
2877 out_unlock_clean:
2878         mutex_unlock(&the_lnet.ln_api_mutex);
2879         while (!list_empty(&net_head)) {
2880                 /* net_head list is empty in success case */
2881                 net = list_entry(net_head.next, struct lnet_net, net_list);
2882                 list_del_init(&net->net_list);
2883                 lnet_net_free(net);
2884         }
2885         return rc;
2886 }
2887
2888 int
2889 lnet_dyn_del_net(__u32 net_id)
2890 {
2891         struct lnet_net  *net;
2892         struct lnet_ping_buffer *pbuf;
2893         struct lnet_handle_md ping_mdh;
2894         int               rc;
2895         int               net_ni_count;
2896
2897         /* don't allow userspace to shutdown the LOLND */
2898         if (LNET_NETTYP(net_id) == LOLND)
2899                 return -EINVAL;
2900
2901         mutex_lock(&the_lnet.ln_api_mutex);
2902
2903         lnet_net_lock(0);
2904
2905         net = lnet_get_net_locked(net_id);
2906         if (net == NULL) {
2907                 lnet_net_unlock(0);
2908                 rc = -EINVAL;
2909                 goto out;
2910         }
2911
2912         net_ni_count = lnet_get_net_ni_count_locked(net);
2913
2914         lnet_net_unlock(0);
2915
2916         /* create and link a new ping info, before removing the old one */
2917         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2918                                     lnet_get_ni_count() - net_ni_count, false);
2919         if (rc != 0)
2920                 goto out;
2921
2922         lnet_shutdown_lndnet(net);
2923
2924         if (lnet_count_acceptor_nets() == 0)
2925                 lnet_acceptor_stop();
2926
2927         lnet_ping_target_update(pbuf, ping_mdh);
2928
2929 out:
2930         mutex_unlock(&the_lnet.ln_api_mutex);
2931
2932         return rc;
2933 }
2934
2935 void lnet_incr_dlc_seq(void)
2936 {
2937         atomic_inc(&lnet_dlc_seq_no);
2938 }
2939
2940 __u32 lnet_get_dlc_seq_locked(void)
2941 {
2942         return atomic_read(&lnet_dlc_seq_no);
2943 }
2944
2945 /**
2946  * LNet ioctl handler.
2947  *
2948  */
2949 int
2950 LNetCtl(unsigned int cmd, void *arg)
2951 {
2952         struct libcfs_ioctl_data *data = arg;
2953         struct lnet_ioctl_config_data *config;
2954         struct lnet_process_id    id = {0};
2955         struct lnet_ni           *ni;
2956         int                       rc;
2957
2958         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
2959                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
2960
2961         switch (cmd) {
2962         case IOC_LIBCFS_GET_NI:
2963                 rc = LNetGetId(data->ioc_count, &id);
2964                 data->ioc_nid = id.nid;
2965                 return rc;
2966
2967         case IOC_LIBCFS_FAIL_NID:
2968                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2969
2970         case IOC_LIBCFS_ADD_ROUTE:
2971                 config = arg;
2972
2973                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2974                         return -EINVAL;
2975
2976                 mutex_lock(&the_lnet.ln_api_mutex);
2977                 rc = lnet_add_route(config->cfg_net,
2978                                     config->cfg_config_u.cfg_route.rtr_hop,
2979                                     config->cfg_nid,
2980                                     config->cfg_config_u.cfg_route.
2981                                         rtr_priority);
2982                 if (rc == 0) {
2983                         rc = lnet_check_routes();
2984                         if (rc != 0)
2985                                 lnet_del_route(config->cfg_net,
2986                                                config->cfg_nid);
2987                 }
2988                 mutex_unlock(&the_lnet.ln_api_mutex);
2989                 return rc;
2990
2991         case IOC_LIBCFS_DEL_ROUTE:
2992                 config = arg;
2993
2994                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2995                         return -EINVAL;
2996
2997                 mutex_lock(&the_lnet.ln_api_mutex);
2998                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2999                 mutex_unlock(&the_lnet.ln_api_mutex);
3000                 return rc;
3001
3002         case IOC_LIBCFS_GET_ROUTE:
3003                 config = arg;
3004
3005                 if (config->cfg_hdr.ioc_len < sizeof(*config))
3006                         return -EINVAL;
3007
3008                 mutex_lock(&the_lnet.ln_api_mutex);
3009                 rc = lnet_get_route(config->cfg_count,
3010                                     &config->cfg_net,
3011                                     &config->cfg_config_u.cfg_route.rtr_hop,
3012                                     &config->cfg_nid,
3013                                     &config->cfg_config_u.cfg_route.rtr_flags,
3014                                     &config->cfg_config_u.cfg_route.
3015                                         rtr_priority);
3016                 mutex_unlock(&the_lnet.ln_api_mutex);
3017                 return rc;
3018
3019         case IOC_LIBCFS_GET_LOCAL_NI: {
3020                 struct lnet_ioctl_config_ni *cfg_ni;
3021                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
3022                 struct lnet_ioctl_element_stats *stats;
3023                 __u32 tun_size;
3024
3025                 cfg_ni = arg;
3026                 /* get the tunables if they are available */
3027                 if (cfg_ni->lic_cfg_hdr.ioc_len <
3028                     sizeof(*cfg_ni) + sizeof(*stats)+ sizeof(*tun))
3029                         return -EINVAL;
3030
3031                 stats = (struct lnet_ioctl_element_stats *)
3032                         cfg_ni->lic_bulk;
3033                 tun = (struct lnet_ioctl_config_lnd_tunables *)
3034                                 (cfg_ni->lic_bulk + sizeof(*stats));
3035
3036                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
3037                         sizeof(*stats);
3038
3039                 mutex_lock(&the_lnet.ln_api_mutex);
3040                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
3041                 mutex_unlock(&the_lnet.ln_api_mutex);
3042                 return rc;
3043         }
3044
3045         case IOC_LIBCFS_GET_NET: {
3046                 size_t total = sizeof(*config) +
3047                                sizeof(struct lnet_ioctl_net_config);
3048                 config = arg;
3049
3050                 if (config->cfg_hdr.ioc_len < total)
3051                         return -EINVAL;
3052
3053                 mutex_lock(&the_lnet.ln_api_mutex);
3054                 rc = lnet_get_net_config(config);
3055                 mutex_unlock(&the_lnet.ln_api_mutex);
3056                 return rc;
3057         }
3058
3059         case IOC_LIBCFS_GET_LNET_STATS:
3060         {
3061                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
3062
3063                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
3064                         return -EINVAL;
3065
3066                 mutex_lock(&the_lnet.ln_api_mutex);
3067                 lnet_counters_get(&lnet_stats->st_cntrs);
3068                 mutex_unlock(&the_lnet.ln_api_mutex);
3069                 return 0;
3070         }
3071
3072         case IOC_LIBCFS_CONFIG_RTR:
3073                 config = arg;
3074
3075                 if (config->cfg_hdr.ioc_len < sizeof(*config))
3076                         return -EINVAL;
3077
3078                 mutex_lock(&the_lnet.ln_api_mutex);
3079                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
3080                         rc = lnet_rtrpools_enable();
3081                         mutex_unlock(&the_lnet.ln_api_mutex);
3082                         return rc;
3083                 }
3084                 lnet_rtrpools_disable();
3085                 mutex_unlock(&the_lnet.ln_api_mutex);
3086                 return 0;
3087
3088         case IOC_LIBCFS_ADD_BUF:
3089                 config = arg;
3090
3091                 if (config->cfg_hdr.ioc_len < sizeof(*config))
3092                         return -EINVAL;
3093
3094                 mutex_lock(&the_lnet.ln_api_mutex);
3095                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
3096                                                 buf_tiny,
3097                                           config->cfg_config_u.cfg_buffers.
3098                                                 buf_small,
3099                                           config->cfg_config_u.cfg_buffers.
3100                                                 buf_large);
3101                 mutex_unlock(&the_lnet.ln_api_mutex);
3102                 return rc;
3103
3104         case IOC_LIBCFS_SET_NUMA_RANGE: {
3105                 struct lnet_ioctl_set_value *numa;
3106                 numa = arg;
3107                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
3108                         return -EINVAL;
3109                 lnet_net_lock(LNET_LOCK_EX);
3110                 lnet_numa_range = numa->sv_value;
3111                 lnet_net_unlock(LNET_LOCK_EX);
3112                 return 0;
3113         }
3114
3115         case IOC_LIBCFS_GET_NUMA_RANGE: {
3116                 struct lnet_ioctl_set_value *numa;
3117                 numa = arg;
3118                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
3119                         return -EINVAL;
3120                 numa->sv_value = lnet_numa_range;
3121                 return 0;
3122         }
3123
3124         case IOC_LIBCFS_GET_BUF: {
3125                 struct lnet_ioctl_pool_cfg *pool_cfg;
3126                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
3127
3128                 config = arg;
3129
3130                 if (config->cfg_hdr.ioc_len < total)
3131                         return -EINVAL;
3132
3133                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
3134
3135                 mutex_lock(&the_lnet.ln_api_mutex);
3136                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
3137                 mutex_unlock(&the_lnet.ln_api_mutex);
3138                 return rc;
3139         }
3140
3141         case IOC_LIBCFS_ADD_PEER_NI: {
3142                 struct lnet_ioctl_peer_cfg *cfg = arg;
3143
3144                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
3145                         return -EINVAL;
3146
3147                 mutex_lock(&the_lnet.ln_api_mutex);
3148                 rc = lnet_add_peer_ni(cfg->prcfg_prim_nid,
3149                                       cfg->prcfg_cfg_nid,
3150                                       cfg->prcfg_mr);
3151                 mutex_unlock(&the_lnet.ln_api_mutex);
3152                 return rc;
3153         }
3154
3155         case IOC_LIBCFS_DEL_PEER_NI: {
3156                 struct lnet_ioctl_peer_cfg *cfg = arg;
3157
3158                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
3159                         return -EINVAL;
3160
3161                 mutex_lock(&the_lnet.ln_api_mutex);
3162                 rc = lnet_del_peer_ni(cfg->prcfg_prim_nid,
3163                                       cfg->prcfg_cfg_nid);
3164                 mutex_unlock(&the_lnet.ln_api_mutex);
3165                 return rc;
3166         }
3167
3168         case IOC_LIBCFS_GET_PEER_INFO: {
3169                 struct lnet_ioctl_peer *peer_info = arg;
3170
3171                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
3172                         return -EINVAL;
3173
3174                 mutex_lock(&the_lnet.ln_api_mutex);
3175                 rc = lnet_get_peer_ni_info(
3176                    peer_info->pr_count,
3177                    &peer_info->pr_nid,
3178                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
3179                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
3180                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
3181                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
3182                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
3183                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
3184                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
3185                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
3186                 mutex_unlock(&the_lnet.ln_api_mutex);
3187                 return rc;
3188         }
3189
3190         case IOC_LIBCFS_GET_PEER_NI: {
3191                 struct lnet_ioctl_peer_cfg *cfg = arg;
3192
3193                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
3194                         return -EINVAL;
3195
3196                 mutex_lock(&the_lnet.ln_api_mutex);
3197                 rc = lnet_get_peer_info(&cfg->prcfg_prim_nid,
3198                                         &cfg->prcfg_cfg_nid,
3199                                         &cfg->prcfg_count,
3200                                         &cfg->prcfg_mr,
3201                                         &cfg->prcfg_size,
3202                                         (void __user *)cfg->prcfg_bulk);
3203                 mutex_unlock(&the_lnet.ln_api_mutex);
3204                 return rc;
3205         }
3206
3207         case IOC_LIBCFS_GET_PEER_LIST: {
3208                 struct lnet_ioctl_peer_cfg *cfg = arg;
3209
3210                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
3211                         return -EINVAL;
3212
3213                 mutex_lock(&the_lnet.ln_api_mutex);
3214                 rc = lnet_get_peer_list(&cfg->prcfg_count, &cfg->prcfg_size,
3215                                 (lnet_process_id_t __user *)cfg->prcfg_bulk);
3216                 mutex_unlock(&the_lnet.ln_api_mutex);
3217                 return rc;
3218         }
3219
3220         case IOC_LIBCFS_NOTIFY_ROUTER: {
3221                 unsigned long jiffies_passed;
3222
3223                 jiffies_passed = ktime_get_real_seconds() - data->ioc_u64[0];
3224                 jiffies_passed = cfs_time_seconds(jiffies_passed);
3225
3226                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
3227                                    jiffies - jiffies_passed);
3228         }
3229
3230         case IOC_LIBCFS_LNET_DIST:
3231                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
3232                 if (rc < 0 && rc != -EHOSTUNREACH)
3233                         return rc;
3234
3235                 data->ioc_u32[0] = rc;
3236                 return 0;
3237
3238         case IOC_LIBCFS_TESTPROTOCOMPAT:
3239                 lnet_net_lock(LNET_LOCK_EX);
3240                 the_lnet.ln_testprotocompat = data->ioc_flags;
3241                 lnet_net_unlock(LNET_LOCK_EX);
3242                 return 0;
3243
3244         case IOC_LIBCFS_LNET_FAULT:
3245                 return lnet_fault_ctl(data->ioc_flags, data);
3246
3247         case IOC_LIBCFS_PING: {
3248                 signed long timeout;
3249
3250                 id.nid = data->ioc_nid;
3251                 id.pid = data->ioc_u32[0];
3252
3253                 /* If timeout is negative then set default of 3 minutes */
3254                 if (((s32)data->ioc_u32[1] <= 0) ||
3255                     data->ioc_u32[1] > (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
3256                         timeout = msecs_to_jiffies(DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC);
3257                 else
3258                         timeout = msecs_to_jiffies(data->ioc_u32[1]);
3259
3260                 rc = lnet_ping(id, timeout, data->ioc_pbuf1,
3261                                data->ioc_plen1 / sizeof(struct lnet_process_id));
3262
3263                 if (rc < 0)
3264                         return rc;
3265
3266                 data->ioc_count = rc;
3267                 return 0;
3268         }
3269
3270         case IOC_LIBCFS_PING_PEER: {
3271                 struct lnet_ioctl_ping_data *ping = arg;
3272                 struct lnet_peer *lp;
3273                 signed long timeout;
3274
3275                 /* If timeout is negative then set default of 3 minutes */
3276                 if (((s32)ping->op_param) <= 0 ||
3277                     ping->op_param > (DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC))
3278                         timeout = msecs_to_jiffies(DEFAULT_PEER_TIMEOUT * MSEC_PER_SEC);
3279                 else
3280                         timeout = msecs_to_jiffies(ping->op_param);
3281
3282                 rc = lnet_ping(ping->ping_id, timeout,
3283                                ping->ping_buf,
3284                                ping->ping_count);
3285                 if (rc < 0)
3286                         return rc;
3287
3288                 lp = lnet_find_peer(ping->ping_id.nid);
3289                 if (lp) {
3290                         ping->ping_id.nid = lp->lp_primary_nid;
3291                         ping->mr_info = lnet_peer_is_multi_rail(lp);
3292                 }
3293                 ping->ping_count = rc;
3294                 return 0;
3295         }
3296
3297         default:
3298                 ni = lnet_net2ni_addref(data->ioc_net);
3299                 if (ni == NULL)
3300                         return -EINVAL;
3301
3302                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
3303                         rc = -EINVAL;
3304                 else
3305                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
3306
3307                 lnet_ni_decref(ni);
3308                 return rc;
3309         }
3310         /* not reached */
3311 }
3312 EXPORT_SYMBOL(LNetCtl);
3313
3314 void LNetDebugPeer(struct lnet_process_id id)
3315 {
3316         lnet_debug_peer(id.nid);
3317 }
3318 EXPORT_SYMBOL(LNetDebugPeer);
3319
3320 /**
3321  * Determine if the specified peer \a nid is on the local node.
3322  *
3323  * \param nid   peer nid to check
3324  *
3325  * \retval true         If peer NID is on the local node.
3326  * \retval false        If peer NID is not on the local node.
3327  */
3328 bool LNetIsPeerLocal(lnet_nid_t nid)
3329 {
3330         struct lnet_net *net;
3331         struct lnet_ni *ni;
3332         int cpt;
3333
3334         cpt = lnet_net_lock_current();
3335         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3336                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3337                         if (ni->ni_nid == nid) {
3338                                 lnet_net_unlock(cpt);
3339                                 return true;
3340                         }
3341                 }
3342         }
3343         lnet_net_unlock(cpt);
3344
3345         return false;
3346 }
3347 EXPORT_SYMBOL(LNetIsPeerLocal);
3348
3349 /**
3350  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
3351  * Note that all interfaces share a same PID, as requested by LNetNIInit().
3352  *
3353  * \param index Index of the interface to look up.
3354  * \param id On successful return, this location will hold the
3355  * struct lnet_process_id ID of the interface.
3356  *
3357  * \retval 0 If an interface exists at \a index.
3358  * \retval -ENOENT If no interface has been found.
3359  */
3360 int
3361 LNetGetId(unsigned int index, struct lnet_process_id *id)
3362 {
3363         struct lnet_ni   *ni;
3364         struct lnet_net  *net;
3365         int               cpt;
3366         int               rc = -ENOENT;
3367
3368         LASSERT(the_lnet.ln_refcount > 0);
3369
3370         cpt = lnet_net_lock_current();
3371
3372         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3373                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3374                         if (index-- != 0)
3375                                 continue;
3376
3377                         id->nid = ni->ni_nid;
3378                         id->pid = the_lnet.ln_pid;
3379                         rc = 0;
3380                         break;
3381                 }
3382         }
3383
3384         lnet_net_unlock(cpt);
3385         return rc;
3386 }
3387 EXPORT_SYMBOL(LNetGetId);
3388
3389 static int lnet_ping(struct lnet_process_id id, signed long timeout,
3390                      struct lnet_process_id __user *ids, int n_ids)
3391 {
3392         struct lnet_handle_eq eqh;
3393         struct lnet_handle_md mdh;
3394         struct lnet_event event;
3395         struct lnet_md md = { NULL };
3396         int which;
3397         int unlinked = 0;
3398         int replied = 0;
3399         const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC);
3400         struct lnet_ping_buffer *pbuf;
3401         struct lnet_process_id tmpid;
3402         int i;
3403         int nob;
3404         int rc;
3405         int rc2;
3406         sigset_t blocked;
3407
3408         /* n_ids limit is arbitrary */
3409         if (n_ids <= 0 || n_ids > lnet_interfaces_max || id.nid == LNET_NID_ANY)
3410                 return -EINVAL;
3411
3412         if (id.pid == LNET_PID_ANY)
3413                 id.pid = LNET_PID_LUSTRE;
3414
3415         pbuf = lnet_ping_buffer_alloc(n_ids, GFP_NOFS);
3416         if (!pbuf)
3417                 return -ENOMEM;
3418
3419         /* NB 2 events max (including any unlink event) */
3420         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
3421         if (rc != 0) {
3422                 CERROR("Can't allocate EQ: %d\n", rc);
3423                 goto fail_ping_buffer_decref;
3424         }
3425
3426         /* initialize md content */
3427         md.start     = &pbuf->pb_info;
3428         md.length    = LNET_PING_INFO_SIZE(n_ids);
3429         md.threshold = 2; /* GET/REPLY */
3430         md.max_size  = 0;
3431         md.options   = LNET_MD_TRUNCATE;
3432         md.user_ptr  = NULL;
3433         md.eq_handle = eqh;
3434
3435         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
3436         if (rc != 0) {
3437                 CERROR("Can't bind MD: %d\n", rc);
3438                 goto fail_free_eq;
3439         }
3440
3441         rc = LNetGet(LNET_NID_ANY, mdh, id,
3442                      LNET_RESERVED_PORTAL,
3443                      LNET_PROTO_PING_MATCHBITS, 0);
3444
3445         if (rc != 0) {
3446                 /* Don't CERROR; this could be deliberate! */
3447                 rc2 = LNetMDUnlink(mdh);
3448                 LASSERT(rc2 == 0);
3449
3450                 /* NB must wait for the UNLINK event below... */
3451                 unlinked = 1;
3452                 timeout = a_long_time;
3453         }
3454
3455         do {
3456                 /* MUST block for unlink to complete */
3457                 if (unlinked)
3458                         blocked = cfs_block_allsigs();
3459
3460                 rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which);
3461
3462                 if (unlinked)
3463                         cfs_restore_sigs(blocked);
3464
3465                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
3466                        (rc2 <= 0) ? -1 : event.type,
3467                        (rc2 <= 0) ? -1 : event.status,
3468                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
3469
3470                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
3471
3472                 if (rc2 <= 0 || event.status != 0) {
3473                         /* timeout or error */
3474                         if (!replied && rc == 0)
3475                                 rc = (rc2 < 0) ? rc2 :
3476                                      (rc2 == 0) ? -ETIMEDOUT :
3477                                      event.status;
3478
3479                         if (!unlinked) {
3480                                 /* Ensure completion in finite time... */
3481                                 LNetMDUnlink(mdh);
3482                                 /* No assertion (racing with network) */
3483                                 unlinked = 1;
3484                                 timeout = a_long_time;
3485                         } else if (rc2 == 0) {
3486                                 /* timed out waiting for unlink */
3487                                 CWARN("ping %s: late network completion\n",
3488                                       libcfs_id2str(id));
3489                         }
3490                 } else if (event.type == LNET_EVENT_REPLY) {
3491                         replied = 1;
3492                         rc = event.mlength;
3493                 }
3494         } while (rc2 <= 0 || !event.unlinked);
3495
3496         if (!replied) {
3497                 if (rc >= 0)
3498                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
3499                               libcfs_id2str(id));
3500                 rc = -EIO;
3501                 goto fail_free_eq;
3502         }
3503
3504         nob = rc;
3505         LASSERT(nob >= 0 && nob <= LNET_PING_INFO_SIZE(n_ids));
3506
3507         rc = -EPROTO;           /* if I can't parse... */
3508
3509         if (nob < 8) {
3510                 CERROR("%s: ping info too short %d\n",
3511                        libcfs_id2str(id), nob);
3512                 goto fail_free_eq;
3513         }
3514
3515         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
3516                 lnet_swap_pinginfo(pbuf);
3517         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
3518                 CERROR("%s: Unexpected magic %08x\n",
3519                        libcfs_id2str(id), pbuf->pb_info.pi_magic);
3520                 goto fail_free_eq;
3521         }
3522
3523         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
3524                 CERROR("%s: ping w/o NI status: 0x%x\n",
3525                        libcfs_id2str(id), pbuf->pb_info.pi_features);
3526                 goto fail_free_eq;
3527         }
3528
3529         if (nob < LNET_PING_INFO_SIZE(0)) {
3530                 CERROR("%s: Short reply %d(%d min)\n",
3531                        libcfs_id2str(id),
3532                        nob, (int)LNET_PING_INFO_SIZE(0));
3533                 goto fail_free_eq;
3534         }
3535
3536         if (pbuf->pb_info.pi_nnis < n_ids)
3537                 n_ids = pbuf->pb_info.pi_nnis;
3538
3539         if (nob < LNET_PING_INFO_SIZE(n_ids)) {
3540                 CERROR("%s: Short reply %d(%d expected)\n",
3541                        libcfs_id2str(id),
3542                        nob, (int)LNET_PING_INFO_SIZE(n_ids));
3543                 goto fail_free_eq;
3544         }
3545
3546         rc = -EFAULT;           /* if I segv in copy_to_user()... */
3547
3548         memset(&tmpid, 0, sizeof(tmpid));
3549         for (i = 0; i < n_ids; i++) {
3550                 tmpid.pid = pbuf->pb_info.pi_pid;
3551                 tmpid.nid = pbuf->pb_info.pi_ni[i].ns_nid;
3552                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
3553                         goto fail_free_eq;
3554         }
3555         rc = pbuf->pb_info.pi_nnis;
3556
3557  fail_free_eq:
3558         rc2 = LNetEQFree(eqh);
3559         if (rc2 != 0)
3560                 CERROR("rc2 %d\n", rc2);
3561         LASSERT(rc2 == 0);
3562
3563  fail_ping_buffer_decref:
3564         lnet_ping_buffer_decref(pbuf);
3565         return rc;
3566 }