Whamcloud - gitweb
38186e2faf124fb29591c6649ca2c13a01fc7328
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_LNET
34 #include <linux/log2.h>
35 #include <linux/ktime.h>
36
37 #include <lnet/lib-lnet.h>
38
39 #define D_LNI D_CONSOLE
40
41 lnet_t      the_lnet;                           /* THE state of the network */
42 EXPORT_SYMBOL(the_lnet);
43
44 static char *ip2nets = "";
45 module_param(ip2nets, charp, 0444);
46 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
47
48 static char *networks = "";
49 module_param(networks, charp, 0444);
50 MODULE_PARM_DESC(networks, "local networks");
51
52 static char *routes = "";
53 module_param(routes, charp, 0444);
54 MODULE_PARM_DESC(routes, "routes to non-local networks");
55
56 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
57 module_param(rnet_htable_size, int, 0444);
58 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
59
60 static int use_tcp_bonding = false;
61 module_param(use_tcp_bonding, int, 0444);
62 MODULE_PARM_DESC(use_tcp_bonding,
63                  "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
64
65 unsigned int lnet_numa_range = 0;
66 module_param(lnet_numa_range, uint, 0444);
67 MODULE_PARM_DESC(lnet_numa_range,
68                 "NUMA range to consider during Multi-Rail selection");
69
70 static int lnet_max_interfaces = LNET_MAX_INTERFACES_DEFAULT;
71 module_param(lnet_max_interfaces, int, 0444);
72 MODULE_PARM_DESC(lnet_max_interfaces,
73                 "Maximum number of interfaces in a node.");
74
75 /*
76  * This sequence number keeps track of how many times DLC was used to
77  * update the local NIs. It is incremented when a NI is added or
78  * removed and checked when sending a message to determine if there is
79  * a need to re-run the selection algorithm. See lnet_select_pathway()
80  * for more details on its usage.
81  */
82 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
83
84 static int lnet_ping(lnet_process_id_t id, signed long timeout,
85                      lnet_process_id_t __user *ids, int n_ids);
86
87 static char *
88 lnet_get_routes(void)
89 {
90         return routes;
91 }
92
93 static char *
94 lnet_get_networks(void)
95 {
96         char   *nets;
97         int     rc;
98
99         if (*networks != 0 && *ip2nets != 0) {
100                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
101                                    "'ip2nets' but not both at once\n");
102                 return NULL;
103         }
104
105         if (*ip2nets != 0) {
106                 rc = lnet_parse_ip2nets(&nets, ip2nets);
107                 return (rc == 0) ? nets : NULL;
108         }
109
110         if (*networks != 0)
111                 return networks;
112
113         return "tcp";
114 }
115
116 static void
117 lnet_init_locks(void)
118 {
119         spin_lock_init(&the_lnet.ln_eq_wait_lock);
120         init_waitqueue_head(&the_lnet.ln_eq_waitq);
121         init_waitqueue_head(&the_lnet.ln_rc_waitq);
122         mutex_init(&the_lnet.ln_lnd_mutex);
123         mutex_init(&the_lnet.ln_api_mutex);
124 }
125
126 static void
127 lnet_fini_locks(void)
128 {
129 }
130
131 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
132 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
133                                             *  MDs kmem_cache */
134
135 static int
136 lnet_descriptor_setup(void)
137 {
138         /* create specific kmem_cache for MEs and small MDs (i.e., originally
139          * allocated in <size-xxx> kmem_cache).
140          */
141         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(lnet_me_t),
142                                             0, 0, NULL);
143         if (!lnet_mes_cachep)
144                 return -ENOMEM;
145
146         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
147                                                   LNET_SMALL_MD_SIZE, 0, 0,
148                                                   NULL);
149         if (!lnet_small_mds_cachep)
150                 return -ENOMEM;
151
152         return 0;
153 }
154
155 static void
156 lnet_descriptor_cleanup(void)
157 {
158
159         if (lnet_small_mds_cachep) {
160                 kmem_cache_destroy(lnet_small_mds_cachep);
161                 lnet_small_mds_cachep = NULL;
162         }
163
164         if (lnet_mes_cachep) {
165                 kmem_cache_destroy(lnet_mes_cachep);
166                 lnet_mes_cachep = NULL;
167         }
168 }
169
170 static int
171 lnet_create_remote_nets_table(void)
172 {
173         int               i;
174         struct list_head *hash;
175
176         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
177         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
178         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
179         if (hash == NULL) {
180                 CERROR("Failed to create remote nets hash table\n");
181                 return -ENOMEM;
182         }
183
184         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
185                 INIT_LIST_HEAD(&hash[i]);
186         the_lnet.ln_remote_nets_hash = hash;
187         return 0;
188 }
189
190 static void
191 lnet_destroy_remote_nets_table(void)
192 {
193         int i;
194
195         if (the_lnet.ln_remote_nets_hash == NULL)
196                 return;
197
198         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
199                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
200
201         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
202                     LNET_REMOTE_NETS_HASH_SIZE *
203                     sizeof(the_lnet.ln_remote_nets_hash[0]));
204         the_lnet.ln_remote_nets_hash = NULL;
205 }
206
207 static void
208 lnet_destroy_locks(void)
209 {
210         if (the_lnet.ln_res_lock != NULL) {
211                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
212                 the_lnet.ln_res_lock = NULL;
213         }
214
215         if (the_lnet.ln_net_lock != NULL) {
216                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
217                 the_lnet.ln_net_lock = NULL;
218         }
219
220         lnet_fini_locks();
221 }
222
223 static int
224 lnet_create_locks(void)
225 {
226         lnet_init_locks();
227
228         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
229         if (the_lnet.ln_res_lock == NULL)
230                 goto failed;
231
232         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
233         if (the_lnet.ln_net_lock == NULL)
234                 goto failed;
235
236         return 0;
237
238  failed:
239         lnet_destroy_locks();
240         return -ENOMEM;
241 }
242
243 static void lnet_assert_wire_constants(void)
244 {
245         /*
246          * Wire protocol assertions generated by 'wirecheck'
247          * running on Linux lustre-build 3.10.0-327.el7_lustre.centos.x86_64
248          * #1 SMP Fri Jul 8 13:32:15 EDT 2016 x86_64 x86_64 x86_64 GNU/Linux
249          * with gcc version 4.8.5 20150623 (Red Hat 4.8.5-4) (GCC)
250          */
251
252
253         /* Constants... */
254         CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
255         CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1);
256         CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0);
257         CLASSERT(LNET_MSG_ACK == 0);
258         CLASSERT(LNET_MSG_PUT == 1);
259         CLASSERT(LNET_MSG_GET == 2);
260         CLASSERT(LNET_MSG_REPLY == 3);
261         CLASSERT(LNET_MSG_HELLO == 4);
262
263         /* Checks for struct lnet_handle_wire */
264         CLASSERT((int)sizeof(struct lnet_handle_wire) == 16);
265         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) == 0);
266         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) == 8);
267         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_object_cookie) == 8);
268         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) == 8);
269
270         /* Checks for struct lnet_magicversion */
271         CLASSERT((int)sizeof(struct lnet_magicversion) == 8);
272         CLASSERT((int)offsetof(struct lnet_magicversion, magic) == 0);
273         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->magic) == 4);
274         CLASSERT((int)offsetof(struct lnet_magicversion, version_major) == 4);
275         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->version_major) == 2);
276         CLASSERT((int)offsetof(struct lnet_magicversion, version_minor) == 6);
277         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->version_minor) == 2);
278
279         /* Checks for struct lnet_hdr */
280         CLASSERT((int)sizeof(struct lnet_hdr) == 72);
281         CLASSERT((int)offsetof(struct lnet_hdr, dest_nid) == 0);
282         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->dest_nid) == 8);
283         CLASSERT((int)offsetof(struct lnet_hdr, src_nid) == 8);
284         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->src_nid) == 8);
285         CLASSERT((int)offsetof(struct lnet_hdr, dest_pid) == 16);
286         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->dest_pid) == 4);
287         CLASSERT((int)offsetof(struct lnet_hdr, src_pid) == 20);
288         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->src_pid) == 4);
289         CLASSERT((int)offsetof(struct lnet_hdr, type) == 24);
290         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->type) == 4);
291         CLASSERT((int)offsetof(struct lnet_hdr, payload_length) == 28);
292         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->payload_length) == 4);
293         CLASSERT((int)offsetof(struct lnet_hdr, msg) == 32);
294         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg) == 40);
295
296         /* Ack */
297         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) == 32);
298         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) == 16);
299         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.match_bits) == 48);
300         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) == 8);
301         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.mlength) == 56);
302         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) == 4);
303
304         /* Put */
305         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) == 32);
306         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) == 16);
307         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.match_bits) == 48);
308         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) == 8);
309         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.hdr_data) == 56);
310         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) == 8);
311         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.ptl_index) == 64);
312         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) == 4);
313         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.offset) == 68);
314         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) == 4);
315
316         /* Get */
317         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.return_wmd) == 32);
318         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) == 16);
319         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.match_bits) == 48);
320         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) == 8);
321         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.ptl_index) == 56);
322         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) == 4);
323         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.src_offset) == 60);
324         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) == 4);
325         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.sink_length) == 64);
326         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) == 4);
327
328         /* Reply */
329         CLASSERT((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) == 32);
330         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) == 16);
331
332         /* Hello */
333         CLASSERT((int)offsetof(struct lnet_hdr, msg.hello.incarnation) == 32);
334         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) == 8);
335         CLASSERT((int)offsetof(struct lnet_hdr, msg.hello.type) == 40);
336         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) == 4);
337
338         /* Checks for struct lnet_ni_status and related constants */
339         CLASSERT(LNET_NI_STATUS_INVALID == 0x00000000);
340         CLASSERT(LNET_NI_STATUS_UP == 0x15aac0de);
341         CLASSERT(LNET_NI_STATUS_DOWN == 0xdeadface);
342
343         /* Checks for struct lnet_ni_status */
344         CLASSERT((int)sizeof(struct lnet_ni_status) == 16);
345         CLASSERT((int)offsetof(struct lnet_ni_status, ns_nid) == 0);
346         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) == 8);
347         CLASSERT((int)offsetof(struct lnet_ni_status, ns_status) == 8);
348         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_status) == 4);
349         CLASSERT((int)offsetof(struct lnet_ni_status, ns_unused) == 12);
350         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_unused) == 4);
351
352         /* Checks for struct lnet_ping_info and related constants */
353         CLASSERT(LNET_PROTO_PING_MAGIC == 0x70696E67);
354         CLASSERT(LNET_PING_FEAT_INVAL == 0);
355         CLASSERT(LNET_PING_FEAT_BASE == 1);
356         CLASSERT(LNET_PING_FEAT_NI_STATUS == 2);
357         CLASSERT(LNET_PING_FEAT_RTE_DISABLED == 4);
358         CLASSERT(LNET_PING_FEAT_MULTI_RAIL == 8);
359         CLASSERT(LNET_PING_FEAT_BITS == 15);
360
361         /* Checks for struct lnet_ping_info */
362         CLASSERT((int)sizeof(struct lnet_ping_info) == 16);
363         CLASSERT((int)offsetof(struct lnet_ping_info, pi_magic) == 0);
364         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) == 4);
365         CLASSERT((int)offsetof(struct lnet_ping_info, pi_features) == 4);
366         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_features) == 4);
367         CLASSERT((int)offsetof(struct lnet_ping_info, pi_pid) == 8);
368         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) == 4);
369         CLASSERT((int)offsetof(struct lnet_ping_info, pi_nnis) == 12);
370         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) == 4);
371         CLASSERT((int)offsetof(struct lnet_ping_info, pi_ni) == 16);
372         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_ni) == 0);
373 }
374
375 static lnd_t *lnet_find_lnd_by_type(__u32 type)
376 {
377         lnd_t            *lnd;
378         struct list_head *tmp;
379
380         /* holding lnd mutex */
381         list_for_each(tmp, &the_lnet.ln_lnds) {
382                 lnd = list_entry(tmp, lnd_t, lnd_list);
383
384                 if (lnd->lnd_type == type)
385                         return lnd;
386         }
387         return NULL;
388 }
389
390 void
391 lnet_register_lnd (lnd_t *lnd)
392 {
393         mutex_lock(&the_lnet.ln_lnd_mutex);
394
395         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
396         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
397
398         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
399         lnd->lnd_refcount = 0;
400
401         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
402
403         mutex_unlock(&the_lnet.ln_lnd_mutex);
404 }
405 EXPORT_SYMBOL(lnet_register_lnd);
406
407 void
408 lnet_unregister_lnd (lnd_t *lnd)
409 {
410         mutex_lock(&the_lnet.ln_lnd_mutex);
411
412         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
413         LASSERT(lnd->lnd_refcount == 0);
414
415         list_del(&lnd->lnd_list);
416         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
417
418         mutex_unlock(&the_lnet.ln_lnd_mutex);
419 }
420 EXPORT_SYMBOL(lnet_unregister_lnd);
421
422 void
423 lnet_counters_get(lnet_counters_t *counters)
424 {
425         lnet_counters_t *ctr;
426         int             i;
427
428         memset(counters, 0, sizeof(*counters));
429
430         lnet_net_lock(LNET_LOCK_EX);
431
432         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
433                 counters->msgs_max     += ctr->msgs_max;
434                 counters->msgs_alloc   += ctr->msgs_alloc;
435                 counters->errors       += ctr->errors;
436                 counters->send_count   += ctr->send_count;
437                 counters->recv_count   += ctr->recv_count;
438                 counters->route_count  += ctr->route_count;
439                 counters->drop_count   += ctr->drop_count;
440                 counters->send_length  += ctr->send_length;
441                 counters->recv_length  += ctr->recv_length;
442                 counters->route_length += ctr->route_length;
443                 counters->drop_length  += ctr->drop_length;
444
445         }
446         lnet_net_unlock(LNET_LOCK_EX);
447 }
448 EXPORT_SYMBOL(lnet_counters_get);
449
450 void
451 lnet_counters_reset(void)
452 {
453         lnet_counters_t *counters;
454         int             i;
455
456         lnet_net_lock(LNET_LOCK_EX);
457
458         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
459                 memset(counters, 0, sizeof(lnet_counters_t));
460
461         lnet_net_unlock(LNET_LOCK_EX);
462 }
463
464 static char *
465 lnet_res_type2str(int type)
466 {
467         switch (type) {
468         default:
469                 LBUG();
470         case LNET_COOKIE_TYPE_MD:
471                 return "MD";
472         case LNET_COOKIE_TYPE_ME:
473                 return "ME";
474         case LNET_COOKIE_TYPE_EQ:
475                 return "EQ";
476         }
477 }
478
479 static void
480 lnet_res_container_cleanup(struct lnet_res_container *rec)
481 {
482         int     count = 0;
483
484         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
485                 return;
486
487         while (!list_empty(&rec->rec_active)) {
488                 struct list_head *e = rec->rec_active.next;
489
490                 list_del_init(e);
491                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
492                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
493
494                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
495                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
496
497                 } else { /* NB: Active MEs should be attached on portals */
498                         LBUG();
499                 }
500                 count++;
501         }
502
503         if (count > 0) {
504                 /* Found alive MD/ME/EQ, user really should unlink/free
505                  * all of them before finalize LNet, but if someone didn't,
506                  * we have to recycle garbage for him */
507                 CERROR("%d active elements on exit of %s container\n",
508                        count, lnet_res_type2str(rec->rec_type));
509         }
510
511         if (rec->rec_lh_hash != NULL) {
512                 LIBCFS_FREE(rec->rec_lh_hash,
513                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
514                 rec->rec_lh_hash = NULL;
515         }
516
517         rec->rec_type = 0; /* mark it as finalized */
518 }
519
520 static int
521 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
522 {
523         int     rc = 0;
524         int     i;
525
526         LASSERT(rec->rec_type == 0);
527
528         rec->rec_type = type;
529         INIT_LIST_HEAD(&rec->rec_active);
530
531         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
532
533         /* Arbitrary choice of hash table size */
534         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
535                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
536         if (rec->rec_lh_hash == NULL) {
537                 rc = -ENOMEM;
538                 goto out;
539         }
540
541         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
542                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
543
544         return 0;
545
546 out:
547         CERROR("Failed to setup %s resource container\n",
548                lnet_res_type2str(type));
549         lnet_res_container_cleanup(rec);
550         return rc;
551 }
552
553 static void
554 lnet_res_containers_destroy(struct lnet_res_container **recs)
555 {
556         struct lnet_res_container       *rec;
557         int                             i;
558
559         cfs_percpt_for_each(rec, i, recs)
560                 lnet_res_container_cleanup(rec);
561
562         cfs_percpt_free(recs);
563 }
564
565 static struct lnet_res_container **
566 lnet_res_containers_create(int type)
567 {
568         struct lnet_res_container       **recs;
569         struct lnet_res_container       *rec;
570         int                             rc;
571         int                             i;
572
573         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
574         if (recs == NULL) {
575                 CERROR("Failed to allocate %s resource containers\n",
576                        lnet_res_type2str(type));
577                 return NULL;
578         }
579
580         cfs_percpt_for_each(rec, i, recs) {
581                 rc = lnet_res_container_setup(rec, i, type);
582                 if (rc != 0) {
583                         lnet_res_containers_destroy(recs);
584                         return NULL;
585                 }
586         }
587
588         return recs;
589 }
590
591 lnet_libhandle_t *
592 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
593 {
594         /* ALWAYS called with lnet_res_lock held */
595         struct list_head        *head;
596         lnet_libhandle_t        *lh;
597         unsigned int            hash;
598
599         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
600                 return NULL;
601
602         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
603         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
604
605         list_for_each_entry(lh, head, lh_hash_chain) {
606                 if (lh->lh_cookie == cookie)
607                         return lh;
608         }
609
610         return NULL;
611 }
612
613 void
614 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
615 {
616         /* ALWAYS called with lnet_res_lock held */
617         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
618         unsigned int    hash;
619
620         lh->lh_cookie = rec->rec_lh_cookie;
621         rec->rec_lh_cookie += 1 << ibits;
622
623         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
624
625         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
626 }
627
628 static int lnet_unprepare(void);
629
630 static int
631 lnet_prepare(lnet_pid_t requested_pid)
632 {
633         /* Prepare to bring up the network */
634         struct lnet_res_container **recs;
635         int                       rc = 0;
636
637         if (requested_pid == LNET_PID_ANY) {
638                 /* Don't instantiate LNET just for me */
639                 return -ENETDOWN;
640         }
641
642         LASSERT(the_lnet.ln_refcount == 0);
643
644         the_lnet.ln_routing = 0;
645
646         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
647         the_lnet.ln_pid = requested_pid;
648
649         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
650         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
651         INIT_LIST_HEAD(&the_lnet.ln_nets);
652         INIT_LIST_HEAD(&the_lnet.ln_routers);
653         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
654         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
655
656         rc = lnet_descriptor_setup();
657         if (rc != 0)
658                 goto failed;
659
660         rc = lnet_create_remote_nets_table();
661         if (rc != 0)
662                 goto failed;
663
664         /*
665          * NB the interface cookie in wire handles guards against delayed
666          * replies and ACKs appearing valid after reboot.
667          */
668         the_lnet.ln_interface_cookie = ktime_get_real_ns();
669
670         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
671                                                 sizeof(lnet_counters_t));
672         if (the_lnet.ln_counters == NULL) {
673                 CERROR("Failed to allocate counters for LNet\n");
674                 rc = -ENOMEM;
675                 goto failed;
676         }
677
678         rc = lnet_peer_tables_create();
679         if (rc != 0)
680                 goto failed;
681
682         rc = lnet_msg_containers_create();
683         if (rc != 0)
684                 goto failed;
685
686         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
687                                       LNET_COOKIE_TYPE_EQ);
688         if (rc != 0)
689                 goto failed;
690
691         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
692         if (recs == NULL) {
693                 rc = -ENOMEM;
694                 goto failed;
695         }
696
697         the_lnet.ln_me_containers = recs;
698
699         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
700         if (recs == NULL) {
701                 rc = -ENOMEM;
702                 goto failed;
703         }
704
705         the_lnet.ln_md_containers = recs;
706
707         rc = lnet_portals_create();
708         if (rc != 0) {
709                 CERROR("Failed to create portals for LNet: %d\n", rc);
710                 goto failed;
711         }
712
713         return 0;
714
715  failed:
716         lnet_unprepare();
717         return rc;
718 }
719
720 static int
721 lnet_unprepare (void)
722 {
723         /* NB no LNET_LOCK since this is the last reference.  All LND instances
724          * have shut down already, so it is safe to unlink and free all
725          * descriptors, even those that appear committed to a network op (eg MD
726          * with non-zero pending count) */
727
728         lnet_fail_nid(LNET_NID_ANY, 0);
729
730         LASSERT(the_lnet.ln_refcount == 0);
731         LASSERT(list_empty(&the_lnet.ln_test_peers));
732         LASSERT(list_empty(&the_lnet.ln_nets));
733
734         lnet_portals_destroy();
735
736         if (the_lnet.ln_md_containers != NULL) {
737                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
738                 the_lnet.ln_md_containers = NULL;
739         }
740
741         if (the_lnet.ln_me_containers != NULL) {
742                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
743                 the_lnet.ln_me_containers = NULL;
744         }
745
746         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
747
748         lnet_msg_containers_destroy();
749         lnet_peer_uninit();
750         lnet_rtrpools_free(0);
751
752         if (the_lnet.ln_counters != NULL) {
753                 cfs_percpt_free(the_lnet.ln_counters);
754                 the_lnet.ln_counters = NULL;
755         }
756         lnet_destroy_remote_nets_table();
757         lnet_descriptor_cleanup();
758
759         return 0;
760 }
761
762 lnet_ni_t  *
763 lnet_net2ni_locked(__u32 net_id, int cpt)
764 {
765         struct lnet_ni   *ni;
766         struct lnet_net  *net;
767
768         LASSERT(cpt != LNET_LOCK_EX);
769
770         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
771                 if (net->net_id == net_id) {
772                         ni = list_entry(net->net_ni_list.next, struct lnet_ni,
773                                         ni_netlist);
774                         return ni;
775                 }
776         }
777
778         return NULL;
779 }
780
781 lnet_ni_t *
782 lnet_net2ni_addref(__u32 net)
783 {
784         lnet_ni_t *ni;
785
786         lnet_net_lock(0);
787         ni = lnet_net2ni_locked(net, 0);
788         if (ni)
789                 lnet_ni_addref_locked(ni, 0);
790         lnet_net_unlock(0);
791
792         return ni;
793 }
794 EXPORT_SYMBOL(lnet_net2ni_addref);
795
796 struct lnet_net *
797 lnet_get_net_locked(__u32 net_id)
798 {
799         struct lnet_net  *net;
800
801         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
802                 if (net->net_id == net_id)
803                         return net;
804         }
805
806         return NULL;
807 }
808
809 unsigned int
810 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
811 {
812         __u64           key = nid;
813         unsigned int    val;
814
815         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
816
817         if (number == 1)
818                 return 0;
819
820         val = hash_long(key, LNET_CPT_BITS);
821         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
822         if (val < number)
823                 return val;
824
825         return (unsigned int)(key + val + (val >> 1)) % number;
826 }
827
828 int
829 lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
830 {
831         struct lnet_net *net;
832
833         /* must called with hold of lnet_net_lock */
834         if (LNET_CPT_NUMBER == 1)
835                 return 0; /* the only one */
836
837         /*
838          * If NI is provided then use the CPT identified in the NI cpt
839          * list if one exists. If one doesn't exist, then that NI is
840          * associated with all CPTs and it follows that the net it belongs
841          * to is implicitly associated with all CPTs, so just hash the nid
842          * and return that.
843          */
844         if (ni != NULL) {
845                 if (ni->ni_cpts != NULL)
846                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
847                                                              ni->ni_ncpts)];
848                 else
849                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
850         }
851
852         /* no NI provided so look at the net */
853         net = lnet_get_net_locked(LNET_NIDNET(nid));
854
855         if (net != NULL && net->net_cpts != NULL) {
856                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
857         }
858
859         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
860 }
861
862 int
863 lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
864 {
865         int     cpt;
866         int     cpt2;
867
868         if (LNET_CPT_NUMBER == 1)
869                 return 0; /* the only one */
870
871         cpt = lnet_net_lock_current();
872
873         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
874
875         lnet_net_unlock(cpt);
876
877         return cpt2;
878 }
879 EXPORT_SYMBOL(lnet_cpt_of_nid);
880
881 int
882 lnet_islocalnet(__u32 net_id)
883 {
884         struct lnet_net *net;
885         int             cpt;
886         bool            local;
887
888         cpt = lnet_net_lock_current();
889
890         net = lnet_get_net_locked(net_id);
891
892         local = net != NULL;
893
894         lnet_net_unlock(cpt);
895
896         return local;
897 }
898
899 bool
900 lnet_is_ni_healthy_locked(struct lnet_ni *ni)
901 {
902         if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
903             ni->ni_state == LNET_NI_STATE_DEGRADED)
904                 return true;
905
906         return false;
907 }
908
909 lnet_ni_t  *
910 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
911 {
912         struct lnet_net  *net;
913         struct lnet_ni   *ni;
914
915         LASSERT(cpt != LNET_LOCK_EX);
916
917         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
918                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
919                         if (ni->ni_nid == nid)
920                                 return ni;
921                 }
922         }
923
924         return NULL;
925 }
926
927 lnet_ni_t *
928 lnet_nid2ni_addref(lnet_nid_t nid)
929 {
930         lnet_ni_t *ni;
931
932         lnet_net_lock(0);
933         ni = lnet_nid2ni_locked(nid, 0);
934         if (ni)
935                 lnet_ni_addref_locked(ni, 0);
936         lnet_net_unlock(0);
937
938         return ni;
939 }
940 EXPORT_SYMBOL(lnet_nid2ni_addref);
941
942 int
943 lnet_islocalnid(lnet_nid_t nid)
944 {
945         struct lnet_ni  *ni;
946         int             cpt;
947
948         cpt = lnet_net_lock_current();
949         ni = lnet_nid2ni_locked(nid, cpt);
950         lnet_net_unlock(cpt);
951
952         return ni != NULL;
953 }
954
955 int
956 lnet_count_acceptor_nets(void)
957 {
958         /* Return the # of NIs that need the acceptor. */
959         int              count = 0;
960         struct lnet_net  *net;
961         int              cpt;
962
963         cpt = lnet_net_lock_current();
964         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
965                 /* all socklnd type networks should have the acceptor
966                  * thread started */
967                 if (net->net_lnd->lnd_accept != NULL)
968                         count++;
969         }
970
971         lnet_net_unlock(cpt);
972
973         return count;
974 }
975
976 struct lnet_ping_buffer *
977 lnet_ping_buffer_alloc(int nnis, gfp_t gfp)
978 {
979         struct lnet_ping_buffer *pbuf;
980
981         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nnis), gfp);
982         if (pbuf) {
983                 pbuf->pb_nnis = nnis;
984                 atomic_set(&pbuf->pb_refcnt, 1);
985         }
986
987         return pbuf;
988 }
989
990 void
991 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
992 {
993         LASSERT(lnet_ping_buffer_numref(pbuf) == 0);
994         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nnis));
995 }
996
997 static struct lnet_ping_buffer *
998 lnet_ping_target_create(int nnis)
999 {
1000         struct lnet_ping_buffer *pbuf;
1001
1002         pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
1003         if (pbuf == NULL) {
1004                 CERROR("Can't allocate ping source [%d]\n", nnis);
1005                 return NULL;
1006         }
1007
1008         pbuf->pb_info.pi_nnis = nnis;
1009         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1010         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1011         pbuf->pb_info.pi_features = LNET_PING_FEAT_NI_STATUS;
1012
1013         return pbuf;
1014 }
1015
1016 static inline int
1017 lnet_get_net_ni_count_locked(struct lnet_net *net)
1018 {
1019         struct lnet_ni  *ni;
1020         int             count = 0;
1021
1022         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1023                 count++;
1024
1025         return count;
1026 }
1027
1028 static inline int
1029 lnet_get_net_ni_count_pre(struct lnet_net *net)
1030 {
1031         struct lnet_ni  *ni;
1032         int             count = 0;
1033
1034         list_for_each_entry(ni, &net->net_ni_added, ni_netlist)
1035                 count++;
1036
1037         return count;
1038 }
1039
1040 static inline int
1041 lnet_get_ni_count(void)
1042 {
1043         struct lnet_ni  *ni;
1044         struct lnet_net *net;
1045         int             count = 0;
1046
1047         lnet_net_lock(0);
1048
1049         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1050                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1051                         count++;
1052         }
1053
1054         lnet_net_unlock(0);
1055
1056         return count;
1057 }
1058
1059 int
1060 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1061 {
1062         if (!pinfo)
1063                 return -EINVAL;
1064         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1065                 return -EPROTO;
1066         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1067                 return -EPROTO;
1068         /* Loopback is guaranteed to be present */
1069         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_max_interfaces)
1070                 return -ERANGE;
1071         if (LNET_NETTYP(LNET_NIDNET(LNET_PING_INFO_LONI(pinfo))) != LOLND)
1072                 return -EPROTO;
1073         return 0;
1074 }
1075
1076 static void
1077 lnet_ping_target_destroy(void)
1078 {
1079         struct lnet_net *net;
1080         struct lnet_ni  *ni;
1081
1082         lnet_net_lock(LNET_LOCK_EX);
1083
1084         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1085                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1086                         lnet_ni_lock(ni);
1087                         ni->ni_status = NULL;
1088                         lnet_ni_unlock(ni);
1089                 }
1090         }
1091
1092         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1093         the_lnet.ln_ping_target = NULL;
1094
1095         lnet_net_unlock(LNET_LOCK_EX);
1096 }
1097
1098 static void
1099 lnet_ping_target_event_handler(lnet_event_t *event)
1100 {
1101         struct lnet_ping_buffer *pbuf = event->md.user_ptr;
1102
1103         if (event->unlinked)
1104                 lnet_ping_buffer_decref(pbuf);
1105 }
1106
1107 static int
1108 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1109                        lnet_handle_md_t *ping_mdh, int ni_count, bool set_eq)
1110 {
1111         lnet_handle_me_t  me_handle;
1112         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1113         lnet_md_t         md = {NULL};
1114         int               rc, rc2;
1115
1116         if (set_eq) {
1117                 rc = LNetEQAlloc(0, lnet_ping_target_event_handler,
1118                                  &the_lnet.ln_ping_target_eq);
1119                 if (rc != 0) {
1120                         CERROR("Can't allocate ping buffer EQ: %d\n", rc);
1121                         return rc;
1122                 }
1123         }
1124
1125         *ppbuf = lnet_ping_target_create(ni_count);
1126         if (*ppbuf == NULL) {
1127                 rc = -ENOMEM;
1128                 goto fail_free_eq;
1129         }
1130
1131         /* Ping target ME/MD */
1132         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1133                           LNET_PROTO_PING_MATCHBITS, 0,
1134                           LNET_UNLINK, LNET_INS_AFTER,
1135                           &me_handle);
1136         if (rc != 0) {
1137                 CERROR("Can't create ping target ME: %d\n", rc);
1138                 goto fail_decref_ping_buffer;
1139         }
1140
1141         /* initialize md content */
1142         md.start     = &(*ppbuf)->pb_info;
1143         md.length    = LNET_PING_INFO_SIZE((*ppbuf)->pb_nnis);
1144         md.threshold = LNET_MD_THRESH_INF;
1145         md.max_size  = 0;
1146         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1147                        LNET_MD_MANAGE_REMOTE;
1148         md.eq_handle = the_lnet.ln_ping_target_eq;
1149         md.user_ptr  = *ppbuf;
1150
1151         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, ping_mdh);
1152         if (rc != 0) {
1153                 CERROR("Can't attach ping target MD: %d\n", rc);
1154                 goto fail_unlink_ping_me;
1155         }
1156         lnet_ping_buffer_addref(*ppbuf);
1157
1158         return 0;
1159
1160 fail_unlink_ping_me:
1161         rc2 = LNetMEUnlink(me_handle);
1162         LASSERT(rc2 == 0);
1163 fail_decref_ping_buffer:
1164         LASSERT(lnet_ping_buffer_numref(*ppbuf) == 1);
1165         lnet_ping_buffer_decref(*ppbuf);
1166         *ppbuf = NULL;
1167 fail_free_eq:
1168         if (set_eq) {
1169                 rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
1170                 LASSERT(rc2 == 0);
1171         }
1172         return rc;
1173 }
1174
1175 static void
1176 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf, lnet_handle_md_t *ping_mdh)
1177 {
1178         sigset_t        blocked = cfs_block_allsigs();
1179
1180         LNetMDUnlink(*ping_mdh);
1181         LNetInvalidateHandle(ping_mdh);
1182
1183         /* NB the MD could be busy; this just starts the unlink */
1184         while (lnet_ping_buffer_numref(pbuf) > 1) {
1185                 CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
1186                 set_current_state(TASK_UNINTERRUPTIBLE);
1187                 schedule_timeout(cfs_time_seconds(1));
1188         }
1189
1190         cfs_restore_sigs(blocked);
1191 }
1192
1193 static void
1194 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
1195 {
1196         struct lnet_ni          *ni;
1197         struct lnet_net         *net;
1198         struct lnet_ni_status *ns;
1199         int                     i;
1200         int                     rc;
1201
1202         i = 0;
1203         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1204                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1205                         LASSERT(i < pbuf->pb_nnis);
1206
1207                         ns = &pbuf->pb_info.pi_ni[i];
1208
1209                         ns->ns_nid = ni->ni_nid;
1210
1211                         lnet_ni_lock(ni);
1212                         ns->ns_status = (ni->ni_status != NULL) ?
1213                                          ni->ni_status->ns_status :
1214                                                 LNET_NI_STATUS_UP;
1215                         ni->ni_status = ns;
1216                         lnet_ni_unlock(ni);
1217
1218                         i++;
1219                 }
1220         }
1221         /*
1222          * We (ab)use the ns_status of the loopback interface to
1223          * transmit the sequence number. The first interface listed
1224          * must be the loopback interface.
1225          */
1226         rc = lnet_ping_info_validate(&pbuf->pb_info);
1227         if (rc) {
1228                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
1229                 LBUG();
1230         }
1231         LNET_PING_BUFFER_SEQNO(pbuf) =
1232                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
1233 }
1234
1235 static void
1236 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
1237                         lnet_handle_md_t ping_mdh)
1238 {
1239         struct lnet_ping_buffer *old_pbuf = NULL;
1240         lnet_handle_md_t old_ping_md;
1241
1242         /* switch the NIs to point to the new ping info created */
1243         lnet_net_lock(LNET_LOCK_EX);
1244
1245         if (!the_lnet.ln_routing)
1246                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1247
1248         /* Ensure only known feature bits have been set. */
1249         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
1250         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
1251
1252         lnet_ping_target_install_locked(pbuf);
1253
1254         if (the_lnet.ln_ping_target) {
1255                 old_pbuf = the_lnet.ln_ping_target;
1256                 old_ping_md = the_lnet.ln_ping_target_md;
1257         }
1258         the_lnet.ln_ping_target_md = ping_mdh;
1259         the_lnet.ln_ping_target = pbuf;
1260
1261         lnet_net_unlock(LNET_LOCK_EX);
1262
1263         if (old_pbuf) {
1264                 /* unlink and free the old ping info */
1265                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
1266                 lnet_ping_buffer_decref(old_pbuf);
1267         }
1268 }
1269
1270 static void
1271 lnet_ping_target_fini(void)
1272 {
1273         int             rc;
1274
1275         lnet_ping_md_unlink(the_lnet.ln_ping_target,
1276                             &the_lnet.ln_ping_target_md);
1277
1278         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1279         LASSERT(rc == 0);
1280
1281         lnet_ping_target_destroy();
1282 }
1283
1284 /* Resize the push target. */
1285 int lnet_push_target_resize(void)
1286 {
1287         lnet_process_id_t id = { LNET_NID_ANY, LNET_PID_ANY };
1288         lnet_md_t md = { NULL };
1289         lnet_handle_me_t meh;
1290         lnet_handle_md_t mdh;
1291         lnet_handle_md_t old_mdh;
1292         struct lnet_ping_buffer *pbuf;
1293         struct lnet_ping_buffer *old_pbuf;
1294         int nnis = the_lnet.ln_push_target_nnis;
1295         int rc;
1296
1297         if (nnis <= 0) {
1298                 rc = -EINVAL;
1299                 goto fail_return;
1300         }
1301 again:
1302         pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
1303         if (!pbuf) {
1304                 rc = -ENOMEM;
1305                 goto fail_return;
1306         }
1307
1308         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1309                           LNET_PROTO_PING_MATCHBITS, 0,
1310                           LNET_UNLINK, LNET_INS_AFTER,
1311                           &meh);
1312         if (rc) {
1313                 CERROR("Can't create push target ME: %d\n", rc);
1314                 goto fail_decref_pbuf;
1315         }
1316
1317         /* initialize md content */
1318         md.start     = &pbuf->pb_info;
1319         md.length    = LNET_PING_INFO_SIZE(nnis);
1320         md.threshold = LNET_MD_THRESH_INF;
1321         md.max_size  = 0;
1322         md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE |
1323                        LNET_MD_MANAGE_REMOTE;
1324         md.user_ptr  = pbuf;
1325         md.eq_handle = the_lnet.ln_push_target_eq;
1326
1327         rc = LNetMDAttach(meh, md, LNET_RETAIN, &mdh);
1328         if (rc) {
1329                 CERROR("Can't attach push MD: %d\n", rc);
1330                 goto fail_unlink_meh;
1331         }
1332         lnet_ping_buffer_addref(pbuf);
1333
1334         lnet_net_lock(LNET_LOCK_EX);
1335         old_pbuf = the_lnet.ln_push_target;
1336         old_mdh = the_lnet.ln_push_target_md;
1337         the_lnet.ln_push_target = pbuf;
1338         the_lnet.ln_push_target_md = mdh;
1339         lnet_net_unlock(LNET_LOCK_EX);
1340
1341         if (old_pbuf) {
1342                 LNetMDUnlink(old_mdh);
1343                 lnet_ping_buffer_decref(old_pbuf);
1344         }
1345
1346         if (nnis < the_lnet.ln_push_target_nnis)
1347                 goto again;
1348
1349         CDEBUG(D_NET, "nnis %d success\n", nnis);
1350
1351         return 0;
1352
1353 fail_unlink_meh:
1354         LNetMEUnlink(meh);
1355 fail_decref_pbuf:
1356         lnet_ping_buffer_decref(pbuf);
1357 fail_return:
1358         CDEBUG(D_NET, "nnis %d error %d\n", nnis, rc);
1359         return rc;
1360 }
1361
1362 static void lnet_push_target_event_handler(struct lnet_event *ev)
1363 {
1364         struct lnet_ping_buffer *pbuf = ev->md.user_ptr;
1365
1366         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
1367                 lnet_swap_pinginfo(pbuf);
1368
1369         if (ev->unlinked)
1370                 lnet_ping_buffer_decref(pbuf);
1371 }
1372
1373 /* Initialize the push target. */
1374 static int lnet_push_target_init(void)
1375 {
1376         int rc;
1377
1378         if (the_lnet.ln_push_target)
1379                 return -EALREADY;
1380
1381         rc = LNetEQAlloc(0, lnet_push_target_event_handler,
1382                          &the_lnet.ln_push_target_eq);
1383         if (rc) {
1384                 CERROR("Can't allocated push target EQ: %d\n", rc);
1385                 return rc;
1386         }
1387
1388         /* Start at the required minimum, we'll enlarge if required. */
1389         the_lnet.ln_push_target_nnis = LNET_MIN_INTERFACES;
1390
1391         rc = lnet_push_target_resize();
1392
1393         if (rc) {
1394                 LNetEQFree(the_lnet.ln_push_target_eq);
1395                 LNetInvalidateHandle(&the_lnet.ln_push_target_eq);
1396         }
1397
1398         return rc;
1399 }
1400
1401 /* Clean up the push target. */
1402 static void lnet_push_target_fini(void)
1403 {
1404         if (!the_lnet.ln_push_target)
1405                 return;
1406
1407         /* Unlink and invalidate to prevent new references. */
1408         LNetMDUnlink(the_lnet.ln_push_target_md);
1409         LNetInvalidateHandle(&the_lnet.ln_push_target_md);
1410
1411         /* Wait for the unlink to complete. */
1412         while (lnet_ping_buffer_numref(the_lnet.ln_push_target) > 1) {
1413                 CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
1414                 set_current_state(TASK_UNINTERRUPTIBLE);
1415                 schedule_timeout(cfs_time_seconds(1));
1416         }
1417
1418         lnet_ping_buffer_decref(the_lnet.ln_push_target);
1419         the_lnet.ln_push_target = NULL;
1420         the_lnet.ln_push_target_nnis = 0;
1421
1422         LNetEQFree(the_lnet.ln_push_target_eq);
1423         LNetInvalidateHandle(&the_lnet.ln_push_target_eq);
1424 }
1425
1426 static int
1427 lnet_ni_tq_credits(lnet_ni_t *ni)
1428 {
1429         int     credits;
1430
1431         LASSERT(ni->ni_ncpts >= 1);
1432
1433         if (ni->ni_ncpts == 1)
1434                 return ni->ni_net->net_tunables.lct_max_tx_credits;
1435
1436         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
1437         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
1438         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
1439
1440         return credits;
1441 }
1442
1443 static void
1444 lnet_ni_unlink_locked(lnet_ni_t *ni)
1445 {
1446         if (!list_empty(&ni->ni_cptlist)) {
1447                 list_del_init(&ni->ni_cptlist);
1448                 lnet_ni_decref_locked(ni, 0);
1449         }
1450
1451         /* move it to zombie list and nobody can find it anymore */
1452         LASSERT(!list_empty(&ni->ni_netlist));
1453         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
1454         lnet_ni_decref_locked(ni, 0);
1455 }
1456
1457 static void
1458 lnet_clear_zombies_nis_locked(struct lnet_net *net)
1459 {
1460         int             i;
1461         int             islo;
1462         lnet_ni_t       *ni;
1463         struct list_head *zombie_list = &net->net_ni_zombie;
1464
1465         /*
1466          * Now wait for the NIs I just nuked to show up on the zombie
1467          * list and shut them down in guaranteed thread context
1468          */
1469         i = 2;
1470         while (!list_empty(zombie_list)) {
1471                 int     *ref;
1472                 int     j;
1473
1474                 ni = list_entry(zombie_list->next,
1475                                 lnet_ni_t, ni_netlist);
1476                 list_del_init(&ni->ni_netlist);
1477                 /* the ni should be in deleting state. If it's not it's
1478                  * a bug */
1479                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
1480                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1481                         if (*ref == 0)
1482                                 continue;
1483                         /* still busy, add it back to zombie list */
1484                         list_add(&ni->ni_netlist, zombie_list);
1485                         break;
1486                 }
1487
1488                 if (!list_empty(&ni->ni_netlist)) {
1489                         lnet_net_unlock(LNET_LOCK_EX);
1490                         ++i;
1491                         if ((i & (-i)) == i) {
1492                                 CDEBUG(D_WARNING,
1493                                        "Waiting for zombie LNI %s\n",
1494                                        libcfs_nid2str(ni->ni_nid));
1495                         }
1496                         set_current_state(TASK_UNINTERRUPTIBLE);
1497                         schedule_timeout(cfs_time_seconds(1));
1498                         lnet_net_lock(LNET_LOCK_EX);
1499                         continue;
1500                 }
1501
1502                 lnet_net_unlock(LNET_LOCK_EX);
1503
1504                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
1505
1506                 LASSERT(!in_interrupt());
1507                 (net->net_lnd->lnd_shutdown)(ni);
1508
1509                 if (!islo)
1510                         CDEBUG(D_LNI, "Removed LNI %s\n",
1511                               libcfs_nid2str(ni->ni_nid));
1512
1513                 lnet_ni_free(ni);
1514                 i = 2;
1515                 lnet_net_lock(LNET_LOCK_EX);
1516         }
1517 }
1518
1519 /* shutdown down the NI and release refcount */
1520 static void
1521 lnet_shutdown_lndni(struct lnet_ni *ni)
1522 {
1523         int i;
1524         struct lnet_net *net = ni->ni_net;
1525
1526         lnet_net_lock(LNET_LOCK_EX);
1527         ni->ni_state = LNET_NI_STATE_DELETING;
1528         lnet_ni_unlink_locked(ni);
1529         lnet_incr_dlc_seq();
1530         lnet_net_unlock(LNET_LOCK_EX);
1531
1532         /* clear messages for this NI on the lazy portal */
1533         for (i = 0; i < the_lnet.ln_nportals; i++)
1534                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
1535
1536         lnet_net_lock(LNET_LOCK_EX);
1537         lnet_clear_zombies_nis_locked(net);
1538         lnet_net_unlock(LNET_LOCK_EX);
1539 }
1540
1541 static void
1542 lnet_shutdown_lndnet(struct lnet_net *net)
1543 {
1544         struct lnet_ni *ni;
1545
1546         lnet_net_lock(LNET_LOCK_EX);
1547
1548         net->net_state = LNET_NET_STATE_DELETING;
1549
1550         list_del_init(&net->net_list);
1551
1552         while (!list_empty(&net->net_ni_list)) {
1553                 ni = list_entry(net->net_ni_list.next,
1554                                 lnet_ni_t, ni_netlist);
1555                 lnet_net_unlock(LNET_LOCK_EX);
1556                 lnet_shutdown_lndni(ni);
1557                 lnet_net_lock(LNET_LOCK_EX);
1558         }
1559
1560         lnet_net_unlock(LNET_LOCK_EX);
1561
1562         /* Do peer table cleanup for this net */
1563         lnet_peer_tables_cleanup(net);
1564
1565         lnet_net_lock(LNET_LOCK_EX);
1566         /*
1567          * decrement ref count on lnd only when the entire network goes
1568          * away
1569          */
1570         net->net_lnd->lnd_refcount--;
1571
1572         lnet_net_unlock(LNET_LOCK_EX);
1573
1574         lnet_net_free(net);
1575 }
1576
1577 static void
1578 lnet_shutdown_lndnets(void)
1579 {
1580         struct lnet_net *net;
1581
1582         /* NB called holding the global mutex */
1583
1584         /* All quiet on the API front */
1585         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
1586         LASSERT(the_lnet.ln_refcount == 0);
1587
1588         lnet_net_lock(LNET_LOCK_EX);
1589         the_lnet.ln_state = LNET_STATE_STOPPING;
1590
1591         while (!list_empty(&the_lnet.ln_nets)) {
1592                 /*
1593                  * move the nets to the zombie list to avoid them being
1594                  * picked up for new work. LONET is also included in the
1595                  * Nets that will be moved to the zombie list
1596                  */
1597                 net = list_entry(the_lnet.ln_nets.next,
1598                                  struct lnet_net, net_list);
1599                 list_move(&net->net_list, &the_lnet.ln_net_zombie);
1600         }
1601
1602         /* Drop the cached loopback Net. */
1603         if (the_lnet.ln_loni != NULL) {
1604                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1605                 the_lnet.ln_loni = NULL;
1606         }
1607         lnet_net_unlock(LNET_LOCK_EX);
1608
1609         /* iterate through the net zombie list and delete each net */
1610         while (!list_empty(&the_lnet.ln_net_zombie)) {
1611                 net = list_entry(the_lnet.ln_net_zombie.next,
1612                                  struct lnet_net, net_list);
1613                 lnet_shutdown_lndnet(net);
1614         }
1615
1616         lnet_net_lock(LNET_LOCK_EX);
1617         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
1618         lnet_net_unlock(LNET_LOCK_EX);
1619 }
1620
1621 static int
1622 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
1623 {
1624         int                     rc = -EINVAL;
1625         struct lnet_tx_queue    *tq;
1626         int                     i;
1627         struct lnet_net         *net = ni->ni_net;
1628
1629         mutex_lock(&the_lnet.ln_lnd_mutex);
1630
1631         if (tun) {
1632                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
1633                 ni->ni_lnd_tunables_set = true;
1634         }
1635
1636         rc = (net->net_lnd->lnd_startup)(ni);
1637
1638         mutex_unlock(&the_lnet.ln_lnd_mutex);
1639
1640         if (rc != 0) {
1641                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1642                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
1643                 lnet_net_lock(LNET_LOCK_EX);
1644                 net->net_lnd->lnd_refcount--;
1645                 lnet_net_unlock(LNET_LOCK_EX);
1646                 goto failed0;
1647         }
1648
1649         ni->ni_state = LNET_NI_STATE_ACTIVE;
1650
1651         /* We keep a reference on the loopback net through the loopback NI */
1652         if (net->net_lnd->lnd_type == LOLND) {
1653                 lnet_ni_addref(ni);
1654                 LASSERT(the_lnet.ln_loni == NULL);
1655                 the_lnet.ln_loni = ni;
1656                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
1657                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
1658                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
1659                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
1660                 return 0;
1661         }
1662
1663         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
1664             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
1665                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1666                                    libcfs_lnd2str(net->net_lnd->lnd_type),
1667                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
1668                                         "" : "per-peer ");
1669                 /* shutdown the NI since if we get here then it must've already
1670                  * been started
1671                  */
1672                 lnet_shutdown_lndni(ni);
1673                 return -EINVAL;
1674         }
1675
1676         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1677                 tq->tq_credits_min =
1678                 tq->tq_credits_max =
1679                 tq->tq_credits = lnet_ni_tq_credits(ni);
1680         }
1681
1682         atomic_set(&ni->ni_tx_credits,
1683                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
1684
1685         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1686                 libcfs_nid2str(ni->ni_nid),
1687                 ni->ni_net->net_tunables.lct_peer_tx_credits,
1688                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1689                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
1690                 ni->ni_net->net_tunables.lct_peer_timeout);
1691
1692         return 0;
1693 failed0:
1694         lnet_ni_free(ni);
1695         return rc;
1696 }
1697
1698 static int
1699 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
1700 {
1701         struct lnet_ni          *ni;
1702         struct lnet_net         *net_l = NULL;
1703         struct list_head        local_ni_list;
1704         int                     rc;
1705         int                     ni_count = 0;
1706         __u32                   lnd_type;
1707         lnd_t                   *lnd;
1708         int                     peer_timeout =
1709                 net->net_tunables.lct_peer_timeout;
1710         int                     maxtxcredits =
1711                 net->net_tunables.lct_max_tx_credits;
1712         int                     peerrtrcredits =
1713                 net->net_tunables.lct_peer_rtr_credits;
1714
1715         INIT_LIST_HEAD(&local_ni_list);
1716
1717         /*
1718          * make sure that this net is unique. If it isn't then
1719          * we are adding interfaces to an already existing network, and
1720          * 'net' is just a convenient way to pass in the list.
1721          * if it is unique we need to find the LND and load it if
1722          * necessary.
1723          */
1724         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
1725                 lnd_type = LNET_NETTYP(net->net_id);
1726
1727                 LASSERT(libcfs_isknown_lnd(lnd_type));
1728
1729                 if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
1730                     lnd_type == IIBLND || lnd_type == VIBLND) {
1731                         CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
1732                         rc = -EINVAL;
1733                         goto failed0;
1734                 }
1735
1736                 mutex_lock(&the_lnet.ln_lnd_mutex);
1737                 lnd = lnet_find_lnd_by_type(lnd_type);
1738
1739                 if (lnd == NULL) {
1740                         mutex_unlock(&the_lnet.ln_lnd_mutex);
1741                         rc = request_module("%s", libcfs_lnd2modname(lnd_type));
1742                         mutex_lock(&the_lnet.ln_lnd_mutex);
1743
1744                         lnd = lnet_find_lnd_by_type(lnd_type);
1745                         if (lnd == NULL) {
1746                                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1747                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1748                                 libcfs_lnd2str(lnd_type),
1749                                 libcfs_lnd2modname(lnd_type), rc);
1750 #ifndef HAVE_MODULE_LOADING_SUPPORT
1751                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1752                                                 "compiled with kernel module "
1753                                                 "loading support.");
1754 #endif
1755                                 rc = -EINVAL;
1756                                 goto failed0;
1757                         }
1758                 }
1759
1760                 lnet_net_lock(LNET_LOCK_EX);
1761                 lnd->lnd_refcount++;
1762                 lnet_net_unlock(LNET_LOCK_EX);
1763
1764                 net->net_lnd = lnd;
1765
1766                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1767
1768                 net_l = net;
1769         }
1770
1771         /*
1772          * net_l: if the network being added is unique then net_l
1773          *        will point to that network
1774          *        if the network being added is not unique then
1775          *        net_l points to the existing network.
1776          *
1777          * When we enter the loop below, we'll pick NIs off he
1778          * network beign added and start them up, then add them to
1779          * a local ni list. Once we've successfully started all
1780          * the NIs then we join the local NI list (of started up
1781          * networks) with the net_l->net_ni_list, which should
1782          * point to the correct network to add the new ni list to
1783          *
1784          * If any of the new NIs fail to start up, then we want to
1785          * iterate through the local ni list, which should include
1786          * any NIs which were successfully started up, and shut
1787          * them down.
1788          *
1789          * After than we want to delete the network being added,
1790          * to avoid a memory leak.
1791          */
1792
1793         /*
1794          * When a network uses TCP bonding then all its interfaces
1795          * must be specified when the network is first defined: the
1796          * TCP bonding code doesn't allow for interfaces to be added
1797          * or removed.
1798          */
1799         if (net_l != net && net_l != NULL && use_tcp_bonding &&
1800             LNET_NETTYP(net_l->net_id) == SOCKLND) {
1801                 rc = -EINVAL;
1802                 goto failed0;
1803         }
1804
1805         while (!list_empty(&net->net_ni_added)) {
1806                 ni = list_entry(net->net_ni_added.next, struct lnet_ni,
1807                                 ni_netlist);
1808                 list_del_init(&ni->ni_netlist);
1809
1810                 /* make sure that the the NI we're about to start
1811                  * up is actually unique. if it's not fail. */
1812                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
1813                                         ni->ni_interfaces[0])) {
1814                         rc = -EINVAL;
1815                         goto failed1;
1816                 }
1817
1818                 /* adjust the pointer the parent network, just in case it
1819                  * the net is a duplicate */
1820                 ni->ni_net = net_l;
1821
1822                 rc = lnet_startup_lndni(ni, tun);
1823
1824                 LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
1825                         ni->ni_net->net_lnd->lnd_query != NULL);
1826
1827                 if (rc < 0)
1828                         goto failed1;
1829
1830                 lnet_ni_addref(ni);
1831                 list_add_tail(&ni->ni_netlist, &local_ni_list);
1832
1833                 ni_count++;
1834         }
1835
1836         lnet_net_lock(LNET_LOCK_EX);
1837         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
1838         lnet_incr_dlc_seq();
1839         lnet_net_unlock(LNET_LOCK_EX);
1840
1841         /* if the network is not unique then we don't want to keep
1842          * it around after we're done. Free it. Otherwise add that
1843          * net to the global the_lnet.ln_nets */
1844         if (net_l != net && net_l != NULL) {
1845                 /*
1846                  * TODO - note. currently the tunables can not be updated
1847                  * once added
1848                  */
1849                 lnet_net_free(net);
1850         } else {
1851                 net->net_state = LNET_NET_STATE_ACTIVE;
1852                 /*
1853                  * restore tunables after it has been overwitten by the
1854                  * lnd
1855                  */
1856                 if (peer_timeout != -1)
1857                         net->net_tunables.lct_peer_timeout = peer_timeout;
1858                 if (maxtxcredits != -1)
1859                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
1860                 if (peerrtrcredits != -1)
1861                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
1862
1863                 lnet_net_lock(LNET_LOCK_EX);
1864                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
1865                 lnet_net_unlock(LNET_LOCK_EX);
1866         }
1867
1868         return ni_count;
1869
1870 failed1:
1871         /*
1872          * shutdown the new NIs that are being started up
1873          * free the NET being started
1874          */
1875         while (!list_empty(&local_ni_list)) {
1876                 ni = list_entry(local_ni_list.next, struct lnet_ni,
1877                                 ni_netlist);
1878
1879                 lnet_shutdown_lndni(ni);
1880         }
1881
1882 failed0:
1883         lnet_net_free(net);
1884
1885         return rc;
1886 }
1887
1888 static int
1889 lnet_startup_lndnets(struct list_head *netlist)
1890 {
1891         struct lnet_net         *net;
1892         int                     rc;
1893         int                     ni_count = 0;
1894
1895         /*
1896          * Change to running state before bringing up the LNDs. This
1897          * allows lnet_shutdown_lndnets() to assert that we've passed
1898          * through here.
1899          */
1900         lnet_net_lock(LNET_LOCK_EX);
1901         the_lnet.ln_state = LNET_STATE_RUNNING;
1902         lnet_net_unlock(LNET_LOCK_EX);
1903
1904         while (!list_empty(netlist)) {
1905                 net = list_entry(netlist->next, struct lnet_net, net_list);
1906                 list_del_init(&net->net_list);
1907
1908                 rc = lnet_startup_lndnet(net, NULL);
1909
1910                 if (rc < 0)
1911                         goto failed;
1912
1913                 ni_count += rc;
1914         }
1915
1916         return ni_count;
1917 failed:
1918         lnet_shutdown_lndnets();
1919
1920         return rc;
1921 }
1922
1923 /**
1924  * Initialize LNet library.
1925  *
1926  * Automatically called at module loading time. Caller has to call
1927  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
1928  * latter returned 0. It must be called exactly once.
1929  *
1930  * \retval 0 on success
1931  * \retval -ve on failures.
1932  */
1933 int lnet_lib_init(void)
1934 {
1935         int rc;
1936
1937         lnet_assert_wire_constants();
1938
1939         if (lnet_max_interfaces < LNET_MIN_INTERFACES)
1940                 lnet_max_interfaces = LNET_MIN_INTERFACES;
1941
1942         memset(&the_lnet, 0, sizeof(the_lnet));
1943
1944         /* refer to global cfs_cpt_table for now */
1945         the_lnet.ln_cpt_table   = cfs_cpt_table;
1946         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1947
1948         LASSERT(the_lnet.ln_cpt_number > 0);
1949         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1950                 /* we are under risk of consuming all lh_cookie */
1951                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1952                        "please change setting of CPT-table and retry\n",
1953                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1954                 return -E2BIG;
1955         }
1956
1957         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1958                 the_lnet.ln_cpt_bits++;
1959
1960         rc = lnet_create_locks();
1961         if (rc != 0) {
1962                 CERROR("Can't create LNet global locks: %d\n", rc);
1963                 return rc;
1964         }
1965
1966         the_lnet.ln_refcount = 0;
1967         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1968         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1969         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
1970         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1971         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1972
1973         /* The hash table size is the number of bits it takes to express the set
1974          * ln_num_routes, minus 1 (better to under estimate than over so we
1975          * don't waste memory). */
1976         if (rnet_htable_size <= 0)
1977                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1978         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1979                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1980         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1981                                            order_base_2(rnet_htable_size) - 1);
1982
1983         /* All LNDs apart from the LOLND are in separate modules.  They
1984          * register themselves when their module loads, and unregister
1985          * themselves when their module is unloaded. */
1986         lnet_register_lnd(&the_lolnd);
1987         return 0;
1988 }
1989
1990 /**
1991  * Finalize LNet library.
1992  *
1993  * \pre lnet_lib_init() called with success.
1994  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1995  */
1996 void lnet_lib_exit(void)
1997 {
1998         LASSERT(the_lnet.ln_refcount == 0);
1999
2000         while (!list_empty(&the_lnet.ln_lnds))
2001                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
2002                                                lnd_t, lnd_list));
2003         lnet_destroy_locks();
2004 }
2005
2006 /**
2007  * Set LNet PID and start LNet interfaces, routing, and forwarding.
2008  *
2009  * Users must call this function at least once before any other functions.
2010  * For each successful call there must be a corresponding call to
2011  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
2012  * ignored.
2013  *
2014  * The PID used by LNet may be different from the one requested.
2015  * See LNetGetId().
2016  *
2017  * \param requested_pid PID requested by the caller.
2018  *
2019  * \return >= 0 on success, and < 0 error code on failures.
2020  */
2021 int
2022 LNetNIInit(lnet_pid_t requested_pid)
2023 {
2024         int                     im_a_router = 0;
2025         int                     rc;
2026         int                     ni_count;
2027         struct lnet_ping_buffer *pbuf;
2028         lnet_handle_md_t        ping_mdh;
2029         struct list_head        net_head;
2030         struct lnet_net         *net;
2031
2032         INIT_LIST_HEAD(&net_head);
2033
2034         mutex_lock(&the_lnet.ln_api_mutex);
2035
2036         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
2037
2038         if (the_lnet.ln_refcount > 0) {
2039                 rc = the_lnet.ln_refcount++;
2040                 mutex_unlock(&the_lnet.ln_api_mutex);
2041                 return rc;
2042         }
2043
2044         rc = lnet_prepare(requested_pid);
2045         if (rc != 0) {
2046                 mutex_unlock(&the_lnet.ln_api_mutex);
2047                 return rc;
2048         }
2049
2050         /* create a network for Loopback network */
2051         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
2052         if (net == NULL) {
2053                 rc = -ENOMEM;
2054                 goto err_empty_list;
2055         }
2056
2057         /* Add in the loopback NI */
2058         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
2059                 rc = -ENOMEM;
2060                 goto err_empty_list;
2061         }
2062
2063         /* If LNet is being initialized via DLC it is possible
2064          * that the user requests not to load module parameters (ones which
2065          * are supported by DLC) on initialization.  Therefore, make sure not
2066          * to load networks, routes and forwarding from module parameters
2067          * in this case.  On cleanup in case of failure only clean up
2068          * routes if it has been loaded */
2069         if (!the_lnet.ln_nis_from_mod_params) {
2070                 rc = lnet_parse_networks(&net_head, lnet_get_networks(),
2071                                          use_tcp_bonding);
2072                 if (rc < 0)
2073                         goto err_empty_list;
2074         }
2075
2076         ni_count = lnet_startup_lndnets(&net_head);
2077         if (ni_count < 0) {
2078                 rc = ni_count;
2079                 goto err_empty_list;
2080         }
2081
2082         if (!the_lnet.ln_nis_from_mod_params) {
2083                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
2084                 if (rc != 0)
2085                         goto err_shutdown_lndnis;
2086
2087                 rc = lnet_check_routes();
2088                 if (rc != 0)
2089                         goto err_destroy_routes;
2090
2091                 rc = lnet_rtrpools_alloc(im_a_router);
2092                 if (rc != 0)
2093                         goto err_destroy_routes;
2094         }
2095
2096         rc = lnet_acceptor_start();
2097         if (rc != 0)
2098                 goto err_destroy_routes;
2099
2100         the_lnet.ln_refcount = 1;
2101         /* Now I may use my own API functions... */
2102
2103         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_count, true);
2104         if (rc != 0)
2105                 goto err_acceptor_stop;
2106
2107         lnet_ping_target_update(pbuf, ping_mdh);
2108
2109         rc = lnet_router_checker_start();
2110         if (rc != 0)
2111                 goto err_stop_ping;
2112
2113         rc = lnet_push_target_init();
2114         if (rc != 0)
2115                 goto err_stop_router_checker;
2116
2117         rc = lnet_peer_discovery_start();
2118         if (rc != 0)
2119                 goto err_destroy_push_target;
2120
2121         lnet_fault_init();
2122         lnet_proc_init();
2123
2124         mutex_unlock(&the_lnet.ln_api_mutex);
2125
2126         return 0;
2127
2128 err_destroy_push_target:
2129         lnet_push_target_fini();
2130 err_stop_router_checker:
2131         lnet_router_checker_stop();
2132 err_stop_ping:
2133         lnet_ping_target_fini();
2134 err_acceptor_stop:
2135         the_lnet.ln_refcount = 0;
2136         lnet_acceptor_stop();
2137 err_destroy_routes:
2138         if (!the_lnet.ln_nis_from_mod_params)
2139                 lnet_destroy_routes();
2140 err_shutdown_lndnis:
2141         lnet_shutdown_lndnets();
2142 err_empty_list:
2143         lnet_unprepare();
2144         LASSERT(rc < 0);
2145         mutex_unlock(&the_lnet.ln_api_mutex);
2146         while (!list_empty(&net_head)) {
2147                 struct lnet_net *net;
2148
2149                 net = list_entry(net_head.next, struct lnet_net, net_list);
2150                 list_del_init(&net->net_list);
2151                 lnet_net_free(net);
2152         }
2153         return rc;
2154 }
2155 EXPORT_SYMBOL(LNetNIInit);
2156
2157 /**
2158  * Stop LNet interfaces, routing, and forwarding.
2159  *
2160  * Users must call this function once for each successful call to LNetNIInit().
2161  * Once the LNetNIFini() operation has been started, the results of pending
2162  * API operations are undefined.
2163  *
2164  * \return always 0 for current implementation.
2165  */
2166 int
2167 LNetNIFini()
2168 {
2169         mutex_lock(&the_lnet.ln_api_mutex);
2170
2171         LASSERT(the_lnet.ln_refcount > 0);
2172
2173         if (the_lnet.ln_refcount != 1) {
2174                 the_lnet.ln_refcount--;
2175         } else {
2176                 LASSERT(!the_lnet.ln_niinit_self);
2177
2178                 lnet_fault_fini();
2179
2180                 lnet_proc_fini();
2181                 lnet_peer_discovery_stop();
2182                 lnet_push_target_fini();
2183                 lnet_router_checker_stop();
2184                 lnet_ping_target_fini();
2185
2186                 /* Teardown fns that use my own API functions BEFORE here */
2187                 the_lnet.ln_refcount = 0;
2188
2189                 lnet_acceptor_stop();
2190                 lnet_destroy_routes();
2191                 lnet_shutdown_lndnets();
2192                 lnet_unprepare();
2193         }
2194
2195         mutex_unlock(&the_lnet.ln_api_mutex);
2196         return 0;
2197 }
2198 EXPORT_SYMBOL(LNetNIFini);
2199
2200 /**
2201  * Grabs the ni data from the ni structure and fills the out
2202  * parameters
2203  *
2204  * \param[in] ni network        interface structure
2205  * \param[out] cfg_ni           NI config information
2206  * \param[out] tun              network and LND tunables
2207  */
2208 static void
2209 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
2210                    struct lnet_ioctl_config_lnd_tunables *tun,
2211                    struct lnet_ioctl_element_stats *stats,
2212                    __u32 tun_size)
2213 {
2214         size_t min_size = 0;
2215         int i;
2216
2217         if (!ni || !cfg_ni || !tun)
2218                 return;
2219
2220         if (ni->ni_interfaces[0] != NULL) {
2221                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
2222                         if (ni->ni_interfaces[i] != NULL) {
2223                                 strncpy(cfg_ni->lic_ni_intf[i],
2224                                         ni->ni_interfaces[i],
2225                                         sizeof(cfg_ni->lic_ni_intf[i]));
2226                         }
2227                 }
2228         }
2229
2230         cfg_ni->lic_nid = ni->ni_nid;
2231         if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
2232                 cfg_ni->lic_status = LNET_NI_STATUS_UP;
2233         else
2234                 cfg_ni->lic_status = ni->ni_status->ns_status;
2235         cfg_ni->lic_tcp_bonding = use_tcp_bonding;
2236         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
2237
2238         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
2239
2240         if (stats) {
2241                 stats->send_count = atomic_read(&ni->ni_stats.send_count);
2242                 stats->recv_count = atomic_read(&ni->ni_stats.recv_count);
2243         }
2244
2245         /*
2246          * tun->lt_tun will always be present, but in order to be
2247          * backwards compatible, we need to deal with the cases when
2248          * tun->lt_tun is smaller than what the kernel has, because it
2249          * comes from an older version of a userspace program, then we'll
2250          * need to copy as much information as we have available space.
2251          */
2252         min_size = tun_size - sizeof(tun->lt_cmn);
2253         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
2254
2255         /* copy over the cpts */
2256         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
2257             ni->ni_cpts == NULL)  {
2258                 for (i = 0; i < ni->ni_ncpts; i++)
2259                         cfg_ni->lic_cpts[i] = i;
2260         } else {
2261                 for (i = 0;
2262                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
2263                      i < LNET_MAX_SHOW_NUM_CPT;
2264                      i++)
2265                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
2266         }
2267         cfg_ni->lic_ncpts = ni->ni_ncpts;
2268 }
2269
2270 /**
2271  * NOTE: This is a legacy function left in the code to be backwards
2272  * compatible with older userspace programs. It should eventually be
2273  * removed.
2274  *
2275  * Grabs the ni data from the ni structure and fills the out
2276  * parameters
2277  *
2278  * \param[in] ni network        interface structure
2279  * \param[out] config           config information
2280  */
2281 static void
2282 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
2283                          struct lnet_ioctl_config_data *config)
2284 {
2285         struct lnet_ioctl_net_config *net_config;
2286         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
2287         size_t min_size, tunable_size = 0;
2288         int i;
2289
2290         if (!ni || !config)
2291                 return;
2292
2293         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
2294         if (!net_config)
2295                 return;
2296
2297         BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
2298                      ARRAY_SIZE(net_config->ni_interfaces));
2299
2300         for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
2301                 if (!ni->ni_interfaces[i])
2302                         break;
2303
2304                 strncpy(net_config->ni_interfaces[i],
2305                         ni->ni_interfaces[i],
2306                         sizeof(net_config->ni_interfaces[i]));
2307         }
2308
2309         config->cfg_nid = ni->ni_nid;
2310         config->cfg_config_u.cfg_net.net_peer_timeout =
2311                 ni->ni_net->net_tunables.lct_peer_timeout;
2312         config->cfg_config_u.cfg_net.net_max_tx_credits =
2313                 ni->ni_net->net_tunables.lct_max_tx_credits;
2314         config->cfg_config_u.cfg_net.net_peer_tx_credits =
2315                 ni->ni_net->net_tunables.lct_peer_tx_credits;
2316         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
2317                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
2318
2319         if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
2320                 net_config->ni_status = LNET_NI_STATUS_UP;
2321         else
2322                 net_config->ni_status = ni->ni_status->ns_status;
2323
2324         if (ni->ni_cpts) {
2325                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
2326
2327                 for (i = 0; i < num_cpts; i++)
2328                         net_config->ni_cpts[i] = ni->ni_cpts[i];
2329
2330                 config->cfg_ncpts = num_cpts;
2331         }
2332
2333         /*
2334          * See if user land tools sent in a newer and larger version
2335          * of struct lnet_tunables than what the kernel uses.
2336          */
2337         min_size = sizeof(*config) + sizeof(*net_config);
2338
2339         if (config->cfg_hdr.ioc_len > min_size)
2340                 tunable_size = config->cfg_hdr.ioc_len - min_size;
2341
2342         /* Don't copy too much data to user space */
2343         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
2344         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
2345
2346         if (lnd_cfg && min_size) {
2347                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
2348                 config->cfg_config_u.cfg_net.net_interface_count = 1;
2349
2350                 /* Tell user land that kernel side has less data */
2351                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
2352                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
2353                         config->cfg_hdr.ioc_len -= min_size;
2354                 }
2355         }
2356 }
2357
2358 struct lnet_ni *
2359 lnet_get_ni_idx_locked(int idx)
2360 {
2361         struct lnet_ni          *ni;
2362         struct lnet_net         *net;
2363
2364         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2365                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2366                         if (idx-- == 0)
2367                                 return ni;
2368                 }
2369         }
2370
2371         return NULL;
2372 }
2373
2374 struct lnet_ni *
2375 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
2376 {
2377         struct lnet_ni          *ni;
2378         struct lnet_net         *net = mynet;
2379
2380         if (prev == NULL) {
2381                 if (net == NULL)
2382                         net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
2383                                         net_list);
2384                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2385                                 ni_netlist);
2386
2387                 return ni;
2388         }
2389
2390         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
2391                 /* if you reached the end of the ni list and the net is
2392                  * specified, then there are no more nis in that net */
2393                 if (net != NULL)
2394                         return NULL;
2395
2396                 /* we reached the end of this net ni list. move to the
2397                  * next net */
2398                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
2399                         /* no more nets and no more NIs. */
2400                         return NULL;
2401
2402                 /* get the next net */
2403                 net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
2404                                  net_list);
2405                 /* get the ni on it */
2406                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2407                                 ni_netlist);
2408
2409                 return ni;
2410         }
2411
2412         /* there are more nis left */
2413         ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
2414
2415         return ni;
2416 }
2417
2418 int
2419 lnet_get_net_config(struct lnet_ioctl_config_data *config)
2420 {
2421         struct lnet_ni *ni;
2422         int cpt;
2423         int rc = -ENOENT;
2424         int idx = config->cfg_count;
2425
2426         cpt = lnet_net_lock_current();
2427
2428         ni = lnet_get_ni_idx_locked(idx);
2429
2430         if (ni != NULL) {
2431                 rc = 0;
2432                 lnet_ni_lock(ni);
2433                 lnet_fill_ni_info_legacy(ni, config);
2434                 lnet_ni_unlock(ni);
2435         }
2436
2437         lnet_net_unlock(cpt);
2438         return rc;
2439 }
2440
2441 int
2442 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
2443                    struct lnet_ioctl_config_lnd_tunables *tun,
2444                    struct lnet_ioctl_element_stats *stats,
2445                    __u32 tun_size)
2446 {
2447         struct lnet_ni          *ni;
2448         int                     cpt;
2449         int                     rc = -ENOENT;
2450
2451         if (!cfg_ni || !tun || !stats)
2452                 return -EINVAL;
2453
2454         cpt = lnet_net_lock_current();
2455
2456         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
2457
2458         if (ni) {
2459                 rc = 0;
2460                 lnet_ni_lock(ni);
2461                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
2462                 lnet_ni_unlock(ni);
2463         }
2464
2465         lnet_net_unlock(cpt);
2466         return rc;
2467 }
2468
2469 static int lnet_add_net_common(struct lnet_net *net,
2470                                struct lnet_ioctl_config_lnd_tunables *tun)
2471 {
2472         __u32                   net_id;
2473         struct lnet_ping_buffer *pbuf;
2474         lnet_handle_md_t        ping_mdh;
2475         int                     rc;
2476         lnet_remotenet_t        *rnet;
2477         int                     net_ni_count;
2478         int                     num_acceptor_nets;
2479
2480         lnet_net_lock(LNET_LOCK_EX);
2481         rnet = lnet_find_rnet_locked(net->net_id);
2482         lnet_net_unlock(LNET_LOCK_EX);
2483         /*
2484          * make sure that the net added doesn't invalidate the current
2485          * configuration LNet is keeping
2486          */
2487         if (rnet) {
2488                 CERROR("Adding net %s will invalidate routing configuration\n",
2489                        libcfs_net2str(net->net_id));
2490                 lnet_net_free(net);
2491                 return -EUSERS;
2492         }
2493
2494         /*
2495          * make sure you calculate the correct number of slots in the ping
2496          * buffer. Since the ping info is a flattened list of all the NIs,
2497          * we should allocate enough slots to accomodate the number of NIs
2498          * which will be added.
2499          *
2500          * since ni hasn't been configured yet, use
2501          * lnet_get_net_ni_count_pre() which checks the net_ni_added list
2502          */
2503         net_ni_count = lnet_get_net_ni_count_pre(net);
2504
2505         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2506                                     net_ni_count + lnet_get_ni_count(),
2507                                     false);
2508         if (rc < 0) {
2509                 lnet_net_free(net);
2510                 return rc;
2511         }
2512
2513         if (tun)
2514                 memcpy(&net->net_tunables,
2515                        &tun->lt_cmn, sizeof(net->net_tunables));
2516         else
2517                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
2518
2519         /*
2520          * before starting this network get a count of the current TCP
2521          * networks which require the acceptor thread running. If that
2522          * count is == 0 before we start up this network, then we'd want to
2523          * start up the acceptor thread after starting up this network
2524          */
2525         num_acceptor_nets = lnet_count_acceptor_nets();
2526
2527         net_id = net->net_id;
2528
2529         rc = lnet_startup_lndnet(net,
2530                                  (tun) ? &tun->lt_tun : NULL);
2531         if (rc < 0)
2532                 goto failed;
2533
2534         lnet_net_lock(LNET_LOCK_EX);
2535         net = lnet_get_net_locked(net_id);
2536         lnet_net_unlock(LNET_LOCK_EX);
2537
2538         LASSERT(net);
2539
2540         /*
2541          * Start the acceptor thread if this is the first network
2542          * being added that requires the thread.
2543          */
2544         if (net->net_lnd->lnd_accept && num_acceptor_nets == 0) {
2545                 rc = lnet_acceptor_start();
2546                 if (rc < 0) {
2547                         /* shutdown the net that we just started */
2548                         CERROR("Failed to start up acceptor thread\n");
2549                         lnet_shutdown_lndnet(net);
2550                         goto failed;
2551                 }
2552         }
2553
2554         lnet_net_lock(LNET_LOCK_EX);
2555         lnet_peer_net_added(net);
2556         lnet_net_unlock(LNET_LOCK_EX);
2557
2558         lnet_ping_target_update(pbuf, ping_mdh);
2559
2560         return 0;
2561
2562 failed:
2563         lnet_ping_md_unlink(pbuf, &ping_mdh);
2564         lnet_ping_buffer_decref(pbuf);
2565         return rc;
2566 }
2567
2568 static int lnet_handle_legacy_ip2nets(char *ip2nets,
2569                                       struct lnet_ioctl_config_lnd_tunables *tun)
2570 {
2571         struct lnet_net *net;
2572         char *nets;
2573         int rc;
2574         struct list_head net_head;
2575
2576         INIT_LIST_HEAD(&net_head);
2577
2578         rc = lnet_parse_ip2nets(&nets, ip2nets);
2579         if (rc < 0)
2580                 return rc;
2581
2582         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2583         if (rc < 0)
2584                 return rc;
2585
2586         mutex_lock(&the_lnet.ln_api_mutex);
2587         while (!list_empty(&net_head)) {
2588                 net = list_entry(net_head.next, struct lnet_net, net_list);
2589                 list_del_init(&net->net_list);
2590                 rc = lnet_add_net_common(net, tun);
2591                 if (rc < 0)
2592                         goto out;
2593         }
2594
2595 out:
2596         mutex_unlock(&the_lnet.ln_api_mutex);
2597
2598         while (!list_empty(&net_head)) {
2599                 net = list_entry(net_head.next, struct lnet_net, net_list);
2600                 list_del_init(&net->net_list);
2601                 lnet_net_free(net);
2602         }
2603         return rc;
2604 }
2605
2606 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf)
2607 {
2608         struct lnet_net *net;
2609         struct lnet_ni *ni;
2610         struct lnet_ioctl_config_lnd_tunables *tun = NULL;
2611         int rc, i;
2612         __u32 net_id;
2613
2614         /* get the tunables if they are available */
2615         if (conf->lic_cfg_hdr.ioc_len >=
2616             sizeof(*conf) + sizeof(*tun))
2617                 tun = (struct lnet_ioctl_config_lnd_tunables *)
2618                         conf->lic_bulk;
2619
2620         /* handle legacy ip2nets from DLC */
2621         if (conf->lic_legacy_ip2nets[0] != '\0')
2622                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
2623                                                   tun);
2624
2625         net_id = LNET_NIDNET(conf->lic_nid);
2626
2627         net = lnet_net_alloc(net_id, NULL);
2628         if (!net)
2629                 return -ENOMEM;
2630
2631         for (i = 0; i < conf->lic_ncpts; i++) {
2632                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER)
2633                         return -EINVAL;
2634         }
2635
2636         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
2637                                        conf->lic_ni_intf[0]);
2638         if (!ni)
2639                 return -ENOMEM;
2640
2641         mutex_lock(&the_lnet.ln_api_mutex);
2642
2643         rc = lnet_add_net_common(net, tun);
2644
2645         mutex_unlock(&the_lnet.ln_api_mutex);
2646
2647         return rc;
2648 }
2649
2650 int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf)
2651 {
2652         struct lnet_net  *net;
2653         struct lnet_ni *ni;
2654         __u32 net_id = LNET_NIDNET(conf->lic_nid);
2655         struct lnet_ping_buffer *pbuf;
2656         lnet_handle_md_t  ping_mdh;
2657         int               rc;
2658         int               net_count;
2659         __u32             addr;
2660
2661         /* don't allow userspace to shutdown the LOLND */
2662         if (LNET_NETTYP(net_id) == LOLND)
2663                 return -EINVAL;
2664
2665         mutex_lock(&the_lnet.ln_api_mutex);
2666
2667         lnet_net_lock(0);
2668
2669         net = lnet_get_net_locked(net_id);
2670         if (!net) {
2671                 CERROR("net %s not found\n",
2672                        libcfs_net2str(net_id));
2673                 rc = -ENOENT;
2674                 goto unlock_net;
2675         }
2676
2677         addr = LNET_NIDADDR(conf->lic_nid);
2678         if (addr == 0) {
2679                 /* remove the entire net */
2680                 net_count = lnet_get_net_ni_count_locked(net);
2681
2682                 lnet_net_unlock(0);
2683
2684                 /* create and link a new ping info, before removing the old one */
2685                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2686                                         lnet_get_ni_count() - net_count,
2687                                         false);
2688                 if (rc != 0)
2689                         goto unlock_api_mutex;
2690
2691                 lnet_shutdown_lndnet(net);
2692
2693                 if (lnet_count_acceptor_nets() == 0)
2694                         lnet_acceptor_stop();
2695
2696                 lnet_ping_target_update(pbuf, ping_mdh);
2697
2698                 goto unlock_api_mutex;
2699         }
2700
2701         ni = lnet_nid2ni_locked(conf->lic_nid, 0);
2702         if (!ni) {
2703                 CERROR("nid %s not found\n",
2704                        libcfs_nid2str(conf->lic_nid));
2705                 rc = -ENOENT;
2706                 goto unlock_net;
2707         }
2708
2709         net_count = lnet_get_net_ni_count_locked(net);
2710
2711         lnet_net_unlock(0);
2712
2713         /* create and link a new ping info, before removing the old one */
2714         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2715                                   lnet_get_ni_count() - 1, false);
2716         if (rc != 0)
2717                 goto unlock_api_mutex;
2718
2719         lnet_shutdown_lndni(ni);
2720
2721         if (lnet_count_acceptor_nets() == 0)
2722                 lnet_acceptor_stop();
2723
2724         lnet_ping_target_update(pbuf, ping_mdh);
2725
2726         /* check if the net is empty and remove it if it is */
2727         if (net_count == 1)
2728                 lnet_shutdown_lndnet(net);
2729
2730         goto unlock_api_mutex;
2731
2732 unlock_net:
2733         lnet_net_unlock(0);
2734 unlock_api_mutex:
2735         mutex_unlock(&the_lnet.ln_api_mutex);
2736
2737         return rc;
2738 }
2739
2740 /*
2741  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
2742  * They are only expected to be called for unique networks.
2743  * That can be as a result of older DLC library
2744  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
2745  */
2746 int
2747 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
2748 {
2749         struct lnet_net         *net;
2750         struct list_head        net_head;
2751         int                     rc;
2752         struct lnet_ioctl_config_lnd_tunables tun;
2753         char *nets = conf->cfg_config_u.cfg_net.net_intf;
2754
2755         INIT_LIST_HEAD(&net_head);
2756
2757         /* Create a net/ni structures for the network string */
2758         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2759         if (rc <= 0)
2760                 return rc == 0 ? -EINVAL : rc;
2761
2762         mutex_lock(&the_lnet.ln_api_mutex);
2763
2764         if (rc > 1) {
2765                 rc = -EINVAL; /* only add one network per call */
2766                 goto failed;
2767         }
2768
2769         net = list_entry(net_head.next, struct lnet_net, net_list);
2770         list_del_init(&net->net_list);
2771
2772         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
2773
2774         memset(&tun, 0, sizeof(tun));
2775
2776         tun.lt_cmn.lct_peer_timeout =
2777           conf->cfg_config_u.cfg_net.net_peer_timeout;
2778         tun.lt_cmn.lct_peer_tx_credits =
2779           conf->cfg_config_u.cfg_net.net_peer_tx_credits;
2780         tun.lt_cmn.lct_peer_rtr_credits =
2781           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
2782         tun.lt_cmn.lct_max_tx_credits =
2783           conf->cfg_config_u.cfg_net.net_max_tx_credits;
2784
2785         rc = lnet_add_net_common(net, &tun);
2786         if (rc != 0)
2787                 goto failed;
2788
2789         return 0;
2790
2791 failed:
2792         mutex_unlock(&the_lnet.ln_api_mutex);
2793         while (!list_empty(&net_head)) {
2794                 net = list_entry(net_head.next, struct lnet_net, net_list);
2795                 list_del_init(&net->net_list);
2796                 lnet_net_free(net);
2797         }
2798         return rc;
2799 }
2800
2801 int
2802 lnet_dyn_del_net(__u32 net_id)
2803 {
2804         struct lnet_net  *net;
2805         struct lnet_ping_buffer *pbuf;
2806         lnet_handle_md_t  ping_mdh;
2807         int               rc;
2808         int               net_ni_count;
2809
2810         /* don't allow userspace to shutdown the LOLND */
2811         if (LNET_NETTYP(net_id) == LOLND)
2812                 return -EINVAL;
2813
2814         mutex_lock(&the_lnet.ln_api_mutex);
2815
2816         lnet_net_lock(0);
2817
2818         net = lnet_get_net_locked(net_id);
2819         if (net == NULL) {
2820                 rc = -EINVAL;
2821                 goto out;
2822         }
2823
2824         net_ni_count = lnet_get_net_ni_count_locked(net);
2825
2826         lnet_net_unlock(0);
2827
2828         /* create and link a new ping info, before removing the old one */
2829         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2830                                     lnet_get_ni_count() - net_ni_count, false);
2831         if (rc != 0)
2832                 goto out;
2833
2834         lnet_shutdown_lndnet(net);
2835
2836         if (lnet_count_acceptor_nets() == 0)
2837                 lnet_acceptor_stop();
2838
2839         lnet_ping_target_update(pbuf, ping_mdh);
2840
2841 out:
2842         mutex_unlock(&the_lnet.ln_api_mutex);
2843
2844         return rc;
2845 }
2846
2847 void lnet_incr_dlc_seq(void)
2848 {
2849         atomic_inc(&lnet_dlc_seq_no);
2850 }
2851
2852 __u32 lnet_get_dlc_seq_locked(void)
2853 {
2854         return atomic_read(&lnet_dlc_seq_no);
2855 }
2856
2857 /**
2858  * LNet ioctl handler.
2859  *
2860  */
2861 int
2862 LNetCtl(unsigned int cmd, void *arg)
2863 {
2864         struct libcfs_ioctl_data *data = arg;
2865         struct lnet_ioctl_config_data *config;
2866         lnet_process_id_t         id = {0};
2867         lnet_ni_t                *ni;
2868         int                       rc;
2869
2870         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
2871                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
2872
2873         switch (cmd) {
2874         case IOC_LIBCFS_GET_NI:
2875                 rc = LNetGetId(data->ioc_count, &id);
2876                 data->ioc_nid = id.nid;
2877                 return rc;
2878
2879         case IOC_LIBCFS_FAIL_NID:
2880                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2881
2882         case IOC_LIBCFS_ADD_ROUTE:
2883                 config = arg;
2884
2885                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2886                         return -EINVAL;
2887
2888                 mutex_lock(&the_lnet.ln_api_mutex);
2889                 rc = lnet_add_route(config->cfg_net,
2890                                     config->cfg_config_u.cfg_route.rtr_hop,
2891                                     config->cfg_nid,
2892                                     config->cfg_config_u.cfg_route.
2893                                         rtr_priority);
2894                 if (rc == 0) {
2895                         rc = lnet_check_routes();
2896                         if (rc != 0)
2897                                 lnet_del_route(config->cfg_net,
2898                                                config->cfg_nid);
2899                 }
2900                 mutex_unlock(&the_lnet.ln_api_mutex);
2901                 return rc;
2902
2903         case IOC_LIBCFS_DEL_ROUTE:
2904                 config = arg;
2905
2906                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2907                         return -EINVAL;
2908
2909                 mutex_lock(&the_lnet.ln_api_mutex);
2910                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2911                 mutex_unlock(&the_lnet.ln_api_mutex);
2912                 return rc;
2913
2914         case IOC_LIBCFS_GET_ROUTE:
2915                 config = arg;
2916
2917                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2918                         return -EINVAL;
2919
2920                 mutex_lock(&the_lnet.ln_api_mutex);
2921                 rc = lnet_get_route(config->cfg_count,
2922                                     &config->cfg_net,
2923                                     &config->cfg_config_u.cfg_route.rtr_hop,
2924                                     &config->cfg_nid,
2925                                     &config->cfg_config_u.cfg_route.rtr_flags,
2926                                     &config->cfg_config_u.cfg_route.
2927                                         rtr_priority);
2928                 mutex_unlock(&the_lnet.ln_api_mutex);
2929                 return rc;
2930
2931         case IOC_LIBCFS_GET_LOCAL_NI: {
2932                 struct lnet_ioctl_config_ni *cfg_ni;
2933                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
2934                 struct lnet_ioctl_element_stats *stats;
2935                 __u32 tun_size;
2936
2937                 cfg_ni = arg;
2938                 /* get the tunables if they are available */
2939                 if (cfg_ni->lic_cfg_hdr.ioc_len <
2940                     sizeof(*cfg_ni) + sizeof(*stats)+ sizeof(*tun))
2941                         return -EINVAL;
2942
2943                 stats = (struct lnet_ioctl_element_stats *)
2944                         cfg_ni->lic_bulk;
2945                 tun = (struct lnet_ioctl_config_lnd_tunables *)
2946                                 (cfg_ni->lic_bulk + sizeof(*stats));
2947
2948                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
2949                         sizeof(*stats);
2950
2951                 mutex_lock(&the_lnet.ln_api_mutex);
2952                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
2953                 mutex_unlock(&the_lnet.ln_api_mutex);
2954                 return rc;
2955         }
2956
2957         case IOC_LIBCFS_GET_NET: {
2958                 size_t total = sizeof(*config) +
2959                                sizeof(struct lnet_ioctl_net_config);
2960                 config = arg;
2961
2962                 if (config->cfg_hdr.ioc_len < total)
2963                         return -EINVAL;
2964
2965                 mutex_lock(&the_lnet.ln_api_mutex);
2966                 rc = lnet_get_net_config(config);
2967                 mutex_unlock(&the_lnet.ln_api_mutex);
2968                 return rc;
2969         }
2970
2971         case IOC_LIBCFS_GET_LNET_STATS:
2972         {
2973                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2974
2975                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
2976                         return -EINVAL;
2977
2978                 mutex_lock(&the_lnet.ln_api_mutex);
2979                 lnet_counters_get(&lnet_stats->st_cntrs);
2980                 mutex_unlock(&the_lnet.ln_api_mutex);
2981                 return 0;
2982         }
2983
2984         case IOC_LIBCFS_CONFIG_RTR:
2985                 config = arg;
2986
2987                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2988                         return -EINVAL;
2989
2990                 mutex_lock(&the_lnet.ln_api_mutex);
2991                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2992                         rc = lnet_rtrpools_enable();
2993                         mutex_unlock(&the_lnet.ln_api_mutex);
2994                         return rc;
2995                 }
2996                 lnet_rtrpools_disable();
2997                 mutex_unlock(&the_lnet.ln_api_mutex);
2998                 return 0;
2999
3000         case IOC_LIBCFS_ADD_BUF:
3001                 config = arg;
3002
3003                 if (config->cfg_hdr.ioc_len < sizeof(*config))
3004                         return -EINVAL;
3005
3006                 mutex_lock(&the_lnet.ln_api_mutex);
3007                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
3008                                                 buf_tiny,
3009                                           config->cfg_config_u.cfg_buffers.
3010                                                 buf_small,
3011                                           config->cfg_config_u.cfg_buffers.
3012                                                 buf_large);
3013                 mutex_unlock(&the_lnet.ln_api_mutex);
3014                 return rc;
3015
3016         case IOC_LIBCFS_SET_NUMA_RANGE: {
3017                 struct lnet_ioctl_set_value *numa;
3018                 numa = arg;
3019                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
3020                         return -EINVAL;
3021                 lnet_net_lock(LNET_LOCK_EX);
3022                 lnet_numa_range = numa->sv_value;
3023                 lnet_net_unlock(LNET_LOCK_EX);
3024                 return 0;
3025         }
3026
3027         case IOC_LIBCFS_GET_NUMA_RANGE: {
3028                 struct lnet_ioctl_set_value *numa;
3029                 numa = arg;
3030                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
3031                         return -EINVAL;
3032                 numa->sv_value = lnet_numa_range;
3033                 return 0;
3034         }
3035
3036         case IOC_LIBCFS_SET_MAX_INTF: {
3037                 struct lnet_ioctl_set_value *max_intf;
3038                 max_intf = arg;
3039                 if (max_intf->sv_hdr.ioc_len != sizeof(*max_intf) ||
3040                     max_intf->sv_value < LNET_MIN_INTERFACES)
3041                         return -EINVAL;
3042                 mutex_lock(&the_lnet.ln_api_mutex);
3043                 lnet_max_interfaces = max_intf->sv_value;
3044                 mutex_unlock(&the_lnet.ln_api_mutex);
3045                 return 0;
3046         }
3047
3048         case IOC_LIBCFS_GET_MAX_INTF: {
3049                 struct lnet_ioctl_set_value *max_intf;
3050                 max_intf = arg;
3051                 if (max_intf->sv_hdr.ioc_len != sizeof(*max_intf))
3052                         return -EINVAL;
3053                 max_intf->sv_value = lnet_max_interfaces;
3054                 return 0;
3055         }
3056
3057         case IOC_LIBCFS_SET_DISCOVERY: {
3058                 struct lnet_ioctl_set_value *discovery;
3059                 discovery = arg;
3060                 if (discovery->sv_hdr.ioc_len != sizeof(*discovery) ||
3061                     discovery->sv_value > 1)
3062                         return -EINVAL;
3063                 mutex_lock(&the_lnet.ln_api_mutex);
3064                 lnet_peer_discovery_enabled = discovery->sv_value;
3065                 mutex_unlock(&the_lnet.ln_api_mutex);
3066                 return 0;
3067         }
3068
3069         case IOC_LIBCFS_GET_DISCOVERY: {
3070                 struct lnet_ioctl_set_value *discovery;
3071                 discovery = arg;
3072                 if (discovery->sv_hdr.ioc_len != sizeof(*discovery))
3073                         return -EINVAL;
3074                 discovery->sv_value = lnet_peer_discovery_enabled;
3075                 return 0;
3076         }
3077
3078         case IOC_LIBCFS_GET_BUF: {
3079                 struct lnet_ioctl_pool_cfg *pool_cfg;
3080                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
3081
3082                 config = arg;
3083
3084                 if (config->cfg_hdr.ioc_len < total)
3085                         return -EINVAL;
3086
3087                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
3088
3089                 mutex_lock(&the_lnet.ln_api_mutex);
3090                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
3091                 mutex_unlock(&the_lnet.ln_api_mutex);
3092                 return rc;
3093         }
3094
3095         case IOC_LIBCFS_ADD_PEER_NI: {
3096                 struct lnet_ioctl_peer_cfg *cfg = arg;
3097
3098                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
3099                         return -EINVAL;
3100
3101                 mutex_lock(&the_lnet.ln_api_mutex);
3102                 rc = lnet_add_peer_ni(cfg->prcfg_prim_nid,
3103                                       cfg->prcfg_cfg_nid,
3104                                       cfg->prcfg_mr);
3105                 mutex_unlock(&the_lnet.ln_api_mutex);
3106                 return rc;
3107         }
3108
3109         case IOC_LIBCFS_DEL_PEER_NI: {
3110                 struct lnet_ioctl_peer_cfg *cfg = arg;
3111
3112                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
3113                         return -EINVAL;
3114
3115                 mutex_lock(&the_lnet.ln_api_mutex);
3116                 rc = lnet_del_peer_ni(cfg->prcfg_prim_nid,
3117                                       cfg->prcfg_cfg_nid);
3118                 mutex_unlock(&the_lnet.ln_api_mutex);
3119                 return rc;
3120         }
3121
3122         case IOC_LIBCFS_GET_PEER_INFO: {
3123                 struct lnet_ioctl_peer *peer_info = arg;
3124
3125                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
3126                         return -EINVAL;
3127
3128                 mutex_lock(&the_lnet.ln_api_mutex);
3129                 rc = lnet_get_peer_ni_info(
3130                    peer_info->pr_count,
3131                    &peer_info->pr_nid,
3132                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
3133                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
3134                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
3135                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
3136                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
3137                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
3138                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
3139                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
3140                 mutex_unlock(&the_lnet.ln_api_mutex);
3141                 return rc;
3142         }
3143
3144         case IOC_LIBCFS_GET_PEER_NI: {
3145                 struct lnet_ioctl_peer_cfg *cfg = arg;
3146                 struct lnet_peer_ni_credit_info *lpni_cri;
3147                 struct lnet_ioctl_element_stats *lpni_stats;
3148                 size_t total = sizeof(*cfg) + sizeof(*lpni_cri) +
3149                                sizeof(*lpni_stats);
3150
3151                 if (cfg->prcfg_hdr.ioc_len < total)
3152                         return -EINVAL;
3153
3154                 lpni_cri = (struct lnet_peer_ni_credit_info*) cfg->prcfg_bulk;
3155                 lpni_stats = (struct lnet_ioctl_element_stats *)
3156                              (cfg->prcfg_bulk + sizeof(*lpni_cri));
3157
3158                 mutex_lock(&the_lnet.ln_api_mutex);
3159                 rc = lnet_get_peer_info(cfg->prcfg_idx, &cfg->prcfg_prim_nid,
3160                                         &cfg->prcfg_cfg_nid, &cfg->prcfg_mr,
3161                                         lpni_cri, lpni_stats);
3162                 mutex_unlock(&the_lnet.ln_api_mutex);
3163                 return rc;
3164         }
3165
3166         case IOC_LIBCFS_NOTIFY_ROUTER: {
3167                 unsigned long jiffies_passed;
3168
3169                 jiffies_passed = ktime_get_real_seconds() - data->ioc_u64[0];
3170                 jiffies_passed = cfs_time_seconds(jiffies_passed);
3171
3172                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
3173                                    jiffies - jiffies_passed);
3174         }
3175
3176         case IOC_LIBCFS_LNET_DIST:
3177                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
3178                 if (rc < 0 && rc != -EHOSTUNREACH)
3179                         return rc;
3180
3181                 data->ioc_u32[0] = rc;
3182                 return 0;
3183
3184         case IOC_LIBCFS_TESTPROTOCOMPAT:
3185                 lnet_net_lock(LNET_LOCK_EX);
3186                 the_lnet.ln_testprotocompat = data->ioc_flags;
3187                 lnet_net_unlock(LNET_LOCK_EX);
3188                 return 0;
3189
3190         case IOC_LIBCFS_LNET_FAULT:
3191                 return lnet_fault_ctl(data->ioc_flags, data);
3192
3193         case IOC_LIBCFS_PING: {
3194                 signed long timeout;
3195
3196                 id.nid = data->ioc_nid;
3197                 id.pid = data->ioc_u32[0];
3198
3199                 /* Don't block longer than 2 minutes */
3200                 if (data->ioc_u32[1] > 120 * MSEC_PER_SEC)
3201                         return -EINVAL;
3202
3203                 /* If timestamp is negative then disable timeout */
3204                 if ((s32)data->ioc_u32[1] < 0)
3205                         timeout = MAX_SCHEDULE_TIMEOUT;
3206                 else
3207                         timeout = msecs_to_jiffies(data->ioc_u32[1]);
3208
3209                 rc = lnet_ping(id, timeout, data->ioc_pbuf1,
3210                                data->ioc_plen1 / sizeof(lnet_process_id_t));
3211                 if (rc < 0)
3212                         return rc;
3213                 data->ioc_count = rc;
3214                 return 0;
3215         }
3216
3217         default:
3218                 ni = lnet_net2ni_addref(data->ioc_net);
3219                 if (ni == NULL)
3220                         return -EINVAL;
3221
3222                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
3223                         rc = -EINVAL;
3224                 else
3225                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
3226
3227                 lnet_ni_decref(ni);
3228                 return rc;
3229         }
3230         /* not reached */
3231 }
3232 EXPORT_SYMBOL(LNetCtl);
3233
3234 void LNetDebugPeer(lnet_process_id_t id)
3235 {
3236         lnet_debug_peer(id.nid);
3237 }
3238 EXPORT_SYMBOL(LNetDebugPeer);
3239
3240 /**
3241  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
3242  * all interfaces share a same PID, as requested by LNetNIInit().
3243  *
3244  * \param index Index of the interface to look up.
3245  * \param id On successful return, this location will hold the
3246  * lnet_process_id_t ID of the interface.
3247  *
3248  * \retval 0 If an interface exists at \a index.
3249  * \retval -ENOENT If no interface has been found.
3250  */
3251 int
3252 LNetGetId(unsigned int index, lnet_process_id_t *id)
3253 {
3254         struct lnet_ni   *ni;
3255         struct lnet_net  *net;
3256         int               cpt;
3257         int               rc = -ENOENT;
3258
3259         LASSERT(the_lnet.ln_refcount > 0);
3260
3261         cpt = lnet_net_lock_current();
3262
3263         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3264                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3265                         if (index-- != 0)
3266                                 continue;
3267
3268                         id->nid = ni->ni_nid;
3269                         id->pid = the_lnet.ln_pid;
3270                         rc = 0;
3271                         break;
3272                 }
3273         }
3274
3275         lnet_net_unlock(cpt);
3276         return rc;
3277 }
3278 EXPORT_SYMBOL(LNetGetId);
3279
3280 /**
3281  * Print a string representation of handle \a h into buffer \a str of
3282  * \a len bytes.
3283  */
3284 void
3285 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
3286 {
3287         snprintf(str, len, "%#llx", h.cookie);
3288 }
3289 EXPORT_SYMBOL(LNetSnprintHandle);
3290
3291 static int lnet_ping(lnet_process_id_t id, signed long timeout,
3292                      lnet_process_id_t __user *ids, int n_ids)
3293 {
3294         lnet_handle_eq_t     eqh;
3295         lnet_handle_md_t     mdh;
3296         lnet_event_t         event;
3297         lnet_md_t            md = { NULL };
3298         int                  which;
3299         int                  unlinked = 0;
3300         int                  replied = 0;
3301         const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC);
3302         struct lnet_ping_buffer *pbuf;
3303         lnet_process_id_t    tmpid;
3304         int                  i;
3305         int                  nob;
3306         int                  rc;
3307         int                  rc2;
3308         sigset_t         blocked;
3309
3310         /* n_ids limit is arbitrary */
3311         if (n_ids <= 0 || n_ids > lnet_max_interfaces || id.nid == LNET_NID_ANY)
3312                 return -EINVAL;
3313
3314         if (id.pid == LNET_PID_ANY)
3315                 id.pid = LNET_PID_LUSTRE;
3316
3317         pbuf = lnet_ping_buffer_alloc(n_ids, GFP_NOFS);
3318         if (!pbuf)
3319                 return -ENOMEM;
3320
3321         /* NB 2 events max (including any unlink event) */
3322         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
3323         if (rc != 0) {
3324                 CERROR("Can't allocate EQ: %d\n", rc);
3325                 goto out_0;
3326         }
3327
3328         /* initialize md content */
3329         md.start     = &pbuf->pb_info;
3330         md.length    = LNET_PING_INFO_SIZE(n_ids);
3331         md.threshold = 2; /*GET/REPLY*/
3332         md.max_size  = 0;
3333         md.options   = LNET_MD_TRUNCATE;
3334         md.user_ptr  = NULL;
3335         md.eq_handle = eqh;
3336
3337         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
3338         if (rc != 0) {
3339                 CERROR("Can't bind MD: %d\n", rc);
3340                 goto out_1;
3341         }
3342
3343         rc = LNetGet(LNET_NID_ANY, mdh, id,
3344                      LNET_RESERVED_PORTAL,
3345                      LNET_PROTO_PING_MATCHBITS, 0);
3346
3347         if (rc != 0) {
3348                 /* Don't CERROR; this could be deliberate! */
3349
3350                 rc2 = LNetMDUnlink(mdh);
3351                 LASSERT(rc2 == 0);
3352
3353                 /* NB must wait for the UNLINK event below... */
3354                 unlinked = 1;
3355                 timeout = a_long_time;
3356         }
3357
3358         do {
3359                 /* MUST block for unlink to complete */
3360                 if (unlinked)
3361                         blocked = cfs_block_allsigs();
3362
3363                 rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which);
3364
3365                 if (unlinked)
3366                         cfs_restore_sigs(blocked);
3367
3368                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
3369                        (rc2 <= 0) ? -1 : event.type,
3370                        (rc2 <= 0) ? -1 : event.status,
3371                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
3372
3373                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
3374
3375                 if (rc2 <= 0 || event.status != 0) {
3376                         /* timeout or error */
3377                         if (!replied && rc == 0)
3378                                 rc = (rc2 < 0) ? rc2 :
3379                                      (rc2 == 0) ? -ETIMEDOUT :
3380                                      event.status;
3381
3382                         if (!unlinked) {
3383                                 /* Ensure completion in finite time... */
3384                                 LNetMDUnlink(mdh);
3385                                 /* No assertion (racing with network) */
3386                                 unlinked = 1;
3387                                 timeout = a_long_time;
3388                         } else if (rc2 == 0) {
3389                                 /* timed out waiting for unlink */
3390                                 CWARN("ping %s: late network completion\n",
3391                                       libcfs_id2str(id));
3392                         }
3393                 } else if (event.type == LNET_EVENT_REPLY) {
3394                         replied = 1;
3395                         rc = event.mlength;
3396                 }
3397
3398         } while (rc2 <= 0 || !event.unlinked);
3399
3400         if (!replied) {
3401                 if (rc >= 0)
3402                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
3403                               libcfs_id2str(id));
3404                 rc = -EIO;
3405                 goto out_1;
3406         }
3407
3408         nob = rc;
3409         LASSERT(nob >= 0 && nob <= LNET_PING_INFO_SIZE(n_ids));
3410
3411         rc = -EPROTO;                           /* if I can't parse... */
3412
3413         if (nob < 8) {
3414                 /* can't check magic/version */
3415                 CERROR("%s: ping info too short %d\n",
3416                        libcfs_id2str(id), nob);
3417                 goto out_1;
3418         }
3419
3420         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
3421                 lnet_swap_pinginfo(pbuf);
3422         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
3423                 CERROR("%s: Unexpected magic %08x\n",
3424                        libcfs_id2str(id), pbuf->pb_info.pi_magic);
3425                 goto out_1;
3426         }
3427
3428         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
3429                 CERROR("%s: ping w/o NI status: 0x%x\n",
3430                        libcfs_id2str(id), pbuf->pb_info.pi_features);
3431                 goto out_1;
3432         }
3433
3434         if (nob < LNET_PING_INFO_SIZE(0)) {
3435                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
3436                        nob, (int)LNET_PING_INFO_SIZE(0));
3437                 goto out_1;
3438         }
3439
3440         if (pbuf->pb_info.pi_nnis < n_ids)
3441                 n_ids = pbuf->pb_info.pi_nnis;
3442
3443         if (nob < LNET_PING_INFO_SIZE(n_ids)) {
3444                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
3445                        nob, (int)LNET_PING_INFO_SIZE(n_ids));;
3446                 goto out_1;
3447         }
3448
3449         rc = -EFAULT;                           /* If I SEGV... */
3450
3451         memset(&tmpid, 0, sizeof(tmpid));
3452         for (i = 0; i < n_ids; i++) {
3453                 tmpid.pid = pbuf->pb_info.pi_pid;
3454                 tmpid.nid = pbuf->pb_info.pi_ni[i].ns_nid;
3455                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
3456                         goto out_1;
3457         }
3458         rc = pbuf->pb_info.pi_nnis;
3459
3460  out_1:
3461         rc2 = LNetEQFree(eqh);
3462         if (rc2 != 0)
3463                 CERROR("rc2 %d\n", rc2);
3464         LASSERT(rc2 == 0);
3465
3466  out_0:
3467         lnet_ping_buffer_decref(pbuf);
3468         return rc;
3469 }