Whamcloud - gitweb
LU-7734 lnet: implement Peer Discovery
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_LNET
34 #include <linux/log2.h>
35 #include <linux/ktime.h>
36
37 #include <lnet/lib-lnet.h>
38
39 #define D_LNI D_CONSOLE
40
41 lnet_t      the_lnet;                           /* THE state of the network */
42 EXPORT_SYMBOL(the_lnet);
43
44 static char *ip2nets = "";
45 module_param(ip2nets, charp, 0444);
46 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
47
48 static char *networks = "";
49 module_param(networks, charp, 0444);
50 MODULE_PARM_DESC(networks, "local networks");
51
52 static char *routes = "";
53 module_param(routes, charp, 0444);
54 MODULE_PARM_DESC(routes, "routes to non-local networks");
55
56 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
57 module_param(rnet_htable_size, int, 0444);
58 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
59
60 static int use_tcp_bonding = false;
61 module_param(use_tcp_bonding, int, 0444);
62 MODULE_PARM_DESC(use_tcp_bonding,
63                  "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
64
65 unsigned int lnet_numa_range = 0;
66 module_param(lnet_numa_range, uint, 0444);
67 MODULE_PARM_DESC(lnet_numa_range,
68                 "NUMA range to consider during Multi-Rail selection");
69
70 static int lnet_max_interfaces = LNET_MAX_INTERFACES_DEFAULT;
71 module_param(lnet_max_interfaces, int, 0444);
72 MODULE_PARM_DESC(lnet_max_interfaces,
73                 "Maximum number of interfaces in a node.");
74
75 /*
76  * This sequence number keeps track of how many times DLC was used to
77  * update the local NIs. It is incremented when a NI is added or
78  * removed and checked when sending a message to determine if there is
79  * a need to re-run the selection algorithm. See lnet_select_pathway()
80  * for more details on its usage.
81  */
82 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
83
84 static int lnet_ping(lnet_process_id_t id, signed long timeout,
85                      lnet_process_id_t __user *ids, int n_ids);
86
87 static char *
88 lnet_get_routes(void)
89 {
90         return routes;
91 }
92
93 static char *
94 lnet_get_networks(void)
95 {
96         char   *nets;
97         int     rc;
98
99         if (*networks != 0 && *ip2nets != 0) {
100                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
101                                    "'ip2nets' but not both at once\n");
102                 return NULL;
103         }
104
105         if (*ip2nets != 0) {
106                 rc = lnet_parse_ip2nets(&nets, ip2nets);
107                 return (rc == 0) ? nets : NULL;
108         }
109
110         if (*networks != 0)
111                 return networks;
112
113         return "tcp";
114 }
115
116 static void
117 lnet_init_locks(void)
118 {
119         spin_lock_init(&the_lnet.ln_eq_wait_lock);
120         init_waitqueue_head(&the_lnet.ln_eq_waitq);
121         init_waitqueue_head(&the_lnet.ln_rc_waitq);
122         mutex_init(&the_lnet.ln_lnd_mutex);
123         mutex_init(&the_lnet.ln_api_mutex);
124 }
125
126 static void
127 lnet_fini_locks(void)
128 {
129 }
130
131 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
132 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
133                                             *  MDs kmem_cache */
134
135 static int
136 lnet_descriptor_setup(void)
137 {
138         /* create specific kmem_cache for MEs and small MDs (i.e., originally
139          * allocated in <size-xxx> kmem_cache).
140          */
141         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(lnet_me_t),
142                                             0, 0, NULL);
143         if (!lnet_mes_cachep)
144                 return -ENOMEM;
145
146         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
147                                                   LNET_SMALL_MD_SIZE, 0, 0,
148                                                   NULL);
149         if (!lnet_small_mds_cachep)
150                 return -ENOMEM;
151
152         return 0;
153 }
154
155 static void
156 lnet_descriptor_cleanup(void)
157 {
158
159         if (lnet_small_mds_cachep) {
160                 kmem_cache_destroy(lnet_small_mds_cachep);
161                 lnet_small_mds_cachep = NULL;
162         }
163
164         if (lnet_mes_cachep) {
165                 kmem_cache_destroy(lnet_mes_cachep);
166                 lnet_mes_cachep = NULL;
167         }
168 }
169
170 static int
171 lnet_create_remote_nets_table(void)
172 {
173         int               i;
174         struct list_head *hash;
175
176         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
177         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
178         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
179         if (hash == NULL) {
180                 CERROR("Failed to create remote nets hash table\n");
181                 return -ENOMEM;
182         }
183
184         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
185                 INIT_LIST_HEAD(&hash[i]);
186         the_lnet.ln_remote_nets_hash = hash;
187         return 0;
188 }
189
190 static void
191 lnet_destroy_remote_nets_table(void)
192 {
193         int i;
194
195         if (the_lnet.ln_remote_nets_hash == NULL)
196                 return;
197
198         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
199                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
200
201         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
202                     LNET_REMOTE_NETS_HASH_SIZE *
203                     sizeof(the_lnet.ln_remote_nets_hash[0]));
204         the_lnet.ln_remote_nets_hash = NULL;
205 }
206
207 static void
208 lnet_destroy_locks(void)
209 {
210         if (the_lnet.ln_res_lock != NULL) {
211                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
212                 the_lnet.ln_res_lock = NULL;
213         }
214
215         if (the_lnet.ln_net_lock != NULL) {
216                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
217                 the_lnet.ln_net_lock = NULL;
218         }
219
220         lnet_fini_locks();
221 }
222
223 static int
224 lnet_create_locks(void)
225 {
226         lnet_init_locks();
227
228         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
229         if (the_lnet.ln_res_lock == NULL)
230                 goto failed;
231
232         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
233         if (the_lnet.ln_net_lock == NULL)
234                 goto failed;
235
236         return 0;
237
238  failed:
239         lnet_destroy_locks();
240         return -ENOMEM;
241 }
242
243 static void lnet_assert_wire_constants(void)
244 {
245         /*
246          * Wire protocol assertions generated by 'wirecheck'
247          * running on Linux lustre-build 3.10.0-327.el7_lustre.centos.x86_64
248          * #1 SMP Fri Jul 8 13:32:15 EDT 2016 x86_64 x86_64 x86_64 GNU/Linux
249          * with gcc version 4.8.5 20150623 (Red Hat 4.8.5-4) (GCC)
250          */
251
252
253         /* Constants... */
254         CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
255         CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1);
256         CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0);
257         CLASSERT(LNET_MSG_ACK == 0);
258         CLASSERT(LNET_MSG_PUT == 1);
259         CLASSERT(LNET_MSG_GET == 2);
260         CLASSERT(LNET_MSG_REPLY == 3);
261         CLASSERT(LNET_MSG_HELLO == 4);
262
263         /* Checks for struct lnet_handle_wire */
264         CLASSERT((int)sizeof(struct lnet_handle_wire) == 16);
265         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) == 0);
266         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) == 8);
267         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_object_cookie) == 8);
268         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) == 8);
269
270         /* Checks for struct lnet_magicversion */
271         CLASSERT((int)sizeof(struct lnet_magicversion) == 8);
272         CLASSERT((int)offsetof(struct lnet_magicversion, magic) == 0);
273         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->magic) == 4);
274         CLASSERT((int)offsetof(struct lnet_magicversion, version_major) == 4);
275         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->version_major) == 2);
276         CLASSERT((int)offsetof(struct lnet_magicversion, version_minor) == 6);
277         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->version_minor) == 2);
278
279         /* Checks for struct lnet_hdr */
280         CLASSERT((int)sizeof(struct lnet_hdr) == 72);
281         CLASSERT((int)offsetof(struct lnet_hdr, dest_nid) == 0);
282         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->dest_nid) == 8);
283         CLASSERT((int)offsetof(struct lnet_hdr, src_nid) == 8);
284         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->src_nid) == 8);
285         CLASSERT((int)offsetof(struct lnet_hdr, dest_pid) == 16);
286         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->dest_pid) == 4);
287         CLASSERT((int)offsetof(struct lnet_hdr, src_pid) == 20);
288         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->src_pid) == 4);
289         CLASSERT((int)offsetof(struct lnet_hdr, type) == 24);
290         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->type) == 4);
291         CLASSERT((int)offsetof(struct lnet_hdr, payload_length) == 28);
292         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->payload_length) == 4);
293         CLASSERT((int)offsetof(struct lnet_hdr, msg) == 32);
294         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg) == 40);
295
296         /* Ack */
297         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) == 32);
298         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) == 16);
299         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.match_bits) == 48);
300         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) == 8);
301         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.mlength) == 56);
302         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) == 4);
303
304         /* Put */
305         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) == 32);
306         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) == 16);
307         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.match_bits) == 48);
308         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) == 8);
309         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.hdr_data) == 56);
310         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) == 8);
311         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.ptl_index) == 64);
312         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) == 4);
313         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.offset) == 68);
314         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) == 4);
315
316         /* Get */
317         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.return_wmd) == 32);
318         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) == 16);
319         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.match_bits) == 48);
320         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) == 8);
321         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.ptl_index) == 56);
322         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) == 4);
323         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.src_offset) == 60);
324         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) == 4);
325         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.sink_length) == 64);
326         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) == 4);
327
328         /* Reply */
329         CLASSERT((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) == 32);
330         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) == 16);
331
332         /* Hello */
333         CLASSERT((int)offsetof(struct lnet_hdr, msg.hello.incarnation) == 32);
334         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) == 8);
335         CLASSERT((int)offsetof(struct lnet_hdr, msg.hello.type) == 40);
336         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) == 4);
337
338         /* Checks for struct lnet_ni_status and related constants */
339         CLASSERT(LNET_NI_STATUS_INVALID == 0x00000000);
340         CLASSERT(LNET_NI_STATUS_UP == 0x15aac0de);
341         CLASSERT(LNET_NI_STATUS_DOWN == 0xdeadface);
342
343         /* Checks for struct lnet_ni_status */
344         CLASSERT((int)sizeof(struct lnet_ni_status) == 16);
345         CLASSERT((int)offsetof(struct lnet_ni_status, ns_nid) == 0);
346         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) == 8);
347         CLASSERT((int)offsetof(struct lnet_ni_status, ns_status) == 8);
348         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_status) == 4);
349         CLASSERT((int)offsetof(struct lnet_ni_status, ns_unused) == 12);
350         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_unused) == 4);
351
352         /* Checks for struct lnet_ping_info and related constants */
353         CLASSERT(LNET_PROTO_PING_MAGIC == 0x70696E67);
354         CLASSERT(LNET_PING_FEAT_INVAL == 0);
355         CLASSERT(LNET_PING_FEAT_BASE == 1);
356         CLASSERT(LNET_PING_FEAT_NI_STATUS == 2);
357         CLASSERT(LNET_PING_FEAT_RTE_DISABLED == 4);
358         CLASSERT(LNET_PING_FEAT_MULTI_RAIL == 8);
359         CLASSERT(LNET_PING_FEAT_BITS == 15);
360
361         /* Checks for struct lnet_ping_info */
362         CLASSERT((int)sizeof(struct lnet_ping_info) == 16);
363         CLASSERT((int)offsetof(struct lnet_ping_info, pi_magic) == 0);
364         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) == 4);
365         CLASSERT((int)offsetof(struct lnet_ping_info, pi_features) == 4);
366         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_features) == 4);
367         CLASSERT((int)offsetof(struct lnet_ping_info, pi_pid) == 8);
368         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) == 4);
369         CLASSERT((int)offsetof(struct lnet_ping_info, pi_nnis) == 12);
370         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) == 4);
371         CLASSERT((int)offsetof(struct lnet_ping_info, pi_ni) == 16);
372         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_ni) == 0);
373 }
374
375 static lnd_t *lnet_find_lnd_by_type(__u32 type)
376 {
377         lnd_t            *lnd;
378         struct list_head *tmp;
379
380         /* holding lnd mutex */
381         list_for_each(tmp, &the_lnet.ln_lnds) {
382                 lnd = list_entry(tmp, lnd_t, lnd_list);
383
384                 if (lnd->lnd_type == type)
385                         return lnd;
386         }
387         return NULL;
388 }
389
390 void
391 lnet_register_lnd (lnd_t *lnd)
392 {
393         mutex_lock(&the_lnet.ln_lnd_mutex);
394
395         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
396         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
397
398         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
399         lnd->lnd_refcount = 0;
400
401         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
402
403         mutex_unlock(&the_lnet.ln_lnd_mutex);
404 }
405 EXPORT_SYMBOL(lnet_register_lnd);
406
407 void
408 lnet_unregister_lnd (lnd_t *lnd)
409 {
410         mutex_lock(&the_lnet.ln_lnd_mutex);
411
412         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
413         LASSERT(lnd->lnd_refcount == 0);
414
415         list_del(&lnd->lnd_list);
416         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
417
418         mutex_unlock(&the_lnet.ln_lnd_mutex);
419 }
420 EXPORT_SYMBOL(lnet_unregister_lnd);
421
422 void
423 lnet_counters_get(lnet_counters_t *counters)
424 {
425         lnet_counters_t *ctr;
426         int             i;
427
428         memset(counters, 0, sizeof(*counters));
429
430         lnet_net_lock(LNET_LOCK_EX);
431
432         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
433                 counters->msgs_max     += ctr->msgs_max;
434                 counters->msgs_alloc   += ctr->msgs_alloc;
435                 counters->errors       += ctr->errors;
436                 counters->send_count   += ctr->send_count;
437                 counters->recv_count   += ctr->recv_count;
438                 counters->route_count  += ctr->route_count;
439                 counters->drop_count   += ctr->drop_count;
440                 counters->send_length  += ctr->send_length;
441                 counters->recv_length  += ctr->recv_length;
442                 counters->route_length += ctr->route_length;
443                 counters->drop_length  += ctr->drop_length;
444
445         }
446         lnet_net_unlock(LNET_LOCK_EX);
447 }
448 EXPORT_SYMBOL(lnet_counters_get);
449
450 void
451 lnet_counters_reset(void)
452 {
453         lnet_counters_t *counters;
454         int             i;
455
456         lnet_net_lock(LNET_LOCK_EX);
457
458         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
459                 memset(counters, 0, sizeof(lnet_counters_t));
460
461         lnet_net_unlock(LNET_LOCK_EX);
462 }
463
464 static char *
465 lnet_res_type2str(int type)
466 {
467         switch (type) {
468         default:
469                 LBUG();
470         case LNET_COOKIE_TYPE_MD:
471                 return "MD";
472         case LNET_COOKIE_TYPE_ME:
473                 return "ME";
474         case LNET_COOKIE_TYPE_EQ:
475                 return "EQ";
476         }
477 }
478
479 static void
480 lnet_res_container_cleanup(struct lnet_res_container *rec)
481 {
482         int     count = 0;
483
484         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
485                 return;
486
487         while (!list_empty(&rec->rec_active)) {
488                 struct list_head *e = rec->rec_active.next;
489
490                 list_del_init(e);
491                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
492                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
493
494                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
495                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
496
497                 } else { /* NB: Active MEs should be attached on portals */
498                         LBUG();
499                 }
500                 count++;
501         }
502
503         if (count > 0) {
504                 /* Found alive MD/ME/EQ, user really should unlink/free
505                  * all of them before finalize LNet, but if someone didn't,
506                  * we have to recycle garbage for him */
507                 CERROR("%d active elements on exit of %s container\n",
508                        count, lnet_res_type2str(rec->rec_type));
509         }
510
511         if (rec->rec_lh_hash != NULL) {
512                 LIBCFS_FREE(rec->rec_lh_hash,
513                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
514                 rec->rec_lh_hash = NULL;
515         }
516
517         rec->rec_type = 0; /* mark it as finalized */
518 }
519
520 static int
521 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
522 {
523         int     rc = 0;
524         int     i;
525
526         LASSERT(rec->rec_type == 0);
527
528         rec->rec_type = type;
529         INIT_LIST_HEAD(&rec->rec_active);
530
531         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
532
533         /* Arbitrary choice of hash table size */
534         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
535                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
536         if (rec->rec_lh_hash == NULL) {
537                 rc = -ENOMEM;
538                 goto out;
539         }
540
541         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
542                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
543
544         return 0;
545
546 out:
547         CERROR("Failed to setup %s resource container\n",
548                lnet_res_type2str(type));
549         lnet_res_container_cleanup(rec);
550         return rc;
551 }
552
553 static void
554 lnet_res_containers_destroy(struct lnet_res_container **recs)
555 {
556         struct lnet_res_container       *rec;
557         int                             i;
558
559         cfs_percpt_for_each(rec, i, recs)
560                 lnet_res_container_cleanup(rec);
561
562         cfs_percpt_free(recs);
563 }
564
565 static struct lnet_res_container **
566 lnet_res_containers_create(int type)
567 {
568         struct lnet_res_container       **recs;
569         struct lnet_res_container       *rec;
570         int                             rc;
571         int                             i;
572
573         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
574         if (recs == NULL) {
575                 CERROR("Failed to allocate %s resource containers\n",
576                        lnet_res_type2str(type));
577                 return NULL;
578         }
579
580         cfs_percpt_for_each(rec, i, recs) {
581                 rc = lnet_res_container_setup(rec, i, type);
582                 if (rc != 0) {
583                         lnet_res_containers_destroy(recs);
584                         return NULL;
585                 }
586         }
587
588         return recs;
589 }
590
591 lnet_libhandle_t *
592 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
593 {
594         /* ALWAYS called with lnet_res_lock held */
595         struct list_head        *head;
596         lnet_libhandle_t        *lh;
597         unsigned int            hash;
598
599         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
600                 return NULL;
601
602         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
603         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
604
605         list_for_each_entry(lh, head, lh_hash_chain) {
606                 if (lh->lh_cookie == cookie)
607                         return lh;
608         }
609
610         return NULL;
611 }
612
613 void
614 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
615 {
616         /* ALWAYS called with lnet_res_lock held */
617         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
618         unsigned int    hash;
619
620         lh->lh_cookie = rec->rec_lh_cookie;
621         rec->rec_lh_cookie += 1 << ibits;
622
623         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
624
625         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
626 }
627
628 static int lnet_unprepare(void);
629
630 static int
631 lnet_prepare(lnet_pid_t requested_pid)
632 {
633         /* Prepare to bring up the network */
634         struct lnet_res_container **recs;
635         int                       rc = 0;
636
637         if (requested_pid == LNET_PID_ANY) {
638                 /* Don't instantiate LNET just for me */
639                 return -ENETDOWN;
640         }
641
642         LASSERT(the_lnet.ln_refcount == 0);
643
644         the_lnet.ln_routing = 0;
645
646         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
647         the_lnet.ln_pid = requested_pid;
648
649         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
650         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
651         INIT_LIST_HEAD(&the_lnet.ln_nets);
652         INIT_LIST_HEAD(&the_lnet.ln_routers);
653         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
654         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
655
656         rc = lnet_descriptor_setup();
657         if (rc != 0)
658                 goto failed;
659
660         rc = lnet_create_remote_nets_table();
661         if (rc != 0)
662                 goto failed;
663
664         /*
665          * NB the interface cookie in wire handles guards against delayed
666          * replies and ACKs appearing valid after reboot.
667          */
668         the_lnet.ln_interface_cookie = ktime_get_real_ns();
669
670         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
671                                                 sizeof(lnet_counters_t));
672         if (the_lnet.ln_counters == NULL) {
673                 CERROR("Failed to allocate counters for LNet\n");
674                 rc = -ENOMEM;
675                 goto failed;
676         }
677
678         rc = lnet_peer_tables_create();
679         if (rc != 0)
680                 goto failed;
681
682         rc = lnet_msg_containers_create();
683         if (rc != 0)
684                 goto failed;
685
686         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
687                                       LNET_COOKIE_TYPE_EQ);
688         if (rc != 0)
689                 goto failed;
690
691         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
692         if (recs == NULL) {
693                 rc = -ENOMEM;
694                 goto failed;
695         }
696
697         the_lnet.ln_me_containers = recs;
698
699         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
700         if (recs == NULL) {
701                 rc = -ENOMEM;
702                 goto failed;
703         }
704
705         the_lnet.ln_md_containers = recs;
706
707         rc = lnet_portals_create();
708         if (rc != 0) {
709                 CERROR("Failed to create portals for LNet: %d\n", rc);
710                 goto failed;
711         }
712
713         return 0;
714
715  failed:
716         lnet_unprepare();
717         return rc;
718 }
719
720 static int
721 lnet_unprepare (void)
722 {
723         /* NB no LNET_LOCK since this is the last reference.  All LND instances
724          * have shut down already, so it is safe to unlink and free all
725          * descriptors, even those that appear committed to a network op (eg MD
726          * with non-zero pending count) */
727
728         lnet_fail_nid(LNET_NID_ANY, 0);
729
730         LASSERT(the_lnet.ln_refcount == 0);
731         LASSERT(list_empty(&the_lnet.ln_test_peers));
732         LASSERT(list_empty(&the_lnet.ln_nets));
733
734         lnet_portals_destroy();
735
736         if (the_lnet.ln_md_containers != NULL) {
737                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
738                 the_lnet.ln_md_containers = NULL;
739         }
740
741         if (the_lnet.ln_me_containers != NULL) {
742                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
743                 the_lnet.ln_me_containers = NULL;
744         }
745
746         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
747
748         lnet_msg_containers_destroy();
749         lnet_peer_uninit();
750         lnet_rtrpools_free(0);
751
752         if (the_lnet.ln_counters != NULL) {
753                 cfs_percpt_free(the_lnet.ln_counters);
754                 the_lnet.ln_counters = NULL;
755         }
756         lnet_destroy_remote_nets_table();
757         lnet_descriptor_cleanup();
758
759         return 0;
760 }
761
762 lnet_ni_t  *
763 lnet_net2ni_locked(__u32 net_id, int cpt)
764 {
765         struct lnet_ni   *ni;
766         struct lnet_net  *net;
767
768         LASSERT(cpt != LNET_LOCK_EX);
769
770         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
771                 if (net->net_id == net_id) {
772                         ni = list_entry(net->net_ni_list.next, struct lnet_ni,
773                                         ni_netlist);
774                         return ni;
775                 }
776         }
777
778         return NULL;
779 }
780
781 lnet_ni_t *
782 lnet_net2ni_addref(__u32 net)
783 {
784         lnet_ni_t *ni;
785
786         lnet_net_lock(0);
787         ni = lnet_net2ni_locked(net, 0);
788         if (ni)
789                 lnet_ni_addref_locked(ni, 0);
790         lnet_net_unlock(0);
791
792         return ni;
793 }
794 EXPORT_SYMBOL(lnet_net2ni_addref);
795
796 struct lnet_net *
797 lnet_get_net_locked(__u32 net_id)
798 {
799         struct lnet_net  *net;
800
801         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
802                 if (net->net_id == net_id)
803                         return net;
804         }
805
806         return NULL;
807 }
808
809 unsigned int
810 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
811 {
812         __u64           key = nid;
813         unsigned int    val;
814
815         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
816
817         if (number == 1)
818                 return 0;
819
820         val = hash_long(key, LNET_CPT_BITS);
821         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
822         if (val < number)
823                 return val;
824
825         return (unsigned int)(key + val + (val >> 1)) % number;
826 }
827
828 int
829 lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
830 {
831         struct lnet_net *net;
832
833         /* must called with hold of lnet_net_lock */
834         if (LNET_CPT_NUMBER == 1)
835                 return 0; /* the only one */
836
837         /*
838          * If NI is provided then use the CPT identified in the NI cpt
839          * list if one exists. If one doesn't exist, then that NI is
840          * associated with all CPTs and it follows that the net it belongs
841          * to is implicitly associated with all CPTs, so just hash the nid
842          * and return that.
843          */
844         if (ni != NULL) {
845                 if (ni->ni_cpts != NULL)
846                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
847                                                              ni->ni_ncpts)];
848                 else
849                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
850         }
851
852         /* no NI provided so look at the net */
853         net = lnet_get_net_locked(LNET_NIDNET(nid));
854
855         if (net != NULL && net->net_cpts != NULL) {
856                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
857         }
858
859         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
860 }
861
862 int
863 lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
864 {
865         int     cpt;
866         int     cpt2;
867
868         if (LNET_CPT_NUMBER == 1)
869                 return 0; /* the only one */
870
871         cpt = lnet_net_lock_current();
872
873         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
874
875         lnet_net_unlock(cpt);
876
877         return cpt2;
878 }
879 EXPORT_SYMBOL(lnet_cpt_of_nid);
880
881 int
882 lnet_islocalnet(__u32 net_id)
883 {
884         struct lnet_net *net;
885         int             cpt;
886         bool            local;
887
888         cpt = lnet_net_lock_current();
889
890         net = lnet_get_net_locked(net_id);
891
892         local = net != NULL;
893
894         lnet_net_unlock(cpt);
895
896         return local;
897 }
898
899 bool
900 lnet_is_ni_healthy_locked(struct lnet_ni *ni)
901 {
902         if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
903             ni->ni_state == LNET_NI_STATE_DEGRADED)
904                 return true;
905
906         return false;
907 }
908
909 lnet_ni_t  *
910 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
911 {
912         struct lnet_net  *net;
913         struct lnet_ni   *ni;
914
915         LASSERT(cpt != LNET_LOCK_EX);
916
917         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
918                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
919                         if (ni->ni_nid == nid)
920                                 return ni;
921                 }
922         }
923
924         return NULL;
925 }
926
927 lnet_ni_t *
928 lnet_nid2ni_addref(lnet_nid_t nid)
929 {
930         lnet_ni_t *ni;
931
932         lnet_net_lock(0);
933         ni = lnet_nid2ni_locked(nid, 0);
934         if (ni)
935                 lnet_ni_addref_locked(ni, 0);
936         lnet_net_unlock(0);
937
938         return ni;
939 }
940 EXPORT_SYMBOL(lnet_nid2ni_addref);
941
942 int
943 lnet_islocalnid(lnet_nid_t nid)
944 {
945         struct lnet_ni  *ni;
946         int             cpt;
947
948         cpt = lnet_net_lock_current();
949         ni = lnet_nid2ni_locked(nid, cpt);
950         lnet_net_unlock(cpt);
951
952         return ni != NULL;
953 }
954
955 int
956 lnet_count_acceptor_nets(void)
957 {
958         /* Return the # of NIs that need the acceptor. */
959         int              count = 0;
960         struct lnet_net  *net;
961         int              cpt;
962
963         cpt = lnet_net_lock_current();
964         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
965                 /* all socklnd type networks should have the acceptor
966                  * thread started */
967                 if (net->net_lnd->lnd_accept != NULL)
968                         count++;
969         }
970
971         lnet_net_unlock(cpt);
972
973         return count;
974 }
975
976 struct lnet_ping_buffer *
977 lnet_ping_buffer_alloc(int nnis, gfp_t gfp)
978 {
979         struct lnet_ping_buffer *pbuf;
980
981         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nnis), gfp);
982         if (pbuf) {
983                 pbuf->pb_nnis = nnis;
984                 atomic_set(&pbuf->pb_refcnt, 1);
985         }
986
987         return pbuf;
988 }
989
990 void
991 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
992 {
993         LASSERT(lnet_ping_buffer_numref(pbuf) == 0);
994         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nnis));
995 }
996
997 static struct lnet_ping_buffer *
998 lnet_ping_target_create(int nnis)
999 {
1000         struct lnet_ping_buffer *pbuf;
1001
1002         pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
1003         if (pbuf == NULL) {
1004                 CERROR("Can't allocate ping source [%d]\n", nnis);
1005                 return NULL;
1006         }
1007
1008         pbuf->pb_info.pi_nnis = nnis;
1009         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1010         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1011         pbuf->pb_info.pi_features =
1012                 LNET_PING_FEAT_NI_STATUS | LNET_PING_FEAT_MULTI_RAIL;
1013
1014         return pbuf;
1015 }
1016
1017 static inline int
1018 lnet_get_net_ni_count_locked(struct lnet_net *net)
1019 {
1020         struct lnet_ni  *ni;
1021         int             count = 0;
1022
1023         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1024                 count++;
1025
1026         return count;
1027 }
1028
1029 static inline int
1030 lnet_get_net_ni_count_pre(struct lnet_net *net)
1031 {
1032         struct lnet_ni  *ni;
1033         int             count = 0;
1034
1035         list_for_each_entry(ni, &net->net_ni_added, ni_netlist)
1036                 count++;
1037
1038         return count;
1039 }
1040
1041 static inline int
1042 lnet_get_ni_count(void)
1043 {
1044         struct lnet_ni  *ni;
1045         struct lnet_net *net;
1046         int             count = 0;
1047
1048         lnet_net_lock(0);
1049
1050         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1051                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1052                         count++;
1053         }
1054
1055         lnet_net_unlock(0);
1056
1057         return count;
1058 }
1059
1060 int
1061 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1062 {
1063         if (!pinfo)
1064                 return -EINVAL;
1065         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1066                 return -EPROTO;
1067         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1068                 return -EPROTO;
1069         /* Loopback is guaranteed to be present */
1070         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_max_interfaces)
1071                 return -ERANGE;
1072         if (LNET_NETTYP(LNET_NIDNET(LNET_PING_INFO_LONI(pinfo))) != LOLND)
1073                 return -EPROTO;
1074         return 0;
1075 }
1076
1077 static void
1078 lnet_ping_target_destroy(void)
1079 {
1080         struct lnet_net *net;
1081         struct lnet_ni  *ni;
1082
1083         lnet_net_lock(LNET_LOCK_EX);
1084
1085         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1086                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1087                         lnet_ni_lock(ni);
1088                         ni->ni_status = NULL;
1089                         lnet_ni_unlock(ni);
1090                 }
1091         }
1092
1093         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1094         the_lnet.ln_ping_target = NULL;
1095
1096         lnet_net_unlock(LNET_LOCK_EX);
1097 }
1098
1099 static void
1100 lnet_ping_target_event_handler(lnet_event_t *event)
1101 {
1102         struct lnet_ping_buffer *pbuf = event->md.user_ptr;
1103
1104         if (event->unlinked)
1105                 lnet_ping_buffer_decref(pbuf);
1106 }
1107
1108 static int
1109 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1110                        lnet_handle_md_t *ping_mdh, int ni_count, bool set_eq)
1111 {
1112         lnet_handle_me_t  me_handle;
1113         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1114         lnet_md_t         md = {NULL};
1115         int               rc, rc2;
1116
1117         if (set_eq) {
1118                 rc = LNetEQAlloc(0, lnet_ping_target_event_handler,
1119                                  &the_lnet.ln_ping_target_eq);
1120                 if (rc != 0) {
1121                         CERROR("Can't allocate ping buffer EQ: %d\n", rc);
1122                         return rc;
1123                 }
1124         }
1125
1126         *ppbuf = lnet_ping_target_create(ni_count);
1127         if (*ppbuf == NULL) {
1128                 rc = -ENOMEM;
1129                 goto fail_free_eq;
1130         }
1131
1132         /* Ping target ME/MD */
1133         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1134                           LNET_PROTO_PING_MATCHBITS, 0,
1135                           LNET_UNLINK, LNET_INS_AFTER,
1136                           &me_handle);
1137         if (rc != 0) {
1138                 CERROR("Can't create ping target ME: %d\n", rc);
1139                 goto fail_decref_ping_buffer;
1140         }
1141
1142         /* initialize md content */
1143         md.start     = &(*ppbuf)->pb_info;
1144         md.length    = LNET_PING_INFO_SIZE((*ppbuf)->pb_nnis);
1145         md.threshold = LNET_MD_THRESH_INF;
1146         md.max_size  = 0;
1147         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1148                        LNET_MD_MANAGE_REMOTE;
1149         md.eq_handle = the_lnet.ln_ping_target_eq;
1150         md.user_ptr  = *ppbuf;
1151
1152         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, ping_mdh);
1153         if (rc != 0) {
1154                 CERROR("Can't attach ping target MD: %d\n", rc);
1155                 goto fail_unlink_ping_me;
1156         }
1157         lnet_ping_buffer_addref(*ppbuf);
1158
1159         return 0;
1160
1161 fail_unlink_ping_me:
1162         rc2 = LNetMEUnlink(me_handle);
1163         LASSERT(rc2 == 0);
1164 fail_decref_ping_buffer:
1165         LASSERT(lnet_ping_buffer_numref(*ppbuf) == 1);
1166         lnet_ping_buffer_decref(*ppbuf);
1167         *ppbuf = NULL;
1168 fail_free_eq:
1169         if (set_eq) {
1170                 rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
1171                 LASSERT(rc2 == 0);
1172         }
1173         return rc;
1174 }
1175
1176 static void
1177 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf, lnet_handle_md_t *ping_mdh)
1178 {
1179         sigset_t        blocked = cfs_block_allsigs();
1180
1181         LNetMDUnlink(*ping_mdh);
1182         LNetInvalidateHandle(ping_mdh);
1183
1184         /* NB the MD could be busy; this just starts the unlink */
1185         while (lnet_ping_buffer_numref(pbuf) > 1) {
1186                 CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
1187                 set_current_state(TASK_UNINTERRUPTIBLE);
1188                 schedule_timeout(cfs_time_seconds(1));
1189         }
1190
1191         cfs_restore_sigs(blocked);
1192 }
1193
1194 static void
1195 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
1196 {
1197         struct lnet_ni          *ni;
1198         struct lnet_net         *net;
1199         struct lnet_ni_status *ns;
1200         int                     i;
1201         int                     rc;
1202
1203         i = 0;
1204         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1205                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1206                         LASSERT(i < pbuf->pb_nnis);
1207
1208                         ns = &pbuf->pb_info.pi_ni[i];
1209
1210                         ns->ns_nid = ni->ni_nid;
1211
1212                         lnet_ni_lock(ni);
1213                         ns->ns_status = (ni->ni_status != NULL) ?
1214                                          ni->ni_status->ns_status :
1215                                                 LNET_NI_STATUS_UP;
1216                         ni->ni_status = ns;
1217                         lnet_ni_unlock(ni);
1218
1219                         i++;
1220                 }
1221         }
1222         /*
1223          * We (ab)use the ns_status of the loopback interface to
1224          * transmit the sequence number. The first interface listed
1225          * must be the loopback interface.
1226          */
1227         rc = lnet_ping_info_validate(&pbuf->pb_info);
1228         if (rc) {
1229                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
1230                 LBUG();
1231         }
1232         LNET_PING_BUFFER_SEQNO(pbuf) =
1233                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
1234 }
1235
1236 static void
1237 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
1238                         lnet_handle_md_t ping_mdh)
1239 {
1240         struct lnet_ping_buffer *old_pbuf = NULL;
1241         lnet_handle_md_t old_ping_md;
1242
1243         /* switch the NIs to point to the new ping info created */
1244         lnet_net_lock(LNET_LOCK_EX);
1245
1246         if (!the_lnet.ln_routing)
1247                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1248
1249         /* Ensure only known feature bits have been set. */
1250         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
1251         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
1252
1253         lnet_ping_target_install_locked(pbuf);
1254
1255         if (the_lnet.ln_ping_target) {
1256                 old_pbuf = the_lnet.ln_ping_target;
1257                 old_ping_md = the_lnet.ln_ping_target_md;
1258         }
1259         the_lnet.ln_ping_target_md = ping_mdh;
1260         the_lnet.ln_ping_target = pbuf;
1261
1262         lnet_net_unlock(LNET_LOCK_EX);
1263
1264         if (old_pbuf) {
1265                 /* unlink and free the old ping info */
1266                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
1267                 lnet_ping_buffer_decref(old_pbuf);
1268         }
1269 }
1270
1271 static void
1272 lnet_ping_target_fini(void)
1273 {
1274         int             rc;
1275
1276         lnet_ping_md_unlink(the_lnet.ln_ping_target,
1277                             &the_lnet.ln_ping_target_md);
1278
1279         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1280         LASSERT(rc == 0);
1281
1282         lnet_ping_target_destroy();
1283 }
1284
1285 /* Resize the push target. */
1286 int lnet_push_target_resize(void)
1287 {
1288         lnet_process_id_t id = { LNET_NID_ANY, LNET_PID_ANY };
1289         lnet_md_t md = { NULL };
1290         lnet_handle_me_t meh;
1291         lnet_handle_md_t mdh;
1292         lnet_handle_md_t old_mdh;
1293         struct lnet_ping_buffer *pbuf;
1294         struct lnet_ping_buffer *old_pbuf;
1295         int nnis = the_lnet.ln_push_target_nnis;
1296         int rc;
1297
1298         if (nnis <= 0) {
1299                 rc = -EINVAL;
1300                 goto fail_return;
1301         }
1302 again:
1303         pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
1304         if (!pbuf) {
1305                 rc = -ENOMEM;
1306                 goto fail_return;
1307         }
1308
1309         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1310                           LNET_PROTO_PING_MATCHBITS, 0,
1311                           LNET_UNLINK, LNET_INS_AFTER,
1312                           &meh);
1313         if (rc) {
1314                 CERROR("Can't create push target ME: %d\n", rc);
1315                 goto fail_decref_pbuf;
1316         }
1317
1318         /* initialize md content */
1319         md.start     = &pbuf->pb_info;
1320         md.length    = LNET_PING_INFO_SIZE(nnis);
1321         md.threshold = LNET_MD_THRESH_INF;
1322         md.max_size  = 0;
1323         md.options   = LNET_MD_OP_PUT | LNET_MD_TRUNCATE |
1324                        LNET_MD_MANAGE_REMOTE;
1325         md.user_ptr  = pbuf;
1326         md.eq_handle = the_lnet.ln_push_target_eq;
1327
1328         rc = LNetMDAttach(meh, md, LNET_RETAIN, &mdh);
1329         if (rc) {
1330                 CERROR("Can't attach push MD: %d\n", rc);
1331                 goto fail_unlink_meh;
1332         }
1333         lnet_ping_buffer_addref(pbuf);
1334
1335         lnet_net_lock(LNET_LOCK_EX);
1336         old_pbuf = the_lnet.ln_push_target;
1337         old_mdh = the_lnet.ln_push_target_md;
1338         the_lnet.ln_push_target = pbuf;
1339         the_lnet.ln_push_target_md = mdh;
1340         lnet_net_unlock(LNET_LOCK_EX);
1341
1342         if (old_pbuf) {
1343                 LNetMDUnlink(old_mdh);
1344                 lnet_ping_buffer_decref(old_pbuf);
1345         }
1346
1347         if (nnis < the_lnet.ln_push_target_nnis)
1348                 goto again;
1349
1350         CDEBUG(D_NET, "nnis %d success\n", nnis);
1351
1352         return 0;
1353
1354 fail_unlink_meh:
1355         LNetMEUnlink(meh);
1356 fail_decref_pbuf:
1357         lnet_ping_buffer_decref(pbuf);
1358 fail_return:
1359         CDEBUG(D_NET, "nnis %d error %d\n", nnis, rc);
1360         return rc;
1361 }
1362
1363 static void lnet_push_target_event_handler(struct lnet_event *ev)
1364 {
1365         struct lnet_ping_buffer *pbuf = ev->md.user_ptr;
1366
1367         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
1368                 lnet_swap_pinginfo(pbuf);
1369         lnet_peer_push_event(ev);
1370         if (ev->unlinked)
1371                 lnet_ping_buffer_decref(pbuf);
1372 }
1373
1374 /* Initialize the push target. */
1375 static int lnet_push_target_init(void)
1376 {
1377         int rc;
1378
1379         if (the_lnet.ln_push_target)
1380                 return -EALREADY;
1381
1382         rc = LNetEQAlloc(0, lnet_push_target_event_handler,
1383                          &the_lnet.ln_push_target_eq);
1384         if (rc) {
1385                 CERROR("Can't allocated push target EQ: %d\n", rc);
1386                 return rc;
1387         }
1388
1389         /* Start at the required minimum, we'll enlarge if required. */
1390         the_lnet.ln_push_target_nnis = LNET_MIN_INTERFACES;
1391
1392         rc = lnet_push_target_resize();
1393
1394         if (rc) {
1395                 LNetEQFree(the_lnet.ln_push_target_eq);
1396                 LNetInvalidateHandle(&the_lnet.ln_push_target_eq);
1397         }
1398
1399         return rc;
1400 }
1401
1402 /* Clean up the push target. */
1403 static void lnet_push_target_fini(void)
1404 {
1405         if (!the_lnet.ln_push_target)
1406                 return;
1407
1408         /* Unlink and invalidate to prevent new references. */
1409         LNetMDUnlink(the_lnet.ln_push_target_md);
1410         LNetInvalidateHandle(&the_lnet.ln_push_target_md);
1411
1412         /* Wait for the unlink to complete. */
1413         while (lnet_ping_buffer_numref(the_lnet.ln_push_target) > 1) {
1414                 CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
1415                 set_current_state(TASK_UNINTERRUPTIBLE);
1416                 schedule_timeout(cfs_time_seconds(1));
1417         }
1418
1419         lnet_ping_buffer_decref(the_lnet.ln_push_target);
1420         the_lnet.ln_push_target = NULL;
1421         the_lnet.ln_push_target_nnis = 0;
1422
1423         LNetEQFree(the_lnet.ln_push_target_eq);
1424         LNetInvalidateHandle(&the_lnet.ln_push_target_eq);
1425 }
1426
1427 static int
1428 lnet_ni_tq_credits(lnet_ni_t *ni)
1429 {
1430         int     credits;
1431
1432         LASSERT(ni->ni_ncpts >= 1);
1433
1434         if (ni->ni_ncpts == 1)
1435                 return ni->ni_net->net_tunables.lct_max_tx_credits;
1436
1437         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
1438         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
1439         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
1440
1441         return credits;
1442 }
1443
1444 static void
1445 lnet_ni_unlink_locked(lnet_ni_t *ni)
1446 {
1447         if (!list_empty(&ni->ni_cptlist)) {
1448                 list_del_init(&ni->ni_cptlist);
1449                 lnet_ni_decref_locked(ni, 0);
1450         }
1451
1452         /* move it to zombie list and nobody can find it anymore */
1453         LASSERT(!list_empty(&ni->ni_netlist));
1454         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
1455         lnet_ni_decref_locked(ni, 0);
1456 }
1457
1458 static void
1459 lnet_clear_zombies_nis_locked(struct lnet_net *net)
1460 {
1461         int             i;
1462         int             islo;
1463         lnet_ni_t       *ni;
1464         struct list_head *zombie_list = &net->net_ni_zombie;
1465
1466         /*
1467          * Now wait for the NIs I just nuked to show up on the zombie
1468          * list and shut them down in guaranteed thread context
1469          */
1470         i = 2;
1471         while (!list_empty(zombie_list)) {
1472                 int     *ref;
1473                 int     j;
1474
1475                 ni = list_entry(zombie_list->next,
1476                                 lnet_ni_t, ni_netlist);
1477                 list_del_init(&ni->ni_netlist);
1478                 /* the ni should be in deleting state. If it's not it's
1479                  * a bug */
1480                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
1481                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1482                         if (*ref == 0)
1483                                 continue;
1484                         /* still busy, add it back to zombie list */
1485                         list_add(&ni->ni_netlist, zombie_list);
1486                         break;
1487                 }
1488
1489                 if (!list_empty(&ni->ni_netlist)) {
1490                         lnet_net_unlock(LNET_LOCK_EX);
1491                         ++i;
1492                         if ((i & (-i)) == i) {
1493                                 CDEBUG(D_WARNING,
1494                                        "Waiting for zombie LNI %s\n",
1495                                        libcfs_nid2str(ni->ni_nid));
1496                         }
1497                         set_current_state(TASK_UNINTERRUPTIBLE);
1498                         schedule_timeout(cfs_time_seconds(1));
1499                         lnet_net_lock(LNET_LOCK_EX);
1500                         continue;
1501                 }
1502
1503                 lnet_net_unlock(LNET_LOCK_EX);
1504
1505                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
1506
1507                 LASSERT(!in_interrupt());
1508                 (net->net_lnd->lnd_shutdown)(ni);
1509
1510                 if (!islo)
1511                         CDEBUG(D_LNI, "Removed LNI %s\n",
1512                               libcfs_nid2str(ni->ni_nid));
1513
1514                 lnet_ni_free(ni);
1515                 i = 2;
1516                 lnet_net_lock(LNET_LOCK_EX);
1517         }
1518 }
1519
1520 /* shutdown down the NI and release refcount */
1521 static void
1522 lnet_shutdown_lndni(struct lnet_ni *ni)
1523 {
1524         int i;
1525         struct lnet_net *net = ni->ni_net;
1526
1527         lnet_net_lock(LNET_LOCK_EX);
1528         ni->ni_state = LNET_NI_STATE_DELETING;
1529         lnet_ni_unlink_locked(ni);
1530         lnet_incr_dlc_seq();
1531         lnet_net_unlock(LNET_LOCK_EX);
1532
1533         /* clear messages for this NI on the lazy portal */
1534         for (i = 0; i < the_lnet.ln_nportals; i++)
1535                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
1536
1537         lnet_net_lock(LNET_LOCK_EX);
1538         lnet_clear_zombies_nis_locked(net);
1539         lnet_net_unlock(LNET_LOCK_EX);
1540 }
1541
1542 static void
1543 lnet_shutdown_lndnet(struct lnet_net *net)
1544 {
1545         struct lnet_ni *ni;
1546
1547         lnet_net_lock(LNET_LOCK_EX);
1548
1549         net->net_state = LNET_NET_STATE_DELETING;
1550
1551         list_del_init(&net->net_list);
1552
1553         while (!list_empty(&net->net_ni_list)) {
1554                 ni = list_entry(net->net_ni_list.next,
1555                                 lnet_ni_t, ni_netlist);
1556                 lnet_net_unlock(LNET_LOCK_EX);
1557                 lnet_shutdown_lndni(ni);
1558                 lnet_net_lock(LNET_LOCK_EX);
1559         }
1560
1561         lnet_net_unlock(LNET_LOCK_EX);
1562
1563         /* Do peer table cleanup for this net */
1564         lnet_peer_tables_cleanup(net);
1565
1566         lnet_net_lock(LNET_LOCK_EX);
1567         /*
1568          * decrement ref count on lnd only when the entire network goes
1569          * away
1570          */
1571         net->net_lnd->lnd_refcount--;
1572
1573         lnet_net_unlock(LNET_LOCK_EX);
1574
1575         lnet_net_free(net);
1576 }
1577
1578 static void
1579 lnet_shutdown_lndnets(void)
1580 {
1581         struct lnet_net *net;
1582
1583         /* NB called holding the global mutex */
1584
1585         /* All quiet on the API front */
1586         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
1587         LASSERT(the_lnet.ln_refcount == 0);
1588
1589         lnet_net_lock(LNET_LOCK_EX);
1590         the_lnet.ln_state = LNET_STATE_STOPPING;
1591
1592         while (!list_empty(&the_lnet.ln_nets)) {
1593                 /*
1594                  * move the nets to the zombie list to avoid them being
1595                  * picked up for new work. LONET is also included in the
1596                  * Nets that will be moved to the zombie list
1597                  */
1598                 net = list_entry(the_lnet.ln_nets.next,
1599                                  struct lnet_net, net_list);
1600                 list_move(&net->net_list, &the_lnet.ln_net_zombie);
1601         }
1602
1603         /* Drop the cached loopback Net. */
1604         if (the_lnet.ln_loni != NULL) {
1605                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1606                 the_lnet.ln_loni = NULL;
1607         }
1608         lnet_net_unlock(LNET_LOCK_EX);
1609
1610         /* iterate through the net zombie list and delete each net */
1611         while (!list_empty(&the_lnet.ln_net_zombie)) {
1612                 net = list_entry(the_lnet.ln_net_zombie.next,
1613                                  struct lnet_net, net_list);
1614                 lnet_shutdown_lndnet(net);
1615         }
1616
1617         lnet_net_lock(LNET_LOCK_EX);
1618         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
1619         lnet_net_unlock(LNET_LOCK_EX);
1620 }
1621
1622 static int
1623 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
1624 {
1625         int                     rc = -EINVAL;
1626         struct lnet_tx_queue    *tq;
1627         int                     i;
1628         struct lnet_net         *net = ni->ni_net;
1629
1630         mutex_lock(&the_lnet.ln_lnd_mutex);
1631
1632         if (tun) {
1633                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
1634                 ni->ni_lnd_tunables_set = true;
1635         }
1636
1637         rc = (net->net_lnd->lnd_startup)(ni);
1638
1639         mutex_unlock(&the_lnet.ln_lnd_mutex);
1640
1641         if (rc != 0) {
1642                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1643                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
1644                 lnet_net_lock(LNET_LOCK_EX);
1645                 net->net_lnd->lnd_refcount--;
1646                 lnet_net_unlock(LNET_LOCK_EX);
1647                 goto failed0;
1648         }
1649
1650         ni->ni_state = LNET_NI_STATE_ACTIVE;
1651
1652         /* We keep a reference on the loopback net through the loopback NI */
1653         if (net->net_lnd->lnd_type == LOLND) {
1654                 lnet_ni_addref(ni);
1655                 LASSERT(the_lnet.ln_loni == NULL);
1656                 the_lnet.ln_loni = ni;
1657                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
1658                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
1659                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
1660                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
1661                 return 0;
1662         }
1663
1664         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
1665             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
1666                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1667                                    libcfs_lnd2str(net->net_lnd->lnd_type),
1668                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
1669                                         "" : "per-peer ");
1670                 /* shutdown the NI since if we get here then it must've already
1671                  * been started
1672                  */
1673                 lnet_shutdown_lndni(ni);
1674                 return -EINVAL;
1675         }
1676
1677         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1678                 tq->tq_credits_min =
1679                 tq->tq_credits_max =
1680                 tq->tq_credits = lnet_ni_tq_credits(ni);
1681         }
1682
1683         atomic_set(&ni->ni_tx_credits,
1684                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
1685
1686         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1687                 libcfs_nid2str(ni->ni_nid),
1688                 ni->ni_net->net_tunables.lct_peer_tx_credits,
1689                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1690                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
1691                 ni->ni_net->net_tunables.lct_peer_timeout);
1692
1693         return 0;
1694 failed0:
1695         lnet_ni_free(ni);
1696         return rc;
1697 }
1698
1699 static int
1700 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
1701 {
1702         struct lnet_ni          *ni;
1703         struct lnet_net         *net_l = NULL;
1704         struct list_head        local_ni_list;
1705         int                     rc;
1706         int                     ni_count = 0;
1707         __u32                   lnd_type;
1708         lnd_t                   *lnd;
1709         int                     peer_timeout =
1710                 net->net_tunables.lct_peer_timeout;
1711         int                     maxtxcredits =
1712                 net->net_tunables.lct_max_tx_credits;
1713         int                     peerrtrcredits =
1714                 net->net_tunables.lct_peer_rtr_credits;
1715
1716         INIT_LIST_HEAD(&local_ni_list);
1717
1718         /*
1719          * make sure that this net is unique. If it isn't then
1720          * we are adding interfaces to an already existing network, and
1721          * 'net' is just a convenient way to pass in the list.
1722          * if it is unique we need to find the LND and load it if
1723          * necessary.
1724          */
1725         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
1726                 lnd_type = LNET_NETTYP(net->net_id);
1727
1728                 LASSERT(libcfs_isknown_lnd(lnd_type));
1729
1730                 if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
1731                     lnd_type == IIBLND || lnd_type == VIBLND) {
1732                         CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
1733                         rc = -EINVAL;
1734                         goto failed0;
1735                 }
1736
1737                 mutex_lock(&the_lnet.ln_lnd_mutex);
1738                 lnd = lnet_find_lnd_by_type(lnd_type);
1739
1740                 if (lnd == NULL) {
1741                         mutex_unlock(&the_lnet.ln_lnd_mutex);
1742                         rc = request_module("%s", libcfs_lnd2modname(lnd_type));
1743                         mutex_lock(&the_lnet.ln_lnd_mutex);
1744
1745                         lnd = lnet_find_lnd_by_type(lnd_type);
1746                         if (lnd == NULL) {
1747                                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1748                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1749                                 libcfs_lnd2str(lnd_type),
1750                                 libcfs_lnd2modname(lnd_type), rc);
1751 #ifndef HAVE_MODULE_LOADING_SUPPORT
1752                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1753                                                 "compiled with kernel module "
1754                                                 "loading support.");
1755 #endif
1756                                 rc = -EINVAL;
1757                                 goto failed0;
1758                         }
1759                 }
1760
1761                 lnet_net_lock(LNET_LOCK_EX);
1762                 lnd->lnd_refcount++;
1763                 lnet_net_unlock(LNET_LOCK_EX);
1764
1765                 net->net_lnd = lnd;
1766
1767                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1768
1769                 net_l = net;
1770         }
1771
1772         /*
1773          * net_l: if the network being added is unique then net_l
1774          *        will point to that network
1775          *        if the network being added is not unique then
1776          *        net_l points to the existing network.
1777          *
1778          * When we enter the loop below, we'll pick NIs off he
1779          * network beign added and start them up, then add them to
1780          * a local ni list. Once we've successfully started all
1781          * the NIs then we join the local NI list (of started up
1782          * networks) with the net_l->net_ni_list, which should
1783          * point to the correct network to add the new ni list to
1784          *
1785          * If any of the new NIs fail to start up, then we want to
1786          * iterate through the local ni list, which should include
1787          * any NIs which were successfully started up, and shut
1788          * them down.
1789          *
1790          * After than we want to delete the network being added,
1791          * to avoid a memory leak.
1792          */
1793
1794         /*
1795          * When a network uses TCP bonding then all its interfaces
1796          * must be specified when the network is first defined: the
1797          * TCP bonding code doesn't allow for interfaces to be added
1798          * or removed.
1799          */
1800         if (net_l != net && net_l != NULL && use_tcp_bonding &&
1801             LNET_NETTYP(net_l->net_id) == SOCKLND) {
1802                 rc = -EINVAL;
1803                 goto failed0;
1804         }
1805
1806         while (!list_empty(&net->net_ni_added)) {
1807                 ni = list_entry(net->net_ni_added.next, struct lnet_ni,
1808                                 ni_netlist);
1809                 list_del_init(&ni->ni_netlist);
1810
1811                 /* make sure that the the NI we're about to start
1812                  * up is actually unique. if it's not fail. */
1813                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
1814                                         ni->ni_interfaces[0])) {
1815                         rc = -EINVAL;
1816                         goto failed1;
1817                 }
1818
1819                 /* adjust the pointer the parent network, just in case it
1820                  * the net is a duplicate */
1821                 ni->ni_net = net_l;
1822
1823                 rc = lnet_startup_lndni(ni, tun);
1824
1825                 LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
1826                         ni->ni_net->net_lnd->lnd_query != NULL);
1827
1828                 if (rc < 0)
1829                         goto failed1;
1830
1831                 lnet_ni_addref(ni);
1832                 list_add_tail(&ni->ni_netlist, &local_ni_list);
1833
1834                 ni_count++;
1835         }
1836
1837         lnet_net_lock(LNET_LOCK_EX);
1838         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
1839         lnet_incr_dlc_seq();
1840         lnet_net_unlock(LNET_LOCK_EX);
1841
1842         /* if the network is not unique then we don't want to keep
1843          * it around after we're done. Free it. Otherwise add that
1844          * net to the global the_lnet.ln_nets */
1845         if (net_l != net && net_l != NULL) {
1846                 /*
1847                  * TODO - note. currently the tunables can not be updated
1848                  * once added
1849                  */
1850                 lnet_net_free(net);
1851         } else {
1852                 net->net_state = LNET_NET_STATE_ACTIVE;
1853                 /*
1854                  * restore tunables after it has been overwitten by the
1855                  * lnd
1856                  */
1857                 if (peer_timeout != -1)
1858                         net->net_tunables.lct_peer_timeout = peer_timeout;
1859                 if (maxtxcredits != -1)
1860                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
1861                 if (peerrtrcredits != -1)
1862                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
1863
1864                 lnet_net_lock(LNET_LOCK_EX);
1865                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
1866                 lnet_net_unlock(LNET_LOCK_EX);
1867         }
1868
1869         return ni_count;
1870
1871 failed1:
1872         /*
1873          * shutdown the new NIs that are being started up
1874          * free the NET being started
1875          */
1876         while (!list_empty(&local_ni_list)) {
1877                 ni = list_entry(local_ni_list.next, struct lnet_ni,
1878                                 ni_netlist);
1879
1880                 lnet_shutdown_lndni(ni);
1881         }
1882
1883 failed0:
1884         lnet_net_free(net);
1885
1886         return rc;
1887 }
1888
1889 static int
1890 lnet_startup_lndnets(struct list_head *netlist)
1891 {
1892         struct lnet_net         *net;
1893         int                     rc;
1894         int                     ni_count = 0;
1895
1896         /*
1897          * Change to running state before bringing up the LNDs. This
1898          * allows lnet_shutdown_lndnets() to assert that we've passed
1899          * through here.
1900          */
1901         lnet_net_lock(LNET_LOCK_EX);
1902         the_lnet.ln_state = LNET_STATE_RUNNING;
1903         lnet_net_unlock(LNET_LOCK_EX);
1904
1905         while (!list_empty(netlist)) {
1906                 net = list_entry(netlist->next, struct lnet_net, net_list);
1907                 list_del_init(&net->net_list);
1908
1909                 rc = lnet_startup_lndnet(net, NULL);
1910
1911                 if (rc < 0)
1912                         goto failed;
1913
1914                 ni_count += rc;
1915         }
1916
1917         return ni_count;
1918 failed:
1919         lnet_shutdown_lndnets();
1920
1921         return rc;
1922 }
1923
1924 /**
1925  * Initialize LNet library.
1926  *
1927  * Automatically called at module loading time. Caller has to call
1928  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
1929  * latter returned 0. It must be called exactly once.
1930  *
1931  * \retval 0 on success
1932  * \retval -ve on failures.
1933  */
1934 int lnet_lib_init(void)
1935 {
1936         int rc;
1937
1938         lnet_assert_wire_constants();
1939
1940         if (lnet_max_interfaces < LNET_MIN_INTERFACES)
1941                 lnet_max_interfaces = LNET_MIN_INTERFACES;
1942
1943         memset(&the_lnet, 0, sizeof(the_lnet));
1944
1945         /* refer to global cfs_cpt_table for now */
1946         the_lnet.ln_cpt_table   = cfs_cpt_table;
1947         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1948
1949         LASSERT(the_lnet.ln_cpt_number > 0);
1950         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1951                 /* we are under risk of consuming all lh_cookie */
1952                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1953                        "please change setting of CPT-table and retry\n",
1954                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1955                 return -E2BIG;
1956         }
1957
1958         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1959                 the_lnet.ln_cpt_bits++;
1960
1961         rc = lnet_create_locks();
1962         if (rc != 0) {
1963                 CERROR("Can't create LNet global locks: %d\n", rc);
1964                 return rc;
1965         }
1966
1967         the_lnet.ln_refcount = 0;
1968         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1969         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1970         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
1971         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1972         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1973
1974         /* The hash table size is the number of bits it takes to express the set
1975          * ln_num_routes, minus 1 (better to under estimate than over so we
1976          * don't waste memory). */
1977         if (rnet_htable_size <= 0)
1978                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1979         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1980                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1981         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1982                                            order_base_2(rnet_htable_size) - 1);
1983
1984         /* All LNDs apart from the LOLND are in separate modules.  They
1985          * register themselves when their module loads, and unregister
1986          * themselves when their module is unloaded. */
1987         lnet_register_lnd(&the_lolnd);
1988         return 0;
1989 }
1990
1991 /**
1992  * Finalize LNet library.
1993  *
1994  * \pre lnet_lib_init() called with success.
1995  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1996  */
1997 void lnet_lib_exit(void)
1998 {
1999         LASSERT(the_lnet.ln_refcount == 0);
2000
2001         while (!list_empty(&the_lnet.ln_lnds))
2002                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
2003                                                lnd_t, lnd_list));
2004         lnet_destroy_locks();
2005 }
2006
2007 /**
2008  * Set LNet PID and start LNet interfaces, routing, and forwarding.
2009  *
2010  * Users must call this function at least once before any other functions.
2011  * For each successful call there must be a corresponding call to
2012  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
2013  * ignored.
2014  *
2015  * The PID used by LNet may be different from the one requested.
2016  * See LNetGetId().
2017  *
2018  * \param requested_pid PID requested by the caller.
2019  *
2020  * \return >= 0 on success, and < 0 error code on failures.
2021  */
2022 int
2023 LNetNIInit(lnet_pid_t requested_pid)
2024 {
2025         int                     im_a_router = 0;
2026         int                     rc;
2027         int                     ni_count;
2028         struct lnet_ping_buffer *pbuf;
2029         lnet_handle_md_t        ping_mdh;
2030         struct list_head        net_head;
2031         struct lnet_net         *net;
2032
2033         INIT_LIST_HEAD(&net_head);
2034
2035         mutex_lock(&the_lnet.ln_api_mutex);
2036
2037         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
2038
2039         if (the_lnet.ln_refcount > 0) {
2040                 rc = the_lnet.ln_refcount++;
2041                 mutex_unlock(&the_lnet.ln_api_mutex);
2042                 return rc;
2043         }
2044
2045         rc = lnet_prepare(requested_pid);
2046         if (rc != 0) {
2047                 mutex_unlock(&the_lnet.ln_api_mutex);
2048                 return rc;
2049         }
2050
2051         /* create a network for Loopback network */
2052         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
2053         if (net == NULL) {
2054                 rc = -ENOMEM;
2055                 goto err_empty_list;
2056         }
2057
2058         /* Add in the loopback NI */
2059         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
2060                 rc = -ENOMEM;
2061                 goto err_empty_list;
2062         }
2063
2064         /* If LNet is being initialized via DLC it is possible
2065          * that the user requests not to load module parameters (ones which
2066          * are supported by DLC) on initialization.  Therefore, make sure not
2067          * to load networks, routes and forwarding from module parameters
2068          * in this case.  On cleanup in case of failure only clean up
2069          * routes if it has been loaded */
2070         if (!the_lnet.ln_nis_from_mod_params) {
2071                 rc = lnet_parse_networks(&net_head, lnet_get_networks(),
2072                                          use_tcp_bonding);
2073                 if (rc < 0)
2074                         goto err_empty_list;
2075         }
2076
2077         ni_count = lnet_startup_lndnets(&net_head);
2078         if (ni_count < 0) {
2079                 rc = ni_count;
2080                 goto err_empty_list;
2081         }
2082
2083         if (!the_lnet.ln_nis_from_mod_params) {
2084                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
2085                 if (rc != 0)
2086                         goto err_shutdown_lndnis;
2087
2088                 rc = lnet_check_routes();
2089                 if (rc != 0)
2090                         goto err_destroy_routes;
2091
2092                 rc = lnet_rtrpools_alloc(im_a_router);
2093                 if (rc != 0)
2094                         goto err_destroy_routes;
2095         }
2096
2097         rc = lnet_acceptor_start();
2098         if (rc != 0)
2099                 goto err_destroy_routes;
2100
2101         the_lnet.ln_refcount = 1;
2102         /* Now I may use my own API functions... */
2103
2104         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_count, true);
2105         if (rc != 0)
2106                 goto err_acceptor_stop;
2107
2108         lnet_ping_target_update(pbuf, ping_mdh);
2109
2110         rc = lnet_router_checker_start();
2111         if (rc != 0)
2112                 goto err_stop_ping;
2113
2114         rc = lnet_push_target_init();
2115         if (rc != 0)
2116                 goto err_stop_router_checker;
2117
2118         rc = lnet_peer_discovery_start();
2119         if (rc != 0)
2120                 goto err_destroy_push_target;
2121
2122         lnet_fault_init();
2123         lnet_proc_init();
2124
2125         mutex_unlock(&the_lnet.ln_api_mutex);
2126
2127         return 0;
2128
2129 err_destroy_push_target:
2130         lnet_push_target_fini();
2131 err_stop_router_checker:
2132         lnet_router_checker_stop();
2133 err_stop_ping:
2134         lnet_ping_target_fini();
2135 err_acceptor_stop:
2136         the_lnet.ln_refcount = 0;
2137         lnet_acceptor_stop();
2138 err_destroy_routes:
2139         if (!the_lnet.ln_nis_from_mod_params)
2140                 lnet_destroy_routes();
2141 err_shutdown_lndnis:
2142         lnet_shutdown_lndnets();
2143 err_empty_list:
2144         lnet_unprepare();
2145         LASSERT(rc < 0);
2146         mutex_unlock(&the_lnet.ln_api_mutex);
2147         while (!list_empty(&net_head)) {
2148                 struct lnet_net *net;
2149
2150                 net = list_entry(net_head.next, struct lnet_net, net_list);
2151                 list_del_init(&net->net_list);
2152                 lnet_net_free(net);
2153         }
2154         return rc;
2155 }
2156 EXPORT_SYMBOL(LNetNIInit);
2157
2158 /**
2159  * Stop LNet interfaces, routing, and forwarding.
2160  *
2161  * Users must call this function once for each successful call to LNetNIInit().
2162  * Once the LNetNIFini() operation has been started, the results of pending
2163  * API operations are undefined.
2164  *
2165  * \return always 0 for current implementation.
2166  */
2167 int
2168 LNetNIFini()
2169 {
2170         mutex_lock(&the_lnet.ln_api_mutex);
2171
2172         LASSERT(the_lnet.ln_refcount > 0);
2173
2174         if (the_lnet.ln_refcount != 1) {
2175                 the_lnet.ln_refcount--;
2176         } else {
2177                 LASSERT(!the_lnet.ln_niinit_self);
2178
2179                 lnet_fault_fini();
2180
2181                 lnet_proc_fini();
2182                 lnet_peer_discovery_stop();
2183                 lnet_push_target_fini();
2184                 lnet_router_checker_stop();
2185                 lnet_ping_target_fini();
2186
2187                 /* Teardown fns that use my own API functions BEFORE here */
2188                 the_lnet.ln_refcount = 0;
2189
2190                 lnet_acceptor_stop();
2191                 lnet_destroy_routes();
2192                 lnet_shutdown_lndnets();
2193                 lnet_unprepare();
2194         }
2195
2196         mutex_unlock(&the_lnet.ln_api_mutex);
2197         return 0;
2198 }
2199 EXPORT_SYMBOL(LNetNIFini);
2200
2201 /**
2202  * Grabs the ni data from the ni structure and fills the out
2203  * parameters
2204  *
2205  * \param[in] ni network        interface structure
2206  * \param[out] cfg_ni           NI config information
2207  * \param[out] tun              network and LND tunables
2208  */
2209 static void
2210 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
2211                    struct lnet_ioctl_config_lnd_tunables *tun,
2212                    struct lnet_ioctl_element_stats *stats,
2213                    __u32 tun_size)
2214 {
2215         size_t min_size = 0;
2216         int i;
2217
2218         if (!ni || !cfg_ni || !tun)
2219                 return;
2220
2221         if (ni->ni_interfaces[0] != NULL) {
2222                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
2223                         if (ni->ni_interfaces[i] != NULL) {
2224                                 strncpy(cfg_ni->lic_ni_intf[i],
2225                                         ni->ni_interfaces[i],
2226                                         sizeof(cfg_ni->lic_ni_intf[i]));
2227                         }
2228                 }
2229         }
2230
2231         cfg_ni->lic_nid = ni->ni_nid;
2232         if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
2233                 cfg_ni->lic_status = LNET_NI_STATUS_UP;
2234         else
2235                 cfg_ni->lic_status = ni->ni_status->ns_status;
2236         cfg_ni->lic_tcp_bonding = use_tcp_bonding;
2237         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
2238
2239         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
2240
2241         if (stats) {
2242                 stats->send_count = atomic_read(&ni->ni_stats.send_count);
2243                 stats->recv_count = atomic_read(&ni->ni_stats.recv_count);
2244         }
2245
2246         /*
2247          * tun->lt_tun will always be present, but in order to be
2248          * backwards compatible, we need to deal with the cases when
2249          * tun->lt_tun is smaller than what the kernel has, because it
2250          * comes from an older version of a userspace program, then we'll
2251          * need to copy as much information as we have available space.
2252          */
2253         min_size = tun_size - sizeof(tun->lt_cmn);
2254         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
2255
2256         /* copy over the cpts */
2257         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
2258             ni->ni_cpts == NULL)  {
2259                 for (i = 0; i < ni->ni_ncpts; i++)
2260                         cfg_ni->lic_cpts[i] = i;
2261         } else {
2262                 for (i = 0;
2263                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
2264                      i < LNET_MAX_SHOW_NUM_CPT;
2265                      i++)
2266                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
2267         }
2268         cfg_ni->lic_ncpts = ni->ni_ncpts;
2269 }
2270
2271 /**
2272  * NOTE: This is a legacy function left in the code to be backwards
2273  * compatible with older userspace programs. It should eventually be
2274  * removed.
2275  *
2276  * Grabs the ni data from the ni structure and fills the out
2277  * parameters
2278  *
2279  * \param[in] ni network        interface structure
2280  * \param[out] config           config information
2281  */
2282 static void
2283 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
2284                          struct lnet_ioctl_config_data *config)
2285 {
2286         struct lnet_ioctl_net_config *net_config;
2287         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
2288         size_t min_size, tunable_size = 0;
2289         int i;
2290
2291         if (!ni || !config)
2292                 return;
2293
2294         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
2295         if (!net_config)
2296                 return;
2297
2298         BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
2299                      ARRAY_SIZE(net_config->ni_interfaces));
2300
2301         for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
2302                 if (!ni->ni_interfaces[i])
2303                         break;
2304
2305                 strncpy(net_config->ni_interfaces[i],
2306                         ni->ni_interfaces[i],
2307                         sizeof(net_config->ni_interfaces[i]));
2308         }
2309
2310         config->cfg_nid = ni->ni_nid;
2311         config->cfg_config_u.cfg_net.net_peer_timeout =
2312                 ni->ni_net->net_tunables.lct_peer_timeout;
2313         config->cfg_config_u.cfg_net.net_max_tx_credits =
2314                 ni->ni_net->net_tunables.lct_max_tx_credits;
2315         config->cfg_config_u.cfg_net.net_peer_tx_credits =
2316                 ni->ni_net->net_tunables.lct_peer_tx_credits;
2317         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
2318                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
2319
2320         if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
2321                 net_config->ni_status = LNET_NI_STATUS_UP;
2322         else
2323                 net_config->ni_status = ni->ni_status->ns_status;
2324
2325         if (ni->ni_cpts) {
2326                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
2327
2328                 for (i = 0; i < num_cpts; i++)
2329                         net_config->ni_cpts[i] = ni->ni_cpts[i];
2330
2331                 config->cfg_ncpts = num_cpts;
2332         }
2333
2334         /*
2335          * See if user land tools sent in a newer and larger version
2336          * of struct lnet_tunables than what the kernel uses.
2337          */
2338         min_size = sizeof(*config) + sizeof(*net_config);
2339
2340         if (config->cfg_hdr.ioc_len > min_size)
2341                 tunable_size = config->cfg_hdr.ioc_len - min_size;
2342
2343         /* Don't copy too much data to user space */
2344         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
2345         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
2346
2347         if (lnd_cfg && min_size) {
2348                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
2349                 config->cfg_config_u.cfg_net.net_interface_count = 1;
2350
2351                 /* Tell user land that kernel side has less data */
2352                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
2353                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
2354                         config->cfg_hdr.ioc_len -= min_size;
2355                 }
2356         }
2357 }
2358
2359 struct lnet_ni *
2360 lnet_get_ni_idx_locked(int idx)
2361 {
2362         struct lnet_ni          *ni;
2363         struct lnet_net         *net;
2364
2365         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2366                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2367                         if (idx-- == 0)
2368                                 return ni;
2369                 }
2370         }
2371
2372         return NULL;
2373 }
2374
2375 struct lnet_ni *
2376 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
2377 {
2378         struct lnet_ni          *ni;
2379         struct lnet_net         *net = mynet;
2380
2381         if (prev == NULL) {
2382                 if (net == NULL)
2383                         net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
2384                                         net_list);
2385                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2386                                 ni_netlist);
2387
2388                 return ni;
2389         }
2390
2391         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
2392                 /* if you reached the end of the ni list and the net is
2393                  * specified, then there are no more nis in that net */
2394                 if (net != NULL)
2395                         return NULL;
2396
2397                 /* we reached the end of this net ni list. move to the
2398                  * next net */
2399                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
2400                         /* no more nets and no more NIs. */
2401                         return NULL;
2402
2403                 /* get the next net */
2404                 net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
2405                                  net_list);
2406                 /* get the ni on it */
2407                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2408                                 ni_netlist);
2409
2410                 return ni;
2411         }
2412
2413         /* there are more nis left */
2414         ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
2415
2416         return ni;
2417 }
2418
2419 int
2420 lnet_get_net_config(struct lnet_ioctl_config_data *config)
2421 {
2422         struct lnet_ni *ni;
2423         int cpt;
2424         int rc = -ENOENT;
2425         int idx = config->cfg_count;
2426
2427         cpt = lnet_net_lock_current();
2428
2429         ni = lnet_get_ni_idx_locked(idx);
2430
2431         if (ni != NULL) {
2432                 rc = 0;
2433                 lnet_ni_lock(ni);
2434                 lnet_fill_ni_info_legacy(ni, config);
2435                 lnet_ni_unlock(ni);
2436         }
2437
2438         lnet_net_unlock(cpt);
2439         return rc;
2440 }
2441
2442 int
2443 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
2444                    struct lnet_ioctl_config_lnd_tunables *tun,
2445                    struct lnet_ioctl_element_stats *stats,
2446                    __u32 tun_size)
2447 {
2448         struct lnet_ni          *ni;
2449         int                     cpt;
2450         int                     rc = -ENOENT;
2451
2452         if (!cfg_ni || !tun || !stats)
2453                 return -EINVAL;
2454
2455         cpt = lnet_net_lock_current();
2456
2457         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
2458
2459         if (ni) {
2460                 rc = 0;
2461                 lnet_ni_lock(ni);
2462                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
2463                 lnet_ni_unlock(ni);
2464         }
2465
2466         lnet_net_unlock(cpt);
2467         return rc;
2468 }
2469
2470 static int lnet_add_net_common(struct lnet_net *net,
2471                                struct lnet_ioctl_config_lnd_tunables *tun)
2472 {
2473         __u32                   net_id;
2474         struct lnet_ping_buffer *pbuf;
2475         lnet_handle_md_t        ping_mdh;
2476         int                     rc;
2477         lnet_remotenet_t        *rnet;
2478         int                     net_ni_count;
2479         int                     num_acceptor_nets;
2480
2481         lnet_net_lock(LNET_LOCK_EX);
2482         rnet = lnet_find_rnet_locked(net->net_id);
2483         lnet_net_unlock(LNET_LOCK_EX);
2484         /*
2485          * make sure that the net added doesn't invalidate the current
2486          * configuration LNet is keeping
2487          */
2488         if (rnet) {
2489                 CERROR("Adding net %s will invalidate routing configuration\n",
2490                        libcfs_net2str(net->net_id));
2491                 lnet_net_free(net);
2492                 return -EUSERS;
2493         }
2494
2495         /*
2496          * make sure you calculate the correct number of slots in the ping
2497          * buffer. Since the ping info is a flattened list of all the NIs,
2498          * we should allocate enough slots to accomodate the number of NIs
2499          * which will be added.
2500          *
2501          * since ni hasn't been configured yet, use
2502          * lnet_get_net_ni_count_pre() which checks the net_ni_added list
2503          */
2504         net_ni_count = lnet_get_net_ni_count_pre(net);
2505
2506         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2507                                     net_ni_count + lnet_get_ni_count(),
2508                                     false);
2509         if (rc < 0) {
2510                 lnet_net_free(net);
2511                 return rc;
2512         }
2513
2514         if (tun)
2515                 memcpy(&net->net_tunables,
2516                        &tun->lt_cmn, sizeof(net->net_tunables));
2517         else
2518                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
2519
2520         /*
2521          * before starting this network get a count of the current TCP
2522          * networks which require the acceptor thread running. If that
2523          * count is == 0 before we start up this network, then we'd want to
2524          * start up the acceptor thread after starting up this network
2525          */
2526         num_acceptor_nets = lnet_count_acceptor_nets();
2527
2528         net_id = net->net_id;
2529
2530         rc = lnet_startup_lndnet(net,
2531                                  (tun) ? &tun->lt_tun : NULL);
2532         if (rc < 0)
2533                 goto failed;
2534
2535         lnet_net_lock(LNET_LOCK_EX);
2536         net = lnet_get_net_locked(net_id);
2537         lnet_net_unlock(LNET_LOCK_EX);
2538
2539         LASSERT(net);
2540
2541         /*
2542          * Start the acceptor thread if this is the first network
2543          * being added that requires the thread.
2544          */
2545         if (net->net_lnd->lnd_accept && num_acceptor_nets == 0) {
2546                 rc = lnet_acceptor_start();
2547                 if (rc < 0) {
2548                         /* shutdown the net that we just started */
2549                         CERROR("Failed to start up acceptor thread\n");
2550                         lnet_shutdown_lndnet(net);
2551                         goto failed;
2552                 }
2553         }
2554
2555         lnet_net_lock(LNET_LOCK_EX);
2556         lnet_peer_net_added(net);
2557         lnet_net_unlock(LNET_LOCK_EX);
2558
2559         lnet_ping_target_update(pbuf, ping_mdh);
2560
2561         return 0;
2562
2563 failed:
2564         lnet_ping_md_unlink(pbuf, &ping_mdh);
2565         lnet_ping_buffer_decref(pbuf);
2566         return rc;
2567 }
2568
2569 static int lnet_handle_legacy_ip2nets(char *ip2nets,
2570                                       struct lnet_ioctl_config_lnd_tunables *tun)
2571 {
2572         struct lnet_net *net;
2573         char *nets;
2574         int rc;
2575         struct list_head net_head;
2576
2577         INIT_LIST_HEAD(&net_head);
2578
2579         rc = lnet_parse_ip2nets(&nets, ip2nets);
2580         if (rc < 0)
2581                 return rc;
2582
2583         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2584         if (rc < 0)
2585                 return rc;
2586
2587         mutex_lock(&the_lnet.ln_api_mutex);
2588         while (!list_empty(&net_head)) {
2589                 net = list_entry(net_head.next, struct lnet_net, net_list);
2590                 list_del_init(&net->net_list);
2591                 rc = lnet_add_net_common(net, tun);
2592                 if (rc < 0)
2593                         goto out;
2594         }
2595
2596 out:
2597         mutex_unlock(&the_lnet.ln_api_mutex);
2598
2599         while (!list_empty(&net_head)) {
2600                 net = list_entry(net_head.next, struct lnet_net, net_list);
2601                 list_del_init(&net->net_list);
2602                 lnet_net_free(net);
2603         }
2604         return rc;
2605 }
2606
2607 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf)
2608 {
2609         struct lnet_net *net;
2610         struct lnet_ni *ni;
2611         struct lnet_ioctl_config_lnd_tunables *tun = NULL;
2612         int rc, i;
2613         __u32 net_id;
2614
2615         /* get the tunables if they are available */
2616         if (conf->lic_cfg_hdr.ioc_len >=
2617             sizeof(*conf) + sizeof(*tun))
2618                 tun = (struct lnet_ioctl_config_lnd_tunables *)
2619                         conf->lic_bulk;
2620
2621         /* handle legacy ip2nets from DLC */
2622         if (conf->lic_legacy_ip2nets[0] != '\0')
2623                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
2624                                                   tun);
2625
2626         net_id = LNET_NIDNET(conf->lic_nid);
2627
2628         net = lnet_net_alloc(net_id, NULL);
2629         if (!net)
2630                 return -ENOMEM;
2631
2632         for (i = 0; i < conf->lic_ncpts; i++) {
2633                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER)
2634                         return -EINVAL;
2635         }
2636
2637         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
2638                                        conf->lic_ni_intf[0]);
2639         if (!ni)
2640                 return -ENOMEM;
2641
2642         mutex_lock(&the_lnet.ln_api_mutex);
2643
2644         rc = lnet_add_net_common(net, tun);
2645
2646         mutex_unlock(&the_lnet.ln_api_mutex);
2647
2648         return rc;
2649 }
2650
2651 int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf)
2652 {
2653         struct lnet_net  *net;
2654         struct lnet_ni *ni;
2655         __u32 net_id = LNET_NIDNET(conf->lic_nid);
2656         struct lnet_ping_buffer *pbuf;
2657         lnet_handle_md_t  ping_mdh;
2658         int               rc;
2659         int               net_count;
2660         __u32             addr;
2661
2662         /* don't allow userspace to shutdown the LOLND */
2663         if (LNET_NETTYP(net_id) == LOLND)
2664                 return -EINVAL;
2665
2666         mutex_lock(&the_lnet.ln_api_mutex);
2667
2668         lnet_net_lock(0);
2669
2670         net = lnet_get_net_locked(net_id);
2671         if (!net) {
2672                 CERROR("net %s not found\n",
2673                        libcfs_net2str(net_id));
2674                 rc = -ENOENT;
2675                 goto unlock_net;
2676         }
2677
2678         addr = LNET_NIDADDR(conf->lic_nid);
2679         if (addr == 0) {
2680                 /* remove the entire net */
2681                 net_count = lnet_get_net_ni_count_locked(net);
2682
2683                 lnet_net_unlock(0);
2684
2685                 /* create and link a new ping info, before removing the old one */
2686                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2687                                         lnet_get_ni_count() - net_count,
2688                                         false);
2689                 if (rc != 0)
2690                         goto unlock_api_mutex;
2691
2692                 lnet_shutdown_lndnet(net);
2693
2694                 if (lnet_count_acceptor_nets() == 0)
2695                         lnet_acceptor_stop();
2696
2697                 lnet_ping_target_update(pbuf, ping_mdh);
2698
2699                 goto unlock_api_mutex;
2700         }
2701
2702         ni = lnet_nid2ni_locked(conf->lic_nid, 0);
2703         if (!ni) {
2704                 CERROR("nid %s not found\n",
2705                        libcfs_nid2str(conf->lic_nid));
2706                 rc = -ENOENT;
2707                 goto unlock_net;
2708         }
2709
2710         net_count = lnet_get_net_ni_count_locked(net);
2711
2712         lnet_net_unlock(0);
2713
2714         /* create and link a new ping info, before removing the old one */
2715         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2716                                   lnet_get_ni_count() - 1, false);
2717         if (rc != 0)
2718                 goto unlock_api_mutex;
2719
2720         lnet_shutdown_lndni(ni);
2721
2722         if (lnet_count_acceptor_nets() == 0)
2723                 lnet_acceptor_stop();
2724
2725         lnet_ping_target_update(pbuf, ping_mdh);
2726
2727         /* check if the net is empty and remove it if it is */
2728         if (net_count == 1)
2729                 lnet_shutdown_lndnet(net);
2730
2731         goto unlock_api_mutex;
2732
2733 unlock_net:
2734         lnet_net_unlock(0);
2735 unlock_api_mutex:
2736         mutex_unlock(&the_lnet.ln_api_mutex);
2737
2738         return rc;
2739 }
2740
2741 /*
2742  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
2743  * They are only expected to be called for unique networks.
2744  * That can be as a result of older DLC library
2745  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
2746  */
2747 int
2748 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
2749 {
2750         struct lnet_net         *net;
2751         struct list_head        net_head;
2752         int                     rc;
2753         struct lnet_ioctl_config_lnd_tunables tun;
2754         char *nets = conf->cfg_config_u.cfg_net.net_intf;
2755
2756         INIT_LIST_HEAD(&net_head);
2757
2758         /* Create a net/ni structures for the network string */
2759         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2760         if (rc <= 0)
2761                 return rc == 0 ? -EINVAL : rc;
2762
2763         mutex_lock(&the_lnet.ln_api_mutex);
2764
2765         if (rc > 1) {
2766                 rc = -EINVAL; /* only add one network per call */
2767                 goto failed;
2768         }
2769
2770         net = list_entry(net_head.next, struct lnet_net, net_list);
2771         list_del_init(&net->net_list);
2772
2773         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
2774
2775         memset(&tun, 0, sizeof(tun));
2776
2777         tun.lt_cmn.lct_peer_timeout =
2778           conf->cfg_config_u.cfg_net.net_peer_timeout;
2779         tun.lt_cmn.lct_peer_tx_credits =
2780           conf->cfg_config_u.cfg_net.net_peer_tx_credits;
2781         tun.lt_cmn.lct_peer_rtr_credits =
2782           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
2783         tun.lt_cmn.lct_max_tx_credits =
2784           conf->cfg_config_u.cfg_net.net_max_tx_credits;
2785
2786         rc = lnet_add_net_common(net, &tun);
2787         if (rc != 0)
2788                 goto failed;
2789
2790         return 0;
2791
2792 failed:
2793         mutex_unlock(&the_lnet.ln_api_mutex);
2794         while (!list_empty(&net_head)) {
2795                 net = list_entry(net_head.next, struct lnet_net, net_list);
2796                 list_del_init(&net->net_list);
2797                 lnet_net_free(net);
2798         }
2799         return rc;
2800 }
2801
2802 int
2803 lnet_dyn_del_net(__u32 net_id)
2804 {
2805         struct lnet_net  *net;
2806         struct lnet_ping_buffer *pbuf;
2807         lnet_handle_md_t  ping_mdh;
2808         int               rc;
2809         int               net_ni_count;
2810
2811         /* don't allow userspace to shutdown the LOLND */
2812         if (LNET_NETTYP(net_id) == LOLND)
2813                 return -EINVAL;
2814
2815         mutex_lock(&the_lnet.ln_api_mutex);
2816
2817         lnet_net_lock(0);
2818
2819         net = lnet_get_net_locked(net_id);
2820         if (net == NULL) {
2821                 rc = -EINVAL;
2822                 goto out;
2823         }
2824
2825         net_ni_count = lnet_get_net_ni_count_locked(net);
2826
2827         lnet_net_unlock(0);
2828
2829         /* create and link a new ping info, before removing the old one */
2830         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2831                                     lnet_get_ni_count() - net_ni_count, false);
2832         if (rc != 0)
2833                 goto out;
2834
2835         lnet_shutdown_lndnet(net);
2836
2837         if (lnet_count_acceptor_nets() == 0)
2838                 lnet_acceptor_stop();
2839
2840         lnet_ping_target_update(pbuf, ping_mdh);
2841
2842 out:
2843         mutex_unlock(&the_lnet.ln_api_mutex);
2844
2845         return rc;
2846 }
2847
2848 void lnet_incr_dlc_seq(void)
2849 {
2850         atomic_inc(&lnet_dlc_seq_no);
2851 }
2852
2853 __u32 lnet_get_dlc_seq_locked(void)
2854 {
2855         return atomic_read(&lnet_dlc_seq_no);
2856 }
2857
2858 /**
2859  * LNet ioctl handler.
2860  *
2861  */
2862 int
2863 LNetCtl(unsigned int cmd, void *arg)
2864 {
2865         struct libcfs_ioctl_data *data = arg;
2866         struct lnet_ioctl_config_data *config;
2867         lnet_process_id_t         id = {0};
2868         lnet_ni_t                *ni;
2869         int                       rc;
2870
2871         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
2872                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
2873
2874         switch (cmd) {
2875         case IOC_LIBCFS_GET_NI:
2876                 rc = LNetGetId(data->ioc_count, &id);
2877                 data->ioc_nid = id.nid;
2878                 return rc;
2879
2880         case IOC_LIBCFS_FAIL_NID:
2881                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2882
2883         case IOC_LIBCFS_ADD_ROUTE:
2884                 config = arg;
2885
2886                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2887                         return -EINVAL;
2888
2889                 mutex_lock(&the_lnet.ln_api_mutex);
2890                 rc = lnet_add_route(config->cfg_net,
2891                                     config->cfg_config_u.cfg_route.rtr_hop,
2892                                     config->cfg_nid,
2893                                     config->cfg_config_u.cfg_route.
2894                                         rtr_priority);
2895                 if (rc == 0) {
2896                         rc = lnet_check_routes();
2897                         if (rc != 0)
2898                                 lnet_del_route(config->cfg_net,
2899                                                config->cfg_nid);
2900                 }
2901                 mutex_unlock(&the_lnet.ln_api_mutex);
2902                 return rc;
2903
2904         case IOC_LIBCFS_DEL_ROUTE:
2905                 config = arg;
2906
2907                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2908                         return -EINVAL;
2909
2910                 mutex_lock(&the_lnet.ln_api_mutex);
2911                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2912                 mutex_unlock(&the_lnet.ln_api_mutex);
2913                 return rc;
2914
2915         case IOC_LIBCFS_GET_ROUTE:
2916                 config = arg;
2917
2918                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2919                         return -EINVAL;
2920
2921                 mutex_lock(&the_lnet.ln_api_mutex);
2922                 rc = lnet_get_route(config->cfg_count,
2923                                     &config->cfg_net,
2924                                     &config->cfg_config_u.cfg_route.rtr_hop,
2925                                     &config->cfg_nid,
2926                                     &config->cfg_config_u.cfg_route.rtr_flags,
2927                                     &config->cfg_config_u.cfg_route.
2928                                         rtr_priority);
2929                 mutex_unlock(&the_lnet.ln_api_mutex);
2930                 return rc;
2931
2932         case IOC_LIBCFS_GET_LOCAL_NI: {
2933                 struct lnet_ioctl_config_ni *cfg_ni;
2934                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
2935                 struct lnet_ioctl_element_stats *stats;
2936                 __u32 tun_size;
2937
2938                 cfg_ni = arg;
2939                 /* get the tunables if they are available */
2940                 if (cfg_ni->lic_cfg_hdr.ioc_len <
2941                     sizeof(*cfg_ni) + sizeof(*stats)+ sizeof(*tun))
2942                         return -EINVAL;
2943
2944                 stats = (struct lnet_ioctl_element_stats *)
2945                         cfg_ni->lic_bulk;
2946                 tun = (struct lnet_ioctl_config_lnd_tunables *)
2947                                 (cfg_ni->lic_bulk + sizeof(*stats));
2948
2949                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
2950                         sizeof(*stats);
2951
2952                 mutex_lock(&the_lnet.ln_api_mutex);
2953                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
2954                 mutex_unlock(&the_lnet.ln_api_mutex);
2955                 return rc;
2956         }
2957
2958         case IOC_LIBCFS_GET_NET: {
2959                 size_t total = sizeof(*config) +
2960                                sizeof(struct lnet_ioctl_net_config);
2961                 config = arg;
2962
2963                 if (config->cfg_hdr.ioc_len < total)
2964                         return -EINVAL;
2965
2966                 mutex_lock(&the_lnet.ln_api_mutex);
2967                 rc = lnet_get_net_config(config);
2968                 mutex_unlock(&the_lnet.ln_api_mutex);
2969                 return rc;
2970         }
2971
2972         case IOC_LIBCFS_GET_LNET_STATS:
2973         {
2974                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2975
2976                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
2977                         return -EINVAL;
2978
2979                 mutex_lock(&the_lnet.ln_api_mutex);
2980                 lnet_counters_get(&lnet_stats->st_cntrs);
2981                 mutex_unlock(&the_lnet.ln_api_mutex);
2982                 return 0;
2983         }
2984
2985         case IOC_LIBCFS_CONFIG_RTR:
2986                 config = arg;
2987
2988                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2989                         return -EINVAL;
2990
2991                 mutex_lock(&the_lnet.ln_api_mutex);
2992                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2993                         rc = lnet_rtrpools_enable();
2994                         mutex_unlock(&the_lnet.ln_api_mutex);
2995                         return rc;
2996                 }
2997                 lnet_rtrpools_disable();
2998                 mutex_unlock(&the_lnet.ln_api_mutex);
2999                 return 0;
3000
3001         case IOC_LIBCFS_ADD_BUF:
3002                 config = arg;
3003
3004                 if (config->cfg_hdr.ioc_len < sizeof(*config))
3005                         return -EINVAL;
3006
3007                 mutex_lock(&the_lnet.ln_api_mutex);
3008                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
3009                                                 buf_tiny,
3010                                           config->cfg_config_u.cfg_buffers.
3011                                                 buf_small,
3012                                           config->cfg_config_u.cfg_buffers.
3013                                                 buf_large);
3014                 mutex_unlock(&the_lnet.ln_api_mutex);
3015                 return rc;
3016
3017         case IOC_LIBCFS_SET_NUMA_RANGE: {
3018                 struct lnet_ioctl_set_value *numa;
3019                 numa = arg;
3020                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
3021                         return -EINVAL;
3022                 lnet_net_lock(LNET_LOCK_EX);
3023                 lnet_numa_range = numa->sv_value;
3024                 lnet_net_unlock(LNET_LOCK_EX);
3025                 return 0;
3026         }
3027
3028         case IOC_LIBCFS_GET_NUMA_RANGE: {
3029                 struct lnet_ioctl_set_value *numa;
3030                 numa = arg;
3031                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
3032                         return -EINVAL;
3033                 numa->sv_value = lnet_numa_range;
3034                 return 0;
3035         }
3036
3037         case IOC_LIBCFS_SET_MAX_INTF: {
3038                 struct lnet_ioctl_set_value *max_intf;
3039                 max_intf = arg;
3040                 if (max_intf->sv_hdr.ioc_len != sizeof(*max_intf) ||
3041                     max_intf->sv_value < LNET_MIN_INTERFACES)
3042                         return -EINVAL;
3043                 mutex_lock(&the_lnet.ln_api_mutex);
3044                 lnet_max_interfaces = max_intf->sv_value;
3045                 mutex_unlock(&the_lnet.ln_api_mutex);
3046                 return 0;
3047         }
3048
3049         case IOC_LIBCFS_GET_MAX_INTF: {
3050                 struct lnet_ioctl_set_value *max_intf;
3051                 max_intf = arg;
3052                 if (max_intf->sv_hdr.ioc_len != sizeof(*max_intf))
3053                         return -EINVAL;
3054                 max_intf->sv_value = lnet_max_interfaces;
3055                 return 0;
3056         }
3057
3058         case IOC_LIBCFS_SET_DISCOVERY: {
3059                 struct lnet_ioctl_set_value *discovery;
3060                 discovery = arg;
3061                 if (discovery->sv_hdr.ioc_len != sizeof(*discovery) ||
3062                     discovery->sv_value > 1)
3063                         return -EINVAL;
3064                 mutex_lock(&the_lnet.ln_api_mutex);
3065                 lnet_peer_discovery_enabled = discovery->sv_value;
3066                 mutex_unlock(&the_lnet.ln_api_mutex);
3067                 return 0;
3068         }
3069
3070         case IOC_LIBCFS_GET_DISCOVERY: {
3071                 struct lnet_ioctl_set_value *discovery;
3072                 discovery = arg;
3073                 if (discovery->sv_hdr.ioc_len != sizeof(*discovery))
3074                         return -EINVAL;
3075                 discovery->sv_value = lnet_peer_discovery_enabled;
3076                 return 0;
3077         }
3078
3079         case IOC_LIBCFS_GET_BUF: {
3080                 struct lnet_ioctl_pool_cfg *pool_cfg;
3081                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
3082
3083                 config = arg;
3084
3085                 if (config->cfg_hdr.ioc_len < total)
3086                         return -EINVAL;
3087
3088                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
3089
3090                 mutex_lock(&the_lnet.ln_api_mutex);
3091                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
3092                 mutex_unlock(&the_lnet.ln_api_mutex);
3093                 return rc;
3094         }
3095
3096         case IOC_LIBCFS_ADD_PEER_NI: {
3097                 struct lnet_ioctl_peer_cfg *cfg = arg;
3098
3099                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
3100                         return -EINVAL;
3101
3102                 mutex_lock(&the_lnet.ln_api_mutex);
3103                 rc = lnet_add_peer_ni(cfg->prcfg_prim_nid,
3104                                       cfg->prcfg_cfg_nid,
3105                                       cfg->prcfg_mr);
3106                 mutex_unlock(&the_lnet.ln_api_mutex);
3107                 return rc;
3108         }
3109
3110         case IOC_LIBCFS_DEL_PEER_NI: {
3111                 struct lnet_ioctl_peer_cfg *cfg = arg;
3112
3113                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
3114                         return -EINVAL;
3115
3116                 mutex_lock(&the_lnet.ln_api_mutex);
3117                 rc = lnet_del_peer_ni(cfg->prcfg_prim_nid,
3118                                       cfg->prcfg_cfg_nid);
3119                 mutex_unlock(&the_lnet.ln_api_mutex);
3120                 return rc;
3121         }
3122
3123         case IOC_LIBCFS_GET_PEER_INFO: {
3124                 struct lnet_ioctl_peer *peer_info = arg;
3125
3126                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
3127                         return -EINVAL;
3128
3129                 mutex_lock(&the_lnet.ln_api_mutex);
3130                 rc = lnet_get_peer_ni_info(
3131                    peer_info->pr_count,
3132                    &peer_info->pr_nid,
3133                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
3134                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
3135                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
3136                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
3137                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
3138                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
3139                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
3140                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
3141                 mutex_unlock(&the_lnet.ln_api_mutex);
3142                 return rc;
3143         }
3144
3145         case IOC_LIBCFS_GET_PEER_NI: {
3146                 struct lnet_ioctl_peer_cfg *cfg = arg;
3147                 struct lnet_peer_ni_credit_info *lpni_cri;
3148                 struct lnet_ioctl_element_stats *lpni_stats;
3149                 size_t total = sizeof(*cfg) + sizeof(*lpni_cri) +
3150                                sizeof(*lpni_stats);
3151
3152                 if (cfg->prcfg_hdr.ioc_len < total)
3153                         return -EINVAL;
3154
3155                 lpni_cri = (struct lnet_peer_ni_credit_info*) cfg->prcfg_bulk;
3156                 lpni_stats = (struct lnet_ioctl_element_stats *)
3157                              (cfg->prcfg_bulk + sizeof(*lpni_cri));
3158
3159                 mutex_lock(&the_lnet.ln_api_mutex);
3160                 rc = lnet_get_peer_info(cfg->prcfg_idx, &cfg->prcfg_prim_nid,
3161                                         &cfg->prcfg_cfg_nid, &cfg->prcfg_mr,
3162                                         lpni_cri, lpni_stats);
3163                 mutex_unlock(&the_lnet.ln_api_mutex);
3164                 return rc;
3165         }
3166
3167         case IOC_LIBCFS_NOTIFY_ROUTER: {
3168                 unsigned long jiffies_passed;
3169
3170                 jiffies_passed = ktime_get_real_seconds() - data->ioc_u64[0];
3171                 jiffies_passed = cfs_time_seconds(jiffies_passed);
3172
3173                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
3174                                    jiffies - jiffies_passed);
3175         }
3176
3177         case IOC_LIBCFS_LNET_DIST:
3178                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
3179                 if (rc < 0 && rc != -EHOSTUNREACH)
3180                         return rc;
3181
3182                 data->ioc_u32[0] = rc;
3183                 return 0;
3184
3185         case IOC_LIBCFS_TESTPROTOCOMPAT:
3186                 lnet_net_lock(LNET_LOCK_EX);
3187                 the_lnet.ln_testprotocompat = data->ioc_flags;
3188                 lnet_net_unlock(LNET_LOCK_EX);
3189                 return 0;
3190
3191         case IOC_LIBCFS_LNET_FAULT:
3192                 return lnet_fault_ctl(data->ioc_flags, data);
3193
3194         case IOC_LIBCFS_PING: {
3195                 signed long timeout;
3196
3197                 id.nid = data->ioc_nid;
3198                 id.pid = data->ioc_u32[0];
3199
3200                 /* Don't block longer than 2 minutes */
3201                 if (data->ioc_u32[1] > 120 * MSEC_PER_SEC)
3202                         return -EINVAL;
3203
3204                 /* If timestamp is negative then disable timeout */
3205                 if ((s32)data->ioc_u32[1] < 0)
3206                         timeout = MAX_SCHEDULE_TIMEOUT;
3207                 else
3208                         timeout = msecs_to_jiffies(data->ioc_u32[1]);
3209
3210                 rc = lnet_ping(id, timeout, data->ioc_pbuf1,
3211                                data->ioc_plen1 / sizeof(lnet_process_id_t));
3212                 if (rc < 0)
3213                         return rc;
3214                 data->ioc_count = rc;
3215                 return 0;
3216         }
3217
3218         default:
3219                 ni = lnet_net2ni_addref(data->ioc_net);
3220                 if (ni == NULL)
3221                         return -EINVAL;
3222
3223                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
3224                         rc = -EINVAL;
3225                 else
3226                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
3227
3228                 lnet_ni_decref(ni);
3229                 return rc;
3230         }
3231         /* not reached */
3232 }
3233 EXPORT_SYMBOL(LNetCtl);
3234
3235 void LNetDebugPeer(lnet_process_id_t id)
3236 {
3237         lnet_debug_peer(id.nid);
3238 }
3239 EXPORT_SYMBOL(LNetDebugPeer);
3240
3241 /**
3242  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
3243  * all interfaces share a same PID, as requested by LNetNIInit().
3244  *
3245  * \param index Index of the interface to look up.
3246  * \param id On successful return, this location will hold the
3247  * lnet_process_id_t ID of the interface.
3248  *
3249  * \retval 0 If an interface exists at \a index.
3250  * \retval -ENOENT If no interface has been found.
3251  */
3252 int
3253 LNetGetId(unsigned int index, lnet_process_id_t *id)
3254 {
3255         struct lnet_ni   *ni;
3256         struct lnet_net  *net;
3257         int               cpt;
3258         int               rc = -ENOENT;
3259
3260         LASSERT(the_lnet.ln_refcount > 0);
3261
3262         cpt = lnet_net_lock_current();
3263
3264         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3265                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3266                         if (index-- != 0)
3267                                 continue;
3268
3269                         id->nid = ni->ni_nid;
3270                         id->pid = the_lnet.ln_pid;
3271                         rc = 0;
3272                         break;
3273                 }
3274         }
3275
3276         lnet_net_unlock(cpt);
3277         return rc;
3278 }
3279 EXPORT_SYMBOL(LNetGetId);
3280
3281 /**
3282  * Print a string representation of handle \a h into buffer \a str of
3283  * \a len bytes.
3284  */
3285 void
3286 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
3287 {
3288         snprintf(str, len, "%#llx", h.cookie);
3289 }
3290 EXPORT_SYMBOL(LNetSnprintHandle);
3291
3292 static int lnet_ping(lnet_process_id_t id, signed long timeout,
3293                      lnet_process_id_t __user *ids, int n_ids)
3294 {
3295         lnet_handle_eq_t     eqh;
3296         lnet_handle_md_t     mdh;
3297         lnet_event_t         event;
3298         lnet_md_t            md = { NULL };
3299         int                  which;
3300         int                  unlinked = 0;
3301         int                  replied = 0;
3302         const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC);
3303         struct lnet_ping_buffer *pbuf;
3304         lnet_process_id_t    tmpid;
3305         int                  i;
3306         int                  nob;
3307         int                  rc;
3308         int                  rc2;
3309         sigset_t         blocked;
3310
3311         /* n_ids limit is arbitrary */
3312         if (n_ids <= 0 || n_ids > lnet_max_interfaces || id.nid == LNET_NID_ANY)
3313                 return -EINVAL;
3314
3315         if (id.pid == LNET_PID_ANY)
3316                 id.pid = LNET_PID_LUSTRE;
3317
3318         pbuf = lnet_ping_buffer_alloc(n_ids, GFP_NOFS);
3319         if (!pbuf)
3320                 return -ENOMEM;
3321
3322         /* NB 2 events max (including any unlink event) */
3323         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
3324         if (rc != 0) {
3325                 CERROR("Can't allocate EQ: %d\n", rc);
3326                 goto out_0;
3327         }
3328
3329         /* initialize md content */
3330         md.start     = &pbuf->pb_info;
3331         md.length    = LNET_PING_INFO_SIZE(n_ids);
3332         md.threshold = 2; /*GET/REPLY*/
3333         md.max_size  = 0;
3334         md.options   = LNET_MD_TRUNCATE;
3335         md.user_ptr  = NULL;
3336         md.eq_handle = eqh;
3337
3338         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
3339         if (rc != 0) {
3340                 CERROR("Can't bind MD: %d\n", rc);
3341                 goto out_1;
3342         }
3343
3344         rc = LNetGet(LNET_NID_ANY, mdh, id,
3345                      LNET_RESERVED_PORTAL,
3346                      LNET_PROTO_PING_MATCHBITS, 0);
3347
3348         if (rc != 0) {
3349                 /* Don't CERROR; this could be deliberate! */
3350
3351                 rc2 = LNetMDUnlink(mdh);
3352                 LASSERT(rc2 == 0);
3353
3354                 /* NB must wait for the UNLINK event below... */
3355                 unlinked = 1;
3356                 timeout = a_long_time;
3357         }
3358
3359         do {
3360                 /* MUST block for unlink to complete */
3361                 if (unlinked)
3362                         blocked = cfs_block_allsigs();
3363
3364                 rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which);
3365
3366                 if (unlinked)
3367                         cfs_restore_sigs(blocked);
3368
3369                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
3370                        (rc2 <= 0) ? -1 : event.type,
3371                        (rc2 <= 0) ? -1 : event.status,
3372                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
3373
3374                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
3375
3376                 if (rc2 <= 0 || event.status != 0) {
3377                         /* timeout or error */
3378                         if (!replied && rc == 0)
3379                                 rc = (rc2 < 0) ? rc2 :
3380                                      (rc2 == 0) ? -ETIMEDOUT :
3381                                      event.status;
3382
3383                         if (!unlinked) {
3384                                 /* Ensure completion in finite time... */
3385                                 LNetMDUnlink(mdh);
3386                                 /* No assertion (racing with network) */
3387                                 unlinked = 1;
3388                                 timeout = a_long_time;
3389                         } else if (rc2 == 0) {
3390                                 /* timed out waiting for unlink */
3391                                 CWARN("ping %s: late network completion\n",
3392                                       libcfs_id2str(id));
3393                         }
3394                 } else if (event.type == LNET_EVENT_REPLY) {
3395                         replied = 1;
3396                         rc = event.mlength;
3397                 }
3398
3399         } while (rc2 <= 0 || !event.unlinked);
3400
3401         if (!replied) {
3402                 if (rc >= 0)
3403                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
3404                               libcfs_id2str(id));
3405                 rc = -EIO;
3406                 goto out_1;
3407         }
3408
3409         nob = rc;
3410         LASSERT(nob >= 0 && nob <= LNET_PING_INFO_SIZE(n_ids));
3411
3412         rc = -EPROTO;                           /* if I can't parse... */
3413
3414         if (nob < 8) {
3415                 /* can't check magic/version */
3416                 CERROR("%s: ping info too short %d\n",
3417                        libcfs_id2str(id), nob);
3418                 goto out_1;
3419         }
3420
3421         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
3422                 lnet_swap_pinginfo(pbuf);
3423         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
3424                 CERROR("%s: Unexpected magic %08x\n",
3425                        libcfs_id2str(id), pbuf->pb_info.pi_magic);
3426                 goto out_1;
3427         }
3428
3429         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
3430                 CERROR("%s: ping w/o NI status: 0x%x\n",
3431                        libcfs_id2str(id), pbuf->pb_info.pi_features);
3432                 goto out_1;
3433         }
3434
3435         if (nob < LNET_PING_INFO_SIZE(0)) {
3436                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
3437                        nob, (int)LNET_PING_INFO_SIZE(0));
3438                 goto out_1;
3439         }
3440
3441         if (pbuf->pb_info.pi_nnis < n_ids)
3442                 n_ids = pbuf->pb_info.pi_nnis;
3443
3444         if (nob < LNET_PING_INFO_SIZE(n_ids)) {
3445                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
3446                        nob, (int)LNET_PING_INFO_SIZE(n_ids));;
3447                 goto out_1;
3448         }
3449
3450         rc = -EFAULT;                           /* If I SEGV... */
3451
3452         memset(&tmpid, 0, sizeof(tmpid));
3453         for (i = 0; i < n_ids; i++) {
3454                 tmpid.pid = pbuf->pb_info.pi_pid;
3455                 tmpid.nid = pbuf->pb_info.pi_ni[i].ns_nid;
3456                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
3457                         goto out_1;
3458         }
3459         rc = pbuf->pb_info.pi_nnis;
3460
3461  out_1:
3462         rc2 = LNetEQFree(eqh);
3463         if (rc2 != 0)
3464                 CERROR("rc2 %d\n", rc2);
3465         LASSERT(rc2 == 0);
3466
3467  out_0:
3468         lnet_ping_buffer_decref(pbuf);
3469         return rc;
3470 }