Whamcloud - gitweb
LU-5953 build: use installed OFED by default
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2014, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include <lnet/lib-lnet.h>
39 #include <lnet/lib-dlc.h>
40 #include <linux/log2.h>
41
42 #define D_LNI D_CONSOLE
43
44 lnet_t      the_lnet;                           /* THE state of the network */
45 EXPORT_SYMBOL(the_lnet);
46
47 static char *ip2nets = "";
48 CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
49                 "LNET network <- IP table");
50
51 static char *networks = "";
52 CFS_MODULE_PARM(networks, "s", charp, 0444,
53                 "local networks");
54
55 static char *routes = "";
56 CFS_MODULE_PARM(routes, "s", charp, 0444,
57                 "routes to non-local networks");
58
59 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
60 CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
61                 "size of remote network hash table");
62
63 static int lnet_ping(lnet_process_id_t id, int timeout_ms,
64                      lnet_process_id_t *ids, int n_ids);
65
66 static char *
67 lnet_get_routes(void)
68 {
69         return routes;
70 }
71
72 static char *
73 lnet_get_networks(void)
74 {
75         char   *nets;
76         int     rc;
77
78         if (*networks != 0 && *ip2nets != 0) {
79                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
80                                    "'ip2nets' but not both at once\n");
81                 return NULL;
82         }
83
84         if (*ip2nets != 0) {
85                 rc = lnet_parse_ip2nets(&nets, ip2nets);
86                 return (rc == 0) ? nets : NULL;
87         }
88
89         if (*networks != 0)
90                 return networks;
91
92         return "tcp";
93 }
94
95 static void
96 lnet_init_locks(void)
97 {
98         spin_lock_init(&the_lnet.ln_eq_wait_lock);
99         init_waitqueue_head(&the_lnet.ln_eq_waitq);
100         init_waitqueue_head(&the_lnet.ln_rc_waitq);
101         mutex_init(&the_lnet.ln_lnd_mutex);
102         mutex_init(&the_lnet.ln_api_mutex);
103 }
104
105 static void
106 lnet_fini_locks(void)
107 {
108 }
109
110 static int
111 lnet_create_remote_nets_table(void)
112 {
113         int               i;
114         struct list_head *hash;
115
116         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
117         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
118         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
119         if (hash == NULL) {
120                 CERROR("Failed to create remote nets hash table\n");
121                 return -ENOMEM;
122         }
123
124         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
125                 INIT_LIST_HEAD(&hash[i]);
126         the_lnet.ln_remote_nets_hash = hash;
127         return 0;
128 }
129
130 static void
131 lnet_destroy_remote_nets_table(void)
132 {
133         int i;
134
135         if (the_lnet.ln_remote_nets_hash == NULL)
136                 return;
137
138         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
139                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
140
141         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
142                     LNET_REMOTE_NETS_HASH_SIZE *
143                     sizeof(the_lnet.ln_remote_nets_hash[0]));
144         the_lnet.ln_remote_nets_hash = NULL;
145 }
146
147 static void
148 lnet_destroy_locks(void)
149 {
150         if (the_lnet.ln_res_lock != NULL) {
151                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
152                 the_lnet.ln_res_lock = NULL;
153         }
154
155         if (the_lnet.ln_net_lock != NULL) {
156                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
157                 the_lnet.ln_net_lock = NULL;
158         }
159
160         lnet_fini_locks();
161 }
162
163 static int
164 lnet_create_locks(void)
165 {
166         lnet_init_locks();
167
168         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
169         if (the_lnet.ln_res_lock == NULL)
170                 goto failed;
171
172         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
173         if (the_lnet.ln_net_lock == NULL)
174                 goto failed;
175
176         return 0;
177
178  failed:
179         lnet_destroy_locks();
180         return -ENOMEM;
181 }
182
183 static void lnet_assert_wire_constants(void)
184 {
185         /* Wire protocol assertions generated by 'wirecheck'
186          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
187          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
188          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
189
190         /* Constants... */
191         CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
192         CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
193         CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
194         CLASSERT (LNET_MSG_ACK == 0);
195         CLASSERT (LNET_MSG_PUT == 1);
196         CLASSERT (LNET_MSG_GET == 2);
197         CLASSERT (LNET_MSG_REPLY == 3);
198         CLASSERT (LNET_MSG_HELLO == 4);
199
200         /* Checks for struct ptl_handle_wire_t */
201         CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
202         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
203         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
204         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
205         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
206
207         /* Checks for struct lnet_magicversion_t */
208         CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
209         CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
210         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
211         CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
212         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
213         CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
214         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
215
216         /* Checks for struct lnet_hdr_t */
217         CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
218         CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
219         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
220         CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
221         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
222         CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
223         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
224         CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
225         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
226         CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
227         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
228         CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
229         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
230         CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
231         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
232
233         /* Ack */
234         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
235         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
236         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
237         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
238         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
239         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
240
241         /* Put */
242         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
243         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
244         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
245         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
246         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
247         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
248         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
249         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
250         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
251         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
252
253         /* Get */
254         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
255         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
256         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
257         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
258         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
259         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
260         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
261         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
262         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
263         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
264
265         /* Reply */
266         CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
267         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
268
269         /* Hello */
270         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
271         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
272         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
273         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
274 }
275
276 static lnd_t *lnet_find_lnd_by_type(__u32 type)
277 {
278         lnd_t            *lnd;
279         struct list_head *tmp;
280
281         /* holding lnd mutex */
282         list_for_each(tmp, &the_lnet.ln_lnds) {
283                 lnd = list_entry(tmp, lnd_t, lnd_list);
284
285                 if (lnd->lnd_type == type)
286                         return lnd;
287         }
288         return NULL;
289 }
290
291 void
292 lnet_register_lnd (lnd_t *lnd)
293 {
294         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
295
296         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
297         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
298
299         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
300         lnd->lnd_refcount = 0;
301
302         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
303
304         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
305 }
306 EXPORT_SYMBOL(lnet_register_lnd);
307
308 void
309 lnet_unregister_lnd (lnd_t *lnd)
310 {
311         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
312
313         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
314         LASSERT(lnd->lnd_refcount == 0);
315
316         list_del(&lnd->lnd_list);
317         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
318
319         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
320 }
321 EXPORT_SYMBOL(lnet_unregister_lnd);
322
323 void
324 lnet_counters_get(lnet_counters_t *counters)
325 {
326         lnet_counters_t *ctr;
327         int             i;
328
329         memset(counters, 0, sizeof(*counters));
330
331         lnet_net_lock(LNET_LOCK_EX);
332
333         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
334                 counters->msgs_max     += ctr->msgs_max;
335                 counters->msgs_alloc   += ctr->msgs_alloc;
336                 counters->errors       += ctr->errors;
337                 counters->send_count   += ctr->send_count;
338                 counters->recv_count   += ctr->recv_count;
339                 counters->route_count  += ctr->route_count;
340                 counters->drop_count   += ctr->drop_count;
341                 counters->send_length  += ctr->send_length;
342                 counters->recv_length  += ctr->recv_length;
343                 counters->route_length += ctr->route_length;
344                 counters->drop_length  += ctr->drop_length;
345
346         }
347         lnet_net_unlock(LNET_LOCK_EX);
348 }
349 EXPORT_SYMBOL(lnet_counters_get);
350
351 void
352 lnet_counters_reset(void)
353 {
354         lnet_counters_t *counters;
355         int             i;
356
357         lnet_net_lock(LNET_LOCK_EX);
358
359         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
360                 memset(counters, 0, sizeof(lnet_counters_t));
361
362         lnet_net_unlock(LNET_LOCK_EX);
363 }
364
365 static __u64 lnet_create_interface_cookie(void)
366 {
367         /* NB the interface cookie in wire handles guards against delayed
368          * replies and ACKs appearing valid after reboot. Initialisation time,
369          * even if it's only implemented to millisecond resolution is probably
370          * easily good enough. */
371         struct timeval tv;
372         __u64          cookie;
373         do_gettimeofday(&tv);
374         cookie = tv.tv_sec;
375         cookie *= 1000000;
376         cookie += tv.tv_usec;
377         return cookie;
378 }
379
380 static char *
381 lnet_res_type2str(int type)
382 {
383         switch (type) {
384         default:
385                 LBUG();
386         case LNET_COOKIE_TYPE_MD:
387                 return "MD";
388         case LNET_COOKIE_TYPE_ME:
389                 return "ME";
390         case LNET_COOKIE_TYPE_EQ:
391                 return "EQ";
392         }
393 }
394
395 static void
396 lnet_res_container_cleanup(struct lnet_res_container *rec)
397 {
398         int     count = 0;
399
400         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
401                 return;
402
403         while (!list_empty(&rec->rec_active)) {
404                 struct list_head *e = rec->rec_active.next;
405
406                 list_del_init(e);
407                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
408                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
409
410                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
411                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
412
413                 } else { /* NB: Active MEs should be attached on portals */
414                         LBUG();
415                 }
416                 count++;
417         }
418
419         if (count > 0) {
420                 /* Found alive MD/ME/EQ, user really should unlink/free
421                  * all of them before finalize LNet, but if someone didn't,
422                  * we have to recycle garbage for him */
423                 CERROR("%d active elements on exit of %s container\n",
424                        count, lnet_res_type2str(rec->rec_type));
425         }
426
427         if (rec->rec_lh_hash != NULL) {
428                 LIBCFS_FREE(rec->rec_lh_hash,
429                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
430                 rec->rec_lh_hash = NULL;
431         }
432
433         rec->rec_type = 0; /* mark it as finalized */
434 }
435
436 static int
437 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
438 {
439         int     rc = 0;
440         int     i;
441
442         LASSERT(rec->rec_type == 0);
443
444         rec->rec_type = type;
445         INIT_LIST_HEAD(&rec->rec_active);
446
447         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
448
449         /* Arbitrary choice of hash table size */
450         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
451                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
452         if (rec->rec_lh_hash == NULL) {
453                 rc = -ENOMEM;
454                 goto out;
455         }
456
457         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
458                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
459
460         return 0;
461
462 out:
463         CERROR("Failed to setup %s resource container\n",
464                lnet_res_type2str(type));
465         lnet_res_container_cleanup(rec);
466         return rc;
467 }
468
469 static void
470 lnet_res_containers_destroy(struct lnet_res_container **recs)
471 {
472         struct lnet_res_container       *rec;
473         int                             i;
474
475         cfs_percpt_for_each(rec, i, recs)
476                 lnet_res_container_cleanup(rec);
477
478         cfs_percpt_free(recs);
479 }
480
481 static struct lnet_res_container **
482 lnet_res_containers_create(int type)
483 {
484         struct lnet_res_container       **recs;
485         struct lnet_res_container       *rec;
486         int                             rc;
487         int                             i;
488
489         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
490         if (recs == NULL) {
491                 CERROR("Failed to allocate %s resource containers\n",
492                        lnet_res_type2str(type));
493                 return NULL;
494         }
495
496         cfs_percpt_for_each(rec, i, recs) {
497                 rc = lnet_res_container_setup(rec, i, type);
498                 if (rc != 0) {
499                         lnet_res_containers_destroy(recs);
500                         return NULL;
501                 }
502         }
503
504         return recs;
505 }
506
507 lnet_libhandle_t *
508 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
509 {
510         /* ALWAYS called with lnet_res_lock held */
511         struct list_head        *head;
512         lnet_libhandle_t        *lh;
513         unsigned int            hash;
514
515         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
516                 return NULL;
517
518         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
519         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
520
521         list_for_each_entry(lh, head, lh_hash_chain) {
522                 if (lh->lh_cookie == cookie)
523                         return lh;
524         }
525
526         return NULL;
527 }
528
529 void
530 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
531 {
532         /* ALWAYS called with lnet_res_lock held */
533         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
534         unsigned int    hash;
535
536         lh->lh_cookie = rec->rec_lh_cookie;
537         rec->rec_lh_cookie += 1 << ibits;
538
539         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
540
541         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
542 }
543
544 static int lnet_unprepare(void);
545
546 static int
547 lnet_prepare(lnet_pid_t requested_pid)
548 {
549         /* Prepare to bring up the network */
550         struct lnet_res_container **recs;
551         int                       rc = 0;
552
553         if (requested_pid == LNET_PID_ANY) {
554                 /* Don't instantiate LNET just for me */
555                 return -ENETDOWN;
556         }
557
558         LASSERT(the_lnet.ln_refcount == 0);
559
560         the_lnet.ln_routing = 0;
561
562         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
563         the_lnet.ln_pid = requested_pid;
564
565         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
566         INIT_LIST_HEAD(&the_lnet.ln_nis);
567         INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
568         INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
569         INIT_LIST_HEAD(&the_lnet.ln_routers);
570         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
571         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
572
573         rc = lnet_create_remote_nets_table();
574         if (rc != 0)
575                 goto failed;
576
577         the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
578
579         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
580                                                 sizeof(lnet_counters_t));
581         if (the_lnet.ln_counters == NULL) {
582                 CERROR("Failed to allocate counters for LNet\n");
583                 rc = -ENOMEM;
584                 goto failed;
585         }
586
587         rc = lnet_peer_tables_create();
588         if (rc != 0)
589                 goto failed;
590
591         rc = lnet_msg_containers_create();
592         if (rc != 0)
593                 goto failed;
594
595         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
596                                       LNET_COOKIE_TYPE_EQ);
597         if (rc != 0)
598                 goto failed;
599
600         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
601         if (recs == NULL) {
602                 rc = -ENOMEM;
603                 goto failed;
604         }
605
606         the_lnet.ln_me_containers = recs;
607
608         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
609         if (recs == NULL) {
610                 rc = -ENOMEM;
611                 goto failed;
612         }
613
614         the_lnet.ln_md_containers = recs;
615
616         rc = lnet_portals_create();
617         if (rc != 0) {
618                 CERROR("Failed to create portals for LNet: %d\n", rc);
619                 goto failed;
620         }
621
622         return 0;
623
624  failed:
625         lnet_unprepare();
626         return rc;
627 }
628
629 static int
630 lnet_unprepare (void)
631 {
632         /* NB no LNET_LOCK since this is the last reference.  All LND instances
633          * have shut down already, so it is safe to unlink and free all
634          * descriptors, even those that appear committed to a network op (eg MD
635          * with non-zero pending count) */
636
637         lnet_fail_nid(LNET_NID_ANY, 0);
638
639         LASSERT(the_lnet.ln_refcount == 0);
640         LASSERT(list_empty(&the_lnet.ln_test_peers));
641         LASSERT(list_empty(&the_lnet.ln_nis));
642         LASSERT(list_empty(&the_lnet.ln_nis_cpt));
643         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
644
645         lnet_portals_destroy();
646
647         if (the_lnet.ln_md_containers != NULL) {
648                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
649                 the_lnet.ln_md_containers = NULL;
650         }
651
652         if (the_lnet.ln_me_containers != NULL) {
653                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
654                 the_lnet.ln_me_containers = NULL;
655         }
656
657         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
658
659         lnet_msg_containers_destroy();
660         lnet_peer_tables_destroy();
661         lnet_rtrpools_free(0);
662
663         if (the_lnet.ln_counters != NULL) {
664                 cfs_percpt_free(the_lnet.ln_counters);
665                 the_lnet.ln_counters = NULL;
666         }
667         lnet_destroy_remote_nets_table();
668
669         return 0;
670 }
671
672 lnet_ni_t  *
673 lnet_net2ni_locked(__u32 net, int cpt)
674 {
675         struct list_head *tmp;
676         lnet_ni_t        *ni;
677
678         LASSERT(cpt != LNET_LOCK_EX);
679
680         list_for_each(tmp, &the_lnet.ln_nis) {
681                 ni = list_entry(tmp, lnet_ni_t, ni_list);
682
683                 if (LNET_NIDNET(ni->ni_nid) == net) {
684                         lnet_ni_addref_locked(ni, cpt);
685                         return ni;
686                 }
687         }
688
689         return NULL;
690 }
691
692 lnet_ni_t *
693 lnet_net2ni(__u32 net)
694 {
695         lnet_ni_t *ni;
696
697         lnet_net_lock(0);
698         ni = lnet_net2ni_locked(net, 0);
699         lnet_net_unlock(0);
700
701         return ni;
702 }
703 EXPORT_SYMBOL(lnet_net2ni);
704
705 static unsigned int
706 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
707 {
708         __u64           key = nid;
709         unsigned int    val;
710
711         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
712
713         if (number == 1)
714                 return 0;
715
716         val = hash_long(key, LNET_CPT_BITS);
717         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
718         if (val < number)
719                 return val;
720
721         return (unsigned int)(key + val + (val >> 1)) % number;
722 }
723
724 int
725 lnet_cpt_of_nid_locked(lnet_nid_t nid)
726 {
727         struct lnet_ni *ni;
728
729         /* must called with hold of lnet_net_lock */
730         if (LNET_CPT_NUMBER == 1)
731                 return 0; /* the only one */
732
733         /* take lnet_net_lock(any) would be OK */
734         if (!list_empty(&the_lnet.ln_nis_cpt)) {
735                 list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
736                         if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
737                                 continue;
738
739                         LASSERT(ni->ni_cpts != NULL);
740                         return ni->ni_cpts[lnet_nid_cpt_hash
741                                            (nid, ni->ni_ncpts)];
742                 }
743         }
744
745         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
746 }
747
748 int
749 lnet_cpt_of_nid(lnet_nid_t nid)
750 {
751         int     cpt;
752         int     cpt2;
753
754         if (LNET_CPT_NUMBER == 1)
755                 return 0; /* the only one */
756
757         if (list_empty(&the_lnet.ln_nis_cpt))
758                 return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
759
760         cpt = lnet_net_lock_current();
761         cpt2 = lnet_cpt_of_nid_locked(nid);
762         lnet_net_unlock(cpt);
763
764         return cpt2;
765 }
766 EXPORT_SYMBOL(lnet_cpt_of_nid);
767
768 int
769 lnet_islocalnet(__u32 net)
770 {
771         struct lnet_ni  *ni;
772         int             cpt;
773
774         cpt = lnet_net_lock_current();
775
776         ni = lnet_net2ni_locked(net, cpt);
777         if (ni != NULL)
778                 lnet_ni_decref_locked(ni, cpt);
779
780         lnet_net_unlock(cpt);
781
782         return ni != NULL;
783 }
784
785 lnet_ni_t  *
786 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
787 {
788         struct lnet_ni   *ni;
789         struct list_head *tmp;
790
791         LASSERT(cpt != LNET_LOCK_EX);
792
793         list_for_each(tmp, &the_lnet.ln_nis) {
794                 ni = list_entry(tmp, lnet_ni_t, ni_list);
795
796                 if (ni->ni_nid == nid) {
797                         lnet_ni_addref_locked(ni, cpt);
798                         return ni;
799                 }
800         }
801
802         return NULL;
803 }
804
805 int
806 lnet_islocalnid(lnet_nid_t nid)
807 {
808         struct lnet_ni  *ni;
809         int             cpt;
810
811         cpt = lnet_net_lock_current();
812         ni = lnet_nid2ni_locked(nid, cpt);
813         if (ni != NULL)
814                 lnet_ni_decref_locked(ni, cpt);
815         lnet_net_unlock(cpt);
816
817         return ni != NULL;
818 }
819
820 int
821 lnet_count_acceptor_nis (void)
822 {
823         /* Return the # of NIs that need the acceptor. */
824         int              count = 0;
825         struct list_head *tmp;
826         struct lnet_ni   *ni;
827         int              cpt;
828
829         cpt = lnet_net_lock_current();
830         list_for_each(tmp, &the_lnet.ln_nis) {
831                 ni = list_entry(tmp, lnet_ni_t, ni_list);
832
833                 if (ni->ni_lnd->lnd_accept != NULL)
834                         count++;
835         }
836
837         lnet_net_unlock(cpt);
838
839         return count;
840 }
841
842 static lnet_ping_info_t *
843 lnet_ping_info_create(int num_ni)
844 {
845         lnet_ping_info_t *ping_info;
846         unsigned int     infosz;
847
848         infosz = offsetof(lnet_ping_info_t, pi_ni[num_ni]);
849         LIBCFS_ALLOC(ping_info, infosz);
850         if (ping_info == NULL) {
851                 CERROR("Can't allocate ping info[%d]\n", num_ni);
852                 return NULL;
853         }
854
855         ping_info->pi_nnis = num_ni;
856         ping_info->pi_pid = the_lnet.ln_pid;
857         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
858         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
859
860         return ping_info;
861 }
862
863 static inline int
864 lnet_get_ni_count(void)
865 {
866         struct lnet_ni *ni;
867         int            count = 0;
868
869         lnet_net_lock(0);
870
871         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
872                 count++;
873
874         lnet_net_unlock(0);
875
876         return count;
877 }
878
879 static inline void
880 lnet_ping_info_free(lnet_ping_info_t *pinfo)
881 {
882         LIBCFS_FREE(pinfo,
883                     offsetof(lnet_ping_info_t,
884                              pi_ni[pinfo->pi_nnis]));
885 }
886
887 static void
888 lnet_ping_info_destroy(void)
889 {
890         struct lnet_ni  *ni;
891
892         lnet_net_lock(LNET_LOCK_EX);
893
894         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
895                 lnet_ni_lock(ni);
896                 ni->ni_status = NULL;
897                 lnet_ni_unlock(ni);
898         }
899
900         lnet_ping_info_free(the_lnet.ln_ping_info);
901         the_lnet.ln_ping_info = NULL;
902
903         lnet_net_unlock(LNET_LOCK_EX);
904 }
905
906 static void
907 lnet_ping_event_handler(lnet_event_t *event)
908 {
909         lnet_ping_info_t *pinfo = event->md.user_ptr;
910
911         if (event->unlinked)
912                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
913 }
914
915 static int
916 lnet_ping_info_setup(lnet_ping_info_t **ppinfo, lnet_handle_md_t *md_handle,
917                      int ni_count, bool set_eq)
918 {
919         lnet_handle_me_t  me_handle;
920         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
921         lnet_md_t         md = {NULL};
922         int               rc, rc2;
923
924         if (set_eq) {
925                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
926                                  &the_lnet.ln_ping_target_eq);
927                 if (rc != 0) {
928                         CERROR("Can't allocate ping EQ: %d\n", rc);
929                         return rc;
930                 }
931         }
932
933         *ppinfo = lnet_ping_info_create(ni_count);
934         if (*ppinfo == NULL) {
935                 rc = -ENOMEM;
936                 goto failed_0;
937         }
938
939         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
940                           LNET_PROTO_PING_MATCHBITS, 0,
941                           LNET_UNLINK, LNET_INS_AFTER,
942                           &me_handle);
943         if (rc != 0) {
944                 CERROR("Can't create ping ME: %d\n", rc);
945                 goto failed_1;
946         }
947
948         /* initialize md content */
949         md.start     = *ppinfo;
950         md.length    = offsetof(lnet_ping_info_t,
951                                 pi_ni[(*ppinfo)->pi_nnis]);
952         md.threshold = LNET_MD_THRESH_INF;
953         md.max_size  = 0;
954         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
955                        LNET_MD_MANAGE_REMOTE;
956         md.user_ptr  = NULL;
957         md.eq_handle = the_lnet.ln_ping_target_eq;
958         md.user_ptr = *ppinfo;
959
960         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
961         if (rc != 0) {
962                 CERROR("Can't attach ping MD: %d\n", rc);
963                 goto failed_2;
964         }
965
966         return 0;
967
968 failed_2:
969         rc2 = LNetMEUnlink(me_handle);
970         LASSERT(rc2 == 0);
971 failed_1:
972         lnet_ping_info_free(*ppinfo);
973         *ppinfo = NULL;
974 failed_0:
975         if (set_eq)
976                 LNetEQFree(the_lnet.ln_ping_target_eq);
977         return rc;
978 }
979
980 static void
981 lnet_ping_md_unlink(lnet_ping_info_t *pinfo, lnet_handle_md_t *md_handle)
982 {
983         sigset_t        blocked = cfs_block_allsigs();
984
985         LNetMDUnlink(*md_handle);
986         LNetInvalidateHandle(md_handle);
987
988         /* NB md could be busy; this just starts the unlink */
989         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
990                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
991                 cfs_pause(cfs_time_seconds(1));
992         }
993
994         cfs_restore_sigs(blocked);
995 }
996
997 static void
998 lnet_ping_info_install_locked(lnet_ping_info_t *ping_info)
999 {
1000         int                     i;
1001         lnet_ni_t               *ni;
1002         lnet_ni_status_t        *ns;
1003
1004         i = 0;
1005         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1006                 LASSERT(i < ping_info->pi_nnis);
1007
1008                 ns = &ping_info->pi_ni[i];
1009
1010                 ns->ns_nid = ni->ni_nid;
1011
1012                 lnet_ni_lock(ni);
1013                 ns->ns_status = (ni->ni_status != NULL) ?
1014                                 ni->ni_status->ns_status : LNET_NI_STATUS_UP;
1015                 ni->ni_status = ns;
1016                 lnet_ni_unlock(ni);
1017
1018                 i++;
1019         }
1020 }
1021
1022 static void
1023 lnet_ping_target_update(lnet_ping_info_t *pinfo, lnet_handle_md_t md_handle)
1024 {
1025         lnet_ping_info_t *old_pinfo = NULL;
1026         lnet_handle_md_t old_md;
1027
1028         /* switch the NIs to point to the new ping info created */
1029         lnet_net_lock(LNET_LOCK_EX);
1030
1031         if (!the_lnet.ln_routing)
1032                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1033         lnet_ping_info_install_locked(pinfo);
1034
1035         if (the_lnet.ln_ping_info != NULL) {
1036                 old_pinfo = the_lnet.ln_ping_info;
1037                 old_md = the_lnet.ln_ping_target_md;
1038         }
1039         the_lnet.ln_ping_target_md = md_handle;
1040         the_lnet.ln_ping_info = pinfo;
1041
1042         lnet_net_unlock(LNET_LOCK_EX);
1043
1044         if (old_pinfo != NULL) {
1045                 /* unlink the old ping info */
1046                 lnet_ping_md_unlink(old_pinfo, &old_md);
1047                 lnet_ping_info_free(old_pinfo);
1048         }
1049 }
1050
1051 static void
1052 lnet_ping_target_fini(void)
1053 {
1054         int             rc;
1055
1056         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1057                             &the_lnet.ln_ping_target_md);
1058
1059         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1060         LASSERT(rc == 0);
1061
1062         lnet_ping_info_destroy();
1063 }
1064
1065 static int
1066 lnet_ni_tq_credits(lnet_ni_t *ni)
1067 {
1068         int     credits;
1069
1070         LASSERT(ni->ni_ncpts >= 1);
1071
1072         if (ni->ni_ncpts == 1)
1073                 return ni->ni_maxtxcredits;
1074
1075         credits = ni->ni_maxtxcredits / ni->ni_ncpts;
1076         credits = max(credits, 8 * ni->ni_peertxcredits);
1077         credits = min(credits, ni->ni_maxtxcredits);
1078
1079         return credits;
1080 }
1081
1082 static void
1083 lnet_ni_unlink_locked(lnet_ni_t *ni)
1084 {
1085         if (!list_empty(&ni->ni_cptlist)) {
1086                 list_del_init(&ni->ni_cptlist);
1087                 lnet_ni_decref_locked(ni, 0);
1088         }
1089
1090         /* move it to zombie list and nobody can find it anymore */
1091         LASSERT(!list_empty(&ni->ni_list));
1092         list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
1093         lnet_ni_decref_locked(ni, 0);   /* drop ln_nis' ref */
1094 }
1095
1096 static void
1097 lnet_clear_zombies_nis_locked(void)
1098 {
1099         int             i;
1100         int             islo;
1101         lnet_ni_t       *ni;
1102
1103         /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
1104          * and shut them down in guaranteed thread context */
1105         i = 2;
1106         while (!list_empty(&the_lnet.ln_nis_zombie)) {
1107                 int     *ref;
1108                 int     j;
1109
1110                 ni = list_entry(the_lnet.ln_nis_zombie.next,
1111                                 lnet_ni_t, ni_list);
1112                 list_del_init(&ni->ni_list);
1113                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1114                         if (*ref == 0)
1115                                 continue;
1116                         /* still busy, add it back to zombie list */
1117                         list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
1118                         break;
1119                 }
1120
1121                 if (!list_empty(&ni->ni_list)) {
1122                         lnet_net_unlock(LNET_LOCK_EX);
1123                         ++i;
1124                         if ((i & (-i)) == i) {
1125                                 CDEBUG(D_WARNING,
1126                                        "Waiting for zombie LNI %s\n",
1127                                        libcfs_nid2str(ni->ni_nid));
1128                         }
1129                         cfs_pause(cfs_time_seconds(1));
1130                         lnet_net_lock(LNET_LOCK_EX);
1131                         continue;
1132                 }
1133
1134                 ni->ni_lnd->lnd_refcount--;
1135                 lnet_net_unlock(LNET_LOCK_EX);
1136
1137                 islo = ni->ni_lnd->lnd_type == LOLND;
1138
1139                 LASSERT(!in_interrupt());
1140                 (ni->ni_lnd->lnd_shutdown)(ni);
1141
1142                 /* can't deref lnd anymore now; it might have unregistered
1143                  * itself...  */
1144
1145                 if (!islo)
1146                         CDEBUG(D_LNI, "Removed LNI %s\n",
1147                               libcfs_nid2str(ni->ni_nid));
1148
1149                 lnet_ni_free(ni);
1150                 i = 2;
1151                 lnet_net_lock(LNET_LOCK_EX);
1152         }
1153 }
1154
1155 static void
1156 lnet_shutdown_lndnis(void)
1157 {
1158         int             i;
1159         lnet_ni_t       *ni;
1160
1161         /* NB called holding the global mutex */
1162
1163         /* All quiet on the API front */
1164         LASSERT(!the_lnet.ln_shutdown);
1165         LASSERT(the_lnet.ln_refcount == 0);
1166         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
1167
1168         lnet_net_lock(LNET_LOCK_EX);
1169         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1170
1171         /* Unlink NIs from the global table */
1172         while (!list_empty(&the_lnet.ln_nis)) {
1173                 ni = list_entry(the_lnet.ln_nis.next,
1174                                 lnet_ni_t, ni_list);
1175                 lnet_ni_unlink_locked(ni);
1176         }
1177
1178         /* Drop the cached loopback NI. */
1179         if (the_lnet.ln_loni != NULL) {
1180                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1181                 the_lnet.ln_loni = NULL;
1182         }
1183
1184         lnet_net_unlock(LNET_LOCK_EX);
1185
1186         /* Clear lazy portals and drop delayed messages which hold refs
1187          * on their lnet_msg_t::msg_rxpeer */
1188         for (i = 0; i < the_lnet.ln_nportals; i++)
1189                 LNetClearLazyPortal(i);
1190
1191         /* Clear the peer table and wait for all peers to go (they hold refs on
1192          * their NIs) */
1193         lnet_peer_tables_cleanup(NULL);
1194
1195         lnet_net_lock(LNET_LOCK_EX);
1196
1197         lnet_clear_zombies_nis_locked();
1198         the_lnet.ln_shutdown = 0;
1199         lnet_net_unlock(LNET_LOCK_EX);
1200 }
1201
1202 /* shutdown down the NI and release refcount */
1203 static void
1204 lnet_shutdown_lndni(struct lnet_ni *ni)
1205 {
1206         int i;
1207
1208         lnet_net_lock(LNET_LOCK_EX);
1209         lnet_ni_unlink_locked(ni);
1210         lnet_net_unlock(LNET_LOCK_EX);
1211
1212         /* clear messages for this NI on the lazy portal */
1213         for (i = 0; i < the_lnet.ln_nportals; i++)
1214                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
1215
1216         /* Do peer table cleanup for this ni */
1217         lnet_peer_tables_cleanup(ni);
1218
1219         lnet_net_lock(LNET_LOCK_EX);
1220         lnet_clear_zombies_nis_locked();
1221         lnet_net_unlock(LNET_LOCK_EX);
1222 }
1223
1224 static int
1225 lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
1226                    __s32 peer_cr, __s32 peer_buf_cr, __s32 credits)
1227 {
1228         int                     rc = -EINVAL;
1229         __u32                   lnd_type;
1230         lnd_t                   *lnd;
1231         struct lnet_tx_queue    *tq;
1232         int                     i;
1233
1234         lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
1235
1236         LASSERT(libcfs_isknown_lnd(lnd_type));
1237
1238         if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
1239             lnd_type == IIBLND || lnd_type == VIBLND) {
1240                 CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
1241                 goto failed0;
1242         }
1243
1244         /* Make sure this new NI is unique. */
1245         lnet_net_lock(LNET_LOCK_EX);
1246         rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
1247         lnet_net_unlock(LNET_LOCK_EX);
1248
1249         if (!rc) {
1250                 if (lnd_type == LOLND) {
1251                         lnet_ni_free(ni);
1252                         return 0;
1253                 }
1254
1255                 CERROR("Net %s is not unique\n",
1256                        libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
1257
1258                 rc = -EEXIST;
1259                 goto failed0;
1260         }
1261
1262         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1263         lnd = lnet_find_lnd_by_type(lnd_type);
1264
1265         if (lnd == NULL) {
1266                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1267                 rc = request_module("%s", libcfs_lnd2modname(lnd_type));
1268                 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1269
1270                 lnd = lnet_find_lnd_by_type(lnd_type);
1271                 if (lnd == NULL) {
1272                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1273                         CERROR("Can't load LND %s, module %s, rc=%d\n",
1274                                libcfs_lnd2str(lnd_type),
1275                                libcfs_lnd2modname(lnd_type), rc);
1276 #ifndef HAVE_MODULE_LOADING_SUPPORT
1277                         LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1278                                            "compiled with kernel module "
1279                                            "loading support.");
1280 #endif
1281                         rc = -EINVAL;
1282                         goto failed0;
1283                 }
1284         }
1285
1286         lnet_net_lock(LNET_LOCK_EX);
1287         lnd->lnd_refcount++;
1288         lnet_net_unlock(LNET_LOCK_EX);
1289
1290         ni->ni_lnd = lnd;
1291
1292         rc = (lnd->lnd_startup)(ni);
1293
1294         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1295
1296         if (rc != 0) {
1297                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1298                                    rc, libcfs_lnd2str(lnd->lnd_type));
1299                 lnet_net_lock(LNET_LOCK_EX);
1300                 lnd->lnd_refcount--;
1301                 lnet_net_unlock(LNET_LOCK_EX);
1302                 goto failed0;
1303         }
1304
1305         /* If given some LND tunable parameters, parse those now to
1306          * override the values in the NI structure. */
1307         if (peer_buf_cr >= 0)
1308                 ni->ni_peerrtrcredits = peer_buf_cr;
1309         if (peer_timeout >= 0)
1310                 ni->ni_peertimeout = peer_timeout;
1311         /*
1312          * TODO
1313          * Note: For now, don't allow the user to change
1314          * peertxcredits as this number is used in the
1315          * IB LND to control queue depth.
1316          * if (peer_cr != -1)
1317          *      ni->ni_peertxcredits = peer_cr;
1318          */
1319         if (credits >= 0)
1320                 ni->ni_maxtxcredits = credits;
1321
1322         LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
1323
1324         lnet_net_lock(LNET_LOCK_EX);
1325         /* refcount for ln_nis */
1326         lnet_ni_addref_locked(ni, 0);
1327         list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1328         if (ni->ni_cpts != NULL) {
1329                 lnet_ni_addref_locked(ni, 0);
1330                 list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
1331         }
1332
1333         lnet_net_unlock(LNET_LOCK_EX);
1334
1335         if (lnd->lnd_type == LOLND) {
1336                 lnet_ni_addref(ni);
1337                 LASSERT(the_lnet.ln_loni == NULL);
1338                 the_lnet.ln_loni = ni;
1339                 return 0;
1340         }
1341
1342         if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) {
1343                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1344                                    libcfs_lnd2str(lnd->lnd_type),
1345                                    ni->ni_peertxcredits == 0 ?
1346                                         "" : "per-peer ");
1347                 /* shutdown the NI since if we get here then it must've already
1348                  * been started
1349                  */
1350                 lnet_shutdown_lndni(ni);
1351                 return -EINVAL;
1352         }
1353
1354         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1355                 tq->tq_credits_min =
1356                 tq->tq_credits_max =
1357                 tq->tq_credits = lnet_ni_tq_credits(ni);
1358         }
1359
1360         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1361                 libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
1362                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1363                 ni->ni_peerrtrcredits, ni->ni_peertimeout);
1364
1365         return 0;
1366 failed0:
1367         lnet_ni_free(ni);
1368         return rc;
1369 }
1370
1371 static int
1372 lnet_startup_lndnis(struct list_head *nilist)
1373 {
1374         struct lnet_ni          *ni;
1375         int                     rc;
1376         int                     ni_count = 0;
1377
1378         while (!list_empty(nilist)) {
1379                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1380                 list_del(&ni->ni_list);
1381                 rc = lnet_startup_lndni(ni, -1, -1, -1, -1);
1382
1383                 if (rc < 0)
1384                         goto failed;
1385
1386                 ni_count++;
1387         }
1388
1389         return ni_count;
1390 failed:
1391         lnet_shutdown_lndnis();
1392
1393         return rc;
1394 }
1395
1396 /**
1397  * Initialize LNet library.
1398  *
1399  * Automatically called at module loading time. Caller has to call
1400  * lnet_fini() after a call to lnet_init(), if and only if the latter
1401  * returned 0. It must be called exactly once.
1402  *
1403  * \return 0 on success, and -ve on failures.
1404  */
1405 int
1406 lnet_init(void)
1407 {
1408         int     rc;
1409
1410         lnet_assert_wire_constants();
1411
1412         memset(&the_lnet, 0, sizeof(the_lnet));
1413
1414         /* refer to global cfs_cpt_table for now */
1415         the_lnet.ln_cpt_table   = cfs_cpt_table;
1416         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1417
1418         LASSERT(the_lnet.ln_cpt_number > 0);
1419         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1420                 /* we are under risk of consuming all lh_cookie */
1421                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1422                        "please change setting of CPT-table and retry\n",
1423                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1424                 return -1;
1425         }
1426
1427         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1428                 the_lnet.ln_cpt_bits++;
1429
1430         rc = lnet_create_locks();
1431         if (rc != 0) {
1432                 CERROR("Can't create LNet global locks: %d\n", rc);
1433                 return -1;
1434         }
1435
1436         the_lnet.ln_refcount = 0;
1437         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1438         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1439         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1440         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1441
1442         /* The hash table size is the number of bits it takes to express the set
1443          * ln_num_routes, minus 1 (better to under estimate than over so we
1444          * don't waste memory). */
1445         if (rnet_htable_size <= 0)
1446                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1447         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1448                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1449         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1450                                            order_base_2(rnet_htable_size) - 1);
1451
1452         /* All LNDs apart from the LOLND are in separate modules.  They
1453          * register themselves when their module loads, and unregister
1454          * themselves when their module is unloaded. */
1455         lnet_register_lnd(&the_lolnd);
1456         return 0;
1457 }
1458
1459 /**
1460  * Finalize LNet library.
1461  *
1462  * \pre lnet_init() called with success.
1463  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1464  */
1465 void
1466 lnet_fini(void)
1467 {
1468         LASSERT(the_lnet.ln_refcount == 0);
1469
1470         while (!list_empty(&the_lnet.ln_lnds))
1471                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1472                                                lnd_t, lnd_list));
1473         lnet_destroy_locks();
1474 }
1475
1476 /**
1477  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1478  *
1479  * Users must call this function at least once before any other functions.
1480  * For each successful call there must be a corresponding call to
1481  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1482  * ignored.
1483  *
1484  * The PID used by LNet may be different from the one requested.
1485  * See LNetGetId().
1486  *
1487  * \param requested_pid PID requested by the caller.
1488  *
1489  * \return >= 0 on success, and < 0 error code on failures.
1490  */
1491 int
1492 LNetNIInit(lnet_pid_t requested_pid)
1493 {
1494         int                     im_a_router = 0;
1495         int                     rc;
1496         int                     ni_count;
1497         lnet_ping_info_t        *pinfo;
1498         lnet_handle_md_t        md_handle;
1499         struct list_head        net_head;
1500
1501         INIT_LIST_HEAD(&net_head);
1502
1503         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1504
1505         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1506
1507         if (the_lnet.ln_refcount > 0) {
1508                 rc = the_lnet.ln_refcount++;
1509                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1510                 return rc;
1511         }
1512
1513         rc = lnet_prepare(requested_pid);
1514         if (rc != 0) {
1515                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1516                 return rc;
1517         }
1518
1519         /* Add in the loopback network */
1520         if (lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head) == NULL) {
1521                 rc = -ENOMEM;
1522                 goto failed0;
1523         }
1524
1525         /* If LNet is being initialized via DLC it is possible
1526          * that the user requests not to load module parameters (ones which
1527          * are supported by DLC) on initialization.  Therefore, make sure not
1528          * to load networks, routes and forwarding from module parameters
1529          * in this case.  On cleanup in case of failure only clean up
1530          * routes if it has been loaded */
1531         if (!the_lnet.ln_nis_from_mod_params) {
1532                 rc = lnet_parse_networks(&net_head,
1533                                          lnet_get_networks());
1534                 if (rc < 0)
1535                         goto failed0;
1536         }
1537
1538         ni_count = lnet_startup_lndnis(&net_head);
1539         if (ni_count < 0) {
1540                 rc = ni_count;
1541                 goto failed0;
1542         }
1543
1544         if (!the_lnet.ln_nis_from_mod_params) {
1545                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1546                 if (rc != 0)
1547                         goto failed1;
1548
1549                 rc = lnet_check_routes();
1550                 if (rc != 0)
1551                         goto failed2;
1552
1553                 rc = lnet_rtrpools_alloc(im_a_router);
1554                 if (rc != 0)
1555                         goto failed2;
1556         }
1557
1558         rc = lnet_acceptor_start();
1559         if (rc != 0)
1560                 goto failed2;
1561         the_lnet.ln_refcount = 1;
1562         /* Now I may use my own API functions... */
1563
1564         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1565         if (rc != 0)
1566                 goto failed3;
1567
1568         lnet_ping_target_update(pinfo, md_handle);
1569
1570         rc = lnet_router_checker_start();
1571         if (rc != 0)
1572                 goto failed4;
1573
1574         lnet_fault_init();
1575         lnet_proc_init();
1576
1577         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1578
1579         return 0;
1580
1581 failed4:
1582         lnet_ping_target_fini();
1583 failed3:
1584         the_lnet.ln_refcount = 0;
1585         lnet_acceptor_stop();
1586 failed2:
1587         if (!the_lnet.ln_nis_from_mod_params)
1588                 lnet_destroy_routes();
1589 failed1:
1590         lnet_shutdown_lndnis();
1591 failed0:
1592         lnet_unprepare();
1593         LASSERT(rc < 0);
1594         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1595         while (!list_empty(&net_head)) {
1596                 struct lnet_ni *ni;
1597                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
1598                 list_del_init(&ni->ni_list);
1599                 lnet_ni_free(ni);
1600         }
1601         return rc;
1602 }
1603 EXPORT_SYMBOL(LNetNIInit);
1604
1605 /**
1606  * Stop LNet interfaces, routing, and forwarding.
1607  *
1608  * Users must call this function once for each successful call to LNetNIInit().
1609  * Once the LNetNIFini() operation has been started, the results of pending
1610  * API operations are undefined.
1611  *
1612  * \return always 0 for current implementation.
1613  */
1614 int
1615 LNetNIFini()
1616 {
1617         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1618
1619         LASSERT (the_lnet.ln_refcount > 0);
1620
1621         if (the_lnet.ln_refcount != 1) {
1622                 the_lnet.ln_refcount--;
1623         } else {
1624                 LASSERT(!the_lnet.ln_niinit_self);
1625
1626                 lnet_fault_fini();
1627
1628                 lnet_proc_fini();
1629                 lnet_router_checker_stop();
1630                 lnet_ping_target_fini();
1631
1632                 /* Teardown fns that use my own API functions BEFORE here */
1633                 the_lnet.ln_refcount = 0;
1634
1635                 lnet_acceptor_stop();
1636                 lnet_destroy_routes();
1637                 lnet_shutdown_lndnis();
1638                 lnet_unprepare();
1639         }
1640
1641         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1642         return 0;
1643 }
1644 EXPORT_SYMBOL(LNetNIFini);
1645
1646 /**
1647  * Grabs the ni data from the ni structure and fills the out
1648  * parameters
1649  *
1650  * \param[in] ni network        interface structure
1651  * \param[out] cpt_count        the number of cpts the ni is on
1652  * \param[out] nid              Network Interface ID
1653  * \param[out] peer_timeout     NI peer timeout
1654  * \param[out] peer_tx_crdits   NI peer transmit credits
1655  * \param[out] peer_rtr_credits NI peer router credits
1656  * \param[out] max_tx_credits   NI max transmit credit
1657  * \param[out] net_config       Network configuration
1658  */
1659 static void
1660 lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
1661                   int *peer_timeout, int *peer_tx_credits,
1662                   int *peer_rtr_credits, int *max_tx_credits,
1663                   struct lnet_ioctl_net_config *net_config)
1664 {
1665         int i;
1666
1667         if (ni == NULL)
1668                 return;
1669
1670         if (net_config == NULL)
1671                 return;
1672
1673         CLASSERT(ARRAY_SIZE(ni->ni_interfaces) ==
1674                  ARRAY_SIZE(net_config->ni_interfaces));
1675
1676         if (ni->ni_interfaces[0] != NULL) {
1677                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1678                         if (ni->ni_interfaces[i] != NULL) {
1679                                 strncpy(net_config->ni_interfaces[i],
1680                                         ni->ni_interfaces[i],
1681                                         sizeof(net_config->ni_interfaces[i]));
1682                         }
1683                 }
1684         }
1685
1686         *nid = ni->ni_nid;
1687         *peer_timeout = ni->ni_peertimeout;
1688         *peer_tx_credits = ni->ni_peertxcredits;
1689         *peer_rtr_credits = ni->ni_peerrtrcredits;
1690         *max_tx_credits = ni->ni_maxtxcredits;
1691
1692         net_config->ni_status = ni->ni_status->ns_status;
1693
1694         for (i = 0;
1695              ni->ni_cpts != NULL && i < ni->ni_ncpts &&
1696              i < LNET_MAX_SHOW_NUM_CPT;
1697              i++)
1698                 net_config->ni_cpts[i] = ni->ni_cpts[i];
1699
1700         *cpt_count = ni->ni_ncpts;
1701 }
1702
1703 int
1704 lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
1705                     int *peer_tx_credits, int *peer_rtr_credits,
1706                     int *max_tx_credits,
1707                     struct lnet_ioctl_net_config *net_config)
1708 {
1709         struct lnet_ni          *ni;
1710         struct list_head        *tmp;
1711         int                     cpt;
1712         int                     rc = -ENOENT;
1713
1714         cpt = lnet_net_lock_current();
1715
1716         list_for_each(tmp, &the_lnet.ln_nis) {
1717                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1718                 if (idx-- == 0) {
1719                         rc = 0;
1720                         lnet_ni_lock(ni);
1721                         lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
1722                                           peer_tx_credits, peer_rtr_credits,
1723                                           max_tx_credits, net_config);
1724                         lnet_ni_unlock(ni);
1725                         break;
1726                 }
1727         }
1728
1729         lnet_net_unlock(cpt);
1730         return rc;
1731 }
1732
1733 int
1734 lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
1735                 __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
1736                 __s32 credits)
1737 {
1738         lnet_ping_info_t        *pinfo;
1739         lnet_handle_md_t        md_handle;
1740         struct lnet_ni          *ni;
1741         struct list_head        net_head;
1742         int                     rc;
1743         lnet_remotenet_t        *rnet;
1744
1745         INIT_LIST_HEAD(&net_head);
1746
1747         /* Create a ni structure for the network string */
1748         rc = lnet_parse_networks(&net_head, nets);
1749         if (rc <= 0)
1750                 return rc == 0 ? -EINVAL : rc;
1751
1752         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1753
1754         if (rc > 1) {
1755                 rc = -EINVAL; /* only add one interface per call */
1756                 goto failed0;
1757         }
1758
1759         ni = list_entry(net_head.next, struct lnet_ni, ni_list);
1760
1761         lnet_net_lock(LNET_LOCK_EX);
1762         rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
1763         lnet_net_unlock(LNET_LOCK_EX);
1764         /* make sure that the net added doesn't invalidate the current
1765          * configuration LNet is keeping */
1766         if (rnet != NULL) {
1767                 CERROR("Adding net %s will invalidate routing configuration\n",
1768                        nets);
1769                 rc = -EUSERS;
1770                 goto failed0;
1771         }
1772
1773         rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
1774                                   false);
1775         if (rc != 0)
1776                 goto failed0;
1777
1778         list_del_init(&ni->ni_list);
1779
1780         rc = lnet_startup_lndni(ni, peer_timeout, peer_cr,
1781                                 peer_buf_cr, credits);
1782         if (rc != 0)
1783                 goto failed1;
1784
1785         if (ni->ni_lnd->lnd_accept != NULL) {
1786                 rc = lnet_acceptor_start();
1787                 if (rc < 0) {
1788                         /* shutdown the ni that we just started */
1789                         CERROR("Failed to start up acceptor thread\n");
1790                         lnet_shutdown_lndni(ni);
1791                         goto failed1;
1792                 }
1793         }
1794
1795         lnet_ping_target_update(pinfo, md_handle);
1796         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1797
1798         return 0;
1799
1800 failed1:
1801         lnet_ping_md_unlink(pinfo, &md_handle);
1802         lnet_ping_info_free(pinfo);
1803 failed0:
1804         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1805         while (!list_empty(&net_head)) {
1806                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
1807                 list_del_init(&ni->ni_list);
1808                 lnet_ni_free(ni);
1809         }
1810         return rc;
1811 }
1812
1813 int
1814 lnet_dyn_del_ni(__u32 net)
1815 {
1816         lnet_ni_t        *ni;
1817         lnet_ping_info_t *pinfo;
1818         lnet_handle_md_t  md_handle;
1819         int               rc;
1820
1821         /* don't allow userspace to shutdown the LOLND */
1822         if (LNET_NETTYP(net) == LOLND)
1823                 return -EINVAL;
1824
1825         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1826         /* create and link a new ping info, before removing the old one */
1827         rc = lnet_ping_info_setup(&pinfo, &md_handle,
1828                                   lnet_get_ni_count() - 1, false);
1829         if (rc != 0)
1830                 goto out;
1831
1832         ni = lnet_net2ni(net);
1833         if (ni == NULL) {
1834                 rc = -EINVAL;
1835                 goto failed;
1836         }
1837
1838         /* decrement the reference counter taken by lnet_net2ni() */
1839         lnet_ni_decref_locked(ni, 0);
1840
1841         lnet_shutdown_lndni(ni);
1842
1843         if (lnet_count_acceptor_nis() == 0)
1844                 lnet_acceptor_stop();
1845
1846         lnet_ping_target_update(pinfo, md_handle);
1847         goto out;
1848 failed:
1849         lnet_ping_md_unlink(pinfo, &md_handle);
1850         lnet_ping_info_free(pinfo);
1851 out:
1852         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1853
1854         return rc;
1855 }
1856
1857 /**
1858  * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
1859  * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
1860  * internal ioctl handler.
1861  *
1862  * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
1863  *
1864  * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
1865  * The data will be printed to system console. Don't use it excessively.
1866  * \param arg A pointer to lnet_process_id_t, process ID of the peer.
1867  *
1868  * \return Always return 0 when called by users directly (i.e., not via ioctl).
1869  */
1870 int
1871 LNetCtl(unsigned int cmd, void *arg)
1872 {
1873         struct libcfs_ioctl_data *data = arg;
1874         struct lnet_ioctl_config_data *config;
1875         lnet_process_id_t         id = {0};
1876         lnet_ni_t                *ni;
1877         int                       rc;
1878
1879         CLASSERT(LIBCFS_IOC_DATA_MAX >= sizeof(struct lnet_ioctl_net_config) +
1880                                         sizeof(struct lnet_ioctl_config_data));
1881
1882         switch (cmd) {
1883         case IOC_LIBCFS_GET_NI:
1884                 rc = LNetGetId(data->ioc_count, &id);
1885                 data->ioc_nid = id.nid;
1886                 return rc;
1887
1888         case IOC_LIBCFS_FAIL_NID:
1889                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
1890
1891         case IOC_LIBCFS_ADD_ROUTE:
1892                 config = arg;
1893
1894                 if (config->cfg_hdr.ioc_len < sizeof(*config))
1895                         return -EINVAL;
1896
1897                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1898                 rc = lnet_add_route(config->cfg_net,
1899                                     config->cfg_config_u.cfg_route.rtr_hop,
1900                                     config->cfg_nid,
1901                                     config->cfg_config_u.cfg_route.
1902                                         rtr_priority);
1903                 if (rc == 0) {
1904                         rc = lnet_check_routes();
1905                         if (rc != 0)
1906                                 lnet_del_route(config->cfg_net,
1907                                                config->cfg_nid);
1908                 }
1909                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1910                 return rc;
1911
1912         case IOC_LIBCFS_DEL_ROUTE:
1913                 config = arg;
1914
1915                 if (config->cfg_hdr.ioc_len < sizeof(*config))
1916                         return -EINVAL;
1917
1918                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1919                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
1920                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1921                 return rc;
1922
1923         case IOC_LIBCFS_GET_ROUTE:
1924                 config = arg;
1925
1926                 if (config->cfg_hdr.ioc_len < sizeof(*config))
1927                         return -EINVAL;
1928
1929                 return lnet_get_route(config->cfg_count,
1930                                       &config->cfg_net,
1931                                       &config->cfg_config_u.cfg_route.rtr_hop,
1932                                       &config->cfg_nid,
1933                                       &config->cfg_config_u.cfg_route.rtr_flags,
1934                                       &config->cfg_config_u.cfg_route.
1935                                         rtr_priority);
1936
1937         case IOC_LIBCFS_GET_NET: {
1938                 struct lnet_ioctl_net_config *net_config;
1939                 size_t total = sizeof(*config) + sizeof(*net_config);
1940
1941                 config = arg;
1942
1943                 if (config->cfg_hdr.ioc_len < total)
1944                         return -EINVAL;
1945
1946                 net_config = (struct lnet_ioctl_net_config *)
1947                         config->cfg_bulk;
1948                 if (config == NULL || net_config == NULL)
1949                         return -1;
1950
1951                 return lnet_get_net_config(config->cfg_count,
1952                                            &config->cfg_ncpts,
1953                                            &config->cfg_nid,
1954                                            &config->cfg_config_u.
1955                                                 cfg_net.net_peer_timeout,
1956                                            &config->cfg_config_u.cfg_net.
1957                                                 net_peer_tx_credits,
1958                                            &config->cfg_config_u.cfg_net.
1959                                                 net_peer_rtr_credits,
1960                                            &config->cfg_config_u.cfg_net.
1961                                                 net_max_tx_credits,
1962                                            net_config);
1963         }
1964
1965         case IOC_LIBCFS_GET_LNET_STATS:
1966         {
1967                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
1968
1969                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
1970                         return -EINVAL;
1971
1972                 lnet_counters_get(&lnet_stats->st_cntrs);
1973                 return 0;
1974         }
1975
1976         case IOC_LIBCFS_CONFIG_RTR:
1977                 config = arg;
1978
1979                 if (config->cfg_hdr.ioc_len < sizeof(*config))
1980                         return -EINVAL;
1981
1982                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1983                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
1984                         rc = lnet_rtrpools_enable();
1985                         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1986                         return rc;
1987                 }
1988                 lnet_rtrpools_disable();
1989                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1990                 return 0;
1991
1992         case IOC_LIBCFS_ADD_BUF:
1993                 config = arg;
1994
1995                 if (config->cfg_hdr.ioc_len < sizeof(*config))
1996                         return -EINVAL;
1997
1998                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1999                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2000                                                 buf_tiny,
2001                                           config->cfg_config_u.cfg_buffers.
2002                                                 buf_small,
2003                                           config->cfg_config_u.cfg_buffers.
2004                                                 buf_large);
2005                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2006                 return rc;
2007
2008         case IOC_LIBCFS_GET_BUF: {
2009                 struct lnet_ioctl_pool_cfg *pool_cfg;
2010                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
2011
2012                 config = arg;
2013
2014                 if (config->cfg_hdr.ioc_len < total)
2015                         return -EINVAL;
2016
2017                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2018                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2019         }
2020
2021         case IOC_LIBCFS_GET_PEER_INFO: {
2022                 struct lnet_ioctl_peer *peer_info = arg;
2023
2024                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
2025                         return -EINVAL;
2026
2027                 return lnet_get_peer_info(
2028                    peer_info->pr_count,
2029                    &peer_info->pr_nid,
2030                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2031                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2032                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2033                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2034                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2035                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2036                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2037                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2038         }
2039
2040         case IOC_LIBCFS_NOTIFY_ROUTER:
2041                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2042                                    cfs_time_current() -
2043                                    cfs_time_seconds(cfs_time_current_sec() -
2044                                                     (time_t)data->ioc_u64[0]));
2045
2046         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
2047                 /* This can be removed once lustre stops calling it */
2048                 return 0;
2049
2050         case IOC_LIBCFS_LNET_DIST:
2051                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2052                 if (rc < 0 && rc != -EHOSTUNREACH)
2053                         return rc;
2054
2055                 data->ioc_u32[0] = rc;
2056                 return 0;
2057
2058         case IOC_LIBCFS_TESTPROTOCOMPAT:
2059                 lnet_net_lock(LNET_LOCK_EX);
2060                 the_lnet.ln_testprotocompat = data->ioc_flags;
2061                 lnet_net_unlock(LNET_LOCK_EX);
2062                 return 0;
2063
2064         case IOC_LIBCFS_LNET_FAULT:
2065                 return lnet_fault_ctl(data->ioc_flags, data);
2066
2067         case IOC_LIBCFS_PING:
2068                 id.nid = data->ioc_nid;
2069                 id.pid = data->ioc_u32[0];
2070                 rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
2071                                (lnet_process_id_t __user *)data->ioc_pbuf1,
2072                                data->ioc_plen1/sizeof(lnet_process_id_t));
2073                 if (rc < 0)
2074                         return rc;
2075                 data->ioc_count = rc;
2076                 return 0;
2077
2078         case IOC_LIBCFS_DEBUG_PEER: {
2079                 /* CAVEAT EMPTOR: this one designed for calling directly; not
2080                  * via an ioctl */
2081                 id = *((lnet_process_id_t *) arg);
2082
2083                 lnet_debug_peer(id.nid);
2084
2085                 ni = lnet_net2ni(LNET_NIDNET(id.nid));
2086                 if (ni == NULL) {
2087                         CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
2088                 } else {
2089                         if (ni->ni_lnd->lnd_ctl == NULL) {
2090                                 CDEBUG(D_WARNING, "No ctl for %s\n",
2091                                        libcfs_id2str(id));
2092                         } else {
2093                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2094                         }
2095
2096                         lnet_ni_decref(ni);
2097                 }
2098                 return 0;
2099         }
2100
2101         default:
2102                 ni = lnet_net2ni(data->ioc_net);
2103                 if (ni == NULL)
2104                         return -EINVAL;
2105
2106                 if (ni->ni_lnd->lnd_ctl == NULL)
2107                         rc = -EINVAL;
2108                 else
2109                         rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2110
2111                 lnet_ni_decref(ni);
2112                 return rc;
2113         }
2114         /* not reached */
2115 }
2116 EXPORT_SYMBOL(LNetCtl);
2117
2118 /**
2119  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2120  * all interfaces share a same PID, as requested by LNetNIInit().
2121  *
2122  * \param index Index of the interface to look up.
2123  * \param id On successful return, this location will hold the
2124  * lnet_process_id_t ID of the interface.
2125  *
2126  * \retval 0 If an interface exists at \a index.
2127  * \retval -ENOENT If no interface has been found.
2128  */
2129 int
2130 LNetGetId(unsigned int index, lnet_process_id_t *id)
2131 {
2132         struct lnet_ni   *ni;
2133         struct list_head *tmp;
2134         int               cpt;
2135         int               rc = -ENOENT;
2136
2137         LASSERT(the_lnet.ln_refcount > 0);
2138
2139         cpt = lnet_net_lock_current();
2140
2141         list_for_each(tmp, &the_lnet.ln_nis) {
2142                 if (index-- != 0)
2143                         continue;
2144
2145                 ni = list_entry(tmp, lnet_ni_t, ni_list);
2146
2147                 id->nid = ni->ni_nid;
2148                 id->pid = the_lnet.ln_pid;
2149                 rc = 0;
2150                 break;
2151         }
2152
2153         lnet_net_unlock(cpt);
2154         return rc;
2155 }
2156 EXPORT_SYMBOL(LNetGetId);
2157
2158 /**
2159  * Print a string representation of handle \a h into buffer \a str of
2160  * \a len bytes.
2161  */
2162 void
2163 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2164 {
2165         snprintf(str, len, LPX64, h.cookie);
2166 }
2167 EXPORT_SYMBOL(LNetSnprintHandle);
2168
2169 static int
2170 lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids,
2171           int n_ids)
2172 {
2173         lnet_handle_eq_t     eqh;
2174         lnet_handle_md_t     mdh;
2175         lnet_event_t         event;
2176         lnet_md_t            md = { NULL };
2177         int                  which;
2178         int                  unlinked = 0;
2179         int                  replied = 0;
2180         const int            a_long_time = 60000; /* mS */
2181         int                  infosz;
2182         lnet_ping_info_t    *info;
2183         lnet_process_id_t    tmpid;
2184         int                  i;
2185         int                  nob;
2186         int                  rc;
2187         int                  rc2;
2188         sigset_t         blocked;
2189
2190         infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
2191
2192         if (n_ids <= 0 ||
2193             id.nid == LNET_NID_ANY ||
2194             timeout_ms > 500000 ||              /* arbitrary limit! */
2195             n_ids > 20)                         /* arbitrary limit! */
2196                 return -EINVAL;
2197
2198         if (id.pid == LNET_PID_ANY)
2199                 id.pid = LNET_PID_LUSTRE;
2200
2201         LIBCFS_ALLOC(info, infosz);
2202         if (info == NULL)
2203                 return -ENOMEM;
2204
2205         /* NB 2 events max (including any unlink event) */
2206         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2207         if (rc != 0) {
2208                 CERROR("Can't allocate EQ: %d\n", rc);
2209                 goto out_0;
2210         }
2211
2212         /* initialize md content */
2213         md.start     = info;
2214         md.length    = infosz;
2215         md.threshold = 2; /*GET/REPLY*/
2216         md.max_size  = 0;
2217         md.options   = LNET_MD_TRUNCATE;
2218         md.user_ptr  = NULL;
2219         md.eq_handle = eqh;
2220
2221         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2222         if (rc != 0) {
2223                 CERROR("Can't bind MD: %d\n", rc);
2224                 goto out_1;
2225         }
2226
2227         rc = LNetGet(LNET_NID_ANY, mdh, id,
2228                      LNET_RESERVED_PORTAL,
2229                      LNET_PROTO_PING_MATCHBITS, 0);
2230
2231         if (rc != 0) {
2232                 /* Don't CERROR; this could be deliberate! */
2233
2234                 rc2 = LNetMDUnlink(mdh);
2235                 LASSERT(rc2 == 0);
2236
2237                 /* NB must wait for the UNLINK event below... */
2238                 unlinked = 1;
2239                 timeout_ms = a_long_time;
2240         }
2241
2242         do {
2243                 /* MUST block for unlink to complete */
2244                 if (unlinked)
2245                         blocked = cfs_block_allsigs();
2246
2247                 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
2248
2249                 if (unlinked)
2250                         cfs_restore_sigs(blocked);
2251
2252                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2253                        (rc2 <= 0) ? -1 : event.type,
2254                        (rc2 <= 0) ? -1 : event.status,
2255                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2256
2257                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2258
2259                 if (rc2 <= 0 || event.status != 0) {
2260                         /* timeout or error */
2261                         if (!replied && rc == 0)
2262                                 rc = (rc2 < 0) ? rc2 :
2263                                      (rc2 == 0) ? -ETIMEDOUT :
2264                                      event.status;
2265
2266                         if (!unlinked) {
2267                                 /* Ensure completion in finite time... */
2268                                 LNetMDUnlink(mdh);
2269                                 /* No assertion (racing with network) */
2270                                 unlinked = 1;
2271                                 timeout_ms = a_long_time;
2272                         } else if (rc2 == 0) {
2273                                 /* timed out waiting for unlink */
2274                                 CWARN("ping %s: late network completion\n",
2275                                       libcfs_id2str(id));
2276                         }
2277                 } else if (event.type == LNET_EVENT_REPLY) {
2278                         replied = 1;
2279                         rc = event.mlength;
2280                 }
2281
2282         } while (rc2 <= 0 || !event.unlinked);
2283
2284         if (!replied) {
2285                 if (rc >= 0)
2286                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2287                               libcfs_id2str(id));
2288                 rc = -EIO;
2289                 goto out_1;
2290         }
2291
2292         nob = rc;
2293         LASSERT(nob >= 0 && nob <= infosz);
2294
2295         rc = -EPROTO;                           /* if I can't parse... */
2296
2297         if (nob < 8) {
2298                 /* can't check magic/version */
2299                 CERROR("%s: ping info too short %d\n",
2300                        libcfs_id2str(id), nob);
2301                 goto out_1;
2302         }
2303
2304         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2305                 lnet_swap_pinginfo(info);
2306         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2307                 CERROR("%s: Unexpected magic %08x\n",
2308                        libcfs_id2str(id), info->pi_magic);
2309                 goto out_1;
2310         }
2311
2312         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2313                 CERROR("%s: ping w/o NI status: 0x%x\n",
2314                        libcfs_id2str(id), info->pi_features);
2315                 goto out_1;
2316         }
2317
2318         if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
2319                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2320                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
2321                 goto out_1;
2322         }
2323
2324         if (info->pi_nnis < n_ids)
2325                 n_ids = info->pi_nnis;
2326
2327         if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
2328                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2329                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
2330                 goto out_1;
2331         }
2332
2333         rc = -EFAULT;                           /* If I SEGV... */
2334
2335         for (i = 0; i < n_ids; i++) {
2336                 tmpid.pid = info->pi_pid;
2337                 tmpid.nid = info->pi_ni[i].ns_nid;
2338                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2339                         goto out_1;
2340         }
2341         rc = info->pi_nnis;
2342
2343  out_1:
2344         rc2 = LNetEQFree(eqh);
2345         if (rc2 != 0)
2346                 CERROR("rc2 %d\n", rc2);
2347         LASSERT(rc2 == 0);
2348
2349  out_0:
2350         LIBCFS_FREE(info, infosz);
2351         return rc;
2352 }