Whamcloud - gitweb
5ecf560cf32ee5d604a12d48b584cb6542542cfe
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include <lnet/lib-lnet.h>
39 #include <lnet/lib-dlc.h>
40 #ifdef __KERNEL__
41 #include <linux/log2.h>
42 #endif
43
44 #ifdef __KERNEL__
45 #define D_LNI D_CONSOLE
46 #else
47 #define D_LNI D_CONFIG
48 #endif
49
50 lnet_t      the_lnet;                           /* THE state of the network */
51 EXPORT_SYMBOL(the_lnet);
52
53 #ifdef __KERNEL__
54
55 static char *ip2nets = "";
56 CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
57                 "LNET network <- IP table");
58
59 static char *networks = "";
60 CFS_MODULE_PARM(networks, "s", charp, 0444,
61                 "local networks");
62
63 static char *routes = "";
64 CFS_MODULE_PARM(routes, "s", charp, 0444,
65                 "routes to non-local networks");
66
67 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
68 CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
69                 "size of remote network hash table");
70
71 static void lnet_ping_target_fini(void);
72 static int lnet_ping(lnet_process_id_t id, int timeout_ms,
73                      lnet_process_id_t *ids, int n_ids);
74
75 static char *
76 lnet_get_routes(void)
77 {
78         return routes;
79 }
80
81 static char *
82 lnet_get_networks(void)
83 {
84         char   *nets;
85         int     rc;
86
87         if (*networks != 0 && *ip2nets != 0) {
88                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
89                                    "'ip2nets' but not both at once\n");
90                 return NULL;
91         }
92
93         if (*ip2nets != 0) {
94                 rc = lnet_parse_ip2nets(&nets, ip2nets);
95                 return (rc == 0) ? nets : NULL;
96         }
97
98         if (*networks != 0)
99                 return networks;
100
101         return "tcp";
102 }
103
104 static void
105 lnet_init_locks(void)
106 {
107         spin_lock_init(&the_lnet.ln_eq_wait_lock);
108         init_waitqueue_head(&the_lnet.ln_eq_waitq);
109         mutex_init(&the_lnet.ln_lnd_mutex);
110         mutex_init(&the_lnet.ln_api_mutex);
111 }
112
113 static void
114 lnet_fini_locks(void)
115 {
116 }
117
118 #else
119
120 static char *
121 lnet_get_routes(void)
122 {
123         char *str = getenv("LNET_ROUTES");
124
125         return (str == NULL) ? "" : str;
126 }
127
128 static char *
129 lnet_get_networks (void)
130 {
131         static char       default_networks[256];
132         char             *networks = getenv("LNET_NETWORKS");
133         char             *str;
134         char             *sep;
135         int               len;
136         int               nob;
137         struct list_head *tmp;
138
139         if (networks != NULL)
140                 return networks;
141
142         /* In userland, the default 'networks=' is the list of known net types */
143         len = sizeof(default_networks);
144         str = default_networks;
145         *str = 0;
146         sep = "";
147
148         list_for_each(tmp, &the_lnet.ln_lnds) {
149                 lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
150
151                 nob = snprintf(str, len, "%s%s", sep,
152                                libcfs_lnd2str(lnd->lnd_type));
153                 if (nob >= len) {
154                         /* overflowed the string; leave it where it was */
155                         *str = 0;
156                         break;
157                 }
158                 len -= nob;
159                 str += nob;
160                 sep = ",";
161         }
162
163         return default_networks;
164 }
165
166 # ifndef HAVE_LIBPTHREAD
167
168 static void lnet_init_locks(void)
169 {
170         the_lnet.ln_eq_wait_lock = 0;
171         the_lnet.ln_lnd_mutex = 0;
172         the_lnet.ln_api_mutex = 0;
173 }
174
175 static void lnet_fini_locks(void)
176 {
177         LASSERT(the_lnet.ln_api_mutex == 0);
178         LASSERT(the_lnet.ln_lnd_mutex == 0);
179         LASSERT(the_lnet.ln_eq_wait_lock == 0);
180 }
181
182 # else
183
184 static void lnet_init_locks(void)
185 {
186         pthread_cond_init(&the_lnet.ln_eq_cond, NULL);
187         pthread_mutex_init(&the_lnet.ln_eq_wait_lock, NULL);
188         pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL);
189         pthread_mutex_init(&the_lnet.ln_api_mutex, NULL);
190 }
191
192 static void lnet_fini_locks(void)
193 {
194         pthread_mutex_destroy(&the_lnet.ln_api_mutex);
195         pthread_mutex_destroy(&the_lnet.ln_lnd_mutex);
196         pthread_mutex_destroy(&the_lnet.ln_eq_wait_lock);
197         pthread_cond_destroy(&the_lnet.ln_eq_cond);
198 }
199
200 # endif
201 #endif
202
203 static int
204 lnet_create_remote_nets_table(void)
205 {
206         int               i;
207         struct list_head *hash;
208
209         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
210         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
211         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
212         if (hash == NULL) {
213                 CERROR("Failed to create remote nets hash table\n");
214                 return -ENOMEM;
215         }
216
217         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
218                 INIT_LIST_HEAD(&hash[i]);
219         the_lnet.ln_remote_nets_hash = hash;
220         return 0;
221 }
222
223 static void
224 lnet_destroy_remote_nets_table(void)
225 {
226         int i;
227
228         if (the_lnet.ln_remote_nets_hash == NULL)
229                 return;
230
231         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
232                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
233
234         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
235                     LNET_REMOTE_NETS_HASH_SIZE *
236                     sizeof(the_lnet.ln_remote_nets_hash[0]));
237         the_lnet.ln_remote_nets_hash = NULL;
238 }
239
240 static void
241 lnet_destroy_locks(void)
242 {
243         if (the_lnet.ln_res_lock != NULL) {
244                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
245                 the_lnet.ln_res_lock = NULL;
246         }
247
248         if (the_lnet.ln_net_lock != NULL) {
249                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
250                 the_lnet.ln_net_lock = NULL;
251         }
252
253         lnet_fini_locks();
254 }
255
256 static int
257 lnet_create_locks(void)
258 {
259         lnet_init_locks();
260
261         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
262         if (the_lnet.ln_res_lock == NULL)
263                 goto failed;
264
265         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
266         if (the_lnet.ln_net_lock == NULL)
267                 goto failed;
268
269         return 0;
270
271  failed:
272         lnet_destroy_locks();
273         return -ENOMEM;
274 }
275
276 static void lnet_assert_wire_constants(void)
277 {
278         /* Wire protocol assertions generated by 'wirecheck'
279          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
280          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
281          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
282
283         /* Constants... */
284         CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
285         CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
286         CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
287         CLASSERT (LNET_MSG_ACK == 0);
288         CLASSERT (LNET_MSG_PUT == 1);
289         CLASSERT (LNET_MSG_GET == 2);
290         CLASSERT (LNET_MSG_REPLY == 3);
291         CLASSERT (LNET_MSG_HELLO == 4);
292
293         /* Checks for struct ptl_handle_wire_t */
294         CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
295         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
296         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
297         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
298         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
299
300         /* Checks for struct lnet_magicversion_t */
301         CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
302         CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
303         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
304         CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
305         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
306         CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
307         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
308
309         /* Checks for struct lnet_hdr_t */
310         CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
311         CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
312         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
313         CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
314         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
315         CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
316         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
317         CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
318         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
319         CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
320         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
321         CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
322         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
323         CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
324         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
325
326         /* Ack */
327         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
328         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
329         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
330         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
331         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
332         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
333
334         /* Put */
335         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
336         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
337         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
338         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
339         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
340         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
341         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
342         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
343         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
344         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
345
346         /* Get */
347         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
348         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
349         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
350         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
351         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
352         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
353         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
354         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
355         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
356         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
357
358         /* Reply */
359         CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
360         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
361
362         /* Hello */
363         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
364         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
365         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
366         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
367 }
368
369 static lnd_t *
370 lnet_find_lnd_by_type (int type)
371 {
372         lnd_t            *lnd;
373         struct list_head *tmp;
374
375         /* holding lnd mutex */
376         list_for_each(tmp, &the_lnet.ln_lnds) {
377                 lnd = list_entry(tmp, lnd_t, lnd_list);
378
379                 if ((int)lnd->lnd_type == type)
380                         return lnd;
381         }
382         return NULL;
383 }
384
385 void
386 lnet_register_lnd (lnd_t *lnd)
387 {
388         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
389
390         LASSERT(the_lnet.ln_init);
391         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
392         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
393
394         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
395         lnd->lnd_refcount = 0;
396
397         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
398
399         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
400 }
401 EXPORT_SYMBOL(lnet_register_lnd);
402
403 void
404 lnet_unregister_lnd (lnd_t *lnd)
405 {
406         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
407
408         LASSERT(the_lnet.ln_init);
409         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
410         LASSERT(lnd->lnd_refcount == 0);
411
412         list_del(&lnd->lnd_list);
413         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
414
415         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
416 }
417 EXPORT_SYMBOL(lnet_unregister_lnd);
418
419 void
420 lnet_counters_get(lnet_counters_t *counters)
421 {
422         lnet_counters_t *ctr;
423         int             i;
424
425         memset(counters, 0, sizeof(*counters));
426
427         lnet_net_lock(LNET_LOCK_EX);
428
429         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
430                 counters->msgs_max     += ctr->msgs_max;
431                 counters->msgs_alloc   += ctr->msgs_alloc;
432                 counters->errors       += ctr->errors;
433                 counters->send_count   += ctr->send_count;
434                 counters->recv_count   += ctr->recv_count;
435                 counters->route_count  += ctr->route_count;
436                 counters->drop_count   += ctr->drop_count;
437                 counters->send_length  += ctr->send_length;
438                 counters->recv_length  += ctr->recv_length;
439                 counters->route_length += ctr->route_length;
440                 counters->drop_length  += ctr->drop_length;
441
442         }
443         lnet_net_unlock(LNET_LOCK_EX);
444 }
445 EXPORT_SYMBOL(lnet_counters_get);
446
447 void
448 lnet_counters_reset(void)
449 {
450         lnet_counters_t *counters;
451         int             i;
452
453         lnet_net_lock(LNET_LOCK_EX);
454
455         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
456                 memset(counters, 0, sizeof(lnet_counters_t));
457
458         lnet_net_unlock(LNET_LOCK_EX);
459 }
460 EXPORT_SYMBOL(lnet_counters_reset);
461
462 #ifdef LNET_USE_LIB_FREELIST
463
464 int
465 lnet_freelist_init(lnet_freelist_t *fl, int n, int size)
466 {
467         char *space;
468
469         LASSERT (n > 0);
470
471         size += offsetof (lnet_freeobj_t, fo_contents);
472
473         LIBCFS_ALLOC(space, n * size);
474         if (space == NULL)
475                 return (-ENOMEM);
476
477         INIT_LIST_HEAD(&fl->fl_list);
478         fl->fl_objs = space;
479         fl->fl_nobjs = n;
480         fl->fl_objsize = size;
481
482         do {
483                 list_add((struct list_head *)space, &fl->fl_list);
484                 space += size;
485         } while (--n != 0);
486
487         return 0;
488 }
489
490 void
491 lnet_freelist_fini(lnet_freelist_t *fl)
492 {
493         struct list_head *el;
494         int               count;
495
496         if (fl->fl_nobjs == 0)
497                 return;
498
499         count = 0;
500         for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
501                 count++;
502
503         LASSERT (count == fl->fl_nobjs);
504
505         LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
506         memset (fl, 0, sizeof (*fl));
507 }
508
509 #endif /* LNET_USE_LIB_FREELIST */
510
511 static __u64 lnet_create_interface_cookie(void)
512 {
513         /* NB the interface cookie in wire handles guards against delayed
514          * replies and ACKs appearing valid after reboot. Initialisation time,
515          * even if it's only implemented to millisecond resolution is probably
516          * easily good enough. */
517         struct timeval tv;
518         __u64          cookie;
519 #ifndef __KERNEL__
520         int            rc = gettimeofday (&tv, NULL);
521         LASSERT (rc == 0);
522 #else
523         do_gettimeofday(&tv);
524 #endif
525         cookie = tv.tv_sec;
526         cookie *= 1000000;
527         cookie += tv.tv_usec;
528         return cookie;
529 }
530
531 static char *
532 lnet_res_type2str(int type)
533 {
534         switch (type) {
535         default:
536                 LBUG();
537         case LNET_COOKIE_TYPE_MD:
538                 return "MD";
539         case LNET_COOKIE_TYPE_ME:
540                 return "ME";
541         case LNET_COOKIE_TYPE_EQ:
542                 return "EQ";
543         }
544 }
545
546 static void
547 lnet_res_container_cleanup(struct lnet_res_container *rec)
548 {
549         int     count = 0;
550
551         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
552                 return;
553
554         while (!list_empty(&rec->rec_active)) {
555                 struct list_head *e = rec->rec_active.next;
556
557                 list_del_init(e);
558                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
559                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
560
561                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
562                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
563
564                 } else { /* NB: Active MEs should be attached on portals */
565                         LBUG();
566                 }
567                 count++;
568         }
569
570         if (count > 0) {
571                 /* Found alive MD/ME/EQ, user really should unlink/free
572                  * all of them before finalize LNet, but if someone didn't,
573                  * we have to recycle garbage for him */
574                 CERROR("%d active elements on exit of %s container\n",
575                        count, lnet_res_type2str(rec->rec_type));
576         }
577
578 #ifdef LNET_USE_LIB_FREELIST
579         lnet_freelist_fini(&rec->rec_freelist);
580 #endif
581         if (rec->rec_lh_hash != NULL) {
582                 LIBCFS_FREE(rec->rec_lh_hash,
583                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
584                 rec->rec_lh_hash = NULL;
585         }
586
587         rec->rec_type = 0; /* mark it as finalized */
588 }
589
590 static int
591 lnet_res_container_setup(struct lnet_res_container *rec,
592                          int cpt, int type, int objnum, int objsz)
593 {
594         int     rc = 0;
595         int     i;
596
597         LASSERT(rec->rec_type == 0);
598
599         rec->rec_type = type;
600         INIT_LIST_HEAD(&rec->rec_active);
601
602 #ifdef LNET_USE_LIB_FREELIST
603         memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist));
604         rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz);
605         if (rc != 0)
606                 goto out;
607 #endif
608         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
609
610         /* Arbitrary choice of hash table size */
611         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
612                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
613         if (rec->rec_lh_hash == NULL) {
614                 rc = -ENOMEM;
615                 goto out;
616         }
617
618         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
619                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
620
621         return 0;
622
623 out:
624         CERROR("Failed to setup %s resource container\n",
625                lnet_res_type2str(type));
626         lnet_res_container_cleanup(rec);
627         return rc;
628 }
629
630 static void
631 lnet_res_containers_destroy(struct lnet_res_container **recs)
632 {
633         struct lnet_res_container       *rec;
634         int                             i;
635
636         cfs_percpt_for_each(rec, i, recs)
637                 lnet_res_container_cleanup(rec);
638
639         cfs_percpt_free(recs);
640 }
641
642 static struct lnet_res_container **
643 lnet_res_containers_create(int type, int objnum, int objsz)
644 {
645         struct lnet_res_container       **recs;
646         struct lnet_res_container       *rec;
647         int                             rc;
648         int                             i;
649
650         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
651         if (recs == NULL) {
652                 CERROR("Failed to allocate %s resource containers\n",
653                        lnet_res_type2str(type));
654                 return NULL;
655         }
656
657         cfs_percpt_for_each(rec, i, recs) {
658                 rc = lnet_res_container_setup(rec, i, type, objnum, objsz);
659                 if (rc != 0) {
660                         lnet_res_containers_destroy(recs);
661                         return NULL;
662                 }
663         }
664
665         return recs;
666 }
667
668 lnet_libhandle_t *
669 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
670 {
671         /* ALWAYS called with lnet_res_lock held */
672         struct list_head        *head;
673         lnet_libhandle_t        *lh;
674         unsigned int            hash;
675
676         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
677                 return NULL;
678
679         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
680         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
681
682         list_for_each_entry(lh, head, lh_hash_chain) {
683                 if (lh->lh_cookie == cookie)
684                         return lh;
685         }
686
687         return NULL;
688 }
689
690 void
691 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
692 {
693         /* ALWAYS called with lnet_res_lock held */
694         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
695         unsigned int    hash;
696
697         lh->lh_cookie = rec->rec_lh_cookie;
698         rec->rec_lh_cookie += 1 << ibits;
699
700         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
701
702         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
703 }
704
705 #ifndef __KERNEL__
706 /**
707  * Reserved API - do not use.
708  * Temporary workaround to allow uOSS and test programs force server
709  * mode in userspace. See comments near ln_server_mode_flag in
710  * lnet/lib-types.h */
711
712 void
713 lnet_server_mode() {
714         the_lnet.ln_server_mode_flag = 1;
715 }
716 #endif
717
718 static int lnet_unprepare(void);
719
720 static int
721 lnet_prepare(lnet_pid_t requested_pid)
722 {
723         /* Prepare to bring up the network */
724         struct lnet_res_container **recs;
725         int                       rc = 0;
726
727         if (requested_pid == LNET_PID_ANY) {
728                 /* Don't instantiate LNET just for me */
729                 return -ENETDOWN;
730         }
731
732         LASSERT (the_lnet.ln_refcount == 0);
733
734         the_lnet.ln_routing = 0;
735
736 #ifdef __KERNEL__
737         LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
738         the_lnet.ln_pid = requested_pid;
739 #else
740         if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
741                 LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
742
743                 if (current_uid() != 0) /* Only root can run user-space server */
744                         return -EPERM;
745                 the_lnet.ln_pid = requested_pid;
746
747         } else {/* client case (liblustre) */
748
749                 /* My PID must be unique on this node and flag I'm userspace */
750                 the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
751         }
752 #endif
753
754         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
755         INIT_LIST_HEAD(&the_lnet.ln_nis);
756         INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
757         INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
758         INIT_LIST_HEAD(&the_lnet.ln_routers);
759         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
760
761         rc = lnet_create_remote_nets_table();
762         if (rc != 0)
763                 goto failed;
764
765         the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
766
767         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
768                                                 sizeof(lnet_counters_t));
769         if (the_lnet.ln_counters == NULL) {
770                 CERROR("Failed to allocate counters for LNet\n");
771                 rc = -ENOMEM;
772                 goto failed;
773         }
774
775         rc = lnet_peer_tables_create();
776         if (rc != 0)
777                 goto failed;
778
779         rc = lnet_msg_containers_create();
780         if (rc != 0)
781                 goto failed;
782
783         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
784                                       LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS,
785                                       sizeof(lnet_eq_t));
786         if (rc != 0)
787                 goto failed;
788
789         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
790                                           sizeof(lnet_me_t));
791         if (recs == NULL)
792                 goto failed;
793
794         the_lnet.ln_me_containers = recs;
795
796         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
797                                           sizeof(lnet_libmd_t));
798         if (recs == NULL)
799                 goto failed;
800
801         the_lnet.ln_md_containers = recs;
802
803         rc = lnet_portals_create();
804         if (rc != 0) {
805                 CERROR("Failed to create portals for LNet: %d\n", rc);
806                 goto failed;
807         }
808
809         return 0;
810
811  failed:
812         lnet_unprepare();
813         return rc;
814 }
815
816 static int
817 lnet_unprepare (void)
818 {
819         /* NB no LNET_LOCK since this is the last reference.  All LND instances
820          * have shut down already, so it is safe to unlink and free all
821          * descriptors, even those that appear committed to a network op (eg MD
822          * with non-zero pending count) */
823
824         lnet_fail_nid(LNET_NID_ANY, 0);
825
826         LASSERT(the_lnet.ln_refcount == 0);
827         LASSERT(list_empty(&the_lnet.ln_test_peers));
828         LASSERT(list_empty(&the_lnet.ln_nis));
829         LASSERT(list_empty(&the_lnet.ln_nis_cpt));
830         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
831
832         lnet_portals_destroy();
833
834         if (the_lnet.ln_md_containers != NULL) {
835                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
836                 the_lnet.ln_md_containers = NULL;
837         }
838
839         if (the_lnet.ln_me_containers != NULL) {
840                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
841                 the_lnet.ln_me_containers = NULL;
842         }
843
844         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
845
846         lnet_msg_containers_destroy();
847         lnet_peer_tables_destroy();
848         lnet_rtrpools_free(0);
849
850         if (the_lnet.ln_counters != NULL) {
851                 cfs_percpt_free(the_lnet.ln_counters);
852                 the_lnet.ln_counters = NULL;
853         }
854         lnet_destroy_remote_nets_table();
855
856         return 0;
857 }
858
859 lnet_ni_t  *
860 lnet_net2ni_locked(__u32 net, int cpt)
861 {
862         struct list_head *tmp;
863         lnet_ni_t        *ni;
864
865         LASSERT(cpt != LNET_LOCK_EX);
866
867         list_for_each(tmp, &the_lnet.ln_nis) {
868                 ni = list_entry(tmp, lnet_ni_t, ni_list);
869
870                 if (LNET_NIDNET(ni->ni_nid) == net) {
871                         lnet_ni_addref_locked(ni, cpt);
872                         return ni;
873                 }
874         }
875
876         return NULL;
877 }
878
879 lnet_ni_t *
880 lnet_net2ni(__u32 net)
881 {
882         lnet_ni_t *ni;
883
884         lnet_net_lock(0);
885         ni = lnet_net2ni_locked(net, 0);
886         lnet_net_unlock(0);
887
888         return ni;
889 }
890 EXPORT_SYMBOL(lnet_net2ni);
891
892 static unsigned int
893 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
894 {
895         __u64           key = nid;
896         unsigned int    val;
897
898         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
899
900         if (number == 1)
901                 return 0;
902
903         val = hash_long(key, LNET_CPT_BITS);
904         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
905         if (val < number)
906                 return val;
907
908         return (unsigned int)(key + val + (val >> 1)) % number;
909 }
910
911 int
912 lnet_cpt_of_nid_locked(lnet_nid_t nid)
913 {
914         struct lnet_ni *ni;
915
916         /* must called with hold of lnet_net_lock */
917         if (LNET_CPT_NUMBER == 1)
918                 return 0; /* the only one */
919
920         /* take lnet_net_lock(any) would be OK */
921         if (!list_empty(&the_lnet.ln_nis_cpt)) {
922                 list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
923                         if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
924                                 continue;
925
926                         LASSERT(ni->ni_cpts != NULL);
927                         return ni->ni_cpts[lnet_nid_cpt_hash
928                                            (nid, ni->ni_ncpts)];
929                 }
930         }
931
932         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
933 }
934
935 int
936 lnet_cpt_of_nid(lnet_nid_t nid)
937 {
938         int     cpt;
939         int     cpt2;
940
941         if (LNET_CPT_NUMBER == 1)
942                 return 0; /* the only one */
943
944         if (list_empty(&the_lnet.ln_nis_cpt))
945                 return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
946
947         cpt = lnet_net_lock_current();
948         cpt2 = lnet_cpt_of_nid_locked(nid);
949         lnet_net_unlock(cpt);
950
951         return cpt2;
952 }
953 EXPORT_SYMBOL(lnet_cpt_of_nid);
954
955 int
956 lnet_islocalnet(__u32 net)
957 {
958         struct lnet_ni  *ni;
959         int             cpt;
960
961         cpt = lnet_net_lock_current();
962
963         ni = lnet_net2ni_locked(net, cpt);
964         if (ni != NULL)
965                 lnet_ni_decref_locked(ni, cpt);
966
967         lnet_net_unlock(cpt);
968
969         return ni != NULL;
970 }
971
972 lnet_ni_t  *
973 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
974 {
975         struct lnet_ni   *ni;
976         struct list_head *tmp;
977
978         LASSERT(cpt != LNET_LOCK_EX);
979
980         list_for_each(tmp, &the_lnet.ln_nis) {
981                 ni = list_entry(tmp, lnet_ni_t, ni_list);
982
983                 if (ni->ni_nid == nid) {
984                         lnet_ni_addref_locked(ni, cpt);
985                         return ni;
986                 }
987         }
988
989         return NULL;
990 }
991
992 int
993 lnet_islocalnid(lnet_nid_t nid)
994 {
995         struct lnet_ni  *ni;
996         int             cpt;
997
998         cpt = lnet_net_lock_current();
999         ni = lnet_nid2ni_locked(nid, cpt);
1000         if (ni != NULL)
1001                 lnet_ni_decref_locked(ni, cpt);
1002         lnet_net_unlock(cpt);
1003
1004         return ni != NULL;
1005 }
1006
1007 int
1008 lnet_count_acceptor_nis (void)
1009 {
1010         /* Return the # of NIs that need the acceptor. */
1011         int              count = 0;
1012 #if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
1013         struct list_head *tmp;
1014         struct lnet_ni   *ni;
1015         int              cpt;
1016
1017         cpt = lnet_net_lock_current();
1018         list_for_each(tmp, &the_lnet.ln_nis) {
1019                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1020
1021                 if (ni->ni_lnd->lnd_accept != NULL)
1022                         count++;
1023         }
1024
1025         lnet_net_unlock(cpt);
1026
1027 #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
1028         return count;
1029 }
1030
1031 static lnet_ping_info_t *
1032 lnet_ping_info_create(int num_ni)
1033 {
1034         lnet_ping_info_t *ping_info;
1035         unsigned int     infosz;
1036
1037         infosz = offsetof(lnet_ping_info_t, pi_ni[num_ni]);
1038         LIBCFS_ALLOC(ping_info, infosz);
1039         if (ping_info == NULL) {
1040                 CERROR("Can't allocate ping info[%d]\n", num_ni);
1041                 return NULL;
1042         }
1043
1044         ping_info->pi_nnis = num_ni;
1045         ping_info->pi_pid = the_lnet.ln_pid;
1046         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
1047         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
1048
1049         return ping_info;
1050 }
1051
1052 static inline int
1053 lnet_get_ni_count(void)
1054 {
1055         struct lnet_ni *ni;
1056         int            count = 0;
1057
1058         lnet_net_lock(0);
1059
1060         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
1061                 count++;
1062
1063         lnet_net_unlock(0);
1064
1065         return count;
1066 }
1067
1068 static inline void
1069 lnet_ping_info_free(lnet_ping_info_t *pinfo)
1070 {
1071         LIBCFS_FREE(pinfo,
1072                     offsetof(lnet_ping_info_t,
1073                              pi_ni[pinfo->pi_nnis]));
1074 }
1075
1076 static void
1077 lnet_ping_info_destroy(void)
1078 {
1079         struct lnet_ni  *ni;
1080
1081         lnet_net_lock(LNET_LOCK_EX);
1082
1083         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1084                 lnet_ni_lock(ni);
1085                 ni->ni_status = NULL;
1086                 lnet_ni_unlock(ni);
1087         }
1088
1089         lnet_ping_info_free(the_lnet.ln_ping_info);
1090         the_lnet.ln_ping_info = NULL;
1091
1092         lnet_net_unlock(LNET_LOCK_EX);
1093 }
1094
1095 static void
1096 lnet_ping_event_handler(lnet_event_t *event)
1097 {
1098         lnet_ping_info_t *pinfo = event->md.user_ptr;
1099
1100         if (event->unlinked)
1101                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
1102 }
1103
1104 static int
1105 lnet_ping_info_setup(lnet_ping_info_t **ppinfo, lnet_handle_md_t *md_handle,
1106                      int ni_count, bool set_eq)
1107 {
1108         lnet_handle_me_t  me_handle;
1109         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1110         lnet_md_t         md = {0};
1111         int               rc, rc2;
1112
1113         if (set_eq) {
1114                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
1115                                  &the_lnet.ln_ping_target_eq);
1116                 if (rc != 0) {
1117                         CERROR("Can't allocate ping EQ: %d\n", rc);
1118                         return rc;
1119                 }
1120         }
1121
1122         *ppinfo = lnet_ping_info_create(ni_count);
1123         if (*ppinfo == NULL) {
1124                 rc = -ENOMEM;
1125                 goto failed_0;
1126         }
1127
1128         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1129                           LNET_PROTO_PING_MATCHBITS, 0,
1130                           LNET_UNLINK, LNET_INS_AFTER,
1131                           &me_handle);
1132         if (rc != 0) {
1133                 CERROR("Can't create ping ME: %d\n", rc);
1134                 goto failed_1;
1135         }
1136
1137         /* initialize md content */
1138         md.start     = *ppinfo;
1139         md.length    = offsetof(lnet_ping_info_t,
1140                                 pi_ni[(*ppinfo)->pi_nnis]);
1141         md.threshold = LNET_MD_THRESH_INF;
1142         md.max_size  = 0;
1143         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1144                        LNET_MD_MANAGE_REMOTE;
1145         md.user_ptr  = NULL;
1146         md.eq_handle = the_lnet.ln_ping_target_eq;
1147         md.user_ptr = *ppinfo;
1148
1149         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
1150         if (rc != 0) {
1151                 CERROR("Can't attach ping MD: %d\n", rc);
1152                 goto failed_2;
1153         }
1154
1155         return 0;
1156
1157 failed_2:
1158         rc2 = LNetMEUnlink(me_handle);
1159         LASSERT(rc2 == 0);
1160 failed_1:
1161         lnet_ping_info_free(*ppinfo);
1162         *ppinfo = NULL;
1163 failed_0:
1164         if (set_eq)
1165                 LNetEQFree(the_lnet.ln_ping_target_eq);
1166         return rc;
1167 }
1168
1169 static void
1170 lnet_ping_md_unlink(lnet_ping_info_t *pinfo, lnet_handle_md_t *md_handle)
1171 {
1172         sigset_t        blocked = cfs_block_allsigs();
1173
1174         LNetMDUnlink(*md_handle);
1175         LNetInvalidateHandle(md_handle);
1176
1177         /* NB md could be busy; this just starts the unlink */
1178         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
1179                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
1180                 cfs_pause(cfs_time_seconds(1));
1181         }
1182
1183         cfs_restore_sigs(blocked);
1184 }
1185
1186 static void
1187 lnet_ping_info_install_locked(lnet_ping_info_t *ping_info)
1188 {
1189         int                     i;
1190         lnet_ni_t               *ni;
1191         lnet_ni_status_t        *ns;
1192
1193         i = 0;
1194         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1195                 LASSERT(i < ping_info->pi_nnis);
1196
1197                 ns = &ping_info->pi_ni[i];
1198
1199                 ns->ns_nid = ni->ni_nid;
1200
1201                 lnet_ni_lock(ni);
1202                 ns->ns_status = (ni->ni_status != NULL) ?
1203                                 ni->ni_status->ns_status : LNET_NI_STATUS_UP;
1204                 ni->ni_status = ns;
1205                 lnet_ni_unlock(ni);
1206
1207                 i++;
1208         }
1209 }
1210
1211 static void
1212 lnet_ping_target_update(lnet_ping_info_t *pinfo, lnet_handle_md_t md_handle)
1213 {
1214         lnet_ping_info_t *old_pinfo = NULL;
1215         lnet_handle_md_t old_md;
1216
1217         /* switch the NIs to point to the new ping info created */
1218         lnet_net_lock(LNET_LOCK_EX);
1219
1220         if (!the_lnet.ln_routing)
1221                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1222         lnet_ping_info_install_locked(pinfo);
1223
1224         if (the_lnet.ln_ping_info != NULL) {
1225                 old_pinfo = the_lnet.ln_ping_info;
1226                 old_md = the_lnet.ln_ping_target_md;
1227         }
1228         the_lnet.ln_ping_target_md = md_handle;
1229         the_lnet.ln_ping_info = pinfo;
1230
1231         lnet_net_unlock(LNET_LOCK_EX);
1232
1233         if (old_pinfo != NULL) {
1234                 /* unlink the old ping info */
1235                 lnet_ping_md_unlink(old_pinfo, &old_md);
1236                 lnet_ping_info_free(old_pinfo);
1237         }
1238 }
1239
1240 static void
1241 lnet_ping_target_fini(void)
1242 {
1243         int             rc;
1244
1245         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1246                             &the_lnet.ln_ping_target_md);
1247
1248         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1249         LASSERT(rc == 0);
1250
1251         lnet_ping_info_destroy();
1252 }
1253
1254 static int
1255 lnet_ni_tq_credits(lnet_ni_t *ni)
1256 {
1257         int     credits;
1258
1259         LASSERT(ni->ni_ncpts >= 1);
1260
1261         if (ni->ni_ncpts == 1)
1262                 return ni->ni_maxtxcredits;
1263
1264         credits = ni->ni_maxtxcredits / ni->ni_ncpts;
1265         credits = max(credits, 8 * ni->ni_peertxcredits);
1266         credits = min(credits, ni->ni_maxtxcredits);
1267
1268         return credits;
1269 }
1270
1271 static void
1272 lnet_clear_zombies_nis_locked(void)
1273 {
1274         int             i;
1275         int             islo;
1276         lnet_ni_t       *ni;
1277
1278         /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
1279          * and shut them down in guaranteed thread context */
1280         i = 2;
1281         while (!list_empty(&the_lnet.ln_nis_zombie)) {
1282                 int     *ref;
1283                 int     j;
1284
1285                 ni = list_entry(the_lnet.ln_nis_zombie.next,
1286                                 lnet_ni_t, ni_list);
1287                 list_del_init(&ni->ni_list);
1288                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1289                         if (*ref == 0)
1290                                 continue;
1291                         /* still busy, add it back to zombie list */
1292                         list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
1293                         break;
1294                 }
1295
1296                 if (!list_empty(&ni->ni_list)) {
1297                         lnet_net_unlock(LNET_LOCK_EX);
1298                         ++i;
1299                         if ((i & (-i)) == i) {
1300                                 CDEBUG(D_WARNING,
1301                                        "Waiting for zombie LNI %s\n",
1302                                        libcfs_nid2str(ni->ni_nid));
1303                         }
1304                         cfs_pause(cfs_time_seconds(1));
1305                         lnet_net_lock(LNET_LOCK_EX);
1306                         continue;
1307                 }
1308
1309                 ni->ni_lnd->lnd_refcount--;
1310                 lnet_net_unlock(LNET_LOCK_EX);
1311
1312                 islo = ni->ni_lnd->lnd_type == LOLND;
1313
1314                 LASSERT(!in_interrupt());
1315                 (ni->ni_lnd->lnd_shutdown)(ni);
1316
1317                 /* can't deref lnd anymore now; it might have unregistered
1318                  * itself...  */
1319
1320                 if (!islo)
1321                         CDEBUG(D_LNI, "Removed LNI %s\n",
1322                               libcfs_nid2str(ni->ni_nid));
1323
1324                 lnet_ni_free(ni);
1325                 i = 2;
1326                 lnet_net_lock(LNET_LOCK_EX);
1327         }
1328 }
1329
1330 static void
1331 lnet_shutdown_lndnis(void)
1332 {
1333         int             i;
1334         lnet_ni_t       *ni;
1335
1336         /* NB called holding the global mutex */
1337
1338         /* All quiet on the API front */
1339         LASSERT(!the_lnet.ln_shutdown);
1340         LASSERT(the_lnet.ln_refcount == 0);
1341         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
1342
1343         lnet_net_lock(LNET_LOCK_EX);
1344         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1345
1346         /* Unlink NIs from the global table */
1347         while (!list_empty(&the_lnet.ln_nis)) {
1348                 ni = list_entry(the_lnet.ln_nis.next,
1349                                 lnet_ni_t, ni_list);
1350                 /* move it to zombie list and nobody can find it anymore */
1351                 list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
1352                 lnet_ni_decref_locked(ni, 0);   /* drop ln_nis' ref */
1353
1354                 if (!list_empty(&ni->ni_cptlist)) {
1355                         list_del_init(&ni->ni_cptlist);
1356                         lnet_ni_decref_locked(ni, 0);
1357                 }
1358         }
1359
1360         /* Drop the cached eqwait NI. */
1361         if (the_lnet.ln_eq_waitni != NULL) {
1362                 lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0);
1363                 the_lnet.ln_eq_waitni = NULL;
1364         }
1365
1366         /* Drop the cached loopback NI. */
1367         if (the_lnet.ln_loni != NULL) {
1368                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1369                 the_lnet.ln_loni = NULL;
1370         }
1371
1372         lnet_net_unlock(LNET_LOCK_EX);
1373
1374         /* Clear lazy portals and drop delayed messages which hold refs
1375          * on their lnet_msg_t::msg_rxpeer */
1376         for (i = 0; i < the_lnet.ln_nportals; i++)
1377                 LNetClearLazyPortal(i);
1378
1379         /* Clear the peer table and wait for all peers to go (they hold refs on
1380          * their NIs) */
1381         lnet_peer_tables_cleanup(NULL);
1382
1383         lnet_net_lock(LNET_LOCK_EX);
1384
1385         lnet_clear_zombies_nis_locked();
1386         the_lnet.ln_shutdown = 0;
1387         lnet_net_unlock(LNET_LOCK_EX);
1388 }
1389
1390 int
1391 lnet_shutdown_lndni(__u32 net)
1392 {
1393         lnet_ping_info_t *pinfo;
1394         lnet_handle_md_t md_handle;
1395         lnet_ni_t       *found_ni = NULL;
1396         int             ni_count;
1397         int             rc;
1398
1399         if (LNET_NETTYP(net) == LOLND)
1400                 return -EINVAL;
1401
1402         ni_count = lnet_get_ni_count();
1403
1404         /* create and link a new ping info, before removing the old one */
1405         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count - 1, false);
1406         if (rc != 0)
1407                 return rc;
1408
1409         /* proceed with shutting down the NI */
1410         lnet_net_lock(LNET_LOCK_EX);
1411
1412         found_ni = lnet_net2ni_locked(net, 0);
1413         if (found_ni == NULL) {
1414                 lnet_net_unlock(LNET_LOCK_EX);
1415                 lnet_ping_md_unlink(pinfo, &md_handle);
1416                 lnet_ping_info_free(pinfo);
1417                 return -EINVAL;
1418         }
1419
1420         /* decrement the reference counter on found_ni which was
1421          * incremented when we called lnet_net2ni_locked() */
1422         lnet_ni_decref_locked(found_ni, 0);
1423
1424         /* Move ni to zombie list so nobody can find it anymore */
1425         list_move(&found_ni->ni_list, &the_lnet.ln_nis_zombie);
1426
1427         /* Drop the lock reference for the ln_nis ref. */
1428         lnet_ni_decref_locked(found_ni, 0);
1429
1430         if (!list_empty(&found_ni->ni_cptlist)) {
1431                 list_del_init(&found_ni->ni_cptlist);
1432                 lnet_ni_decref_locked(found_ni, 0);
1433         }
1434
1435         lnet_net_unlock(LNET_LOCK_EX);
1436
1437         /* Do peer table cleanup for this ni */
1438         lnet_peer_tables_cleanup(found_ni);
1439
1440         lnet_net_lock(LNET_LOCK_EX);
1441         lnet_clear_zombies_nis_locked();
1442         lnet_net_unlock(LNET_LOCK_EX);
1443
1444         lnet_ping_target_update(pinfo, md_handle);
1445
1446         return 0;
1447 }
1448
1449 static int
1450 lnet_startup_lndnis(struct list_head *nilist, __s32 peer_timeout,
1451                     __s32 peer_cr, __s32 peer_buf_cr, __s32 credits,
1452                     int *ni_count)
1453 {
1454         int                     rc = 0;
1455         struct lnet_ni          *ni;
1456         int                     lnd_type;
1457         lnd_t                   *lnd;
1458         struct lnet_tx_queue    *tq;
1459         int                     i;
1460
1461         while (!list_empty(nilist)) {
1462                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1463                 lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
1464
1465                 if (!libcfs_isknown_lnd(lnd_type))
1466                         goto failed;
1467
1468                 if (lnd_type == CIBLND    ||
1469                     lnd_type == OPENIBLND ||
1470                     lnd_type == IIBLND    ||
1471                     lnd_type == VIBLND) {
1472                         CERROR("LND %s obsoleted\n",
1473                                libcfs_lnd2str(lnd_type));
1474                         goto failed;
1475                 }
1476
1477                 /* Make sure this new NI is unique. */
1478                 lnet_net_lock(LNET_LOCK_EX);
1479                 if (!lnet_net_unique(LNET_NIDNET(ni->ni_nid),
1480                                      &the_lnet.ln_nis)) {
1481                         if (lnd_type == LOLND) {
1482                                 lnet_net_unlock(LNET_LOCK_EX);
1483                                 list_del(&ni->ni_list);
1484                                 lnet_ni_free(ni);
1485                                 continue;
1486                         }
1487
1488                         CERROR("Net %s is not unique\n",
1489                                libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
1490                         lnet_net_unlock(LNET_LOCK_EX);
1491                         goto failed;
1492                 }
1493                 lnet_net_unlock(LNET_LOCK_EX);
1494
1495                 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1496                 lnd = lnet_find_lnd_by_type(lnd_type);
1497
1498 #ifdef __KERNEL__
1499                 if (lnd == NULL) {
1500                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1501                         rc = request_module("%s",
1502                                                 libcfs_lnd2modname(lnd_type));
1503                         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1504
1505                         lnd = lnet_find_lnd_by_type(lnd_type);
1506                         if (lnd == NULL) {
1507                                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1508                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1509                                        libcfs_lnd2str(lnd_type),
1510                                        libcfs_lnd2modname(lnd_type), rc);
1511 #ifndef HAVE_MODULE_LOADING_SUPPORT
1512                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1513                                          "compiled with kernel module "
1514                                          "loading support.");
1515 #endif
1516                                 goto failed;
1517                         }
1518                 }
1519 #else
1520                 if (lnd == NULL) {
1521                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1522                         CERROR("LND %s not supported\n",
1523                                libcfs_lnd2str(lnd_type));
1524                         goto failed;
1525                 }
1526 #endif
1527
1528                 lnet_net_lock(LNET_LOCK_EX);
1529                 lnd->lnd_refcount++;
1530                 lnet_net_unlock(LNET_LOCK_EX);
1531
1532                 ni->ni_lnd = lnd;
1533
1534                 rc = (lnd->lnd_startup)(ni);
1535
1536                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1537
1538                 if (rc != 0) {
1539                         LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
1540                                            "\n",
1541                                            rc, libcfs_lnd2str(lnd->lnd_type));
1542                         lnet_net_lock(LNET_LOCK_EX);
1543                         lnd->lnd_refcount--;
1544                         lnet_net_unlock(LNET_LOCK_EX);
1545                         goto failed;
1546                 }
1547
1548                 /* If given some LND tunable parameters, parse those now to
1549                  * override the values in the NI structure. */
1550                 if (peer_buf_cr >= 0)
1551                         ni->ni_peerrtrcredits = peer_buf_cr;
1552                 if (peer_timeout >= 0)
1553                         ni->ni_peertimeout = peer_timeout;
1554                 /*
1555                  * TODO
1556                  * Note: For now, don't allow the user to change
1557                  * peertxcredits as this number is used in the
1558                  * IB LND to control queue depth.
1559                  * if (peer_cr != -1)
1560                  *      ni->ni_peertxcredits = peer_cr;
1561                  */
1562                 if (credits >= 0)
1563                         ni->ni_maxtxcredits = credits;
1564
1565                 LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
1566
1567                 list_del(&ni->ni_list);
1568
1569                 lnet_net_lock(LNET_LOCK_EX);
1570                 /* refcount for ln_nis */
1571                 lnet_ni_addref_locked(ni, 0);
1572                 list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1573                 if (ni->ni_cpts != NULL) {
1574                         list_add_tail(&ni->ni_cptlist,
1575                                       &the_lnet.ln_nis_cpt);
1576                         lnet_ni_addref_locked(ni, 0);
1577                 }
1578
1579                 lnet_net_unlock(LNET_LOCK_EX);
1580
1581                 /* increment the ni_count here to account for the LOLND as
1582                  * well.  If we increment past this point then the number
1583                  * of count will be missing the LOLND, and then ping and
1584                  * will not report the LOLND
1585                  */
1586                 if (ni_count != NULL)
1587                         (*ni_count)++;
1588
1589                 if (lnd->lnd_type == LOLND) {
1590                         lnet_ni_addref(ni);
1591                         LASSERT(the_lnet.ln_loni == NULL);
1592                         the_lnet.ln_loni = ni;
1593                         continue;
1594                 }
1595
1596 #ifndef __KERNEL__
1597                 if (lnd->lnd_wait != NULL) {
1598                         if (the_lnet.ln_eq_waitni == NULL) {
1599                                 lnet_ni_addref(ni);
1600                                 the_lnet.ln_eq_waitni = ni;
1601                         }
1602                 } else {
1603 # ifndef HAVE_LIBPTHREAD
1604                         LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a "
1605                                            "single-threaded runtime\n",
1606                                            libcfs_lnd2str(lnd_type));
1607                         goto failed;
1608 # endif
1609                 }
1610 #endif
1611                 if (ni->ni_peertxcredits == 0 ||
1612                     ni->ni_maxtxcredits == 0) {
1613                         LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1614                                            libcfs_lnd2str(lnd->lnd_type),
1615                                            ni->ni_peertxcredits == 0 ?
1616                                            "" : "per-peer ");
1617                         goto failed;
1618                 }
1619
1620                 cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1621                         tq->tq_credits_min =
1622                         tq->tq_credits_max =
1623                         tq->tq_credits = lnet_ni_tq_credits(ni);
1624                 }
1625
1626                 CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1627                        libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
1628                        lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1629                        ni->ni_peerrtrcredits, ni->ni_peertimeout);
1630         }
1631
1632         return 0;
1633 failed:
1634         while (!list_empty(nilist)) {
1635                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1636                 list_del(&ni->ni_list);
1637                 lnet_ni_free(ni);
1638         }
1639         return -EINVAL;
1640 }
1641
1642 /**
1643  * Initialize LNet library.
1644  *
1645  * Only userspace program needs to call this function - it's automatically
1646  * called in the kernel at module loading time. Caller has to call LNetFini()
1647  * after a call to LNetInit(), if and only if the latter returned 0. It must
1648  * be called exactly once.
1649  *
1650  * \return 0 on success, and -ve on failures.
1651  */
1652 int
1653 LNetInit(void)
1654 {
1655         int     rc;
1656
1657         lnet_assert_wire_constants();
1658         LASSERT(!the_lnet.ln_init);
1659
1660         memset(&the_lnet, 0, sizeof(the_lnet));
1661
1662         /* refer to global cfs_cpt_table for now */
1663         the_lnet.ln_cpt_table   = cfs_cpt_table;
1664         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1665
1666         LASSERT(the_lnet.ln_cpt_number > 0);
1667         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1668                 /* we are under risk of consuming all lh_cookie */
1669                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1670                        "please change setting of CPT-table and retry\n",
1671                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1672                 return -1;
1673         }
1674
1675         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1676                 the_lnet.ln_cpt_bits++;
1677
1678         rc = lnet_create_locks();
1679         if (rc != 0) {
1680                 CERROR("Can't create LNet global locks: %d\n", rc);
1681                 return -1;
1682         }
1683
1684         the_lnet.ln_refcount = 0;
1685         the_lnet.ln_init = 1;
1686         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1687         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1688         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1689         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1690
1691 #ifdef __KERNEL__
1692         /* The hash table size is the number of bits it takes to express the set
1693          * ln_num_routes, minus 1 (better to under estimate than over so we
1694          * don't waste memory). */
1695         if (rnet_htable_size <= 0)
1696                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1697         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1698                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1699         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1700                                            order_base_2(rnet_htable_size) - 1);
1701
1702         /* All LNDs apart from the LOLND are in separate modules.  They
1703          * register themselves when their module loads, and unregister
1704          * themselves when their module is unloaded. */
1705 #else
1706         the_lnet.ln_remote_nets_hbits = 8;
1707
1708         /* Register LNDs
1709          * NB the order here determines default 'networks=' order */
1710 # ifdef HAVE_LIBPTHREAD
1711         LNET_REGISTER_ULND(the_tcplnd);
1712 # endif
1713 #endif
1714         lnet_register_lnd(&the_lolnd);
1715         return 0;
1716 }
1717 EXPORT_SYMBOL(LNetInit);
1718
1719 /**
1720  * Finalize LNet library.
1721  *
1722  * Only userspace program needs to call this function. It can be called
1723  * at most once.
1724  *
1725  * \pre LNetInit() called with success.
1726  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1727  */
1728 void
1729 LNetFini(void)
1730 {
1731         LASSERT(the_lnet.ln_init);
1732         LASSERT(the_lnet.ln_refcount == 0);
1733
1734         while (!list_empty(&the_lnet.ln_lnds))
1735                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1736                                                lnd_t, lnd_list));
1737         lnet_destroy_locks();
1738
1739         the_lnet.ln_init = 0;
1740 }
1741 EXPORT_SYMBOL(LNetFini);
1742
1743 /**
1744  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1745  *
1746  * Userspace program should call this after a successful call to LNetInit().
1747  * Users must call this function at least once before any other functions.
1748  * For each successful call there must be a corresponding call to
1749  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1750  * ignored.
1751  *
1752  * The PID used by LNet may be different from the one requested.
1753  * See LNetGetId().
1754  *
1755  * \param requested_pid PID requested by the caller.
1756  *
1757  * \return >= 0 on success, and < 0 error code on failures.
1758  */
1759 int
1760 LNetNIInit(lnet_pid_t requested_pid)
1761 {
1762         int                     im_a_router = 0;
1763         int                     rc;
1764         int                     ni_count = 0;
1765         int                     lnd_type;
1766         struct lnet_ni          *ni;
1767         lnet_ping_info_t        *pinfo;
1768         lnet_handle_md_t        md_handle;
1769         struct list_head        net_head;
1770
1771         INIT_LIST_HEAD(&net_head);
1772
1773         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1774
1775         LASSERT(the_lnet.ln_init);
1776         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1777
1778         if (the_lnet.ln_refcount > 0) {
1779                 rc = the_lnet.ln_refcount++;
1780                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1781                 return rc;
1782         }
1783
1784         rc = lnet_prepare(requested_pid);
1785         if (rc != 0)
1786                 goto failed0;
1787
1788         rc = lnet_parse_networks(&net_head,
1789                                  !the_lnet.ln_nis_from_mod_params ?
1790                                    lnet_get_networks() : "");
1791         if (rc < 0)
1792                 goto failed1;
1793
1794         rc = lnet_startup_lndnis(&net_head, -1, -1, -1, -1, &ni_count);
1795         if (rc != 0)
1796                 goto failed1;
1797
1798         if (the_lnet.ln_eq_waitni != NULL && ni_count > 1) {
1799                 lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type;
1800                 LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
1801                                    "\n",
1802                                    libcfs_lnd2str(lnd_type));
1803                 goto failed2;
1804         }
1805
1806         rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1807         if (rc != 0)
1808                 goto failed2;
1809
1810         rc = lnet_check_routes();
1811         if (rc != 0)
1812                 goto failed2;
1813
1814         rc = lnet_rtrpools_alloc(im_a_router);
1815         if (rc != 0)
1816                 goto failed2;
1817
1818         rc = lnet_acceptor_start();
1819         if (rc != 0)
1820                 goto failed2;
1821         the_lnet.ln_refcount = 1;
1822         /* Now I may use my own API functions... */
1823
1824         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1825         if (rc != 0)
1826                 goto failed3;
1827
1828         lnet_ping_target_update(pinfo, md_handle);
1829
1830         rc = lnet_router_checker_start();
1831         if (rc != 0)
1832                 goto failed4;
1833
1834         lnet_fault_init();
1835         lnet_proc_init();
1836
1837         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1838
1839         return 0;
1840
1841 failed4:
1842         the_lnet.ln_refcount = 0;
1843         lnet_ping_md_unlink(pinfo, &md_handle);
1844         lnet_ping_info_free(pinfo);
1845 failed3:
1846         lnet_acceptor_stop();
1847         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1848         LASSERT(rc == 0);
1849 failed2:
1850         lnet_destroy_routes();
1851         lnet_shutdown_lndnis();
1852 failed1:
1853         lnet_unprepare();
1854 failed0:
1855         LASSERT(rc < 0);
1856         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1857         while (!list_empty(&net_head)) {
1858                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
1859                 list_del_init(&ni->ni_list);
1860                 lnet_ni_free(ni);
1861         }
1862         return rc;
1863 }
1864 EXPORT_SYMBOL(LNetNIInit);
1865
1866 /**
1867  * Stop LNet interfaces, routing, and forwarding.
1868  *
1869  * Users must call this function once for each successful call to LNetNIInit().
1870  * Once the LNetNIFini() operation has been started, the results of pending
1871  * API operations are undefined.
1872  *
1873  * \return always 0 for current implementation.
1874  */
1875 int
1876 LNetNIFini()
1877 {
1878         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1879
1880         LASSERT (the_lnet.ln_init);
1881         LASSERT (the_lnet.ln_refcount > 0);
1882
1883         if (the_lnet.ln_refcount != 1) {
1884                 the_lnet.ln_refcount--;
1885         } else {
1886                 LASSERT(!the_lnet.ln_niinit_self);
1887
1888                 lnet_fault_fini();
1889
1890                 lnet_proc_fini();
1891                 lnet_router_checker_stop();
1892                 lnet_ping_target_fini();
1893
1894                 /* Teardown fns that use my own API functions BEFORE here */
1895                 the_lnet.ln_refcount = 0;
1896
1897                 lnet_acceptor_stop();
1898                 lnet_destroy_routes();
1899                 lnet_shutdown_lndnis();
1900                 lnet_unprepare();
1901         }
1902
1903         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1904         return 0;
1905 }
1906 EXPORT_SYMBOL(LNetNIFini);
1907
1908 /**
1909  * Grabs the ni data from the ni structure and fills the out
1910  * parameters
1911  *
1912  * \param[in] ni network        interface structure
1913  * \param[out] cpt_count        the number of cpts the ni is on
1914  * \param[out] nid              Network Interface ID
1915  * \param[out] peer_timeout     NI peer timeout
1916  * \param[out] peer_tx_crdits   NI peer transmit credits
1917  * \param[out] peer_rtr_credits NI peer router credits
1918  * \param[out] max_tx_credits   NI max transmit credit
1919  * \param[out] net_config       Network configuration
1920  */
1921 static void
1922 lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
1923                   int *peer_timeout, int *peer_tx_credits,
1924                   int *peer_rtr_credits, int *max_tx_credits,
1925                   struct lnet_ioctl_net_config *net_config)
1926 {
1927         int i;
1928
1929         if (ni == NULL)
1930                 return;
1931
1932         if (net_config == NULL)
1933                 return;
1934
1935         CLASSERT(ARRAY_SIZE(ni->ni_interfaces) ==
1936                  ARRAY_SIZE(net_config->ni_interfaces));
1937
1938         if (ni->ni_interfaces[0] != NULL) {
1939                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1940                         if (ni->ni_interfaces[i] != NULL) {
1941                                 strncpy(net_config->ni_interfaces[i],
1942                                         ni->ni_interfaces[i],
1943                                         sizeof(net_config->ni_interfaces[i]));
1944                         }
1945                 }
1946         }
1947
1948         *nid = ni->ni_nid;
1949         *peer_timeout = ni->ni_peertimeout;
1950         *peer_tx_credits = ni->ni_peertxcredits;
1951         *peer_rtr_credits = ni->ni_peerrtrcredits;
1952         *max_tx_credits = ni->ni_maxtxcredits;
1953
1954         net_config->ni_status = ni->ni_status->ns_status;
1955
1956         for (i = 0;
1957              ni->ni_cpts != NULL && i < ni->ni_ncpts &&
1958              i < LNET_MAX_SHOW_NUM_CPT;
1959              i++)
1960                 net_config->ni_cpts[i] = ni->ni_cpts[i];
1961
1962         *cpt_count = ni->ni_ncpts;
1963 }
1964
1965 int
1966 lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
1967                     int *peer_tx_credits, int *peer_rtr_credits,
1968                     int *max_tx_credits,
1969                     struct lnet_ioctl_net_config *net_config)
1970 {
1971         struct lnet_ni          *ni;
1972         struct list_head        *tmp;
1973         int                     cpt;
1974         int                     rc = -ENOENT;
1975
1976         cpt = lnet_net_lock_current();
1977
1978         list_for_each(tmp, &the_lnet.ln_nis) {
1979                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1980                 if (idx-- == 0) {
1981                         rc = 0;
1982                         lnet_ni_lock(ni);
1983                         lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
1984                                           peer_tx_credits, peer_rtr_credits,
1985                                           max_tx_credits, net_config);
1986                         lnet_ni_unlock(ni);
1987                         break;
1988                 }
1989         }
1990
1991         lnet_net_unlock(cpt);
1992         return rc;
1993 }
1994
1995 int
1996 lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
1997                 __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
1998                 __s32 credits)
1999 {
2000         lnet_ping_info_t        *pinfo;
2001         lnet_handle_md_t        md_handle;
2002         struct lnet_ni          *ni;
2003         struct list_head        net_head;
2004         int                     rc;
2005
2006         INIT_LIST_HEAD(&net_head);
2007
2008         /* Create a ni structure for the network string */
2009         rc = lnet_parse_networks(&net_head, nets);
2010         if (rc < 0)
2011                 return rc;
2012
2013         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2014
2015         if (rc > 1) {
2016                 rc = -EINVAL; /* only add one interface per call */
2017                 goto failed0;
2018         }
2019
2020         rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
2021                                   false);
2022         if (rc != 0)
2023                 goto failed0;
2024
2025         rc = lnet_startup_lndnis(&net_head, peer_timeout, peer_cr,
2026                                  peer_buf_cr, credits, NULL);
2027         if (rc != 0)
2028                 goto failed1;
2029
2030         lnet_ping_target_update(pinfo, md_handle);
2031         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2032
2033         return 0;
2034
2035 failed1:
2036         lnet_ping_md_unlink(pinfo, &md_handle);
2037         lnet_ping_info_free(pinfo);
2038 failed0:
2039         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2040         while (!list_empty(&net_head)) {
2041                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
2042                 list_del_init(&ni->ni_list);
2043                 lnet_ni_free(ni);
2044         }
2045         return rc;
2046 }
2047
2048 int
2049 lnet_dyn_del_ni(__u32 net)
2050 {
2051         int rc;
2052
2053         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2054         rc = lnet_shutdown_lndni(net);
2055         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2056
2057         return rc;
2058 }
2059
2060 /**
2061  * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
2062  * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
2063  * internal ioctl handler.
2064  *
2065  * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
2066  *
2067  * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
2068  * The data will be printed to system console. Don't use it excessively.
2069  * \param arg A pointer to lnet_process_id_t, process ID of the peer.
2070  *
2071  * \return Always return 0 when called by users directly (i.e., not via ioctl).
2072  */
2073 int
2074 LNetCtl(unsigned int cmd, void *arg)
2075 {
2076         struct libcfs_ioctl_data *data = arg;
2077         struct lnet_ioctl_config_data *config;
2078         lnet_process_id_t         id = {0};
2079         lnet_ni_t                *ni;
2080         int                       rc;
2081
2082         CLASSERT(LIBCFS_IOC_DATA_MAX >= sizeof(struct lnet_ioctl_net_config) +
2083                                         sizeof(struct lnet_ioctl_config_data));
2084         LASSERT(the_lnet.ln_init);
2085
2086         switch (cmd) {
2087         case IOC_LIBCFS_GET_NI:
2088                 rc = LNetGetId(data->ioc_count, &id);
2089                 data->ioc_nid = id.nid;
2090                 return rc;
2091
2092         case IOC_LIBCFS_FAIL_NID:
2093                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2094
2095         case IOC_LIBCFS_ADD_ROUTE:
2096                 config = arg;
2097                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2098                 rc = lnet_add_route(config->cfg_net,
2099                                     config->cfg_config_u.cfg_route.rtr_hop,
2100                                     config->cfg_nid,
2101                                     config->cfg_config_u.cfg_route.
2102                                         rtr_priority);
2103                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2104                 return (rc != 0) ? rc : lnet_check_routes();
2105
2106         case IOC_LIBCFS_DEL_ROUTE:
2107                 config = arg;
2108                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2109                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2110                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2111                 return rc;
2112
2113         case IOC_LIBCFS_GET_ROUTE:
2114                 config = arg;
2115                 return lnet_get_route(config->cfg_count,
2116                                       &config->cfg_net,
2117                                       &config->cfg_config_u.cfg_route.rtr_hop,
2118                                       &config->cfg_nid,
2119                                       &config->cfg_config_u.cfg_route.rtr_flags,
2120                                       &config->cfg_config_u.cfg_route.
2121                                         rtr_priority);
2122
2123         case IOC_LIBCFS_GET_NET: {
2124                 struct lnet_ioctl_net_config *net_config;
2125                 config = arg;
2126                 net_config = (struct lnet_ioctl_net_config *)
2127                         config->cfg_bulk;
2128                 if (config == NULL || net_config == NULL)
2129                         return -1;
2130
2131                 return lnet_get_net_config(config->cfg_count,
2132                                            &config->cfg_ncpts,
2133                                            &config->cfg_nid,
2134                                            &config->cfg_config_u.
2135                                                 cfg_net.net_peer_timeout,
2136                                            &config->cfg_config_u.cfg_net.
2137                                                 net_peer_tx_credits,
2138                                            &config->cfg_config_u.cfg_net.
2139                                                 net_peer_rtr_credits,
2140                                            &config->cfg_config_u.cfg_net.
2141                                                 net_max_tx_credits,
2142                                            net_config);
2143         }
2144
2145         case IOC_LIBCFS_GET_LNET_STATS:
2146         {
2147                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2148
2149                 lnet_counters_get(&lnet_stats->st_cntrs);
2150                 return 0;
2151         }
2152
2153 #if defined(__KERNEL__) && defined(LNET_ROUTER)
2154         case IOC_LIBCFS_CONFIG_RTR:
2155                 config = arg;
2156                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2157                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2158                         rc = lnet_rtrpools_enable();
2159                         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2160                         return rc;
2161                 }
2162                 lnet_rtrpools_disable();
2163                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2164                 return 0;
2165
2166         case IOC_LIBCFS_ADD_BUF:
2167                 config = arg;
2168                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2169                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2170                                                 buf_tiny,
2171                                           config->cfg_config_u.cfg_buffers.
2172                                                 buf_small,
2173                                           config->cfg_config_u.cfg_buffers.
2174                                                 buf_large);
2175                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2176                 return rc;
2177 #endif
2178
2179         case IOC_LIBCFS_GET_BUF: {
2180                 struct lnet_ioctl_pool_cfg *pool_cfg;
2181                 config = arg;
2182                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2183                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2184         }
2185
2186         case IOC_LIBCFS_GET_PEER_INFO: {
2187                 struct lnet_ioctl_peer *peer_info = arg;
2188                 return lnet_get_peer_info(
2189                    peer_info->pr_count,
2190                    &peer_info->pr_nid,
2191                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2192                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2193                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2194                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2195                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2196                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2197                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2198                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2199         }
2200
2201         case IOC_LIBCFS_NOTIFY_ROUTER:
2202                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2203                                    cfs_time_current() -
2204                                    cfs_time_seconds(cfs_time_current_sec() -
2205                                                     (time_t)data->ioc_u64[0]));
2206
2207         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
2208                 /* This can be removed once lustre stops calling it */
2209                 return 0;
2210
2211         case IOC_LIBCFS_LNET_DIST:
2212                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2213                 if (rc < 0 && rc != -EHOSTUNREACH)
2214                         return rc;
2215
2216                 data->ioc_u32[0] = rc;
2217                 return 0;
2218
2219         case IOC_LIBCFS_TESTPROTOCOMPAT:
2220                 lnet_net_lock(LNET_LOCK_EX);
2221                 the_lnet.ln_testprotocompat = data->ioc_flags;
2222                 lnet_net_unlock(LNET_LOCK_EX);
2223                 return 0;
2224
2225         case IOC_LIBCFS_LNET_FAULT:
2226                 return lnet_fault_ctl(data->ioc_flags, data);
2227
2228         case IOC_LIBCFS_PING:
2229                 id.nid = data->ioc_nid;
2230                 id.pid = data->ioc_u32[0];
2231                 rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
2232                                (lnet_process_id_t __user *)data->ioc_pbuf1,
2233                                data->ioc_plen1/sizeof(lnet_process_id_t));
2234                 if (rc < 0)
2235                         return rc;
2236                 data->ioc_count = rc;
2237                 return 0;
2238
2239         case IOC_LIBCFS_DEBUG_PEER: {
2240                 /* CAVEAT EMPTOR: this one designed for calling directly; not
2241                  * via an ioctl */
2242                 id = *((lnet_process_id_t *) arg);
2243
2244                 lnet_debug_peer(id.nid);
2245
2246                 ni = lnet_net2ni(LNET_NIDNET(id.nid));
2247                 if (ni == NULL) {
2248                         CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
2249                 } else {
2250                         if (ni->ni_lnd->lnd_ctl == NULL) {
2251                                 CDEBUG(D_WARNING, "No ctl for %s\n",
2252                                        libcfs_id2str(id));
2253                         } else {
2254                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2255                         }
2256
2257                         lnet_ni_decref(ni);
2258                 }
2259                 return 0;
2260         }
2261
2262         default:
2263                 ni = lnet_net2ni(data->ioc_net);
2264                 if (ni == NULL)
2265                         return -EINVAL;
2266
2267                 if (ni->ni_lnd->lnd_ctl == NULL)
2268                         rc = -EINVAL;
2269                 else
2270                         rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2271
2272                 lnet_ni_decref(ni);
2273                 return rc;
2274         }
2275         /* not reached */
2276 }
2277 EXPORT_SYMBOL(LNetCtl);
2278
2279 /**
2280  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2281  * all interfaces share a same PID, as requested by LNetNIInit().
2282  *
2283  * \param index Index of the interface to look up.
2284  * \param id On successful return, this location will hold the
2285  * lnet_process_id_t ID of the interface.
2286  *
2287  * \retval 0 If an interface exists at \a index.
2288  * \retval -ENOENT If no interface has been found.
2289  */
2290 int
2291 LNetGetId(unsigned int index, lnet_process_id_t *id)
2292 {
2293         struct lnet_ni   *ni;
2294         struct list_head *tmp;
2295         int               cpt;
2296         int               rc = -ENOENT;
2297
2298         LASSERT(the_lnet.ln_init);
2299         LASSERT(the_lnet.ln_refcount > 0);
2300
2301         cpt = lnet_net_lock_current();
2302
2303         list_for_each(tmp, &the_lnet.ln_nis) {
2304                 if (index-- != 0)
2305                         continue;
2306
2307                 ni = list_entry(tmp, lnet_ni_t, ni_list);
2308
2309                 id->nid = ni->ni_nid;
2310                 id->pid = the_lnet.ln_pid;
2311                 rc = 0;
2312                 break;
2313         }
2314
2315         lnet_net_unlock(cpt);
2316         return rc;
2317 }
2318 EXPORT_SYMBOL(LNetGetId);
2319
2320 /**
2321  * Print a string representation of handle \a h into buffer \a str of
2322  * \a len bytes.
2323  */
2324 void
2325 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2326 {
2327         snprintf(str, len, LPX64, h.cookie);
2328 }
2329 EXPORT_SYMBOL(LNetSnprintHandle);
2330
2331 static int
2332 lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids,
2333           int n_ids)
2334 {
2335         lnet_handle_eq_t     eqh;
2336         lnet_handle_md_t     mdh;
2337         lnet_event_t         event;
2338         lnet_md_t            md = {0};
2339         int                  which;
2340         int                  unlinked = 0;
2341         int                  replied = 0;
2342         const int            a_long_time = 60000; /* mS */
2343         int                  infosz;
2344         lnet_ping_info_t    *info;
2345         lnet_process_id_t    tmpid;
2346         int                  i;
2347         int                  nob;
2348         int                  rc;
2349         int                  rc2;
2350         sigset_t         blocked;
2351
2352         infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
2353
2354         if (n_ids <= 0 ||
2355             id.nid == LNET_NID_ANY ||
2356             timeout_ms > 500000 ||              /* arbitrary limit! */
2357             n_ids > 20)                         /* arbitrary limit! */
2358                 return -EINVAL;
2359
2360         if (id.pid == LNET_PID_ANY)
2361                 id.pid = LNET_PID_LUSTRE;
2362
2363         LIBCFS_ALLOC(info, infosz);
2364         if (info == NULL)
2365                 return -ENOMEM;
2366
2367         /* NB 2 events max (including any unlink event) */
2368         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2369         if (rc != 0) {
2370                 CERROR("Can't allocate EQ: %d\n", rc);
2371                 goto out_0;
2372         }
2373
2374         /* initialize md content */
2375         md.start     = info;
2376         md.length    = infosz;
2377         md.threshold = 2; /*GET/REPLY*/
2378         md.max_size  = 0;
2379         md.options   = LNET_MD_TRUNCATE;
2380         md.user_ptr  = NULL;
2381         md.eq_handle = eqh;
2382
2383         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2384         if (rc != 0) {
2385                 CERROR("Can't bind MD: %d\n", rc);
2386                 goto out_1;
2387         }
2388
2389         rc = LNetGet(LNET_NID_ANY, mdh, id,
2390                      LNET_RESERVED_PORTAL,
2391                      LNET_PROTO_PING_MATCHBITS, 0);
2392
2393         if (rc != 0) {
2394                 /* Don't CERROR; this could be deliberate! */
2395
2396                 rc2 = LNetMDUnlink(mdh);
2397                 LASSERT(rc2 == 0);
2398
2399                 /* NB must wait for the UNLINK event below... */
2400                 unlinked = 1;
2401                 timeout_ms = a_long_time;
2402         }
2403
2404         do {
2405                 /* MUST block for unlink to complete */
2406                 if (unlinked)
2407                         blocked = cfs_block_allsigs();
2408
2409                 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
2410
2411                 if (unlinked)
2412                         cfs_restore_sigs(blocked);
2413
2414                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2415                        (rc2 <= 0) ? -1 : event.type,
2416                        (rc2 <= 0) ? -1 : event.status,
2417                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2418
2419                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2420
2421                 if (rc2 <= 0 || event.status != 0) {
2422                         /* timeout or error */
2423                         if (!replied && rc == 0)
2424                                 rc = (rc2 < 0) ? rc2 :
2425                                      (rc2 == 0) ? -ETIMEDOUT :
2426                                      event.status;
2427
2428                         if (!unlinked) {
2429                                 /* Ensure completion in finite time... */
2430                                 LNetMDUnlink(mdh);
2431                                 /* No assertion (racing with network) */
2432                                 unlinked = 1;
2433                                 timeout_ms = a_long_time;
2434                         } else if (rc2 == 0) {
2435                                 /* timed out waiting for unlink */
2436                                 CWARN("ping %s: late network completion\n",
2437                                       libcfs_id2str(id));
2438                         }
2439                 } else if (event.type == LNET_EVENT_REPLY) {
2440                         replied = 1;
2441                         rc = event.mlength;
2442                 }
2443
2444         } while (rc2 <= 0 || !event.unlinked);
2445
2446         if (!replied) {
2447                 if (rc >= 0)
2448                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2449                               libcfs_id2str(id));
2450                 rc = -EIO;
2451                 goto out_1;
2452         }
2453
2454         nob = rc;
2455         LASSERT(nob >= 0 && nob <= infosz);
2456
2457         rc = -EPROTO;                           /* if I can't parse... */
2458
2459         if (nob < 8) {
2460                 /* can't check magic/version */
2461                 CERROR("%s: ping info too short %d\n",
2462                        libcfs_id2str(id), nob);
2463                 goto out_1;
2464         }
2465
2466         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2467                 lnet_swap_pinginfo(info);
2468         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2469                 CERROR("%s: Unexpected magic %08x\n",
2470                        libcfs_id2str(id), info->pi_magic);
2471                 goto out_1;
2472         }
2473
2474         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2475                 CERROR("%s: ping w/o NI status: 0x%x\n",
2476                        libcfs_id2str(id), info->pi_features);
2477                 goto out_1;
2478         }
2479
2480         if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
2481                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2482                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
2483                 goto out_1;
2484         }
2485
2486         if (info->pi_nnis < n_ids)
2487                 n_ids = info->pi_nnis;
2488
2489         if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
2490                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2491                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
2492                 goto out_1;
2493         }
2494
2495         rc = -EFAULT;                           /* If I SEGV... */
2496
2497         for (i = 0; i < n_ids; i++) {
2498                 tmpid.pid = info->pi_pid;
2499                 tmpid.nid = info->pi_ni[i].ns_nid;
2500                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2501                         goto out_1;
2502         }
2503         rc = info->pi_nnis;
2504
2505  out_1:
2506         rc2 = LNetEQFree(eqh);
2507         if (rc2 != 0)
2508                 CERROR("rc2 %d\n", rc2);
2509         LASSERT(rc2 == 0);
2510
2511  out_0:
2512         LIBCFS_FREE(info, infosz);
2513         return rc;
2514 }