Whamcloud - gitweb
LU-5396 lnet/klnds: add sparse annotation __user wherever needed
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include <lnet/lib-lnet.h>
39 #include <lnet/lib-dlc.h>
40 #ifdef __KERNEL__
41 #include <linux/log2.h>
42 #endif
43
44 #ifdef __KERNEL__
45 #define D_LNI D_CONSOLE
46 #else
47 #define D_LNI D_CONFIG
48 #endif
49
50 lnet_t      the_lnet;                           /* THE state of the network */
51 EXPORT_SYMBOL(the_lnet);
52
53 #ifdef __KERNEL__
54
55 static char *ip2nets = "";
56 CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
57                 "LNET network <- IP table");
58
59 static char *networks = "";
60 CFS_MODULE_PARM(networks, "s", charp, 0444,
61                 "local networks");
62
63 static char *routes = "";
64 CFS_MODULE_PARM(routes, "s", charp, 0444,
65                 "routes to non-local networks");
66
67 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
68 CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
69                 "size of remote network hash table");
70
71 static void lnet_ping_target_fini(void);
72 static int lnet_ping(lnet_process_id_t id, int timeout_ms,
73                      lnet_process_id_t *ids, int n_ids);
74
75 static char *
76 lnet_get_routes(void)
77 {
78         return routes;
79 }
80
81 static char *
82 lnet_get_networks(void)
83 {
84         char   *nets;
85         int     rc;
86
87         if (*networks != 0 && *ip2nets != 0) {
88                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
89                                    "'ip2nets' but not both at once\n");
90                 return NULL;
91         }
92
93         if (*ip2nets != 0) {
94                 rc = lnet_parse_ip2nets(&nets, ip2nets);
95                 return (rc == 0) ? nets : NULL;
96         }
97
98         if (*networks != 0)
99                 return networks;
100
101         return "tcp";
102 }
103
104 static void
105 lnet_init_locks(void)
106 {
107         spin_lock_init(&the_lnet.ln_eq_wait_lock);
108         init_waitqueue_head(&the_lnet.ln_eq_waitq);
109         mutex_init(&the_lnet.ln_lnd_mutex);
110         mutex_init(&the_lnet.ln_api_mutex);
111 }
112
113 static void
114 lnet_fini_locks(void)
115 {
116 }
117
118 #else
119
120 static char *
121 lnet_get_routes(void)
122 {
123         char *str = getenv("LNET_ROUTES");
124
125         return (str == NULL) ? "" : str;
126 }
127
128 static char *
129 lnet_get_networks (void)
130 {
131         static char       default_networks[256];
132         char             *networks = getenv("LNET_NETWORKS");
133         char             *str;
134         char             *sep;
135         int               len;
136         int               nob;
137         struct list_head *tmp;
138
139         if (networks != NULL)
140                 return networks;
141
142         /* In userland, the default 'networks=' is the list of known net types */
143         len = sizeof(default_networks);
144         str = default_networks;
145         *str = 0;
146         sep = "";
147
148         list_for_each(tmp, &the_lnet.ln_lnds) {
149                 lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
150
151                 nob = snprintf(str, len, "%s%s", sep,
152                                libcfs_lnd2str(lnd->lnd_type));
153                 if (nob >= len) {
154                         /* overflowed the string; leave it where it was */
155                         *str = 0;
156                         break;
157                 }
158                 len -= nob;
159                 str += nob;
160                 sep = ",";
161         }
162
163         return default_networks;
164 }
165
166 # ifndef HAVE_LIBPTHREAD
167
168 static void lnet_init_locks(void)
169 {
170         the_lnet.ln_eq_wait_lock = 0;
171         the_lnet.ln_lnd_mutex = 0;
172         the_lnet.ln_api_mutex = 0;
173 }
174
175 static void lnet_fini_locks(void)
176 {
177         LASSERT(the_lnet.ln_api_mutex == 0);
178         LASSERT(the_lnet.ln_lnd_mutex == 0);
179         LASSERT(the_lnet.ln_eq_wait_lock == 0);
180 }
181
182 # else
183
184 static void lnet_init_locks(void)
185 {
186         pthread_cond_init(&the_lnet.ln_eq_cond, NULL);
187         pthread_mutex_init(&the_lnet.ln_eq_wait_lock, NULL);
188         pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL);
189         pthread_mutex_init(&the_lnet.ln_api_mutex, NULL);
190 }
191
192 static void lnet_fini_locks(void)
193 {
194         pthread_mutex_destroy(&the_lnet.ln_api_mutex);
195         pthread_mutex_destroy(&the_lnet.ln_lnd_mutex);
196         pthread_mutex_destroy(&the_lnet.ln_eq_wait_lock);
197         pthread_cond_destroy(&the_lnet.ln_eq_cond);
198 }
199
200 # endif
201 #endif
202
203 static int
204 lnet_create_remote_nets_table(void)
205 {
206         int               i;
207         struct list_head *hash;
208
209         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
210         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
211         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
212         if (hash == NULL) {
213                 CERROR("Failed to create remote nets hash table\n");
214                 return -ENOMEM;
215         }
216
217         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
218                 INIT_LIST_HEAD(&hash[i]);
219         the_lnet.ln_remote_nets_hash = hash;
220         return 0;
221 }
222
223 static void
224 lnet_destroy_remote_nets_table(void)
225 {
226         int i;
227
228         if (the_lnet.ln_remote_nets_hash == NULL)
229                 return;
230
231         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
232                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
233
234         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
235                     LNET_REMOTE_NETS_HASH_SIZE *
236                     sizeof(the_lnet.ln_remote_nets_hash[0]));
237         the_lnet.ln_remote_nets_hash = NULL;
238 }
239
240 static void
241 lnet_destroy_locks(void)
242 {
243         if (the_lnet.ln_res_lock != NULL) {
244                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
245                 the_lnet.ln_res_lock = NULL;
246         }
247
248         if (the_lnet.ln_net_lock != NULL) {
249                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
250                 the_lnet.ln_net_lock = NULL;
251         }
252
253         lnet_fini_locks();
254 }
255
256 static int
257 lnet_create_locks(void)
258 {
259         lnet_init_locks();
260
261         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
262         if (the_lnet.ln_res_lock == NULL)
263                 goto failed;
264
265         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
266         if (the_lnet.ln_net_lock == NULL)
267                 goto failed;
268
269         return 0;
270
271  failed:
272         lnet_destroy_locks();
273         return -ENOMEM;
274 }
275
276 static void lnet_assert_wire_constants(void)
277 {
278         /* Wire protocol assertions generated by 'wirecheck'
279          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
280          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
281          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
282
283         /* Constants... */
284         CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
285         CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
286         CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
287         CLASSERT (LNET_MSG_ACK == 0);
288         CLASSERT (LNET_MSG_PUT == 1);
289         CLASSERT (LNET_MSG_GET == 2);
290         CLASSERT (LNET_MSG_REPLY == 3);
291         CLASSERT (LNET_MSG_HELLO == 4);
292
293         /* Checks for struct ptl_handle_wire_t */
294         CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
295         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
296         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
297         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
298         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
299
300         /* Checks for struct lnet_magicversion_t */
301         CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
302         CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
303         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
304         CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
305         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
306         CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
307         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
308
309         /* Checks for struct lnet_hdr_t */
310         CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
311         CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
312         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
313         CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
314         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
315         CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
316         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
317         CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
318         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
319         CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
320         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
321         CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
322         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
323         CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
324         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
325
326         /* Ack */
327         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
328         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
329         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
330         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
331         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
332         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
333
334         /* Put */
335         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
336         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
337         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
338         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
339         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
340         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
341         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
342         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
343         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
344         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
345
346         /* Get */
347         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
348         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
349         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
350         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
351         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
352         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
353         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
354         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
355         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
356         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
357
358         /* Reply */
359         CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
360         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
361
362         /* Hello */
363         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
364         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
365         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
366         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
367 }
368
369 static lnd_t *
370 lnet_find_lnd_by_type (int type)
371 {
372         lnd_t            *lnd;
373         struct list_head *tmp;
374
375         /* holding lnd mutex */
376         list_for_each(tmp, &the_lnet.ln_lnds) {
377                 lnd = list_entry(tmp, lnd_t, lnd_list);
378
379                 if ((int)lnd->lnd_type == type)
380                         return lnd;
381         }
382         return NULL;
383 }
384
385 void
386 lnet_register_lnd (lnd_t *lnd)
387 {
388         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
389
390         LASSERT(the_lnet.ln_init);
391         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
392         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
393
394         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
395         lnd->lnd_refcount = 0;
396
397         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
398
399         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
400 }
401 EXPORT_SYMBOL(lnet_register_lnd);
402
403 void
404 lnet_unregister_lnd (lnd_t *lnd)
405 {
406         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
407
408         LASSERT(the_lnet.ln_init);
409         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
410         LASSERT(lnd->lnd_refcount == 0);
411
412         list_del(&lnd->lnd_list);
413         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
414
415         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
416 }
417 EXPORT_SYMBOL(lnet_unregister_lnd);
418
419 void
420 lnet_counters_get(lnet_counters_t *counters)
421 {
422         lnet_counters_t *ctr;
423         int             i;
424
425         memset(counters, 0, sizeof(*counters));
426
427         lnet_net_lock(LNET_LOCK_EX);
428
429         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
430                 counters->msgs_max     += ctr->msgs_max;
431                 counters->msgs_alloc   += ctr->msgs_alloc;
432                 counters->errors       += ctr->errors;
433                 counters->send_count   += ctr->send_count;
434                 counters->recv_count   += ctr->recv_count;
435                 counters->route_count  += ctr->route_count;
436                 counters->drop_count   += ctr->drop_count;
437                 counters->send_length  += ctr->send_length;
438                 counters->recv_length  += ctr->recv_length;
439                 counters->route_length += ctr->route_length;
440                 counters->drop_length  += ctr->drop_length;
441
442         }
443         lnet_net_unlock(LNET_LOCK_EX);
444 }
445 EXPORT_SYMBOL(lnet_counters_get);
446
447 void
448 lnet_counters_reset(void)
449 {
450         lnet_counters_t *counters;
451         int             i;
452
453         lnet_net_lock(LNET_LOCK_EX);
454
455         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
456                 memset(counters, 0, sizeof(lnet_counters_t));
457
458         lnet_net_unlock(LNET_LOCK_EX);
459 }
460 EXPORT_SYMBOL(lnet_counters_reset);
461
462 #ifdef LNET_USE_LIB_FREELIST
463
464 int
465 lnet_freelist_init(lnet_freelist_t *fl, int n, int size)
466 {
467         char *space;
468
469         LASSERT (n > 0);
470
471         size += offsetof (lnet_freeobj_t, fo_contents);
472
473         LIBCFS_ALLOC(space, n * size);
474         if (space == NULL)
475                 return (-ENOMEM);
476
477         INIT_LIST_HEAD(&fl->fl_list);
478         fl->fl_objs = space;
479         fl->fl_nobjs = n;
480         fl->fl_objsize = size;
481
482         do {
483                 list_add((struct list_head *)space, &fl->fl_list);
484                 space += size;
485         } while (--n != 0);
486
487         return 0;
488 }
489
490 void
491 lnet_freelist_fini(lnet_freelist_t *fl)
492 {
493         struct list_head *el;
494         int               count;
495
496         if (fl->fl_nobjs == 0)
497                 return;
498
499         count = 0;
500         for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
501                 count++;
502
503         LASSERT (count == fl->fl_nobjs);
504
505         LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
506         memset (fl, 0, sizeof (*fl));
507 }
508
509 #endif /* LNET_USE_LIB_FREELIST */
510
511 static __u64 lnet_create_interface_cookie(void)
512 {
513         /* NB the interface cookie in wire handles guards against delayed
514          * replies and ACKs appearing valid after reboot. Initialisation time,
515          * even if it's only implemented to millisecond resolution is probably
516          * easily good enough. */
517         struct timeval tv;
518         __u64          cookie;
519 #ifndef __KERNEL__
520         int            rc = gettimeofday (&tv, NULL);
521         LASSERT (rc == 0);
522 #else
523         do_gettimeofday(&tv);
524 #endif
525         cookie = tv.tv_sec;
526         cookie *= 1000000;
527         cookie += tv.tv_usec;
528         return cookie;
529 }
530
531 static char *
532 lnet_res_type2str(int type)
533 {
534         switch (type) {
535         default:
536                 LBUG();
537         case LNET_COOKIE_TYPE_MD:
538                 return "MD";
539         case LNET_COOKIE_TYPE_ME:
540                 return "ME";
541         case LNET_COOKIE_TYPE_EQ:
542                 return "EQ";
543         }
544 }
545
546 static void
547 lnet_res_container_cleanup(struct lnet_res_container *rec)
548 {
549         int     count = 0;
550
551         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
552                 return;
553
554         while (!list_empty(&rec->rec_active)) {
555                 struct list_head *e = rec->rec_active.next;
556
557                 list_del_init(e);
558                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
559                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
560
561                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
562                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
563
564                 } else { /* NB: Active MEs should be attached on portals */
565                         LBUG();
566                 }
567                 count++;
568         }
569
570         if (count > 0) {
571                 /* Found alive MD/ME/EQ, user really should unlink/free
572                  * all of them before finalize LNet, but if someone didn't,
573                  * we have to recycle garbage for him */
574                 CERROR("%d active elements on exit of %s container\n",
575                        count, lnet_res_type2str(rec->rec_type));
576         }
577
578 #ifdef LNET_USE_LIB_FREELIST
579         lnet_freelist_fini(&rec->rec_freelist);
580 #endif
581         if (rec->rec_lh_hash != NULL) {
582                 LIBCFS_FREE(rec->rec_lh_hash,
583                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
584                 rec->rec_lh_hash = NULL;
585         }
586
587         rec->rec_type = 0; /* mark it as finalized */
588 }
589
590 static int
591 lnet_res_container_setup(struct lnet_res_container *rec,
592                          int cpt, int type, int objnum, int objsz)
593 {
594         int     rc = 0;
595         int     i;
596
597         LASSERT(rec->rec_type == 0);
598
599         rec->rec_type = type;
600         INIT_LIST_HEAD(&rec->rec_active);
601
602 #ifdef LNET_USE_LIB_FREELIST
603         memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist));
604         rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz);
605         if (rc != 0)
606                 goto out;
607 #endif
608         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
609
610         /* Arbitrary choice of hash table size */
611         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
612                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
613         if (rec->rec_lh_hash == NULL) {
614                 rc = -ENOMEM;
615                 goto out;
616         }
617
618         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
619                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
620
621         return 0;
622
623 out:
624         CERROR("Failed to setup %s resource container\n",
625                lnet_res_type2str(type));
626         lnet_res_container_cleanup(rec);
627         return rc;
628 }
629
630 static void
631 lnet_res_containers_destroy(struct lnet_res_container **recs)
632 {
633         struct lnet_res_container       *rec;
634         int                             i;
635
636         cfs_percpt_for_each(rec, i, recs)
637                 lnet_res_container_cleanup(rec);
638
639         cfs_percpt_free(recs);
640 }
641
642 static struct lnet_res_container **
643 lnet_res_containers_create(int type, int objnum, int objsz)
644 {
645         struct lnet_res_container       **recs;
646         struct lnet_res_container       *rec;
647         int                             rc;
648         int                             i;
649
650         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
651         if (recs == NULL) {
652                 CERROR("Failed to allocate %s resource containers\n",
653                        lnet_res_type2str(type));
654                 return NULL;
655         }
656
657         cfs_percpt_for_each(rec, i, recs) {
658                 rc = lnet_res_container_setup(rec, i, type, objnum, objsz);
659                 if (rc != 0) {
660                         lnet_res_containers_destroy(recs);
661                         return NULL;
662                 }
663         }
664
665         return recs;
666 }
667
668 lnet_libhandle_t *
669 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
670 {
671         /* ALWAYS called with lnet_res_lock held */
672         struct list_head        *head;
673         lnet_libhandle_t        *lh;
674         unsigned int            hash;
675
676         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
677                 return NULL;
678
679         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
680         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
681
682         list_for_each_entry(lh, head, lh_hash_chain) {
683                 if (lh->lh_cookie == cookie)
684                         return lh;
685         }
686
687         return NULL;
688 }
689
690 void
691 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
692 {
693         /* ALWAYS called with lnet_res_lock held */
694         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
695         unsigned int    hash;
696
697         lh->lh_cookie = rec->rec_lh_cookie;
698         rec->rec_lh_cookie += 1 << ibits;
699
700         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
701
702         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
703 }
704
705 #ifndef __KERNEL__
706 /**
707  * Reserved API - do not use.
708  * Temporary workaround to allow uOSS and test programs force server
709  * mode in userspace. See comments near ln_server_mode_flag in
710  * lnet/lib-types.h */
711
712 void
713 lnet_server_mode() {
714         the_lnet.ln_server_mode_flag = 1;
715 }
716 #endif
717
718 static int lnet_unprepare(void);
719
720 static int
721 lnet_prepare(lnet_pid_t requested_pid)
722 {
723         /* Prepare to bring up the network */
724         struct lnet_res_container **recs;
725         int                       rc = 0;
726
727         if (requested_pid == LNET_PID_ANY) {
728                 /* Don't instantiate LNET just for me */
729                 return -ENETDOWN;
730         }
731
732         LASSERT (the_lnet.ln_refcount == 0);
733
734         the_lnet.ln_routing = 0;
735
736 #ifdef __KERNEL__
737         LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
738         the_lnet.ln_pid = requested_pid;
739 #else
740         if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
741                 LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
742
743                 if (current_uid() != 0) /* Only root can run user-space server */
744                         return -EPERM;
745                 the_lnet.ln_pid = requested_pid;
746
747         } else {/* client case (liblustre) */
748
749                 /* My PID must be unique on this node and flag I'm userspace */
750                 the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
751         }
752 #endif
753
754         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
755         INIT_LIST_HEAD(&the_lnet.ln_nis);
756         INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
757         INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
758         INIT_LIST_HEAD(&the_lnet.ln_routers);
759
760         rc = lnet_create_remote_nets_table();
761         if (rc != 0)
762                 goto failed;
763
764         the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
765
766         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
767                                                 sizeof(lnet_counters_t));
768         if (the_lnet.ln_counters == NULL) {
769                 CERROR("Failed to allocate counters for LNet\n");
770                 rc = -ENOMEM;
771                 goto failed;
772         }
773
774         rc = lnet_peer_tables_create();
775         if (rc != 0)
776                 goto failed;
777
778         rc = lnet_msg_containers_create();
779         if (rc != 0)
780                 goto failed;
781
782         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
783                                       LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS,
784                                       sizeof(lnet_eq_t));
785         if (rc != 0)
786                 goto failed;
787
788         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
789                                           sizeof(lnet_me_t));
790         if (recs == NULL)
791                 goto failed;
792
793         the_lnet.ln_me_containers = recs;
794
795         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
796                                           sizeof(lnet_libmd_t));
797         if (recs == NULL)
798                 goto failed;
799
800         the_lnet.ln_md_containers = recs;
801
802         rc = lnet_portals_create();
803         if (rc != 0) {
804                 CERROR("Failed to create portals for LNet: %d\n", rc);
805                 goto failed;
806         }
807
808         return 0;
809
810  failed:
811         lnet_unprepare();
812         return rc;
813 }
814
815 static int
816 lnet_unprepare (void)
817 {
818         /* NB no LNET_LOCK since this is the last reference.  All LND instances
819          * have shut down already, so it is safe to unlink and free all
820          * descriptors, even those that appear committed to a network op (eg MD
821          * with non-zero pending count) */
822
823         lnet_fail_nid(LNET_NID_ANY, 0);
824
825         LASSERT(the_lnet.ln_refcount == 0);
826         LASSERT(list_empty(&the_lnet.ln_test_peers));
827         LASSERT(list_empty(&the_lnet.ln_nis));
828         LASSERT(list_empty(&the_lnet.ln_nis_cpt));
829         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
830
831         lnet_portals_destroy();
832
833         if (the_lnet.ln_md_containers != NULL) {
834                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
835                 the_lnet.ln_md_containers = NULL;
836         }
837
838         if (the_lnet.ln_me_containers != NULL) {
839                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
840                 the_lnet.ln_me_containers = NULL;
841         }
842
843         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
844
845         lnet_msg_containers_destroy();
846         lnet_peer_tables_destroy();
847         lnet_rtrpools_free(0);
848
849         if (the_lnet.ln_counters != NULL) {
850                 cfs_percpt_free(the_lnet.ln_counters);
851                 the_lnet.ln_counters = NULL;
852         }
853         lnet_destroy_remote_nets_table();
854
855         return 0;
856 }
857
858 lnet_ni_t  *
859 lnet_net2ni_locked(__u32 net, int cpt)
860 {
861         struct list_head *tmp;
862         lnet_ni_t        *ni;
863
864         LASSERT(cpt != LNET_LOCK_EX);
865
866         list_for_each(tmp, &the_lnet.ln_nis) {
867                 ni = list_entry(tmp, lnet_ni_t, ni_list);
868
869                 if (LNET_NIDNET(ni->ni_nid) == net) {
870                         lnet_ni_addref_locked(ni, cpt);
871                         return ni;
872                 }
873         }
874
875         return NULL;
876 }
877
878 lnet_ni_t *
879 lnet_net2ni(__u32 net)
880 {
881         lnet_ni_t *ni;
882
883         lnet_net_lock(0);
884         ni = lnet_net2ni_locked(net, 0);
885         lnet_net_unlock(0);
886
887         return ni;
888 }
889 EXPORT_SYMBOL(lnet_net2ni);
890
891 static unsigned int
892 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
893 {
894         __u64           key = nid;
895         unsigned int    val;
896
897         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
898
899         if (number == 1)
900                 return 0;
901
902         val = hash_long(key, LNET_CPT_BITS);
903         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
904         if (val < number)
905                 return val;
906
907         return (unsigned int)(key + val + (val >> 1)) % number;
908 }
909
910 int
911 lnet_cpt_of_nid_locked(lnet_nid_t nid)
912 {
913         struct lnet_ni *ni;
914
915         /* must called with hold of lnet_net_lock */
916         if (LNET_CPT_NUMBER == 1)
917                 return 0; /* the only one */
918
919         /* take lnet_net_lock(any) would be OK */
920         if (!list_empty(&the_lnet.ln_nis_cpt)) {
921                 list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
922                         if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
923                                 continue;
924
925                         LASSERT(ni->ni_cpts != NULL);
926                         return ni->ni_cpts[lnet_nid_cpt_hash
927                                            (nid, ni->ni_ncpts)];
928                 }
929         }
930
931         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
932 }
933
934 int
935 lnet_cpt_of_nid(lnet_nid_t nid)
936 {
937         int     cpt;
938         int     cpt2;
939
940         if (LNET_CPT_NUMBER == 1)
941                 return 0; /* the only one */
942
943         if (list_empty(&the_lnet.ln_nis_cpt))
944                 return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
945
946         cpt = lnet_net_lock_current();
947         cpt2 = lnet_cpt_of_nid_locked(nid);
948         lnet_net_unlock(cpt);
949
950         return cpt2;
951 }
952 EXPORT_SYMBOL(lnet_cpt_of_nid);
953
954 int
955 lnet_islocalnet(__u32 net)
956 {
957         struct lnet_ni  *ni;
958         int             cpt;
959
960         cpt = lnet_net_lock_current();
961
962         ni = lnet_net2ni_locked(net, cpt);
963         if (ni != NULL)
964                 lnet_ni_decref_locked(ni, cpt);
965
966         lnet_net_unlock(cpt);
967
968         return ni != NULL;
969 }
970
971 lnet_ni_t  *
972 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
973 {
974         struct lnet_ni   *ni;
975         struct list_head *tmp;
976
977         LASSERT(cpt != LNET_LOCK_EX);
978
979         list_for_each(tmp, &the_lnet.ln_nis) {
980                 ni = list_entry(tmp, lnet_ni_t, ni_list);
981
982                 if (ni->ni_nid == nid) {
983                         lnet_ni_addref_locked(ni, cpt);
984                         return ni;
985                 }
986         }
987
988         return NULL;
989 }
990
991 int
992 lnet_islocalnid(lnet_nid_t nid)
993 {
994         struct lnet_ni  *ni;
995         int             cpt;
996
997         cpt = lnet_net_lock_current();
998         ni = lnet_nid2ni_locked(nid, cpt);
999         if (ni != NULL)
1000                 lnet_ni_decref_locked(ni, cpt);
1001         lnet_net_unlock(cpt);
1002
1003         return ni != NULL;
1004 }
1005
1006 int
1007 lnet_count_acceptor_nis (void)
1008 {
1009         /* Return the # of NIs that need the acceptor. */
1010         int              count = 0;
1011 #if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
1012         struct list_head *tmp;
1013         struct lnet_ni   *ni;
1014         int              cpt;
1015
1016         cpt = lnet_net_lock_current();
1017         list_for_each(tmp, &the_lnet.ln_nis) {
1018                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1019
1020                 if (ni->ni_lnd->lnd_accept != NULL)
1021                         count++;
1022         }
1023
1024         lnet_net_unlock(cpt);
1025
1026 #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
1027         return count;
1028 }
1029
1030 static lnet_ping_info_t *
1031 lnet_ping_info_create(int num_ni)
1032 {
1033         lnet_ping_info_t *ping_info;
1034         unsigned int     infosz;
1035
1036         infosz = offsetof(lnet_ping_info_t, pi_ni[num_ni]);
1037         LIBCFS_ALLOC(ping_info, infosz);
1038         if (ping_info == NULL) {
1039                 CERROR("Can't allocate ping info[%d]\n", num_ni);
1040                 return NULL;
1041         }
1042
1043         ping_info->pi_nnis = num_ni;
1044         ping_info->pi_pid = the_lnet.ln_pid;
1045         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
1046         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
1047
1048         return ping_info;
1049 }
1050
1051 static inline int
1052 lnet_get_ni_count(void)
1053 {
1054         struct lnet_ni *ni;
1055         int            count = 0;
1056
1057         lnet_net_lock(0);
1058
1059         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
1060                 count++;
1061
1062         lnet_net_unlock(0);
1063
1064         return count;
1065 }
1066
1067 static inline void
1068 lnet_ping_info_free(lnet_ping_info_t *pinfo)
1069 {
1070         LIBCFS_FREE(pinfo,
1071                     offsetof(lnet_ping_info_t,
1072                              pi_ni[pinfo->pi_nnis]));
1073 }
1074
1075 static void
1076 lnet_ping_info_destroy(void)
1077 {
1078         struct lnet_ni  *ni;
1079
1080         lnet_net_lock(LNET_LOCK_EX);
1081
1082         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1083                 lnet_ni_lock(ni);
1084                 ni->ni_status = NULL;
1085                 lnet_ni_unlock(ni);
1086         }
1087
1088         lnet_ping_info_free(the_lnet.ln_ping_info);
1089         the_lnet.ln_ping_info = NULL;
1090
1091         lnet_net_unlock(LNET_LOCK_EX);
1092 }
1093
1094 static void
1095 lnet_ping_event_handler(lnet_event_t *event)
1096 {
1097         lnet_ping_info_t *pinfo = event->md.user_ptr;
1098
1099         if (event->unlinked)
1100                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
1101 }
1102
1103 static int
1104 lnet_ping_info_setup(lnet_ping_info_t **ppinfo, lnet_handle_md_t *md_handle,
1105                      int ni_count, bool set_eq)
1106 {
1107         lnet_handle_me_t  me_handle;
1108         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1109         lnet_md_t         md = {0};
1110         int               rc, rc2;
1111
1112         if (set_eq) {
1113                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
1114                                  &the_lnet.ln_ping_target_eq);
1115                 if (rc != 0) {
1116                         CERROR("Can't allocate ping EQ: %d\n", rc);
1117                         return rc;
1118                 }
1119         }
1120
1121         *ppinfo = lnet_ping_info_create(ni_count);
1122         if (*ppinfo == NULL) {
1123                 rc = -ENOMEM;
1124                 goto failed_0;
1125         }
1126
1127         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1128                           LNET_PROTO_PING_MATCHBITS, 0,
1129                           LNET_UNLINK, LNET_INS_AFTER,
1130                           &me_handle);
1131         if (rc != 0) {
1132                 CERROR("Can't create ping ME: %d\n", rc);
1133                 goto failed_1;
1134         }
1135
1136         /* initialize md content */
1137         md.start     = *ppinfo;
1138         md.length    = offsetof(lnet_ping_info_t,
1139                                 pi_ni[(*ppinfo)->pi_nnis]);
1140         md.threshold = LNET_MD_THRESH_INF;
1141         md.max_size  = 0;
1142         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1143                        LNET_MD_MANAGE_REMOTE;
1144         md.user_ptr  = NULL;
1145         md.eq_handle = the_lnet.ln_ping_target_eq;
1146         md.user_ptr = *ppinfo;
1147
1148         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
1149         if (rc != 0) {
1150                 CERROR("Can't attach ping MD: %d\n", rc);
1151                 goto failed_2;
1152         }
1153
1154         return 0;
1155
1156 failed_2:
1157         rc2 = LNetMEUnlink(me_handle);
1158         LASSERT(rc2 == 0);
1159 failed_1:
1160         lnet_ping_info_free(*ppinfo);
1161         *ppinfo = NULL;
1162 failed_0:
1163         if (set_eq)
1164                 LNetEQFree(the_lnet.ln_ping_target_eq);
1165         return rc;
1166 }
1167
1168 static void
1169 lnet_ping_md_unlink(lnet_ping_info_t *pinfo, lnet_handle_md_t *md_handle)
1170 {
1171         sigset_t        blocked = cfs_block_allsigs();
1172
1173         LNetMDUnlink(*md_handle);
1174         LNetInvalidateHandle(md_handle);
1175
1176         /* NB md could be busy; this just starts the unlink */
1177         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
1178                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
1179                 cfs_pause(cfs_time_seconds(1));
1180         }
1181
1182         cfs_restore_sigs(blocked);
1183 }
1184
1185 static void
1186 lnet_ping_info_install_locked(lnet_ping_info_t *ping_info)
1187 {
1188         int                     i;
1189         lnet_ni_t               *ni;
1190         lnet_ni_status_t        *ns;
1191
1192         i = 0;
1193         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1194                 LASSERT(i < ping_info->pi_nnis);
1195
1196                 ns = &ping_info->pi_ni[i];
1197
1198                 ns->ns_nid = ni->ni_nid;
1199
1200                 lnet_ni_lock(ni);
1201                 ns->ns_status = (ni->ni_status != NULL) ?
1202                                 ni->ni_status->ns_status : LNET_NI_STATUS_UP;
1203                 ni->ni_status = ns;
1204                 lnet_ni_unlock(ni);
1205
1206                 i++;
1207         }
1208 }
1209
1210 static void
1211 lnet_ping_target_update(lnet_ping_info_t *pinfo, lnet_handle_md_t md_handle)
1212 {
1213         lnet_ping_info_t *old_pinfo = NULL;
1214         lnet_handle_md_t old_md;
1215
1216         /* switch the NIs to point to the new ping info created */
1217         lnet_net_lock(LNET_LOCK_EX);
1218
1219         if (!the_lnet.ln_routing)
1220                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1221         lnet_ping_info_install_locked(pinfo);
1222
1223         if (the_lnet.ln_ping_info != NULL) {
1224                 old_pinfo = the_lnet.ln_ping_info;
1225                 old_md = the_lnet.ln_ping_target_md;
1226         }
1227         the_lnet.ln_ping_target_md = md_handle;
1228         the_lnet.ln_ping_info = pinfo;
1229
1230         lnet_net_unlock(LNET_LOCK_EX);
1231
1232         if (old_pinfo != NULL) {
1233                 /* unlink the old ping info */
1234                 lnet_ping_md_unlink(old_pinfo, &old_md);
1235                 lnet_ping_info_free(old_pinfo);
1236         }
1237 }
1238
1239 static void
1240 lnet_ping_target_fini(void)
1241 {
1242         int             rc;
1243
1244         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1245                             &the_lnet.ln_ping_target_md);
1246
1247         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1248         LASSERT(rc == 0);
1249
1250         lnet_ping_info_destroy();
1251 }
1252
1253 static int
1254 lnet_ni_tq_credits(lnet_ni_t *ni)
1255 {
1256         int     credits;
1257
1258         LASSERT(ni->ni_ncpts >= 1);
1259
1260         if (ni->ni_ncpts == 1)
1261                 return ni->ni_maxtxcredits;
1262
1263         credits = ni->ni_maxtxcredits / ni->ni_ncpts;
1264         credits = max(credits, 8 * ni->ni_peertxcredits);
1265         credits = min(credits, ni->ni_maxtxcredits);
1266
1267         return credits;
1268 }
1269
1270 static void
1271 lnet_clear_zombies_nis_locked(void)
1272 {
1273         int             i;
1274         int             islo;
1275         lnet_ni_t       *ni;
1276
1277         /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
1278          * and shut them down in guaranteed thread context */
1279         i = 2;
1280         while (!list_empty(&the_lnet.ln_nis_zombie)) {
1281                 int     *ref;
1282                 int     j;
1283
1284                 ni = list_entry(the_lnet.ln_nis_zombie.next,
1285                                 lnet_ni_t, ni_list);
1286                 list_del_init(&ni->ni_list);
1287                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1288                         if (*ref == 0)
1289                                 continue;
1290                         /* still busy, add it back to zombie list */
1291                         list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
1292                         break;
1293                 }
1294
1295                 if (!list_empty(&ni->ni_list)) {
1296                         lnet_net_unlock(LNET_LOCK_EX);
1297                         ++i;
1298                         if ((i & (-i)) == i) {
1299                                 CDEBUG(D_WARNING,
1300                                        "Waiting for zombie LNI %s\n",
1301                                        libcfs_nid2str(ni->ni_nid));
1302                         }
1303                         cfs_pause(cfs_time_seconds(1));
1304                         lnet_net_lock(LNET_LOCK_EX);
1305                         continue;
1306                 }
1307
1308                 ni->ni_lnd->lnd_refcount--;
1309                 lnet_net_unlock(LNET_LOCK_EX);
1310
1311                 islo = ni->ni_lnd->lnd_type == LOLND;
1312
1313                 LASSERT(!in_interrupt());
1314                 (ni->ni_lnd->lnd_shutdown)(ni);
1315
1316                 /* can't deref lnd anymore now; it might have unregistered
1317                  * itself...  */
1318
1319                 if (!islo)
1320                         CDEBUG(D_LNI, "Removed LNI %s\n",
1321                               libcfs_nid2str(ni->ni_nid));
1322
1323                 lnet_ni_free(ni);
1324                 i = 2;
1325                 lnet_net_lock(LNET_LOCK_EX);
1326         }
1327 }
1328
1329 static void
1330 lnet_shutdown_lndnis(void)
1331 {
1332         int             i;
1333         lnet_ni_t       *ni;
1334
1335         /* NB called holding the global mutex */
1336
1337         /* All quiet on the API front */
1338         LASSERT(!the_lnet.ln_shutdown);
1339         LASSERT(the_lnet.ln_refcount == 0);
1340         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
1341
1342         lnet_net_lock(LNET_LOCK_EX);
1343         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1344
1345         /* Unlink NIs from the global table */
1346         while (!list_empty(&the_lnet.ln_nis)) {
1347                 ni = list_entry(the_lnet.ln_nis.next,
1348                                 lnet_ni_t, ni_list);
1349                 /* move it to zombie list and nobody can find it anymore */
1350                 list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
1351                 lnet_ni_decref_locked(ni, 0);   /* drop ln_nis' ref */
1352
1353                 if (!list_empty(&ni->ni_cptlist)) {
1354                         list_del_init(&ni->ni_cptlist);
1355                         lnet_ni_decref_locked(ni, 0);
1356                 }
1357         }
1358
1359         /* Drop the cached eqwait NI. */
1360         if (the_lnet.ln_eq_waitni != NULL) {
1361                 lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0);
1362                 the_lnet.ln_eq_waitni = NULL;
1363         }
1364
1365         /* Drop the cached loopback NI. */
1366         if (the_lnet.ln_loni != NULL) {
1367                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1368                 the_lnet.ln_loni = NULL;
1369         }
1370
1371         lnet_net_unlock(LNET_LOCK_EX);
1372
1373         /* Clear lazy portals and drop delayed messages which hold refs
1374          * on their lnet_msg_t::msg_rxpeer */
1375         for (i = 0; i < the_lnet.ln_nportals; i++)
1376                 LNetClearLazyPortal(i);
1377
1378         /* Clear the peer table and wait for all peers to go (they hold refs on
1379          * their NIs) */
1380         lnet_peer_tables_cleanup(NULL);
1381
1382         lnet_net_lock(LNET_LOCK_EX);
1383
1384         lnet_clear_zombies_nis_locked();
1385         the_lnet.ln_shutdown = 0;
1386         lnet_net_unlock(LNET_LOCK_EX);
1387 }
1388
1389 int
1390 lnet_shutdown_lndni(__u32 net)
1391 {
1392         lnet_ping_info_t *pinfo;
1393         lnet_handle_md_t md_handle;
1394         lnet_ni_t       *found_ni = NULL;
1395         int             ni_count;
1396         int             rc;
1397
1398         if (LNET_NETTYP(net) == LOLND)
1399                 return -EINVAL;
1400
1401         ni_count = lnet_get_ni_count();
1402
1403         /* create and link a new ping info, before removing the old one */
1404         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count - 1, false);
1405         if (rc != 0)
1406                 return rc;
1407
1408         /* proceed with shutting down the NI */
1409         lnet_net_lock(LNET_LOCK_EX);
1410
1411         found_ni = lnet_net2ni_locked(net, 0);
1412         if (found_ni == NULL) {
1413                 lnet_net_unlock(LNET_LOCK_EX);
1414                 lnet_ping_md_unlink(pinfo, &md_handle);
1415                 lnet_ping_info_free(pinfo);
1416                 return -EINVAL;
1417         }
1418
1419         /* decrement the reference counter on found_ni which was
1420          * incremented when we called lnet_net2ni_locked() */
1421         lnet_ni_decref_locked(found_ni, 0);
1422
1423         /* Move ni to zombie list so nobody can find it anymore */
1424         list_move(&found_ni->ni_list, &the_lnet.ln_nis_zombie);
1425
1426         /* Drop the lock reference for the ln_nis ref. */
1427         lnet_ni_decref_locked(found_ni, 0);
1428
1429         if (!list_empty(&found_ni->ni_cptlist)) {
1430                 list_del_init(&found_ni->ni_cptlist);
1431                 lnet_ni_decref_locked(found_ni, 0);
1432         }
1433
1434         lnet_net_unlock(LNET_LOCK_EX);
1435
1436         /* Do peer table cleanup for this ni */
1437         lnet_peer_tables_cleanup(found_ni);
1438
1439         lnet_net_lock(LNET_LOCK_EX);
1440         lnet_clear_zombies_nis_locked();
1441         lnet_net_unlock(LNET_LOCK_EX);
1442
1443         lnet_ping_target_update(pinfo, md_handle);
1444
1445         return 0;
1446 }
1447
1448 static int
1449 lnet_startup_lndnis(struct list_head *nilist, __s32 peer_timeout,
1450                     __s32 peer_cr, __s32 peer_buf_cr, __s32 credits,
1451                     int *ni_count)
1452 {
1453         int                     rc = 0;
1454         struct lnet_ni          *ni;
1455         int                     lnd_type;
1456         lnd_t                   *lnd;
1457         struct lnet_tx_queue    *tq;
1458         int                     i;
1459
1460         while (!list_empty(nilist)) {
1461                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1462                 lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
1463
1464                 if (!libcfs_isknown_lnd(lnd_type))
1465                         goto failed;
1466
1467                 if (lnd_type == CIBLND    ||
1468                     lnd_type == OPENIBLND ||
1469                     lnd_type == IIBLND    ||
1470                     lnd_type == VIBLND) {
1471                         CERROR("LND %s obsoleted\n",
1472                                libcfs_lnd2str(lnd_type));
1473                         goto failed;
1474                 }
1475
1476                 /* Make sure this new NI is unique. */
1477                 lnet_net_lock(LNET_LOCK_EX);
1478                 if (!lnet_net_unique(LNET_NIDNET(ni->ni_nid),
1479                                      &the_lnet.ln_nis)) {
1480                         if (lnd_type == LOLND) {
1481                                 lnet_net_unlock(LNET_LOCK_EX);
1482                                 list_del(&ni->ni_list);
1483                                 lnet_ni_free(ni);
1484                                 continue;
1485                         }
1486
1487                         CERROR("Net %s is not unique\n",
1488                                libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
1489                         lnet_net_unlock(LNET_LOCK_EX);
1490                         goto failed;
1491                 }
1492                 lnet_net_unlock(LNET_LOCK_EX);
1493
1494                 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1495                 lnd = lnet_find_lnd_by_type(lnd_type);
1496
1497 #ifdef __KERNEL__
1498                 if (lnd == NULL) {
1499                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1500                         rc = request_module("%s",
1501                                                 libcfs_lnd2modname(lnd_type));
1502                         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1503
1504                         lnd = lnet_find_lnd_by_type(lnd_type);
1505                         if (lnd == NULL) {
1506                                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1507                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1508                                        libcfs_lnd2str(lnd_type),
1509                                        libcfs_lnd2modname(lnd_type), rc);
1510 #ifndef HAVE_MODULE_LOADING_SUPPORT
1511                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1512                                          "compiled with kernel module "
1513                                          "loading support.");
1514 #endif
1515                                 goto failed;
1516                         }
1517                 }
1518 #else
1519                 if (lnd == NULL) {
1520                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1521                         CERROR("LND %s not supported\n",
1522                                libcfs_lnd2str(lnd_type));
1523                         goto failed;
1524                 }
1525 #endif
1526
1527                 lnet_net_lock(LNET_LOCK_EX);
1528                 lnd->lnd_refcount++;
1529                 lnet_net_unlock(LNET_LOCK_EX);
1530
1531                 ni->ni_lnd = lnd;
1532
1533                 rc = (lnd->lnd_startup)(ni);
1534
1535                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1536
1537                 if (rc != 0) {
1538                         LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
1539                                            "\n",
1540                                            rc, libcfs_lnd2str(lnd->lnd_type));
1541                         lnet_net_lock(LNET_LOCK_EX);
1542                         lnd->lnd_refcount--;
1543                         lnet_net_unlock(LNET_LOCK_EX);
1544                         goto failed;
1545                 }
1546
1547                 /* If given some LND tunable parameters, parse those now to
1548                  * override the values in the NI structure. */
1549                 if (peer_buf_cr >= 0)
1550                         ni->ni_peerrtrcredits = peer_buf_cr;
1551                 if (peer_timeout >= 0)
1552                         ni->ni_peertimeout = peer_timeout;
1553                 /*
1554                  * TODO
1555                  * Note: For now, don't allow the user to change
1556                  * peertxcredits as this number is used in the
1557                  * IB LND to control queue depth.
1558                  * if (peer_cr != -1)
1559                  *      ni->ni_peertxcredits = peer_cr;
1560                  */
1561                 if (credits >= 0)
1562                         ni->ni_maxtxcredits = credits;
1563
1564                 LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
1565
1566                 list_del(&ni->ni_list);
1567
1568                 lnet_net_lock(LNET_LOCK_EX);
1569                 /* refcount for ln_nis */
1570                 lnet_ni_addref_locked(ni, 0);
1571                 list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1572                 if (ni->ni_cpts != NULL) {
1573                         list_add_tail(&ni->ni_cptlist,
1574                                       &the_lnet.ln_nis_cpt);
1575                         lnet_ni_addref_locked(ni, 0);
1576                 }
1577
1578                 lnet_net_unlock(LNET_LOCK_EX);
1579
1580                 /* increment the ni_count here to account for the LOLND as
1581                  * well.  If we increment past this point then the number
1582                  * of count will be missing the LOLND, and then ping and
1583                  * will not report the LOLND
1584                  */
1585                 if (ni_count != NULL)
1586                         (*ni_count)++;
1587
1588                 if (lnd->lnd_type == LOLND) {
1589                         lnet_ni_addref(ni);
1590                         LASSERT(the_lnet.ln_loni == NULL);
1591                         the_lnet.ln_loni = ni;
1592                         continue;
1593                 }
1594
1595 #ifndef __KERNEL__
1596                 if (lnd->lnd_wait != NULL) {
1597                         if (the_lnet.ln_eq_waitni == NULL) {
1598                                 lnet_ni_addref(ni);
1599                                 the_lnet.ln_eq_waitni = ni;
1600                         }
1601                 } else {
1602 # ifndef HAVE_LIBPTHREAD
1603                         LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a "
1604                                            "single-threaded runtime\n",
1605                                            libcfs_lnd2str(lnd_type));
1606                         goto failed;
1607 # endif
1608                 }
1609 #endif
1610                 if (ni->ni_peertxcredits == 0 ||
1611                     ni->ni_maxtxcredits == 0) {
1612                         LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1613                                            libcfs_lnd2str(lnd->lnd_type),
1614                                            ni->ni_peertxcredits == 0 ?
1615                                            "" : "per-peer ");
1616                         goto failed;
1617                 }
1618
1619                 cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1620                         tq->tq_credits_min =
1621                         tq->tq_credits_max =
1622                         tq->tq_credits = lnet_ni_tq_credits(ni);
1623                 }
1624
1625                 CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1626                        libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
1627                        lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1628                        ni->ni_peerrtrcredits, ni->ni_peertimeout);
1629         }
1630
1631         return 0;
1632 failed:
1633         while (!list_empty(nilist)) {
1634                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1635                 list_del(&ni->ni_list);
1636                 lnet_ni_free(ni);
1637         }
1638         return -EINVAL;
1639 }
1640
1641 /**
1642  * Initialize LNet library.
1643  *
1644  * Only userspace program needs to call this function - it's automatically
1645  * called in the kernel at module loading time. Caller has to call LNetFini()
1646  * after a call to LNetInit(), if and only if the latter returned 0. It must
1647  * be called exactly once.
1648  *
1649  * \return 0 on success, and -ve on failures.
1650  */
1651 int
1652 LNetInit(void)
1653 {
1654         int     rc;
1655
1656         lnet_assert_wire_constants();
1657         LASSERT(!the_lnet.ln_init);
1658
1659         memset(&the_lnet, 0, sizeof(the_lnet));
1660
1661         /* refer to global cfs_cpt_table for now */
1662         the_lnet.ln_cpt_table   = cfs_cpt_table;
1663         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1664
1665         LASSERT(the_lnet.ln_cpt_number > 0);
1666         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1667                 /* we are under risk of consuming all lh_cookie */
1668                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1669                        "please change setting of CPT-table and retry\n",
1670                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1671                 return -1;
1672         }
1673
1674         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1675                 the_lnet.ln_cpt_bits++;
1676
1677         rc = lnet_create_locks();
1678         if (rc != 0) {
1679                 CERROR("Can't create LNet global locks: %d\n", rc);
1680                 return -1;
1681         }
1682
1683         the_lnet.ln_refcount = 0;
1684         the_lnet.ln_init = 1;
1685         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1686         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1687         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1688         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1689
1690 #ifdef __KERNEL__
1691         /* The hash table size is the number of bits it takes to express the set
1692          * ln_num_routes, minus 1 (better to under estimate than over so we
1693          * don't waste memory). */
1694         if (rnet_htable_size <= 0)
1695                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1696         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1697                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1698         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1699                                            order_base_2(rnet_htable_size) - 1);
1700
1701         /* All LNDs apart from the LOLND are in separate modules.  They
1702          * register themselves when their module loads, and unregister
1703          * themselves when their module is unloaded. */
1704 #else
1705         the_lnet.ln_remote_nets_hbits = 8;
1706
1707         /* Register LNDs
1708          * NB the order here determines default 'networks=' order */
1709 # ifdef HAVE_LIBPTHREAD
1710         LNET_REGISTER_ULND(the_tcplnd);
1711 # endif
1712 #endif
1713         lnet_register_lnd(&the_lolnd);
1714         return 0;
1715 }
1716 EXPORT_SYMBOL(LNetInit);
1717
1718 /**
1719  * Finalize LNet library.
1720  *
1721  * Only userspace program needs to call this function. It can be called
1722  * at most once.
1723  *
1724  * \pre LNetInit() called with success.
1725  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1726  */
1727 void
1728 LNetFini(void)
1729 {
1730         LASSERT(the_lnet.ln_init);
1731         LASSERT(the_lnet.ln_refcount == 0);
1732
1733         while (!list_empty(&the_lnet.ln_lnds))
1734                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1735                                                lnd_t, lnd_list));
1736         lnet_destroy_locks();
1737
1738         the_lnet.ln_init = 0;
1739 }
1740 EXPORT_SYMBOL(LNetFini);
1741
1742 /**
1743  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1744  *
1745  * Userspace program should call this after a successful call to LNetInit().
1746  * Users must call this function at least once before any other functions.
1747  * For each successful call there must be a corresponding call to
1748  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1749  * ignored.
1750  *
1751  * The PID used by LNet may be different from the one requested.
1752  * See LNetGetId().
1753  *
1754  * \param requested_pid PID requested by the caller.
1755  *
1756  * \return >= 0 on success, and < 0 error code on failures.
1757  */
1758 int
1759 LNetNIInit(lnet_pid_t requested_pid)
1760 {
1761         int                     im_a_router = 0;
1762         int                     rc;
1763         int                     ni_count = 0;
1764         int                     lnd_type;
1765         struct lnet_ni          *ni;
1766         lnet_ping_info_t        *pinfo;
1767         lnet_handle_md_t        md_handle;
1768         struct list_head        net_head;
1769
1770         INIT_LIST_HEAD(&net_head);
1771
1772         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1773
1774         LASSERT(the_lnet.ln_init);
1775         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1776
1777         if (the_lnet.ln_refcount > 0) {
1778                 rc = the_lnet.ln_refcount++;
1779                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1780                 return rc;
1781         }
1782
1783         rc = lnet_prepare(requested_pid);
1784         if (rc != 0)
1785                 goto failed0;
1786
1787         rc = lnet_parse_networks(&net_head,
1788                                  !the_lnet.ln_nis_from_mod_params ?
1789                                    lnet_get_networks() : "");
1790         if (rc < 0)
1791                 goto failed1;
1792
1793         rc = lnet_startup_lndnis(&net_head, -1, -1, -1, -1, &ni_count);
1794         if (rc != 0)
1795                 goto failed1;
1796
1797         if (the_lnet.ln_eq_waitni != NULL && ni_count > 1) {
1798                 lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type;
1799                 LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
1800                                    "\n",
1801                                    libcfs_lnd2str(lnd_type));
1802                 goto failed2;
1803         }
1804
1805         rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1806         if (rc != 0)
1807                 goto failed2;
1808
1809         rc = lnet_check_routes();
1810         if (rc != 0)
1811                 goto failed2;
1812
1813         rc = lnet_rtrpools_alloc(im_a_router);
1814         if (rc != 0)
1815                 goto failed2;
1816
1817         rc = lnet_acceptor_start();
1818         if (rc != 0)
1819                 goto failed2;
1820         the_lnet.ln_refcount = 1;
1821         /* Now I may use my own API functions... */
1822
1823         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1824         if (rc != 0)
1825                 goto failed3;
1826
1827         lnet_ping_target_update(pinfo, md_handle);
1828
1829         rc = lnet_router_checker_start();
1830         if (rc != 0)
1831                 goto failed4;
1832
1833         lnet_proc_init();
1834
1835         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1836
1837         return 0;
1838
1839 failed4:
1840         the_lnet.ln_refcount = 0;
1841         lnet_ping_md_unlink(pinfo, &md_handle);
1842         lnet_ping_info_free(pinfo);
1843 failed3:
1844         lnet_acceptor_stop();
1845         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1846         LASSERT(rc == 0);
1847 failed2:
1848         lnet_destroy_routes();
1849         lnet_shutdown_lndnis();
1850 failed1:
1851         lnet_unprepare();
1852 failed0:
1853         LASSERT(rc < 0);
1854         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1855         while (!list_empty(&net_head)) {
1856                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
1857                 list_del_init(&ni->ni_list);
1858                 lnet_ni_free(ni);
1859         }
1860         return rc;
1861 }
1862 EXPORT_SYMBOL(LNetNIInit);
1863
1864 /**
1865  * Stop LNet interfaces, routing, and forwarding.
1866  *
1867  * Users must call this function once for each successful call to LNetNIInit().
1868  * Once the LNetNIFini() operation has been started, the results of pending
1869  * API operations are undefined.
1870  *
1871  * \return always 0 for current implementation.
1872  */
1873 int
1874 LNetNIFini()
1875 {
1876         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1877
1878         LASSERT (the_lnet.ln_init);
1879         LASSERT (the_lnet.ln_refcount > 0);
1880
1881         if (the_lnet.ln_refcount != 1) {
1882                 the_lnet.ln_refcount--;
1883         } else {
1884                 LASSERT (!the_lnet.ln_niinit_self);
1885
1886                 lnet_proc_fini();
1887                 lnet_router_checker_stop();
1888                 lnet_ping_target_fini();
1889
1890                 /* Teardown fns that use my own API functions BEFORE here */
1891                 the_lnet.ln_refcount = 0;
1892
1893                 lnet_acceptor_stop();
1894                 lnet_destroy_routes();
1895                 lnet_shutdown_lndnis();
1896                 lnet_unprepare();
1897         }
1898
1899         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1900         return 0;
1901 }
1902 EXPORT_SYMBOL(LNetNIFini);
1903
1904 /**
1905  * Grabs the ni data from the ni structure and fills the out
1906  * parameters
1907  *
1908  * \param[in] ni network        interface structure
1909  * \param[out] cpt_count        the number of cpts the ni is on
1910  * \param[out] nid              Network Interface ID
1911  * \param[out] peer_timeout     NI peer timeout
1912  * \param[out] peer_tx_crdits   NI peer transmit credits
1913  * \param[out] peer_rtr_credits NI peer router credits
1914  * \param[out] max_tx_credits   NI max transmit credit
1915  * \param[out] net_config       Network configuration
1916  */
1917 static void
1918 lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
1919                   int *peer_timeout, int *peer_tx_credits,
1920                   int *peer_rtr_credits, int *max_tx_credits,
1921                   struct lnet_ioctl_net_config *net_config)
1922 {
1923         int i;
1924
1925         if (ni == NULL)
1926                 return;
1927
1928         if (net_config == NULL)
1929                 return;
1930
1931         CLASSERT(ARRAY_SIZE(ni->ni_interfaces) ==
1932                  ARRAY_SIZE(net_config->ni_interfaces));
1933
1934         if (ni->ni_interfaces[0] != NULL) {
1935                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1936                         if (ni->ni_interfaces[i] != NULL) {
1937                                 strncpy(net_config->ni_interfaces[i],
1938                                         ni->ni_interfaces[i],
1939                                         sizeof(net_config->ni_interfaces[i]));
1940                         }
1941                 }
1942         }
1943
1944         *nid = ni->ni_nid;
1945         *peer_timeout = ni->ni_peertimeout;
1946         *peer_tx_credits = ni->ni_peertxcredits;
1947         *peer_rtr_credits = ni->ni_peerrtrcredits;
1948         *max_tx_credits = ni->ni_maxtxcredits;
1949
1950         net_config->ni_status = ni->ni_status->ns_status;
1951
1952         for (i = 0;
1953              ni->ni_cpts != NULL && i < ni->ni_ncpts &&
1954              i < LNET_MAX_SHOW_NUM_CPT;
1955              i++)
1956                 net_config->ni_cpts[i] = ni->ni_cpts[i];
1957
1958         *cpt_count = ni->ni_ncpts;
1959 }
1960
1961 int
1962 lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
1963                     int *peer_tx_credits, int *peer_rtr_credits,
1964                     int *max_tx_credits,
1965                     struct lnet_ioctl_net_config *net_config)
1966 {
1967         struct lnet_ni          *ni;
1968         struct list_head        *tmp;
1969         int                     cpt;
1970         int                     rc = -ENOENT;
1971
1972         cpt = lnet_net_lock_current();
1973
1974         list_for_each(tmp, &the_lnet.ln_nis) {
1975                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1976                 if (idx-- == 0) {
1977                         rc = 0;
1978                         lnet_ni_lock(ni);
1979                         lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
1980                                           peer_tx_credits, peer_rtr_credits,
1981                                           max_tx_credits, net_config);
1982                         lnet_ni_unlock(ni);
1983                         break;
1984                 }
1985         }
1986
1987         lnet_net_unlock(cpt);
1988         return rc;
1989 }
1990
1991 int
1992 lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
1993                 __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
1994                 __s32 credits)
1995 {
1996         lnet_ping_info_t        *pinfo;
1997         lnet_handle_md_t        md_handle;
1998         struct lnet_ni          *ni;
1999         struct list_head        net_head;
2000         int                     rc;
2001
2002         INIT_LIST_HEAD(&net_head);
2003
2004         /* Create a ni structure for the network string */
2005         rc = lnet_parse_networks(&net_head, nets);
2006         if (rc < 0)
2007                 return rc;
2008
2009         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2010
2011         if (rc > 1) {
2012                 rc = -EINVAL; /* only add one interface per call */
2013                 goto failed0;
2014         }
2015
2016         rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
2017                                   false);
2018         if (rc != 0)
2019                 goto failed0;
2020
2021         rc = lnet_startup_lndnis(&net_head, peer_timeout, peer_cr,
2022                                  peer_buf_cr, credits, NULL);
2023         if (rc != 0)
2024                 goto failed1;
2025
2026         lnet_ping_target_update(pinfo, md_handle);
2027         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2028
2029         return 0;
2030
2031 failed1:
2032         lnet_ping_md_unlink(pinfo, &md_handle);
2033         lnet_ping_info_free(pinfo);
2034 failed0:
2035         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2036         while (!list_empty(&net_head)) {
2037                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
2038                 list_del_init(&ni->ni_list);
2039                 lnet_ni_free(ni);
2040         }
2041         return rc;
2042 }
2043
2044 int
2045 lnet_dyn_del_ni(__u32 net)
2046 {
2047         int rc;
2048
2049         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2050         rc = lnet_shutdown_lndni(net);
2051         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2052
2053         return rc;
2054 }
2055
2056 /**
2057  * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
2058  * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
2059  * internal ioctl handler.
2060  *
2061  * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
2062  *
2063  * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
2064  * The data will be printed to system console. Don't use it excessively.
2065  * \param arg A pointer to lnet_process_id_t, process ID of the peer.
2066  *
2067  * \return Always return 0 when called by users directly (i.e., not via ioctl).
2068  */
2069 int
2070 LNetCtl(unsigned int cmd, void *arg)
2071 {
2072         struct libcfs_ioctl_data *data = arg;
2073         struct lnet_ioctl_config_data *config;
2074         lnet_process_id_t         id = {0};
2075         lnet_ni_t                *ni;
2076         int                       rc;
2077
2078         LASSERT(the_lnet.ln_init);
2079
2080         switch (cmd) {
2081         case IOC_LIBCFS_GET_NI:
2082                 rc = LNetGetId(data->ioc_count, &id);
2083                 data->ioc_nid = id.nid;
2084                 return rc;
2085
2086         case IOC_LIBCFS_FAIL_NID:
2087                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2088
2089         case IOC_LIBCFS_ADD_ROUTE:
2090                 config = arg;
2091                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2092                 rc = lnet_add_route(config->cfg_net,
2093                                     config->cfg_config_u.cfg_route.rtr_hop,
2094                                     config->cfg_nid,
2095                                     config->cfg_config_u.cfg_route.
2096                                         rtr_priority);
2097                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2098                 return (rc != 0) ? rc : lnet_check_routes();
2099
2100         case IOC_LIBCFS_DEL_ROUTE:
2101                 config = arg;
2102                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2103                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2104                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2105                 return rc;
2106
2107         case IOC_LIBCFS_GET_ROUTE:
2108                 config = arg;
2109                 return lnet_get_route(config->cfg_count,
2110                                       &config->cfg_net,
2111                                       &config->cfg_config_u.cfg_route.rtr_hop,
2112                                       &config->cfg_nid,
2113                                       &config->cfg_config_u.cfg_route.rtr_flags,
2114                                       &config->cfg_config_u.cfg_route.
2115                                         rtr_priority);
2116
2117         case IOC_LIBCFS_GET_NET: {
2118                 struct lnet_ioctl_net_config *net_config;
2119                 config = arg;
2120                 net_config = (struct lnet_ioctl_net_config *)
2121                         config->cfg_bulk;
2122                 if (config == NULL || net_config == NULL)
2123                         return -1;
2124
2125                 return lnet_get_net_config(config->cfg_count,
2126                                            &config->cfg_ncpts,
2127                                            &config->cfg_nid,
2128                                            &config->cfg_config_u.
2129                                                 cfg_net.net_peer_timeout,
2130                                            &config->cfg_config_u.cfg_net.
2131                                                 net_peer_tx_credits,
2132                                            &config->cfg_config_u.cfg_net.
2133                                                 net_peer_rtr_credits,
2134                                            &config->cfg_config_u.cfg_net.
2135                                                 net_max_tx_credits,
2136                                            net_config);
2137         }
2138
2139         case IOC_LIBCFS_GET_LNET_STATS:
2140         {
2141                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2142
2143                 lnet_counters_get(&lnet_stats->st_cntrs);
2144                 return 0;
2145         }
2146
2147 #if defined(__KERNEL__) && defined(LNET_ROUTER)
2148         case IOC_LIBCFS_CONFIG_RTR:
2149                 config = arg;
2150                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2151                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2152                         rc = lnet_rtrpools_enable();
2153                         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2154                         return rc;
2155                 }
2156                 lnet_rtrpools_disable();
2157                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2158                 return 0;
2159
2160         case IOC_LIBCFS_ADD_BUF:
2161                 config = arg;
2162                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2163                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2164                                                 buf_tiny,
2165                                           config->cfg_config_u.cfg_buffers.
2166                                                 buf_small,
2167                                           config->cfg_config_u.cfg_buffers.
2168                                                 buf_large);
2169                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2170                 return rc;
2171 #endif
2172
2173         case IOC_LIBCFS_GET_BUF: {
2174                 struct lnet_ioctl_pool_cfg *pool_cfg;
2175                 config = arg;
2176                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2177                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2178         }
2179
2180         case IOC_LIBCFS_GET_PEER_INFO: {
2181                 struct lnet_ioctl_peer *peer_info = arg;
2182                 return lnet_get_peer_info(
2183                    peer_info->pr_count,
2184                    &peer_info->pr_nid,
2185                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2186                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2187                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2188                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2189                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2190                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2191                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2192                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2193         }
2194
2195         case IOC_LIBCFS_NOTIFY_ROUTER:
2196                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2197                                    cfs_time_current() -
2198                                    cfs_time_seconds(cfs_time_current_sec() -
2199                                                     (time_t)data->ioc_u64[0]));
2200
2201         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
2202                 /* This can be removed once lustre stops calling it */
2203                 return 0;
2204
2205         case IOC_LIBCFS_LNET_DIST:
2206                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2207                 if (rc < 0 && rc != -EHOSTUNREACH)
2208                         return rc;
2209
2210                 data->ioc_u32[0] = rc;
2211                 return 0;
2212
2213         case IOC_LIBCFS_TESTPROTOCOMPAT:
2214                 lnet_net_lock(LNET_LOCK_EX);
2215                 the_lnet.ln_testprotocompat = data->ioc_flags;
2216                 lnet_net_unlock(LNET_LOCK_EX);
2217                 return 0;
2218
2219         case IOC_LIBCFS_PING:
2220                 id.nid = data->ioc_nid;
2221                 id.pid = data->ioc_u32[0];
2222                 rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
2223                                (lnet_process_id_t *)data->ioc_pbuf1,
2224                                data->ioc_plen1/sizeof(lnet_process_id_t));
2225                 if (rc < 0)
2226                         return rc;
2227                 data->ioc_count = rc;
2228                 return 0;
2229
2230         case IOC_LIBCFS_DEBUG_PEER: {
2231                 /* CAVEAT EMPTOR: this one designed for calling directly; not
2232                  * via an ioctl */
2233                 id = *((lnet_process_id_t *) arg);
2234
2235                 lnet_debug_peer(id.nid);
2236
2237                 ni = lnet_net2ni(LNET_NIDNET(id.nid));
2238                 if (ni == NULL) {
2239                         CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
2240                 } else {
2241                         if (ni->ni_lnd->lnd_ctl == NULL) {
2242                                 CDEBUG(D_WARNING, "No ctl for %s\n",
2243                                        libcfs_id2str(id));
2244                         } else {
2245                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2246                         }
2247
2248                         lnet_ni_decref(ni);
2249                 }
2250                 return 0;
2251         }
2252
2253         default:
2254                 ni = lnet_net2ni(data->ioc_net);
2255                 if (ni == NULL)
2256                         return -EINVAL;
2257
2258                 if (ni->ni_lnd->lnd_ctl == NULL)
2259                         rc = -EINVAL;
2260                 else
2261                         rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2262
2263                 lnet_ni_decref(ni);
2264                 return rc;
2265         }
2266         /* not reached */
2267 }
2268 EXPORT_SYMBOL(LNetCtl);
2269
2270 /**
2271  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2272  * all interfaces share a same PID, as requested by LNetNIInit().
2273  *
2274  * \param index Index of the interface to look up.
2275  * \param id On successful return, this location will hold the
2276  * lnet_process_id_t ID of the interface.
2277  *
2278  * \retval 0 If an interface exists at \a index.
2279  * \retval -ENOENT If no interface has been found.
2280  */
2281 int
2282 LNetGetId(unsigned int index, lnet_process_id_t *id)
2283 {
2284         struct lnet_ni   *ni;
2285         struct list_head *tmp;
2286         int               cpt;
2287         int               rc = -ENOENT;
2288
2289         LASSERT(the_lnet.ln_init);
2290         LASSERT(the_lnet.ln_refcount > 0);
2291
2292         cpt = lnet_net_lock_current();
2293
2294         list_for_each(tmp, &the_lnet.ln_nis) {
2295                 if (index-- != 0)
2296                         continue;
2297
2298                 ni = list_entry(tmp, lnet_ni_t, ni_list);
2299
2300                 id->nid = ni->ni_nid;
2301                 id->pid = the_lnet.ln_pid;
2302                 rc = 0;
2303                 break;
2304         }
2305
2306         lnet_net_unlock(cpt);
2307         return rc;
2308 }
2309 EXPORT_SYMBOL(LNetGetId);
2310
2311 /**
2312  * Print a string representation of handle \a h into buffer \a str of
2313  * \a len bytes.
2314  */
2315 void
2316 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2317 {
2318         snprintf(str, len, LPX64, h.cookie);
2319 }
2320 EXPORT_SYMBOL(LNetSnprintHandle);
2321
2322 static int
2323 lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids,
2324           int n_ids)
2325 {
2326         lnet_handle_eq_t     eqh;
2327         lnet_handle_md_t     mdh;
2328         lnet_event_t         event;
2329         lnet_md_t            md = {0};
2330         int                  which;
2331         int                  unlinked = 0;
2332         int                  replied = 0;
2333         const int            a_long_time = 60000; /* mS */
2334         int                  infosz;
2335         lnet_ping_info_t    *info;
2336         lnet_process_id_t    tmpid;
2337         int                  i;
2338         int                  nob;
2339         int                  rc;
2340         int                  rc2;
2341         sigset_t         blocked;
2342
2343         infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
2344
2345         if (n_ids <= 0 ||
2346             id.nid == LNET_NID_ANY ||
2347             timeout_ms > 500000 ||              /* arbitrary limit! */
2348             n_ids > 20)                         /* arbitrary limit! */
2349                 return -EINVAL;
2350
2351         if (id.pid == LNET_PID_ANY)
2352                 id.pid = LUSTRE_SRV_LNET_PID;
2353
2354         LIBCFS_ALLOC(info, infosz);
2355         if (info == NULL)
2356                 return -ENOMEM;
2357
2358         /* NB 2 events max (including any unlink event) */
2359         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2360         if (rc != 0) {
2361                 CERROR("Can't allocate EQ: %d\n", rc);
2362                 goto out_0;
2363         }
2364
2365         /* initialize md content */
2366         md.start     = info;
2367         md.length    = infosz;
2368         md.threshold = 2; /*GET/REPLY*/
2369         md.max_size  = 0;
2370         md.options   = LNET_MD_TRUNCATE;
2371         md.user_ptr  = NULL;
2372         md.eq_handle = eqh;
2373
2374         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2375         if (rc != 0) {
2376                 CERROR("Can't bind MD: %d\n", rc);
2377                 goto out_1;
2378         }
2379
2380         rc = LNetGet(LNET_NID_ANY, mdh, id,
2381                      LNET_RESERVED_PORTAL,
2382                      LNET_PROTO_PING_MATCHBITS, 0);
2383
2384         if (rc != 0) {
2385                 /* Don't CERROR; this could be deliberate! */
2386
2387                 rc2 = LNetMDUnlink(mdh);
2388                 LASSERT(rc2 == 0);
2389
2390                 /* NB must wait for the UNLINK event below... */
2391                 unlinked = 1;
2392                 timeout_ms = a_long_time;
2393         }
2394
2395         do {
2396                 /* MUST block for unlink to complete */
2397                 if (unlinked)
2398                         blocked = cfs_block_allsigs();
2399
2400                 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
2401
2402                 if (unlinked)
2403                         cfs_restore_sigs(blocked);
2404
2405                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2406                        (rc2 <= 0) ? -1 : event.type,
2407                        (rc2 <= 0) ? -1 : event.status,
2408                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2409
2410                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2411
2412                 if (rc2 <= 0 || event.status != 0) {
2413                         /* timeout or error */
2414                         if (!replied && rc == 0)
2415                                 rc = (rc2 < 0) ? rc2 :
2416                                      (rc2 == 0) ? -ETIMEDOUT :
2417                                      event.status;
2418
2419                         if (!unlinked) {
2420                                 /* Ensure completion in finite time... */
2421                                 LNetMDUnlink(mdh);
2422                                 /* No assertion (racing with network) */
2423                                 unlinked = 1;
2424                                 timeout_ms = a_long_time;
2425                         } else if (rc2 == 0) {
2426                                 /* timed out waiting for unlink */
2427                                 CWARN("ping %s: late network completion\n",
2428                                       libcfs_id2str(id));
2429                         }
2430                 } else if (event.type == LNET_EVENT_REPLY) {
2431                         replied = 1;
2432                         rc = event.mlength;
2433                 }
2434
2435         } while (rc2 <= 0 || !event.unlinked);
2436
2437         if (!replied) {
2438                 if (rc >= 0)
2439                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2440                               libcfs_id2str(id));
2441                 rc = -EIO;
2442                 goto out_1;
2443         }
2444
2445         nob = rc;
2446         LASSERT(nob >= 0 && nob <= infosz);
2447
2448         rc = -EPROTO;                           /* if I can't parse... */
2449
2450         if (nob < 8) {
2451                 /* can't check magic/version */
2452                 CERROR("%s: ping info too short %d\n",
2453                        libcfs_id2str(id), nob);
2454                 goto out_1;
2455         }
2456
2457         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2458                 lnet_swap_pinginfo(info);
2459         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2460                 CERROR("%s: Unexpected magic %08x\n",
2461                        libcfs_id2str(id), info->pi_magic);
2462                 goto out_1;
2463         }
2464
2465         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2466                 CERROR("%s: ping w/o NI status: 0x%x\n",
2467                        libcfs_id2str(id), info->pi_features);
2468                 goto out_1;
2469         }
2470
2471         if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
2472                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2473                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
2474                 goto out_1;
2475         }
2476
2477         if (info->pi_nnis < n_ids)
2478                 n_ids = info->pi_nnis;
2479
2480         if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
2481                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2482                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
2483                 goto out_1;
2484         }
2485
2486         rc = -EFAULT;                           /* If I SEGV... */
2487
2488         for (i = 0; i < n_ids; i++) {
2489                 tmpid.pid = info->pi_pid;
2490                 tmpid.nid = info->pi_ni[i].ns_nid;
2491                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2492                         goto out_1;
2493         }
2494         rc = info->pi_nnis;
2495
2496  out_1:
2497         rc2 = LNetEQFree(eqh);
2498         if (rc2 != 0)
2499                 CERROR("rc2 %d\n", rc2);
2500         LASSERT(rc2 == 0);
2501
2502  out_0:
2503         LIBCFS_FREE(info, infosz);
2504         return rc;
2505 }