Whamcloud - gitweb
LU-5568 lnet: fix kernel crash when network failed to start
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include <lnet/lib-lnet.h>
39 #include <lnet/lib-dlc.h>
40 #ifdef __KERNEL__
41 #include <linux/log2.h>
42 #endif
43
44 #ifdef __KERNEL__
45 #define D_LNI D_CONSOLE
46 #else
47 #define D_LNI D_CONFIG
48 #endif
49
50 lnet_t      the_lnet;                           /* THE state of the network */
51 EXPORT_SYMBOL(the_lnet);
52
53 #ifdef __KERNEL__
54
55 static char *ip2nets = "";
56 CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
57                 "LNET network <- IP table");
58
59 static char *networks = "";
60 CFS_MODULE_PARM(networks, "s", charp, 0444,
61                 "local networks");
62
63 static char *routes = "";
64 CFS_MODULE_PARM(routes, "s", charp, 0444,
65                 "routes to non-local networks");
66
67 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
68 CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
69                 "size of remote network hash table");
70
71 static void lnet_ping_target_fini(void);
72 static int lnet_ping(lnet_process_id_t id, int timeout_ms,
73                      lnet_process_id_t *ids, int n_ids);
74
75 static char *
76 lnet_get_routes(void)
77 {
78         return routes;
79 }
80
81 static char *
82 lnet_get_networks(void)
83 {
84         char   *nets;
85         int     rc;
86
87         if (*networks != 0 && *ip2nets != 0) {
88                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
89                                    "'ip2nets' but not both at once\n");
90                 return NULL;
91         }
92
93         if (*ip2nets != 0) {
94                 rc = lnet_parse_ip2nets(&nets, ip2nets);
95                 return (rc == 0) ? nets : NULL;
96         }
97
98         if (*networks != 0)
99                 return networks;
100
101         return "tcp";
102 }
103
104 static void
105 lnet_init_locks(void)
106 {
107         spin_lock_init(&the_lnet.ln_eq_wait_lock);
108         init_waitqueue_head(&the_lnet.ln_eq_waitq);
109         mutex_init(&the_lnet.ln_lnd_mutex);
110         mutex_init(&the_lnet.ln_api_mutex);
111 }
112
113 static void
114 lnet_fini_locks(void)
115 {
116 }
117
118 #else
119
120 static char *
121 lnet_get_routes(void)
122 {
123         char *str = getenv("LNET_ROUTES");
124
125         return (str == NULL) ? "" : str;
126 }
127
128 static char *
129 lnet_get_networks (void)
130 {
131         static char       default_networks[256];
132         char             *networks = getenv("LNET_NETWORKS");
133         char             *str;
134         char             *sep;
135         int               len;
136         int               nob;
137         struct list_head *tmp;
138
139         if (networks != NULL)
140                 return networks;
141
142         /* In userland, the default 'networks=' is the list of known net types */
143         len = sizeof(default_networks);
144         str = default_networks;
145         *str = 0;
146         sep = "";
147
148         list_for_each(tmp, &the_lnet.ln_lnds) {
149                 lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
150
151                 nob = snprintf(str, len, "%s%s", sep,
152                                libcfs_lnd2str(lnd->lnd_type));
153                 if (nob >= len) {
154                         /* overflowed the string; leave it where it was */
155                         *str = 0;
156                         break;
157                 }
158                 len -= nob;
159                 str += nob;
160                 sep = ",";
161         }
162
163         return default_networks;
164 }
165
166 # ifndef HAVE_LIBPTHREAD
167
168 static void lnet_init_locks(void)
169 {
170         the_lnet.ln_eq_wait_lock = 0;
171         the_lnet.ln_lnd_mutex = 0;
172         the_lnet.ln_api_mutex = 0;
173 }
174
175 static void lnet_fini_locks(void)
176 {
177         LASSERT(the_lnet.ln_api_mutex == 0);
178         LASSERT(the_lnet.ln_lnd_mutex == 0);
179         LASSERT(the_lnet.ln_eq_wait_lock == 0);
180 }
181
182 # else
183
184 static void lnet_init_locks(void)
185 {
186         pthread_cond_init(&the_lnet.ln_eq_cond, NULL);
187         pthread_mutex_init(&the_lnet.ln_eq_wait_lock, NULL);
188         pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL);
189         pthread_mutex_init(&the_lnet.ln_api_mutex, NULL);
190 }
191
192 static void lnet_fini_locks(void)
193 {
194         pthread_mutex_destroy(&the_lnet.ln_api_mutex);
195         pthread_mutex_destroy(&the_lnet.ln_lnd_mutex);
196         pthread_mutex_destroy(&the_lnet.ln_eq_wait_lock);
197         pthread_cond_destroy(&the_lnet.ln_eq_cond);
198 }
199
200 # endif
201 #endif
202
203 static int
204 lnet_create_remote_nets_table(void)
205 {
206         int               i;
207         struct list_head *hash;
208
209         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
210         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
211         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
212         if (hash == NULL) {
213                 CERROR("Failed to create remote nets hash table\n");
214                 return -ENOMEM;
215         }
216
217         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
218                 INIT_LIST_HEAD(&hash[i]);
219         the_lnet.ln_remote_nets_hash = hash;
220         return 0;
221 }
222
223 static void
224 lnet_destroy_remote_nets_table(void)
225 {
226         int i;
227
228         if (the_lnet.ln_remote_nets_hash == NULL)
229                 return;
230
231         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
232                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
233
234         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
235                     LNET_REMOTE_NETS_HASH_SIZE *
236                     sizeof(the_lnet.ln_remote_nets_hash[0]));
237         the_lnet.ln_remote_nets_hash = NULL;
238 }
239
240 static void
241 lnet_destroy_locks(void)
242 {
243         if (the_lnet.ln_res_lock != NULL) {
244                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
245                 the_lnet.ln_res_lock = NULL;
246         }
247
248         if (the_lnet.ln_net_lock != NULL) {
249                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
250                 the_lnet.ln_net_lock = NULL;
251         }
252
253         lnet_fini_locks();
254 }
255
256 static int
257 lnet_create_locks(void)
258 {
259         lnet_init_locks();
260
261         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
262         if (the_lnet.ln_res_lock == NULL)
263                 goto failed;
264
265         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
266         if (the_lnet.ln_net_lock == NULL)
267                 goto failed;
268
269         return 0;
270
271  failed:
272         lnet_destroy_locks();
273         return -ENOMEM;
274 }
275
276 static void lnet_assert_wire_constants(void)
277 {
278         /* Wire protocol assertions generated by 'wirecheck'
279          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
280          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
281          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
282
283         /* Constants... */
284         CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
285         CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
286         CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
287         CLASSERT (LNET_MSG_ACK == 0);
288         CLASSERT (LNET_MSG_PUT == 1);
289         CLASSERT (LNET_MSG_GET == 2);
290         CLASSERT (LNET_MSG_REPLY == 3);
291         CLASSERT (LNET_MSG_HELLO == 4);
292
293         /* Checks for struct ptl_handle_wire_t */
294         CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
295         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
296         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
297         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
298         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
299
300         /* Checks for struct lnet_magicversion_t */
301         CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
302         CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
303         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
304         CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
305         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
306         CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
307         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
308
309         /* Checks for struct lnet_hdr_t */
310         CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
311         CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
312         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
313         CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
314         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
315         CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
316         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
317         CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
318         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
319         CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
320         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
321         CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
322         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
323         CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
324         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
325
326         /* Ack */
327         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
328         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
329         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
330         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
331         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
332         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
333
334         /* Put */
335         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
336         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
337         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
338         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
339         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
340         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
341         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
342         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
343         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
344         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
345
346         /* Get */
347         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
348         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
349         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
350         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
351         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
352         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
353         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
354         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
355         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
356         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
357
358         /* Reply */
359         CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
360         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
361
362         /* Hello */
363         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
364         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
365         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
366         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
367 }
368
369 static lnd_t *
370 lnet_find_lnd_by_type (int type)
371 {
372         lnd_t            *lnd;
373         struct list_head *tmp;
374
375         /* holding lnd mutex */
376         list_for_each(tmp, &the_lnet.ln_lnds) {
377                 lnd = list_entry(tmp, lnd_t, lnd_list);
378
379                 if ((int)lnd->lnd_type == type)
380                         return lnd;
381         }
382         return NULL;
383 }
384
385 void
386 lnet_register_lnd (lnd_t *lnd)
387 {
388         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
389
390         LASSERT(the_lnet.ln_init);
391         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
392         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
393
394         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
395         lnd->lnd_refcount = 0;
396
397         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
398
399         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
400 }
401 EXPORT_SYMBOL(lnet_register_lnd);
402
403 void
404 lnet_unregister_lnd (lnd_t *lnd)
405 {
406         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
407
408         LASSERT(the_lnet.ln_init);
409         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
410         LASSERT(lnd->lnd_refcount == 0);
411
412         list_del(&lnd->lnd_list);
413         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
414
415         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
416 }
417 EXPORT_SYMBOL(lnet_unregister_lnd);
418
419 void
420 lnet_counters_get(lnet_counters_t *counters)
421 {
422         lnet_counters_t *ctr;
423         int             i;
424
425         memset(counters, 0, sizeof(*counters));
426
427         lnet_net_lock(LNET_LOCK_EX);
428
429         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
430                 counters->msgs_max     += ctr->msgs_max;
431                 counters->msgs_alloc   += ctr->msgs_alloc;
432                 counters->errors       += ctr->errors;
433                 counters->send_count   += ctr->send_count;
434                 counters->recv_count   += ctr->recv_count;
435                 counters->route_count  += ctr->route_count;
436                 counters->drop_count   += ctr->drop_count;
437                 counters->send_length  += ctr->send_length;
438                 counters->recv_length  += ctr->recv_length;
439                 counters->route_length += ctr->route_length;
440                 counters->drop_length  += ctr->drop_length;
441
442         }
443         lnet_net_unlock(LNET_LOCK_EX);
444 }
445 EXPORT_SYMBOL(lnet_counters_get);
446
447 void
448 lnet_counters_reset(void)
449 {
450         lnet_counters_t *counters;
451         int             i;
452
453         lnet_net_lock(LNET_LOCK_EX);
454
455         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
456                 memset(counters, 0, sizeof(lnet_counters_t));
457
458         lnet_net_unlock(LNET_LOCK_EX);
459 }
460 EXPORT_SYMBOL(lnet_counters_reset);
461
462 #ifdef LNET_USE_LIB_FREELIST
463
464 int
465 lnet_freelist_init(lnet_freelist_t *fl, int n, int size)
466 {
467         char *space;
468
469         LASSERT (n > 0);
470
471         size += offsetof (lnet_freeobj_t, fo_contents);
472
473         LIBCFS_ALLOC(space, n * size);
474         if (space == NULL)
475                 return (-ENOMEM);
476
477         INIT_LIST_HEAD(&fl->fl_list);
478         fl->fl_objs = space;
479         fl->fl_nobjs = n;
480         fl->fl_objsize = size;
481
482         do {
483                 list_add((struct list_head *)space, &fl->fl_list);
484                 space += size;
485         } while (--n != 0);
486
487         return 0;
488 }
489
490 void
491 lnet_freelist_fini(lnet_freelist_t *fl)
492 {
493         struct list_head *el;
494         int               count;
495
496         if (fl->fl_nobjs == 0)
497                 return;
498
499         count = 0;
500         for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
501                 count++;
502
503         LASSERT (count == fl->fl_nobjs);
504
505         LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
506         memset (fl, 0, sizeof (*fl));
507 }
508
509 #endif /* LNET_USE_LIB_FREELIST */
510
511 static __u64 lnet_create_interface_cookie(void)
512 {
513         /* NB the interface cookie in wire handles guards against delayed
514          * replies and ACKs appearing valid after reboot. Initialisation time,
515          * even if it's only implemented to millisecond resolution is probably
516          * easily good enough. */
517         struct timeval tv;
518         __u64          cookie;
519 #ifndef __KERNEL__
520         int            rc = gettimeofday (&tv, NULL);
521         LASSERT (rc == 0);
522 #else
523         do_gettimeofday(&tv);
524 #endif
525         cookie = tv.tv_sec;
526         cookie *= 1000000;
527         cookie += tv.tv_usec;
528         return cookie;
529 }
530
531 static char *
532 lnet_res_type2str(int type)
533 {
534         switch (type) {
535         default:
536                 LBUG();
537         case LNET_COOKIE_TYPE_MD:
538                 return "MD";
539         case LNET_COOKIE_TYPE_ME:
540                 return "ME";
541         case LNET_COOKIE_TYPE_EQ:
542                 return "EQ";
543         }
544 }
545
546 static void
547 lnet_res_container_cleanup(struct lnet_res_container *rec)
548 {
549         int     count = 0;
550
551         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
552                 return;
553
554         while (!list_empty(&rec->rec_active)) {
555                 struct list_head *e = rec->rec_active.next;
556
557                 list_del_init(e);
558                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
559                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
560
561                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
562                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
563
564                 } else { /* NB: Active MEs should be attached on portals */
565                         LBUG();
566                 }
567                 count++;
568         }
569
570         if (count > 0) {
571                 /* Found alive MD/ME/EQ, user really should unlink/free
572                  * all of them before finalize LNet, but if someone didn't,
573                  * we have to recycle garbage for him */
574                 CERROR("%d active elements on exit of %s container\n",
575                        count, lnet_res_type2str(rec->rec_type));
576         }
577
578 #ifdef LNET_USE_LIB_FREELIST
579         lnet_freelist_fini(&rec->rec_freelist);
580 #endif
581         if (rec->rec_lh_hash != NULL) {
582                 LIBCFS_FREE(rec->rec_lh_hash,
583                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
584                 rec->rec_lh_hash = NULL;
585         }
586
587         rec->rec_type = 0; /* mark it as finalized */
588 }
589
590 static int
591 lnet_res_container_setup(struct lnet_res_container *rec,
592                          int cpt, int type, int objnum, int objsz)
593 {
594         int     rc = 0;
595         int     i;
596
597         LASSERT(rec->rec_type == 0);
598
599         rec->rec_type = type;
600         INIT_LIST_HEAD(&rec->rec_active);
601
602 #ifdef LNET_USE_LIB_FREELIST
603         memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist));
604         rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz);
605         if (rc != 0)
606                 goto out;
607 #endif
608         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
609
610         /* Arbitrary choice of hash table size */
611         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
612                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
613         if (rec->rec_lh_hash == NULL) {
614                 rc = -ENOMEM;
615                 goto out;
616         }
617
618         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
619                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
620
621         return 0;
622
623 out:
624         CERROR("Failed to setup %s resource container\n",
625                lnet_res_type2str(type));
626         lnet_res_container_cleanup(rec);
627         return rc;
628 }
629
630 static void
631 lnet_res_containers_destroy(struct lnet_res_container **recs)
632 {
633         struct lnet_res_container       *rec;
634         int                             i;
635
636         cfs_percpt_for_each(rec, i, recs)
637                 lnet_res_container_cleanup(rec);
638
639         cfs_percpt_free(recs);
640 }
641
642 static struct lnet_res_container **
643 lnet_res_containers_create(int type, int objnum, int objsz)
644 {
645         struct lnet_res_container       **recs;
646         struct lnet_res_container       *rec;
647         int                             rc;
648         int                             i;
649
650         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
651         if (recs == NULL) {
652                 CERROR("Failed to allocate %s resource containers\n",
653                        lnet_res_type2str(type));
654                 return NULL;
655         }
656
657         cfs_percpt_for_each(rec, i, recs) {
658                 rc = lnet_res_container_setup(rec, i, type, objnum, objsz);
659                 if (rc != 0) {
660                         lnet_res_containers_destroy(recs);
661                         return NULL;
662                 }
663         }
664
665         return recs;
666 }
667
668 lnet_libhandle_t *
669 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
670 {
671         /* ALWAYS called with lnet_res_lock held */
672         struct list_head        *head;
673         lnet_libhandle_t        *lh;
674         unsigned int            hash;
675
676         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
677                 return NULL;
678
679         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
680         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
681
682         list_for_each_entry(lh, head, lh_hash_chain) {
683                 if (lh->lh_cookie == cookie)
684                         return lh;
685         }
686
687         return NULL;
688 }
689
690 void
691 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
692 {
693         /* ALWAYS called with lnet_res_lock held */
694         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
695         unsigned int    hash;
696
697         lh->lh_cookie = rec->rec_lh_cookie;
698         rec->rec_lh_cookie += 1 << ibits;
699
700         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
701
702         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
703 }
704
705 #ifndef __KERNEL__
706 /**
707  * Reserved API - do not use.
708  * Temporary workaround to allow uOSS and test programs force server
709  * mode in userspace. See comments near ln_server_mode_flag in
710  * lnet/lib-types.h */
711
712 void
713 lnet_server_mode() {
714         the_lnet.ln_server_mode_flag = 1;
715 }
716 #endif
717
718 static int lnet_unprepare(void);
719
720 static int
721 lnet_prepare(lnet_pid_t requested_pid)
722 {
723         /* Prepare to bring up the network */
724         struct lnet_res_container **recs;
725         int                       rc = 0;
726
727         if (requested_pid == LNET_PID_ANY) {
728                 /* Don't instantiate LNET just for me */
729                 return -ENETDOWN;
730         }
731
732         LASSERT (the_lnet.ln_refcount == 0);
733
734         the_lnet.ln_routing = 0;
735
736 #ifdef __KERNEL__
737         LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
738         the_lnet.ln_pid = requested_pid;
739 #else
740         if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
741                 LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
742
743                 if (current_uid() != 0) /* Only root can run user-space server */
744                         return -EPERM;
745                 the_lnet.ln_pid = requested_pid;
746
747         } else {/* client case (liblustre) */
748
749                 /* My PID must be unique on this node and flag I'm userspace */
750                 the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
751         }
752 #endif
753
754         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
755         INIT_LIST_HEAD(&the_lnet.ln_nis);
756         INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
757         INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
758         INIT_LIST_HEAD(&the_lnet.ln_routers);
759
760         rc = lnet_create_remote_nets_table();
761         if (rc != 0)
762                 goto failed;
763
764         the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
765
766         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
767                                                 sizeof(lnet_counters_t));
768         if (the_lnet.ln_counters == NULL) {
769                 CERROR("Failed to allocate counters for LNet\n");
770                 rc = -ENOMEM;
771                 goto failed;
772         }
773
774         rc = lnet_peer_tables_create();
775         if (rc != 0)
776                 goto failed;
777
778         rc = lnet_msg_containers_create();
779         if (rc != 0)
780                 goto failed;
781
782         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
783                                       LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS,
784                                       sizeof(lnet_eq_t));
785         if (rc != 0)
786                 goto failed;
787
788         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
789                                           sizeof(lnet_me_t));
790         if (recs == NULL)
791                 goto failed;
792
793         the_lnet.ln_me_containers = recs;
794
795         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
796                                           sizeof(lnet_libmd_t));
797         if (recs == NULL)
798                 goto failed;
799
800         the_lnet.ln_md_containers = recs;
801
802         rc = lnet_portals_create();
803         if (rc != 0) {
804                 CERROR("Failed to create portals for LNet: %d\n", rc);
805                 goto failed;
806         }
807
808         return 0;
809
810  failed:
811         lnet_unprepare();
812         return rc;
813 }
814
815 static int
816 lnet_unprepare (void)
817 {
818         /* NB no LNET_LOCK since this is the last reference.  All LND instances
819          * have shut down already, so it is safe to unlink and free all
820          * descriptors, even those that appear committed to a network op (eg MD
821          * with non-zero pending count) */
822
823         lnet_fail_nid(LNET_NID_ANY, 0);
824
825         LASSERT(the_lnet.ln_refcount == 0);
826         LASSERT(list_empty(&the_lnet.ln_test_peers));
827         LASSERT(list_empty(&the_lnet.ln_nis));
828         LASSERT(list_empty(&the_lnet.ln_nis_cpt));
829         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
830
831         lnet_portals_destroy();
832
833         if (the_lnet.ln_md_containers != NULL) {
834                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
835                 the_lnet.ln_md_containers = NULL;
836         }
837
838         if (the_lnet.ln_me_containers != NULL) {
839                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
840                 the_lnet.ln_me_containers = NULL;
841         }
842
843         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
844
845         lnet_msg_containers_destroy();
846         lnet_peer_tables_destroy();
847         lnet_rtrpools_free(0);
848
849         if (the_lnet.ln_counters != NULL) {
850                 cfs_percpt_free(the_lnet.ln_counters);
851                 the_lnet.ln_counters = NULL;
852         }
853         lnet_destroy_remote_nets_table();
854
855         return 0;
856 }
857
858 lnet_ni_t  *
859 lnet_net2ni_locked(__u32 net, int cpt)
860 {
861         struct list_head *tmp;
862         lnet_ni_t        *ni;
863
864         LASSERT(cpt != LNET_LOCK_EX);
865
866         list_for_each(tmp, &the_lnet.ln_nis) {
867                 ni = list_entry(tmp, lnet_ni_t, ni_list);
868
869                 if (LNET_NIDNET(ni->ni_nid) == net) {
870                         lnet_ni_addref_locked(ni, cpt);
871                         return ni;
872                 }
873         }
874
875         return NULL;
876 }
877
878 lnet_ni_t *
879 lnet_net2ni(__u32 net)
880 {
881         lnet_ni_t *ni;
882
883         lnet_net_lock(0);
884         ni = lnet_net2ni_locked(net, 0);
885         lnet_net_unlock(0);
886
887         return ni;
888 }
889 EXPORT_SYMBOL(lnet_net2ni);
890
891 static unsigned int
892 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
893 {
894         __u64           key = nid;
895         unsigned int    val;
896
897         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
898
899         if (number == 1)
900                 return 0;
901
902         val = hash_long(key, LNET_CPT_BITS);
903         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
904         if (val < number)
905                 return val;
906
907         return (unsigned int)(key + val + (val >> 1)) % number;
908 }
909
910 int
911 lnet_cpt_of_nid_locked(lnet_nid_t nid)
912 {
913         struct lnet_ni *ni;
914
915         /* must called with hold of lnet_net_lock */
916         if (LNET_CPT_NUMBER == 1)
917                 return 0; /* the only one */
918
919         /* take lnet_net_lock(any) would be OK */
920         if (!list_empty(&the_lnet.ln_nis_cpt)) {
921                 list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
922                         if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
923                                 continue;
924
925                         LASSERT(ni->ni_cpts != NULL);
926                         return ni->ni_cpts[lnet_nid_cpt_hash
927                                            (nid, ni->ni_ncpts)];
928                 }
929         }
930
931         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
932 }
933
934 int
935 lnet_cpt_of_nid(lnet_nid_t nid)
936 {
937         int     cpt;
938         int     cpt2;
939
940         if (LNET_CPT_NUMBER == 1)
941                 return 0; /* the only one */
942
943         if (list_empty(&the_lnet.ln_nis_cpt))
944                 return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
945
946         cpt = lnet_net_lock_current();
947         cpt2 = lnet_cpt_of_nid_locked(nid);
948         lnet_net_unlock(cpt);
949
950         return cpt2;
951 }
952 EXPORT_SYMBOL(lnet_cpt_of_nid);
953
954 int
955 lnet_islocalnet(__u32 net)
956 {
957         struct lnet_ni  *ni;
958         int             cpt;
959
960         cpt = lnet_net_lock_current();
961
962         ni = lnet_net2ni_locked(net, cpt);
963         if (ni != NULL)
964                 lnet_ni_decref_locked(ni, cpt);
965
966         lnet_net_unlock(cpt);
967
968         return ni != NULL;
969 }
970
971 lnet_ni_t  *
972 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
973 {
974         struct lnet_ni   *ni;
975         struct list_head *tmp;
976
977         LASSERT(cpt != LNET_LOCK_EX);
978
979         list_for_each(tmp, &the_lnet.ln_nis) {
980                 ni = list_entry(tmp, lnet_ni_t, ni_list);
981
982                 if (ni->ni_nid == nid) {
983                         lnet_ni_addref_locked(ni, cpt);
984                         return ni;
985                 }
986         }
987
988         return NULL;
989 }
990
991 int
992 lnet_islocalnid(lnet_nid_t nid)
993 {
994         struct lnet_ni  *ni;
995         int             cpt;
996
997         cpt = lnet_net_lock_current();
998         ni = lnet_nid2ni_locked(nid, cpt);
999         if (ni != NULL)
1000                 lnet_ni_decref_locked(ni, cpt);
1001         lnet_net_unlock(cpt);
1002
1003         return ni != NULL;
1004 }
1005
1006 int
1007 lnet_count_acceptor_nis (void)
1008 {
1009         /* Return the # of NIs that need the acceptor. */
1010         int              count = 0;
1011 #if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
1012         struct list_head *tmp;
1013         struct lnet_ni   *ni;
1014         int              cpt;
1015
1016         cpt = lnet_net_lock_current();
1017         list_for_each(tmp, &the_lnet.ln_nis) {
1018                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1019
1020                 if (ni->ni_lnd->lnd_accept != NULL)
1021                         count++;
1022         }
1023
1024         lnet_net_unlock(cpt);
1025
1026 #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
1027         return count;
1028 }
1029
1030 static lnet_ping_info_t *
1031 lnet_ping_info_create(int num_ni)
1032 {
1033         lnet_ping_info_t *ping_info;
1034         unsigned int     infosz;
1035
1036         infosz = offsetof(lnet_ping_info_t, pi_ni[num_ni]);
1037         LIBCFS_ALLOC(ping_info, infosz);
1038         if (ping_info == NULL) {
1039                 CERROR("Can't allocate ping info[%d]\n", num_ni);
1040                 return NULL;
1041         }
1042
1043         ping_info->pi_nnis = num_ni;
1044         ping_info->pi_pid = the_lnet.ln_pid;
1045         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
1046         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
1047
1048         return ping_info;
1049 }
1050
1051 static inline int
1052 lnet_get_ni_count(void)
1053 {
1054         struct lnet_ni *ni;
1055         int            count = 0;
1056
1057         lnet_net_lock(0);
1058
1059         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
1060                 count++;
1061
1062         lnet_net_unlock(0);
1063
1064         return count;
1065 }
1066
1067 static inline void
1068 lnet_ping_info_free(lnet_ping_info_t *pinfo)
1069 {
1070         LIBCFS_FREE(pinfo,
1071                     offsetof(lnet_ping_info_t,
1072                              pi_ni[pinfo->pi_nnis]));
1073 }
1074
1075 static void
1076 lnet_ping_info_destroy(void)
1077 {
1078         struct lnet_ni  *ni;
1079
1080         lnet_net_lock(LNET_LOCK_EX);
1081
1082         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1083                 lnet_ni_lock(ni);
1084                 ni->ni_status = NULL;
1085                 lnet_ni_unlock(ni);
1086         }
1087
1088         lnet_ping_info_free(the_lnet.ln_ping_info);
1089         the_lnet.ln_ping_info = NULL;
1090
1091         lnet_net_unlock(LNET_LOCK_EX);
1092 }
1093
1094 static void
1095 lnet_ping_event_handler(lnet_event_t *event)
1096 {
1097         lnet_ping_info_t *pinfo = event->md.user_ptr;
1098
1099         if (event->unlinked)
1100                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
1101 }
1102
1103 static int
1104 lnet_ping_info_setup(lnet_ping_info_t **ppinfo, lnet_handle_md_t *md_handle,
1105                      int ni_count, bool set_eq)
1106 {
1107         lnet_handle_me_t  me_handle;
1108         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1109         lnet_md_t         md = {0};
1110         int               rc, rc2;
1111
1112         if (set_eq) {
1113                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
1114                                  &the_lnet.ln_ping_target_eq);
1115                 if (rc != 0) {
1116                         CERROR("Can't allocate ping EQ: %d\n", rc);
1117                         return rc;
1118                 }
1119         }
1120
1121         *ppinfo = lnet_ping_info_create(ni_count);
1122         if (*ppinfo == NULL) {
1123                 rc = -ENOMEM;
1124                 goto failed_0;
1125         }
1126
1127         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1128                           LNET_PROTO_PING_MATCHBITS, 0,
1129                           LNET_UNLINK, LNET_INS_AFTER,
1130                           &me_handle);
1131         if (rc != 0) {
1132                 CERROR("Can't create ping ME: %d\n", rc);
1133                 goto failed_1;
1134         }
1135
1136         /* initialize md content */
1137         md.start     = *ppinfo;
1138         md.length    = offsetof(lnet_ping_info_t,
1139                                 pi_ni[(*ppinfo)->pi_nnis]);
1140         md.threshold = LNET_MD_THRESH_INF;
1141         md.max_size  = 0;
1142         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1143                        LNET_MD_MANAGE_REMOTE;
1144         md.user_ptr  = NULL;
1145         md.eq_handle = the_lnet.ln_ping_target_eq;
1146         md.user_ptr = *ppinfo;
1147
1148         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
1149         if (rc != 0) {
1150                 CERROR("Can't attach ping MD: %d\n", rc);
1151                 goto failed_2;
1152         }
1153
1154         return 0;
1155
1156 failed_2:
1157         rc2 = LNetMEUnlink(me_handle);
1158         LASSERT(rc2 == 0);
1159 failed_1:
1160         lnet_ping_info_free(*ppinfo);
1161         *ppinfo = NULL;
1162 failed_0:
1163         if (set_eq)
1164                 LNetEQFree(the_lnet.ln_ping_target_eq);
1165         return rc;
1166 }
1167
1168 static void
1169 lnet_ping_md_unlink(lnet_ping_info_t *pinfo, lnet_handle_md_t *md_handle)
1170 {
1171         sigset_t        blocked = cfs_block_allsigs();
1172
1173         LNetMDUnlink(*md_handle);
1174         LNetInvalidateHandle(md_handle);
1175
1176         /* NB md could be busy; this just starts the unlink */
1177         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
1178                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
1179                 cfs_pause(cfs_time_seconds(1));
1180         }
1181
1182         cfs_restore_sigs(blocked);
1183 }
1184
1185 static void
1186 lnet_ping_info_install_locked(lnet_ping_info_t *ping_info)
1187 {
1188         int                     i;
1189         lnet_ni_t               *ni;
1190         lnet_ni_status_t        *ns;
1191
1192         i = 0;
1193         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1194                 LASSERT(i < ping_info->pi_nnis);
1195
1196                 ns = &ping_info->pi_ni[i];
1197
1198                 ns->ns_nid = ni->ni_nid;
1199
1200                 lnet_ni_lock(ni);
1201                 ns->ns_status = (ni->ni_status != NULL) ?
1202                                 ni->ni_status->ns_status : LNET_NI_STATUS_UP;
1203                 ni->ni_status = ns;
1204                 lnet_ni_unlock(ni);
1205
1206                 i++;
1207         }
1208 }
1209
1210 static void
1211 lnet_ping_target_update(lnet_ping_info_t *pinfo, lnet_handle_md_t md_handle)
1212 {
1213         lnet_ping_info_t *old_pinfo = NULL;
1214         lnet_handle_md_t old_md;
1215
1216         /* switch the NIs to point to the new ping info created */
1217         lnet_net_lock(LNET_LOCK_EX);
1218
1219         if (!the_lnet.ln_routing)
1220                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1221         lnet_ping_info_install_locked(pinfo);
1222
1223         if (the_lnet.ln_ping_info != NULL) {
1224                 old_pinfo = the_lnet.ln_ping_info;
1225                 old_md = the_lnet.ln_ping_target_md;
1226         }
1227         the_lnet.ln_ping_target_md = md_handle;
1228         the_lnet.ln_ping_info = pinfo;
1229
1230         lnet_net_unlock(LNET_LOCK_EX);
1231
1232         if (old_pinfo != NULL) {
1233                 /* unlink the old ping info */
1234                 lnet_ping_md_unlink(old_pinfo, &old_md);
1235                 lnet_ping_info_free(old_pinfo);
1236         }
1237 }
1238
1239 static void
1240 lnet_ping_target_fini(void)
1241 {
1242         int             rc;
1243
1244         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1245                             &the_lnet.ln_ping_target_md);
1246
1247         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1248         LASSERT(rc == 0);
1249
1250         lnet_ping_info_destroy();
1251 }
1252
1253 static int
1254 lnet_ni_tq_credits(lnet_ni_t *ni)
1255 {
1256         int     credits;
1257
1258         LASSERT(ni->ni_ncpts >= 1);
1259
1260         if (ni->ni_ncpts == 1)
1261                 return ni->ni_maxtxcredits;
1262
1263         credits = ni->ni_maxtxcredits / ni->ni_ncpts;
1264         credits = max(credits, 8 * ni->ni_peertxcredits);
1265         credits = min(credits, ni->ni_maxtxcredits);
1266
1267         return credits;
1268 }
1269
1270 static void
1271 lnet_clear_zombies_nis_locked(void)
1272 {
1273         int             i;
1274         int             islo;
1275         lnet_ni_t       *ni;
1276
1277         /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
1278          * and shut them down in guaranteed thread context */
1279         i = 2;
1280         while (!list_empty(&the_lnet.ln_nis_zombie)) {
1281                 int     *ref;
1282                 int     j;
1283
1284                 ni = list_entry(the_lnet.ln_nis_zombie.next,
1285                                 lnet_ni_t, ni_list);
1286                 list_del_init(&ni->ni_list);
1287                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1288                         if (*ref == 0)
1289                                 continue;
1290                         /* still busy, add it back to zombie list */
1291                         list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
1292                         break;
1293                 }
1294
1295                 if (!list_empty(&ni->ni_list)) {
1296                         lnet_net_unlock(LNET_LOCK_EX);
1297                         ++i;
1298                         if ((i & (-i)) == i) {
1299                                 CDEBUG(D_WARNING,
1300                                        "Waiting for zombie LNI %s\n",
1301                                        libcfs_nid2str(ni->ni_nid));
1302                         }
1303                         cfs_pause(cfs_time_seconds(1));
1304                         lnet_net_lock(LNET_LOCK_EX);
1305                         continue;
1306                 }
1307
1308                 ni->ni_lnd->lnd_refcount--;
1309                 lnet_net_unlock(LNET_LOCK_EX);
1310
1311                 islo = ni->ni_lnd->lnd_type == LOLND;
1312
1313                 LASSERT(!in_interrupt());
1314                 (ni->ni_lnd->lnd_shutdown)(ni);
1315
1316                 /* can't deref lnd anymore now; it might have unregistered
1317                  * itself...  */
1318
1319                 if (!islo)
1320                         CDEBUG(D_LNI, "Removed LNI %s\n",
1321                               libcfs_nid2str(ni->ni_nid));
1322
1323                 lnet_ni_free(ni);
1324                 i = 2;
1325                 lnet_net_lock(LNET_LOCK_EX);
1326         }
1327 }
1328
1329 static void
1330 lnet_shutdown_lndnis(void)
1331 {
1332         int             i;
1333         lnet_ni_t       *ni;
1334
1335         /* NB called holding the global mutex */
1336
1337         /* All quiet on the API front */
1338         LASSERT(!the_lnet.ln_shutdown);
1339         LASSERT(the_lnet.ln_refcount == 0);
1340         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
1341
1342         lnet_net_lock(LNET_LOCK_EX);
1343         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1344
1345         /* Unlink NIs from the global table */
1346         while (!list_empty(&the_lnet.ln_nis)) {
1347                 ni = list_entry(the_lnet.ln_nis.next,
1348                                 lnet_ni_t, ni_list);
1349                 /* move it to zombie list and nobody can find it anymore */
1350                 list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
1351                 lnet_ni_decref_locked(ni, 0);   /* drop ln_nis' ref */
1352
1353                 if (!list_empty(&ni->ni_cptlist)) {
1354                         list_del_init(&ni->ni_cptlist);
1355                         lnet_ni_decref_locked(ni, 0);
1356                 }
1357         }
1358
1359         /* Drop the cached eqwait NI. */
1360         if (the_lnet.ln_eq_waitni != NULL) {
1361                 lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0);
1362                 the_lnet.ln_eq_waitni = NULL;
1363         }
1364
1365         /* Drop the cached loopback NI. */
1366         if (the_lnet.ln_loni != NULL) {
1367                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1368                 the_lnet.ln_loni = NULL;
1369         }
1370
1371         lnet_net_unlock(LNET_LOCK_EX);
1372
1373         /* Clear lazy portals and drop delayed messages which hold refs
1374          * on their lnet_msg_t::msg_rxpeer */
1375         for (i = 0; i < the_lnet.ln_nportals; i++)
1376                 LNetClearLazyPortal(i);
1377
1378         /* Clear the peer table and wait for all peers to go (they hold refs on
1379          * their NIs) */
1380         lnet_peer_tables_cleanup(NULL);
1381
1382         lnet_net_lock(LNET_LOCK_EX);
1383
1384         lnet_clear_zombies_nis_locked();
1385         the_lnet.ln_shutdown = 0;
1386         lnet_net_unlock(LNET_LOCK_EX);
1387 }
1388
1389 int
1390 lnet_shutdown_lndni(__u32 net)
1391 {
1392         lnet_ping_info_t *pinfo;
1393         lnet_handle_md_t md_handle;
1394         lnet_ni_t       *found_ni = NULL;
1395         int             ni_count;
1396         int             rc;
1397
1398         if (LNET_NETTYP(net) == LOLND)
1399                 return -EINVAL;
1400
1401         ni_count = lnet_get_ni_count();
1402
1403         /* create and link a new ping info, before removing the old one */
1404         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count - 1, false);
1405         if (rc != 0)
1406                 return rc;
1407
1408         /* proceed with shutting down the NI */
1409         lnet_net_lock(LNET_LOCK_EX);
1410
1411         found_ni = lnet_net2ni_locked(net, 0);
1412         if (found_ni == NULL) {
1413                 lnet_net_unlock(LNET_LOCK_EX);
1414                 lnet_ping_md_unlink(pinfo, &md_handle);
1415                 lnet_ping_info_free(pinfo);
1416                 return -EINVAL;
1417         }
1418
1419         /* decrement the reference counter on found_ni which was
1420          * incremented when we called lnet_net2ni_locked() */
1421         lnet_ni_decref_locked(found_ni, 0);
1422
1423         /* Move ni to zombie list so nobody can find it anymore */
1424         list_move(&found_ni->ni_list, &the_lnet.ln_nis_zombie);
1425
1426         /* Drop the lock reference for the ln_nis ref. */
1427         lnet_ni_decref_locked(found_ni, 0);
1428
1429         if (!list_empty(&found_ni->ni_cptlist)) {
1430                 list_del_init(&found_ni->ni_cptlist);
1431                 lnet_ni_decref_locked(found_ni, 0);
1432         }
1433
1434         lnet_net_unlock(LNET_LOCK_EX);
1435
1436         /* Do peer table cleanup for this ni */
1437         lnet_peer_tables_cleanup(found_ni);
1438
1439         lnet_net_lock(LNET_LOCK_EX);
1440         lnet_clear_zombies_nis_locked();
1441         lnet_net_unlock(LNET_LOCK_EX);
1442
1443         lnet_ping_target_update(pinfo, md_handle);
1444
1445         return 0;
1446 }
1447
1448 static int
1449 lnet_startup_lndni(struct lnet_ni *ni, __s32 peer_timeout,
1450                     __s32 peer_cr, __s32 peer_buf_cr, __s32 credits)
1451 {
1452         int                     rc = 0;
1453         int                     lnd_type;
1454         lnd_t                   *lnd;
1455         struct lnet_tx_queue    *tq;
1456         int                     i;
1457
1458         lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
1459         if (!libcfs_isknown_lnd(lnd_type))
1460                 return -EINVAL;
1461
1462         if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
1463             lnd_type == IIBLND || lnd_type == VIBLND) {
1464                 CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
1465                 return -EINVAL;
1466         }
1467
1468         /* Make sure this new NI is unique. */
1469         lnet_net_lock(LNET_LOCK_EX);
1470         if (!lnet_net_unique(LNET_NIDNET(ni->ni_nid),
1471                              &the_lnet.ln_nis)) {
1472                 if (lnd_type == LOLND) {
1473                         lnet_net_unlock(LNET_LOCK_EX);
1474                         lnet_ni_free(ni);
1475                         return 0;
1476                 }
1477
1478                 CERROR("Net %s is not unique\n",
1479                        libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
1480                 lnet_net_unlock(LNET_LOCK_EX);
1481                 return -EINVAL;
1482         }
1483         lnet_net_unlock(LNET_LOCK_EX);
1484
1485         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1486         lnd = lnet_find_lnd_by_type(lnd_type);
1487
1488 #ifdef __KERNEL__
1489         if (lnd == NULL) {
1490                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1491                 rc = request_module("%s",
1492                                         libcfs_lnd2modname(lnd_type));
1493                 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1494
1495                 lnd = lnet_find_lnd_by_type(lnd_type);
1496                 if (lnd == NULL) {
1497                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1498                         CERROR("Can't load LND %s, module %s, rc=%d\n",
1499                                libcfs_lnd2str(lnd_type),
1500                                libcfs_lnd2modname(lnd_type), rc);
1501 #ifndef HAVE_MODULE_LOADING_SUPPORT
1502                         LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1503                                  "compiled with kernel module "
1504                                  "loading support.");
1505 #endif
1506                         return -EINVAL;
1507                 }
1508         }
1509 #else
1510         if (lnd == NULL) {
1511                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1512                 CERROR("LND %s not supported\n",
1513                        libcfs_lnd2str(lnd_type));
1514                 return -EINVAL;
1515         }
1516 #endif
1517
1518         lnet_net_lock(LNET_LOCK_EX);
1519         lnd->lnd_refcount++;
1520         lnet_net_unlock(LNET_LOCK_EX);
1521
1522         ni->ni_lnd = lnd;
1523
1524         rc = (lnd->lnd_startup)(ni);
1525
1526         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1527
1528         if (rc != 0) {
1529                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
1530                                    "\n",
1531                                    rc, libcfs_lnd2str(lnd->lnd_type));
1532                 lnet_net_lock(LNET_LOCK_EX);
1533                 lnd->lnd_refcount--;
1534                 lnet_net_unlock(LNET_LOCK_EX);
1535                 lnet_ni_free(ni);
1536                 return -EINVAL;
1537         }
1538
1539         /* If given some LND tunable parameters, parse those now to
1540          * override the values in the NI structure. */
1541         if (peer_buf_cr >= 0)
1542                 ni->ni_peerrtrcredits = peer_buf_cr;
1543         if (peer_timeout >= 0)
1544                 ni->ni_peertimeout = peer_timeout;
1545         /*
1546          * TODO
1547          * Note: For now, don't allow the user to change
1548          * peertxcredits as this number is used in the
1549          * IB LND to control queue depth.
1550          * if (peer_cr != -1)
1551          *      ni->ni_peertxcredits = peer_cr;
1552          */
1553         if (credits >= 0)
1554                 ni->ni_maxtxcredits = credits;
1555
1556         LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
1557
1558         lnet_net_lock(LNET_LOCK_EX);
1559         /* refcount for ln_nis */
1560         lnet_ni_addref_locked(ni, 0);
1561         list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1562         if (ni->ni_cpts != NULL) {
1563                 list_add_tail(&ni->ni_cptlist,
1564                               &the_lnet.ln_nis_cpt);
1565                 lnet_ni_addref_locked(ni, 0);
1566         }
1567
1568         lnet_net_unlock(LNET_LOCK_EX);
1569         if (lnd->lnd_type == LOLND) {
1570                 lnet_ni_addref(ni);
1571                 LASSERT(the_lnet.ln_loni == NULL);
1572                 the_lnet.ln_loni = ni;
1573                 return 1;
1574         }
1575
1576 #ifndef __KERNEL__
1577         if (lnd->lnd_wait != NULL) {
1578                 if (the_lnet.ln_eq_waitni == NULL) {
1579                         lnet_ni_addref(ni);
1580                         the_lnet.ln_eq_waitni = ni;
1581                 }
1582         } else {
1583 # ifndef HAVE_LIBPTHREAD
1584                 LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a "
1585                                    "single-threaded runtime\n",
1586                                    libcfs_lnd2str(lnd_type));
1587                 goto failed;
1588 # endif
1589         }
1590 #endif
1591         if (ni->ni_peertxcredits == 0 ||
1592             ni->ni_maxtxcredits == 0) {
1593                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1594                                    libcfs_lnd2str(lnd->lnd_type),
1595                                    ni->ni_peertxcredits == 0 ?
1596                                    "" : "per-peer ");
1597                 goto failed;
1598         }
1599
1600         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1601                 tq->tq_credits_min =
1602                 tq->tq_credits_max =
1603                 tq->tq_credits = lnet_ni_tq_credits(ni);
1604         }
1605
1606         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1607                libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
1608                lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1609                ni->ni_peerrtrcredits, ni->ni_peertimeout);
1610
1611         return 1;
1612 failed:
1613         lnet_shutdown_lndni(LNET_NIDNET(ni->ni_nid));
1614         lnet_ni_free(ni);
1615         return -EINVAL;
1616 }
1617
1618 static int
1619 lnet_startup_lndnis(struct list_head *nilist, __s32 peer_timeout,
1620                     __s32 peer_cr, __s32 peer_buf_cr, __s32 credits,
1621                     int *ni_count)
1622 {
1623         int                     rc = 0;
1624         struct lnet_ni *ni;
1625
1626         while (!list_empty(nilist)) {
1627                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1628                 list_del(&ni->ni_list);
1629                 rc = lnet_startup_lndni(ni, peer_timeout, peer_cr,
1630                                         peer_buf_cr, credits);
1631                 if (rc < 0)
1632                         goto failed;
1633
1634                 if (ni_count != NULL && rc == 1)
1635                         (*ni_count)++;
1636         }
1637
1638         return 0;
1639 failed:
1640         while (!list_empty(nilist)) {
1641                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1642                 list_del(&ni->ni_list);
1643                 lnet_ni_free(ni);
1644         }
1645         lnet_shutdown_lndnis();
1646         return rc;
1647 }
1648
1649 /**
1650  * Initialize LNet library.
1651  *
1652  * Only userspace program needs to call this function - it's automatically
1653  * called in the kernel at module loading time. Caller has to call LNetFini()
1654  * after a call to LNetInit(), if and only if the latter returned 0. It must
1655  * be called exactly once.
1656  *
1657  * \return 0 on success, and -ve on failures.
1658  */
1659 int
1660 LNetInit(void)
1661 {
1662         int     rc;
1663
1664         lnet_assert_wire_constants();
1665         LASSERT(!the_lnet.ln_init);
1666
1667         memset(&the_lnet, 0, sizeof(the_lnet));
1668
1669         /* refer to global cfs_cpt_table for now */
1670         the_lnet.ln_cpt_table   = cfs_cpt_table;
1671         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1672
1673         LASSERT(the_lnet.ln_cpt_number > 0);
1674         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1675                 /* we are under risk of consuming all lh_cookie */
1676                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1677                        "please change setting of CPT-table and retry\n",
1678                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1679                 return -1;
1680         }
1681
1682         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1683                 the_lnet.ln_cpt_bits++;
1684
1685         rc = lnet_create_locks();
1686         if (rc != 0) {
1687                 CERROR("Can't create LNet global locks: %d\n", rc);
1688                 return -1;
1689         }
1690
1691         the_lnet.ln_refcount = 0;
1692         the_lnet.ln_init = 1;
1693         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1694         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1695         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1696         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1697
1698 #ifdef __KERNEL__
1699         /* The hash table size is the number of bits it takes to express the set
1700          * ln_num_routes, minus 1 (better to under estimate than over so we
1701          * don't waste memory). */
1702         if (rnet_htable_size <= 0)
1703                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1704         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1705                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1706         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1707                                            order_base_2(rnet_htable_size) - 1);
1708
1709         /* All LNDs apart from the LOLND are in separate modules.  They
1710          * register themselves when their module loads, and unregister
1711          * themselves when their module is unloaded. */
1712 #else
1713         the_lnet.ln_remote_nets_hbits = 8;
1714
1715         /* Register LNDs
1716          * NB the order here determines default 'networks=' order */
1717 # ifdef HAVE_LIBPTHREAD
1718         LNET_REGISTER_ULND(the_tcplnd);
1719 # endif
1720 #endif
1721         lnet_register_lnd(&the_lolnd);
1722         return 0;
1723 }
1724 EXPORT_SYMBOL(LNetInit);
1725
1726 /**
1727  * Finalize LNet library.
1728  *
1729  * Only userspace program needs to call this function. It can be called
1730  * at most once.
1731  *
1732  * \pre LNetInit() called with success.
1733  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1734  */
1735 void
1736 LNetFini(void)
1737 {
1738         LASSERT(the_lnet.ln_init);
1739         LASSERT(the_lnet.ln_refcount == 0);
1740
1741         while (!list_empty(&the_lnet.ln_lnds))
1742                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1743                                                lnd_t, lnd_list));
1744         lnet_destroy_locks();
1745
1746         the_lnet.ln_init = 0;
1747 }
1748 EXPORT_SYMBOL(LNetFini);
1749
1750 /**
1751  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1752  *
1753  * Userspace program should call this after a successful call to LNetInit().
1754  * Users must call this function at least once before any other functions.
1755  * For each successful call there must be a corresponding call to
1756  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1757  * ignored.
1758  *
1759  * The PID used by LNet may be different from the one requested.
1760  * See LNetGetId().
1761  *
1762  * \param requested_pid PID requested by the caller.
1763  *
1764  * \return >= 0 on success, and < 0 error code on failures.
1765  */
1766 int
1767 LNetNIInit(lnet_pid_t requested_pid)
1768 {
1769         int                     im_a_router = 0;
1770         int                     rc;
1771         int                     ni_count = 0;
1772         int                     lnd_type;
1773         struct lnet_ni          *ni;
1774         lnet_ping_info_t        *pinfo;
1775         lnet_handle_md_t        md_handle;
1776         struct list_head        net_head;
1777
1778         INIT_LIST_HEAD(&net_head);
1779
1780         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1781
1782         LASSERT(the_lnet.ln_init);
1783         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1784
1785         if (the_lnet.ln_refcount > 0) {
1786                 rc = the_lnet.ln_refcount++;
1787                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1788                 return rc;
1789         }
1790
1791         rc = lnet_prepare(requested_pid);
1792         if (rc != 0)
1793                 goto failed0;
1794
1795         rc = lnet_parse_networks(&net_head,
1796                                  !the_lnet.ln_nis_from_mod_params ?
1797                                    lnet_get_networks() : "");
1798         if (rc < 0)
1799                 goto failed1;
1800
1801         rc = lnet_startup_lndnis(&net_head, -1, -1, -1, -1, &ni_count);
1802         if (rc != 0)
1803                 goto failed1;
1804
1805         if (the_lnet.ln_eq_waitni != NULL && ni_count > 1) {
1806                 lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type;
1807                 LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
1808                                    "\n",
1809                                    libcfs_lnd2str(lnd_type));
1810                 goto failed1;
1811         }
1812
1813         rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1814         if (rc != 0)
1815                 goto failed2;
1816
1817         rc = lnet_check_routes();
1818         if (rc != 0)
1819                 goto failed2;
1820
1821         rc = lnet_rtrpools_alloc(im_a_router);
1822         if (rc != 0)
1823                 goto failed2;
1824
1825         rc = lnet_acceptor_start();
1826         if (rc != 0)
1827                 goto failed2;
1828         the_lnet.ln_refcount = 1;
1829         /* Now I may use my own API functions... */
1830
1831         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1832         if (rc != 0)
1833                 goto failed3;
1834
1835         lnet_ping_target_update(pinfo, md_handle);
1836
1837         rc = lnet_router_checker_start();
1838         if (rc != 0)
1839                 goto failed4;
1840
1841         lnet_proc_init();
1842
1843         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1844
1845         return 0;
1846
1847 failed4:
1848         the_lnet.ln_refcount = 0;
1849         lnet_ping_md_unlink(pinfo, &md_handle);
1850         lnet_ping_info_free(pinfo);
1851 failed3:
1852         lnet_acceptor_stop();
1853         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1854         LASSERT(rc == 0);
1855 failed2:
1856         lnet_destroy_routes();
1857         lnet_shutdown_lndnis();
1858 failed1:
1859         lnet_unprepare();
1860 failed0:
1861         LASSERT(rc < 0);
1862         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1863         while (!list_empty(&net_head)) {
1864                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
1865                 list_del_init(&ni->ni_list);
1866                 lnet_ni_free(ni);
1867         }
1868         return rc;
1869 }
1870 EXPORT_SYMBOL(LNetNIInit);
1871
1872 /**
1873  * Stop LNet interfaces, routing, and forwarding.
1874  *
1875  * Users must call this function once for each successful call to LNetNIInit().
1876  * Once the LNetNIFini() operation has been started, the results of pending
1877  * API operations are undefined.
1878  *
1879  * \return always 0 for current implementation.
1880  */
1881 int
1882 LNetNIFini()
1883 {
1884         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1885
1886         LASSERT (the_lnet.ln_init);
1887         LASSERT (the_lnet.ln_refcount > 0);
1888
1889         if (the_lnet.ln_refcount != 1) {
1890                 the_lnet.ln_refcount--;
1891         } else {
1892                 LASSERT (!the_lnet.ln_niinit_self);
1893
1894                 lnet_proc_fini();
1895                 lnet_router_checker_stop();
1896                 lnet_ping_target_fini();
1897
1898                 /* Teardown fns that use my own API functions BEFORE here */
1899                 the_lnet.ln_refcount = 0;
1900
1901                 lnet_acceptor_stop();
1902                 lnet_destroy_routes();
1903                 lnet_shutdown_lndnis();
1904                 lnet_unprepare();
1905         }
1906
1907         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1908         return 0;
1909 }
1910 EXPORT_SYMBOL(LNetNIFini);
1911
1912 /**
1913  * Grabs the ni data from the ni structure and fills the out
1914  * parameters
1915  *
1916  * \param[in] ni network        interface structure
1917  * \param[out] cpt_count        the number of cpts the ni is on
1918  * \param[out] nid              Network Interface ID
1919  * \param[out] peer_timeout     NI peer timeout
1920  * \param[out] peer_tx_crdits   NI peer transmit credits
1921  * \param[out] peer_rtr_credits NI peer router credits
1922  * \param[out] max_tx_credits   NI max transmit credit
1923  * \param[out] net_config       Network configuration
1924  */
1925 static void
1926 lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
1927                   int *peer_timeout, int *peer_tx_credits,
1928                   int *peer_rtr_credits, int *max_tx_credits,
1929                   struct lnet_ioctl_net_config *net_config)
1930 {
1931         int i;
1932
1933         if (ni == NULL)
1934                 return;
1935
1936         if (net_config == NULL)
1937                 return;
1938
1939         CLASSERT(ARRAY_SIZE(ni->ni_interfaces) ==
1940                  ARRAY_SIZE(net_config->ni_interfaces));
1941
1942         if (ni->ni_interfaces[0] != NULL) {
1943                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1944                         if (ni->ni_interfaces[i] != NULL) {
1945                                 strncpy(net_config->ni_interfaces[i],
1946                                         ni->ni_interfaces[i],
1947                                         sizeof(net_config->ni_interfaces[i]));
1948                         }
1949                 }
1950         }
1951
1952         *nid = ni->ni_nid;
1953         *peer_timeout = ni->ni_peertimeout;
1954         *peer_tx_credits = ni->ni_peertxcredits;
1955         *peer_rtr_credits = ni->ni_peerrtrcredits;
1956         *max_tx_credits = ni->ni_maxtxcredits;
1957
1958         net_config->ni_status = ni->ni_status->ns_status;
1959
1960         for (i = 0;
1961              ni->ni_cpts != NULL && i < ni->ni_ncpts &&
1962              i < LNET_MAX_SHOW_NUM_CPT;
1963              i++)
1964                 net_config->ni_cpts[i] = ni->ni_cpts[i];
1965
1966         *cpt_count = ni->ni_ncpts;
1967 }
1968
1969 int
1970 lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
1971                     int *peer_tx_credits, int *peer_rtr_credits,
1972                     int *max_tx_credits,
1973                     struct lnet_ioctl_net_config *net_config)
1974 {
1975         struct lnet_ni          *ni;
1976         struct list_head        *tmp;
1977         int                     cpt;
1978         int                     rc = -ENOENT;
1979
1980         cpt = lnet_net_lock_current();
1981
1982         list_for_each(tmp, &the_lnet.ln_nis) {
1983                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1984                 if (idx-- == 0) {
1985                         rc = 0;
1986                         lnet_ni_lock(ni);
1987                         lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
1988                                           peer_tx_credits, peer_rtr_credits,
1989                                           max_tx_credits, net_config);
1990                         lnet_ni_unlock(ni);
1991                         break;
1992                 }
1993         }
1994
1995         lnet_net_unlock(cpt);
1996         return rc;
1997 }
1998
1999 int
2000 lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
2001                 __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
2002                 __s32 credits)
2003 {
2004         lnet_ping_info_t        *pinfo;
2005         lnet_handle_md_t        md_handle;
2006         struct lnet_ni          *ni;
2007         struct list_head        net_head;
2008         int                     rc;
2009
2010         INIT_LIST_HEAD(&net_head);
2011
2012         /* Create a ni structure for the network string */
2013         rc = lnet_parse_networks(&net_head, nets);
2014         if (rc < 0)
2015                 return rc;
2016
2017         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2018
2019         if (rc != 1) {
2020                 rc = -EINVAL; /* only add one interface per call */
2021                 goto failed0;
2022         }
2023
2024         rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
2025                                   false);
2026         if (rc != 0)
2027                 goto failed0;
2028
2029         /* pick up first one that is not LOLND */
2030         while (!list_empty(&net_head)) {
2031                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
2032                 list_del_init(&ni->ni_list);
2033                 if (LNET_NIDNET(ni->ni_nid) == LOLND) {
2034                         lnet_ni_free(ni);
2035                         continue;
2036                 }
2037                 rc = lnet_startup_lndni(ni, peer_timeout, peer_cr,
2038                                         peer_buf_cr, credits);
2039                 if (rc < 0)
2040                         goto failed1;
2041         }
2042
2043         lnet_ping_target_update(pinfo, md_handle);
2044         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2045
2046         return 0;
2047
2048 failed1:
2049         lnet_ping_md_unlink(pinfo, &md_handle);
2050         lnet_ping_info_free(pinfo);
2051 failed0:
2052         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2053         while (!list_empty(&net_head)) {
2054                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
2055                 list_del_init(&ni->ni_list);
2056                 lnet_ni_free(ni);
2057         }
2058         return rc;
2059 }
2060
2061 int
2062 lnet_dyn_del_ni(__u32 net)
2063 {
2064         int rc;
2065
2066         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2067         rc = lnet_shutdown_lndni(net);
2068         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2069
2070         return rc;
2071 }
2072
2073 /**
2074  * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
2075  * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
2076  * internal ioctl handler.
2077  *
2078  * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
2079  *
2080  * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
2081  * The data will be printed to system console. Don't use it excessively.
2082  * \param arg A pointer to lnet_process_id_t, process ID of the peer.
2083  *
2084  * \return Always return 0 when called by users directly (i.e., not via ioctl).
2085  */
2086 int
2087 LNetCtl(unsigned int cmd, void *arg)
2088 {
2089         struct libcfs_ioctl_data *data = arg;
2090         struct lnet_ioctl_config_data *config;
2091         lnet_process_id_t         id = {0};
2092         lnet_ni_t                *ni;
2093         int                       rc;
2094
2095         LASSERT(the_lnet.ln_init);
2096
2097         switch (cmd) {
2098         case IOC_LIBCFS_GET_NI:
2099                 rc = LNetGetId(data->ioc_count, &id);
2100                 data->ioc_nid = id.nid;
2101                 return rc;
2102
2103         case IOC_LIBCFS_FAIL_NID:
2104                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2105
2106         case IOC_LIBCFS_ADD_ROUTE:
2107                 config = arg;
2108                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2109                 rc = lnet_add_route(config->cfg_net,
2110                                     config->cfg_config_u.cfg_route.rtr_hop,
2111                                     config->cfg_nid,
2112                                     config->cfg_config_u.cfg_route.
2113                                         rtr_priority);
2114                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2115                 return (rc != 0) ? rc : lnet_check_routes();
2116
2117         case IOC_LIBCFS_DEL_ROUTE:
2118                 config = arg;
2119                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2120                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2121                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2122                 return rc;
2123
2124         case IOC_LIBCFS_GET_ROUTE:
2125                 config = arg;
2126                 return lnet_get_route(config->cfg_count,
2127                                       &config->cfg_net,
2128                                       &config->cfg_config_u.cfg_route.rtr_hop,
2129                                       &config->cfg_nid,
2130                                       &config->cfg_config_u.cfg_route.rtr_flags,
2131                                       &config->cfg_config_u.cfg_route.
2132                                         rtr_priority);
2133
2134         case IOC_LIBCFS_GET_NET: {
2135                 struct lnet_ioctl_net_config *net_config;
2136                 config = arg;
2137                 net_config = (struct lnet_ioctl_net_config *)
2138                         config->cfg_bulk;
2139                 if (config == NULL || net_config == NULL)
2140                         return -1;
2141
2142                 return lnet_get_net_config(config->cfg_count,
2143                                            &config->cfg_ncpts,
2144                                            &config->cfg_nid,
2145                                            &config->cfg_config_u.
2146                                                 cfg_net.net_peer_timeout,
2147                                            &config->cfg_config_u.cfg_net.
2148                                                 net_peer_tx_credits,
2149                                            &config->cfg_config_u.cfg_net.
2150                                                 net_peer_rtr_credits,
2151                                            &config->cfg_config_u.cfg_net.
2152                                                 net_max_tx_credits,
2153                                            net_config);
2154         }
2155
2156         case IOC_LIBCFS_GET_LNET_STATS:
2157         {
2158                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2159
2160                 lnet_counters_get(&lnet_stats->st_cntrs);
2161                 return 0;
2162         }
2163
2164 #if defined(__KERNEL__) && defined(LNET_ROUTER)
2165         case IOC_LIBCFS_CONFIG_RTR:
2166                 config = arg;
2167                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2168                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2169                         rc = lnet_rtrpools_enable();
2170                         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2171                         return rc;
2172                 }
2173                 lnet_rtrpools_disable();
2174                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2175                 return 0;
2176
2177         case IOC_LIBCFS_ADD_BUF:
2178                 config = arg;
2179                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2180                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2181                                                 buf_tiny,
2182                                           config->cfg_config_u.cfg_buffers.
2183                                                 buf_small,
2184                                           config->cfg_config_u.cfg_buffers.
2185                                                 buf_large);
2186                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2187                 return rc;
2188 #endif
2189
2190         case IOC_LIBCFS_GET_BUF: {
2191                 struct lnet_ioctl_pool_cfg *pool_cfg;
2192                 config = arg;
2193                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2194                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2195         }
2196
2197         case IOC_LIBCFS_GET_PEER_INFO: {
2198                 struct lnet_ioctl_peer *peer_info = arg;
2199                 return lnet_get_peer_info(
2200                    peer_info->pr_count,
2201                    &peer_info->pr_nid,
2202                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2203                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2204                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2205                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2206                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2207                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2208                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2209                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2210         }
2211
2212         case IOC_LIBCFS_NOTIFY_ROUTER:
2213                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2214                                    cfs_time_current() -
2215                                    cfs_time_seconds(cfs_time_current_sec() -
2216                                                     (time_t)data->ioc_u64[0]));
2217
2218         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
2219                 /* This can be removed once lustre stops calling it */
2220                 return 0;
2221
2222         case IOC_LIBCFS_LNET_DIST:
2223                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2224                 if (rc < 0 && rc != -EHOSTUNREACH)
2225                         return rc;
2226
2227                 data->ioc_u32[0] = rc;
2228                 return 0;
2229
2230         case IOC_LIBCFS_TESTPROTOCOMPAT:
2231                 lnet_net_lock(LNET_LOCK_EX);
2232                 the_lnet.ln_testprotocompat = data->ioc_flags;
2233                 lnet_net_unlock(LNET_LOCK_EX);
2234                 return 0;
2235
2236         case IOC_LIBCFS_PING:
2237                 id.nid = data->ioc_nid;
2238                 id.pid = data->ioc_u32[0];
2239                 rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
2240                                (lnet_process_id_t __user *)data->ioc_pbuf1,
2241                                data->ioc_plen1/sizeof(lnet_process_id_t));
2242                 if (rc < 0)
2243                         return rc;
2244                 data->ioc_count = rc;
2245                 return 0;
2246
2247         case IOC_LIBCFS_DEBUG_PEER: {
2248                 /* CAVEAT EMPTOR: this one designed for calling directly; not
2249                  * via an ioctl */
2250                 id = *((lnet_process_id_t *) arg);
2251
2252                 lnet_debug_peer(id.nid);
2253
2254                 ni = lnet_net2ni(LNET_NIDNET(id.nid));
2255                 if (ni == NULL) {
2256                         CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
2257                 } else {
2258                         if (ni->ni_lnd->lnd_ctl == NULL) {
2259                                 CDEBUG(D_WARNING, "No ctl for %s\n",
2260                                        libcfs_id2str(id));
2261                         } else {
2262                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2263                         }
2264
2265                         lnet_ni_decref(ni);
2266                 }
2267                 return 0;
2268         }
2269
2270         default:
2271                 ni = lnet_net2ni(data->ioc_net);
2272                 if (ni == NULL)
2273                         return -EINVAL;
2274
2275                 if (ni->ni_lnd->lnd_ctl == NULL)
2276                         rc = -EINVAL;
2277                 else
2278                         rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2279
2280                 lnet_ni_decref(ni);
2281                 return rc;
2282         }
2283         /* not reached */
2284 }
2285 EXPORT_SYMBOL(LNetCtl);
2286
2287 /**
2288  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2289  * all interfaces share a same PID, as requested by LNetNIInit().
2290  *
2291  * \param index Index of the interface to look up.
2292  * \param id On successful return, this location will hold the
2293  * lnet_process_id_t ID of the interface.
2294  *
2295  * \retval 0 If an interface exists at \a index.
2296  * \retval -ENOENT If no interface has been found.
2297  */
2298 int
2299 LNetGetId(unsigned int index, lnet_process_id_t *id)
2300 {
2301         struct lnet_ni   *ni;
2302         struct list_head *tmp;
2303         int               cpt;
2304         int               rc = -ENOENT;
2305
2306         LASSERT(the_lnet.ln_init);
2307         LASSERT(the_lnet.ln_refcount > 0);
2308
2309         cpt = lnet_net_lock_current();
2310
2311         list_for_each(tmp, &the_lnet.ln_nis) {
2312                 if (index-- != 0)
2313                         continue;
2314
2315                 ni = list_entry(tmp, lnet_ni_t, ni_list);
2316
2317                 id->nid = ni->ni_nid;
2318                 id->pid = the_lnet.ln_pid;
2319                 rc = 0;
2320                 break;
2321         }
2322
2323         lnet_net_unlock(cpt);
2324         return rc;
2325 }
2326 EXPORT_SYMBOL(LNetGetId);
2327
2328 /**
2329  * Print a string representation of handle \a h into buffer \a str of
2330  * \a len bytes.
2331  */
2332 void
2333 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2334 {
2335         snprintf(str, len, LPX64, h.cookie);
2336 }
2337 EXPORT_SYMBOL(LNetSnprintHandle);
2338
2339 static int
2340 lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids,
2341           int n_ids)
2342 {
2343         lnet_handle_eq_t     eqh;
2344         lnet_handle_md_t     mdh;
2345         lnet_event_t         event;
2346         lnet_md_t            md = {0};
2347         int                  which;
2348         int                  unlinked = 0;
2349         int                  replied = 0;
2350         const int            a_long_time = 60000; /* mS */
2351         int                  infosz;
2352         lnet_ping_info_t    *info;
2353         lnet_process_id_t    tmpid;
2354         int                  i;
2355         int                  nob;
2356         int                  rc;
2357         int                  rc2;
2358         sigset_t         blocked;
2359
2360         infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
2361
2362         if (n_ids <= 0 ||
2363             id.nid == LNET_NID_ANY ||
2364             timeout_ms > 500000 ||              /* arbitrary limit! */
2365             n_ids > 20)                         /* arbitrary limit! */
2366                 return -EINVAL;
2367
2368         if (id.pid == LNET_PID_ANY)
2369                 id.pid = LNET_PID_LUSTRE;
2370
2371         LIBCFS_ALLOC(info, infosz);
2372         if (info == NULL)
2373                 return -ENOMEM;
2374
2375         /* NB 2 events max (including any unlink event) */
2376         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2377         if (rc != 0) {
2378                 CERROR("Can't allocate EQ: %d\n", rc);
2379                 goto out_0;
2380         }
2381
2382         /* initialize md content */
2383         md.start     = info;
2384         md.length    = infosz;
2385         md.threshold = 2; /*GET/REPLY*/
2386         md.max_size  = 0;
2387         md.options   = LNET_MD_TRUNCATE;
2388         md.user_ptr  = NULL;
2389         md.eq_handle = eqh;
2390
2391         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2392         if (rc != 0) {
2393                 CERROR("Can't bind MD: %d\n", rc);
2394                 goto out_1;
2395         }
2396
2397         rc = LNetGet(LNET_NID_ANY, mdh, id,
2398                      LNET_RESERVED_PORTAL,
2399                      LNET_PROTO_PING_MATCHBITS, 0);
2400
2401         if (rc != 0) {
2402                 /* Don't CERROR; this could be deliberate! */
2403
2404                 rc2 = LNetMDUnlink(mdh);
2405                 LASSERT(rc2 == 0);
2406
2407                 /* NB must wait for the UNLINK event below... */
2408                 unlinked = 1;
2409                 timeout_ms = a_long_time;
2410         }
2411
2412         do {
2413                 /* MUST block for unlink to complete */
2414                 if (unlinked)
2415                         blocked = cfs_block_allsigs();
2416
2417                 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
2418
2419                 if (unlinked)
2420                         cfs_restore_sigs(blocked);
2421
2422                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2423                        (rc2 <= 0) ? -1 : event.type,
2424                        (rc2 <= 0) ? -1 : event.status,
2425                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2426
2427                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2428
2429                 if (rc2 <= 0 || event.status != 0) {
2430                         /* timeout or error */
2431                         if (!replied && rc == 0)
2432                                 rc = (rc2 < 0) ? rc2 :
2433                                      (rc2 == 0) ? -ETIMEDOUT :
2434                                      event.status;
2435
2436                         if (!unlinked) {
2437                                 /* Ensure completion in finite time... */
2438                                 LNetMDUnlink(mdh);
2439                                 /* No assertion (racing with network) */
2440                                 unlinked = 1;
2441                                 timeout_ms = a_long_time;
2442                         } else if (rc2 == 0) {
2443                                 /* timed out waiting for unlink */
2444                                 CWARN("ping %s: late network completion\n",
2445                                       libcfs_id2str(id));
2446                         }
2447                 } else if (event.type == LNET_EVENT_REPLY) {
2448                         replied = 1;
2449                         rc = event.mlength;
2450                 }
2451
2452         } while (rc2 <= 0 || !event.unlinked);
2453
2454         if (!replied) {
2455                 if (rc >= 0)
2456                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2457                               libcfs_id2str(id));
2458                 rc = -EIO;
2459                 goto out_1;
2460         }
2461
2462         nob = rc;
2463         LASSERT(nob >= 0 && nob <= infosz);
2464
2465         rc = -EPROTO;                           /* if I can't parse... */
2466
2467         if (nob < 8) {
2468                 /* can't check magic/version */
2469                 CERROR("%s: ping info too short %d\n",
2470                        libcfs_id2str(id), nob);
2471                 goto out_1;
2472         }
2473
2474         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2475                 lnet_swap_pinginfo(info);
2476         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2477                 CERROR("%s: Unexpected magic %08x\n",
2478                        libcfs_id2str(id), info->pi_magic);
2479                 goto out_1;
2480         }
2481
2482         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2483                 CERROR("%s: ping w/o NI status: 0x%x\n",
2484                        libcfs_id2str(id), info->pi_features);
2485                 goto out_1;
2486         }
2487
2488         if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
2489                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2490                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
2491                 goto out_1;
2492         }
2493
2494         if (info->pi_nnis < n_ids)
2495                 n_ids = info->pi_nnis;
2496
2497         if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
2498                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2499                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
2500                 goto out_1;
2501         }
2502
2503         rc = -EFAULT;                           /* If I SEGV... */
2504
2505         for (i = 0; i < n_ids; i++) {
2506                 tmpid.pid = info->pi_pid;
2507                 tmpid.nid = info->pi_ni[i].ns_nid;
2508                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2509                         goto out_1;
2510         }
2511         rc = info->pi_nnis;
2512
2513  out_1:
2514         rc2 = LNetEQFree(eqh);
2515         if (rc2 != 0)
2516                 CERROR("rc2 %d\n", rc2);
2517         LASSERT(rc2 == 0);
2518
2519  out_0:
2520         LIBCFS_FREE(info, infosz);
2521         return rc;
2522 }