Whamcloud - gitweb
LU-5568 lnet: fix kernel crash when network failed to start
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include <lnet/lib-lnet.h>
39 #include <lnet/lib-dlc.h>
40 #ifdef __KERNEL__
41 #include <linux/log2.h>
42 #endif
43
44 #ifdef __KERNEL__
45 #define D_LNI D_CONSOLE
46 #else
47 #define D_LNI D_CONFIG
48 #endif
49
50 lnet_t      the_lnet;                           /* THE state of the network */
51 EXPORT_SYMBOL(the_lnet);
52
53 #ifdef __KERNEL__
54
55 static char *ip2nets = "";
56 CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
57                 "LNET network <- IP table");
58
59 static char *networks = "";
60 CFS_MODULE_PARM(networks, "s", charp, 0444,
61                 "local networks");
62
63 static char *routes = "";
64 CFS_MODULE_PARM(routes, "s", charp, 0444,
65                 "routes to non-local networks");
66
67 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
68 CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
69                 "size of remote network hash table");
70
71 static void lnet_ping_target_fini(void);
72 static int lnet_ping(lnet_process_id_t id, int timeout_ms,
73                      lnet_process_id_t *ids, int n_ids);
74
75 static char *
76 lnet_get_routes(void)
77 {
78         return routes;
79 }
80
81 static char *
82 lnet_get_networks(void)
83 {
84         char   *nets;
85         int     rc;
86
87         if (*networks != 0 && *ip2nets != 0) {
88                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
89                                    "'ip2nets' but not both at once\n");
90                 return NULL;
91         }
92
93         if (*ip2nets != 0) {
94                 rc = lnet_parse_ip2nets(&nets, ip2nets);
95                 return (rc == 0) ? nets : NULL;
96         }
97
98         if (*networks != 0)
99                 return networks;
100
101         return "tcp";
102 }
103
104 static void
105 lnet_init_locks(void)
106 {
107         spin_lock_init(&the_lnet.ln_eq_wait_lock);
108         init_waitqueue_head(&the_lnet.ln_eq_waitq);
109         mutex_init(&the_lnet.ln_lnd_mutex);
110         mutex_init(&the_lnet.ln_api_mutex);
111 }
112
113 static void
114 lnet_fini_locks(void)
115 {
116 }
117
118 #else
119
120 static char *
121 lnet_get_routes(void)
122 {
123         char *str = getenv("LNET_ROUTES");
124
125         return (str == NULL) ? "" : str;
126 }
127
128 static char *
129 lnet_get_networks (void)
130 {
131         static char       default_networks[256];
132         char             *networks = getenv("LNET_NETWORKS");
133         char             *str;
134         char             *sep;
135         int               len;
136         int               nob;
137         struct list_head *tmp;
138
139         if (networks != NULL)
140                 return networks;
141
142         /* In userland, the default 'networks=' is the list of known net types */
143         len = sizeof(default_networks);
144         str = default_networks;
145         *str = 0;
146         sep = "";
147
148         list_for_each(tmp, &the_lnet.ln_lnds) {
149                 lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
150
151                 nob = snprintf(str, len, "%s%s", sep,
152                                libcfs_lnd2str(lnd->lnd_type));
153                 if (nob >= len) {
154                         /* overflowed the string; leave it where it was */
155                         *str = 0;
156                         break;
157                 }
158                 len -= nob;
159                 str += nob;
160                 sep = ",";
161         }
162
163         return default_networks;
164 }
165
166 # ifndef HAVE_LIBPTHREAD
167
168 static void lnet_init_locks(void)
169 {
170         the_lnet.ln_eq_wait_lock = 0;
171         the_lnet.ln_lnd_mutex = 0;
172         the_lnet.ln_api_mutex = 0;
173 }
174
175 static void lnet_fini_locks(void)
176 {
177         LASSERT(the_lnet.ln_api_mutex == 0);
178         LASSERT(the_lnet.ln_lnd_mutex == 0);
179         LASSERT(the_lnet.ln_eq_wait_lock == 0);
180 }
181
182 # else
183
184 static void lnet_init_locks(void)
185 {
186         pthread_cond_init(&the_lnet.ln_eq_cond, NULL);
187         pthread_mutex_init(&the_lnet.ln_eq_wait_lock, NULL);
188         pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL);
189         pthread_mutex_init(&the_lnet.ln_api_mutex, NULL);
190 }
191
192 static void lnet_fini_locks(void)
193 {
194         pthread_mutex_destroy(&the_lnet.ln_api_mutex);
195         pthread_mutex_destroy(&the_lnet.ln_lnd_mutex);
196         pthread_mutex_destroy(&the_lnet.ln_eq_wait_lock);
197         pthread_cond_destroy(&the_lnet.ln_eq_cond);
198 }
199
200 # endif
201 #endif
202
203 static int
204 lnet_create_remote_nets_table(void)
205 {
206         int               i;
207         struct list_head *hash;
208
209         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
210         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
211         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
212         if (hash == NULL) {
213                 CERROR("Failed to create remote nets hash table\n");
214                 return -ENOMEM;
215         }
216
217         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
218                 INIT_LIST_HEAD(&hash[i]);
219         the_lnet.ln_remote_nets_hash = hash;
220         return 0;
221 }
222
223 static void
224 lnet_destroy_remote_nets_table(void)
225 {
226         int i;
227
228         if (the_lnet.ln_remote_nets_hash == NULL)
229                 return;
230
231         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
232                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
233
234         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
235                     LNET_REMOTE_NETS_HASH_SIZE *
236                     sizeof(the_lnet.ln_remote_nets_hash[0]));
237         the_lnet.ln_remote_nets_hash = NULL;
238 }
239
240 static void
241 lnet_destroy_locks(void)
242 {
243         if (the_lnet.ln_res_lock != NULL) {
244                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
245                 the_lnet.ln_res_lock = NULL;
246         }
247
248         if (the_lnet.ln_net_lock != NULL) {
249                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
250                 the_lnet.ln_net_lock = NULL;
251         }
252
253         lnet_fini_locks();
254 }
255
256 static int
257 lnet_create_locks(void)
258 {
259         lnet_init_locks();
260
261         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
262         if (the_lnet.ln_res_lock == NULL)
263                 goto failed;
264
265         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
266         if (the_lnet.ln_net_lock == NULL)
267                 goto failed;
268
269         return 0;
270
271  failed:
272         lnet_destroy_locks();
273         return -ENOMEM;
274 }
275
276 static void lnet_assert_wire_constants(void)
277 {
278         /* Wire protocol assertions generated by 'wirecheck'
279          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
280          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
281          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
282
283         /* Constants... */
284         CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
285         CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
286         CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
287         CLASSERT (LNET_MSG_ACK == 0);
288         CLASSERT (LNET_MSG_PUT == 1);
289         CLASSERT (LNET_MSG_GET == 2);
290         CLASSERT (LNET_MSG_REPLY == 3);
291         CLASSERT (LNET_MSG_HELLO == 4);
292
293         /* Checks for struct ptl_handle_wire_t */
294         CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
295         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
296         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
297         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
298         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
299
300         /* Checks for struct lnet_magicversion_t */
301         CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
302         CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
303         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
304         CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
305         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
306         CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
307         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
308
309         /* Checks for struct lnet_hdr_t */
310         CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
311         CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
312         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
313         CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
314         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
315         CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
316         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
317         CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
318         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
319         CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
320         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
321         CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
322         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
323         CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
324         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
325
326         /* Ack */
327         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
328         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
329         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
330         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
331         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
332         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
333
334         /* Put */
335         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
336         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
337         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
338         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
339         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
340         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
341         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
342         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
343         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
344         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
345
346         /* Get */
347         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
348         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
349         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
350         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
351         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
352         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
353         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
354         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
355         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
356         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
357
358         /* Reply */
359         CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
360         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
361
362         /* Hello */
363         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
364         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
365         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
366         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
367 }
368
369 static lnd_t *
370 lnet_find_lnd_by_type (int type)
371 {
372         lnd_t            *lnd;
373         struct list_head *tmp;
374
375         /* holding lnd mutex */
376         list_for_each(tmp, &the_lnet.ln_lnds) {
377                 lnd = list_entry(tmp, lnd_t, lnd_list);
378
379                 if ((int)lnd->lnd_type == type)
380                         return lnd;
381         }
382         return NULL;
383 }
384
385 void
386 lnet_register_lnd (lnd_t *lnd)
387 {
388         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
389
390         LASSERT(the_lnet.ln_init);
391         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
392         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
393
394         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
395         lnd->lnd_refcount = 0;
396
397         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
398
399         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
400 }
401 EXPORT_SYMBOL(lnet_register_lnd);
402
403 void
404 lnet_unregister_lnd (lnd_t *lnd)
405 {
406         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
407
408         LASSERT(the_lnet.ln_init);
409         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
410         LASSERT(lnd->lnd_refcount == 0);
411
412         list_del(&lnd->lnd_list);
413         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
414
415         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
416 }
417 EXPORT_SYMBOL(lnet_unregister_lnd);
418
419 void
420 lnet_counters_get(lnet_counters_t *counters)
421 {
422         lnet_counters_t *ctr;
423         int             i;
424
425         memset(counters, 0, sizeof(*counters));
426
427         lnet_net_lock(LNET_LOCK_EX);
428
429         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
430                 counters->msgs_max     += ctr->msgs_max;
431                 counters->msgs_alloc   += ctr->msgs_alloc;
432                 counters->errors       += ctr->errors;
433                 counters->send_count   += ctr->send_count;
434                 counters->recv_count   += ctr->recv_count;
435                 counters->route_count  += ctr->route_count;
436                 counters->drop_count   += ctr->drop_count;
437                 counters->send_length  += ctr->send_length;
438                 counters->recv_length  += ctr->recv_length;
439                 counters->route_length += ctr->route_length;
440                 counters->drop_length  += ctr->drop_length;
441
442         }
443         lnet_net_unlock(LNET_LOCK_EX);
444 }
445 EXPORT_SYMBOL(lnet_counters_get);
446
447 void
448 lnet_counters_reset(void)
449 {
450         lnet_counters_t *counters;
451         int             i;
452
453         lnet_net_lock(LNET_LOCK_EX);
454
455         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
456                 memset(counters, 0, sizeof(lnet_counters_t));
457
458         lnet_net_unlock(LNET_LOCK_EX);
459 }
460 EXPORT_SYMBOL(lnet_counters_reset);
461
462 #ifdef LNET_USE_LIB_FREELIST
463
464 int
465 lnet_freelist_init(lnet_freelist_t *fl, int n, int size)
466 {
467         char *space;
468
469         LASSERT (n > 0);
470
471         size += offsetof (lnet_freeobj_t, fo_contents);
472
473         LIBCFS_ALLOC(space, n * size);
474         if (space == NULL)
475                 return (-ENOMEM);
476
477         INIT_LIST_HEAD(&fl->fl_list);
478         fl->fl_objs = space;
479         fl->fl_nobjs = n;
480         fl->fl_objsize = size;
481
482         do {
483                 list_add((struct list_head *)space, &fl->fl_list);
484                 space += size;
485         } while (--n != 0);
486
487         return 0;
488 }
489
490 void
491 lnet_freelist_fini(lnet_freelist_t *fl)
492 {
493         struct list_head *el;
494         int               count;
495
496         if (fl->fl_nobjs == 0)
497                 return;
498
499         count = 0;
500         for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
501                 count++;
502
503         LASSERT (count == fl->fl_nobjs);
504
505         LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
506         memset (fl, 0, sizeof (*fl));
507 }
508
509 #endif /* LNET_USE_LIB_FREELIST */
510
511 static __u64 lnet_create_interface_cookie(void)
512 {
513         /* NB the interface cookie in wire handles guards against delayed
514          * replies and ACKs appearing valid after reboot. Initialisation time,
515          * even if it's only implemented to millisecond resolution is probably
516          * easily good enough. */
517         struct timeval tv;
518         __u64          cookie;
519 #ifndef __KERNEL__
520         int            rc = gettimeofday (&tv, NULL);
521         LASSERT (rc == 0);
522 #else
523         do_gettimeofday(&tv);
524 #endif
525         cookie = tv.tv_sec;
526         cookie *= 1000000;
527         cookie += tv.tv_usec;
528         return cookie;
529 }
530
531 static char *
532 lnet_res_type2str(int type)
533 {
534         switch (type) {
535         default:
536                 LBUG();
537         case LNET_COOKIE_TYPE_MD:
538                 return "MD";
539         case LNET_COOKIE_TYPE_ME:
540                 return "ME";
541         case LNET_COOKIE_TYPE_EQ:
542                 return "EQ";
543         }
544 }
545
546 static void
547 lnet_res_container_cleanup(struct lnet_res_container *rec)
548 {
549         int     count = 0;
550
551         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
552                 return;
553
554         while (!list_empty(&rec->rec_active)) {
555                 struct list_head *e = rec->rec_active.next;
556
557                 list_del_init(e);
558                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
559                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
560
561                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
562                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
563
564                 } else { /* NB: Active MEs should be attached on portals */
565                         LBUG();
566                 }
567                 count++;
568         }
569
570         if (count > 0) {
571                 /* Found alive MD/ME/EQ, user really should unlink/free
572                  * all of them before finalize LNet, but if someone didn't,
573                  * we have to recycle garbage for him */
574                 CERROR("%d active elements on exit of %s container\n",
575                        count, lnet_res_type2str(rec->rec_type));
576         }
577
578 #ifdef LNET_USE_LIB_FREELIST
579         lnet_freelist_fini(&rec->rec_freelist);
580 #endif
581         if (rec->rec_lh_hash != NULL) {
582                 LIBCFS_FREE(rec->rec_lh_hash,
583                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
584                 rec->rec_lh_hash = NULL;
585         }
586
587         rec->rec_type = 0; /* mark it as finalized */
588 }
589
590 static int
591 lnet_res_container_setup(struct lnet_res_container *rec,
592                          int cpt, int type, int objnum, int objsz)
593 {
594         int     rc = 0;
595         int     i;
596
597         LASSERT(rec->rec_type == 0);
598
599         rec->rec_type = type;
600         INIT_LIST_HEAD(&rec->rec_active);
601
602 #ifdef LNET_USE_LIB_FREELIST
603         memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist));
604         rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz);
605         if (rc != 0)
606                 goto out;
607 #endif
608         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
609
610         /* Arbitrary choice of hash table size */
611         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
612                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
613         if (rec->rec_lh_hash == NULL) {
614                 rc = -ENOMEM;
615                 goto out;
616         }
617
618         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
619                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
620
621         return 0;
622
623 out:
624         CERROR("Failed to setup %s resource container\n",
625                lnet_res_type2str(type));
626         lnet_res_container_cleanup(rec);
627         return rc;
628 }
629
630 static void
631 lnet_res_containers_destroy(struct lnet_res_container **recs)
632 {
633         struct lnet_res_container       *rec;
634         int                             i;
635
636         cfs_percpt_for_each(rec, i, recs)
637                 lnet_res_container_cleanup(rec);
638
639         cfs_percpt_free(recs);
640 }
641
642 static struct lnet_res_container **
643 lnet_res_containers_create(int type, int objnum, int objsz)
644 {
645         struct lnet_res_container       **recs;
646         struct lnet_res_container       *rec;
647         int                             rc;
648         int                             i;
649
650         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
651         if (recs == NULL) {
652                 CERROR("Failed to allocate %s resource containers\n",
653                        lnet_res_type2str(type));
654                 return NULL;
655         }
656
657         cfs_percpt_for_each(rec, i, recs) {
658                 rc = lnet_res_container_setup(rec, i, type, objnum, objsz);
659                 if (rc != 0) {
660                         lnet_res_containers_destroy(recs);
661                         return NULL;
662                 }
663         }
664
665         return recs;
666 }
667
668 lnet_libhandle_t *
669 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
670 {
671         /* ALWAYS called with lnet_res_lock held */
672         struct list_head        *head;
673         lnet_libhandle_t        *lh;
674         unsigned int            hash;
675
676         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
677                 return NULL;
678
679         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
680         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
681
682         list_for_each_entry(lh, head, lh_hash_chain) {
683                 if (lh->lh_cookie == cookie)
684                         return lh;
685         }
686
687         return NULL;
688 }
689
690 void
691 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
692 {
693         /* ALWAYS called with lnet_res_lock held */
694         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
695         unsigned int    hash;
696
697         lh->lh_cookie = rec->rec_lh_cookie;
698         rec->rec_lh_cookie += 1 << ibits;
699
700         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
701
702         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
703 }
704
705 #ifndef __KERNEL__
706 /**
707  * Reserved API - do not use.
708  * Temporary workaround to allow uOSS and test programs force server
709  * mode in userspace. See comments near ln_server_mode_flag in
710  * lnet/lib-types.h */
711
712 void
713 lnet_server_mode() {
714         the_lnet.ln_server_mode_flag = 1;
715 }
716 #endif
717
718 static int lnet_unprepare(void);
719
720 static int
721 lnet_prepare(lnet_pid_t requested_pid)
722 {
723         /* Prepare to bring up the network */
724         struct lnet_res_container **recs;
725         int                       rc = 0;
726
727         if (requested_pid == LNET_PID_ANY) {
728                 /* Don't instantiate LNET just for me */
729                 return -ENETDOWN;
730         }
731
732         LASSERT (the_lnet.ln_refcount == 0);
733
734         the_lnet.ln_routing = 0;
735
736 #ifdef __KERNEL__
737         LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
738         the_lnet.ln_pid = requested_pid;
739 #else
740         if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
741                 LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
742
743                 if (current_uid() != 0) /* Only root can run user-space server */
744                         return -EPERM;
745                 the_lnet.ln_pid = requested_pid;
746
747         } else {/* client case (liblustre) */
748
749                 /* My PID must be unique on this node and flag I'm userspace */
750                 the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
751         }
752 #endif
753
754         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
755         INIT_LIST_HEAD(&the_lnet.ln_nis);
756         INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
757         INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
758         INIT_LIST_HEAD(&the_lnet.ln_routers);
759         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
760         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
761
762         rc = lnet_create_remote_nets_table();
763         if (rc != 0)
764                 goto failed;
765
766         the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
767
768         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
769                                                 sizeof(lnet_counters_t));
770         if (the_lnet.ln_counters == NULL) {
771                 CERROR("Failed to allocate counters for LNet\n");
772                 rc = -ENOMEM;
773                 goto failed;
774         }
775
776         rc = lnet_peer_tables_create();
777         if (rc != 0)
778                 goto failed;
779
780         rc = lnet_msg_containers_create();
781         if (rc != 0)
782                 goto failed;
783
784         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
785                                       LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS,
786                                       sizeof(lnet_eq_t));
787         if (rc != 0)
788                 goto failed;
789
790         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
791                                           sizeof(lnet_me_t));
792         if (recs == NULL)
793                 goto failed;
794
795         the_lnet.ln_me_containers = recs;
796
797         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
798                                           sizeof(lnet_libmd_t));
799         if (recs == NULL)
800                 goto failed;
801
802         the_lnet.ln_md_containers = recs;
803
804         rc = lnet_portals_create();
805         if (rc != 0) {
806                 CERROR("Failed to create portals for LNet: %d\n", rc);
807                 goto failed;
808         }
809
810         return 0;
811
812  failed:
813         lnet_unprepare();
814         return rc;
815 }
816
817 static int
818 lnet_unprepare (void)
819 {
820         /* NB no LNET_LOCK since this is the last reference.  All LND instances
821          * have shut down already, so it is safe to unlink and free all
822          * descriptors, even those that appear committed to a network op (eg MD
823          * with non-zero pending count) */
824
825         lnet_fail_nid(LNET_NID_ANY, 0);
826
827         LASSERT(the_lnet.ln_refcount == 0);
828         LASSERT(list_empty(&the_lnet.ln_test_peers));
829         LASSERT(list_empty(&the_lnet.ln_nis));
830         LASSERT(list_empty(&the_lnet.ln_nis_cpt));
831         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
832
833         lnet_portals_destroy();
834
835         if (the_lnet.ln_md_containers != NULL) {
836                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
837                 the_lnet.ln_md_containers = NULL;
838         }
839
840         if (the_lnet.ln_me_containers != NULL) {
841                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
842                 the_lnet.ln_me_containers = NULL;
843         }
844
845         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
846
847         lnet_msg_containers_destroy();
848         lnet_peer_tables_destroy();
849         lnet_rtrpools_free(0);
850
851         if (the_lnet.ln_counters != NULL) {
852                 cfs_percpt_free(the_lnet.ln_counters);
853                 the_lnet.ln_counters = NULL;
854         }
855         lnet_destroy_remote_nets_table();
856
857         return 0;
858 }
859
860 lnet_ni_t  *
861 lnet_net2ni_locked(__u32 net, int cpt)
862 {
863         struct list_head *tmp;
864         lnet_ni_t        *ni;
865
866         LASSERT(cpt != LNET_LOCK_EX);
867
868         list_for_each(tmp, &the_lnet.ln_nis) {
869                 ni = list_entry(tmp, lnet_ni_t, ni_list);
870
871                 if (LNET_NIDNET(ni->ni_nid) == net) {
872                         lnet_ni_addref_locked(ni, cpt);
873                         return ni;
874                 }
875         }
876
877         return NULL;
878 }
879
880 lnet_ni_t *
881 lnet_net2ni(__u32 net)
882 {
883         lnet_ni_t *ni;
884
885         lnet_net_lock(0);
886         ni = lnet_net2ni_locked(net, 0);
887         lnet_net_unlock(0);
888
889         return ni;
890 }
891 EXPORT_SYMBOL(lnet_net2ni);
892
893 static unsigned int
894 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
895 {
896         __u64           key = nid;
897         unsigned int    val;
898
899         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
900
901         if (number == 1)
902                 return 0;
903
904         val = hash_long(key, LNET_CPT_BITS);
905         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
906         if (val < number)
907                 return val;
908
909         return (unsigned int)(key + val + (val >> 1)) % number;
910 }
911
912 int
913 lnet_cpt_of_nid_locked(lnet_nid_t nid)
914 {
915         struct lnet_ni *ni;
916
917         /* must called with hold of lnet_net_lock */
918         if (LNET_CPT_NUMBER == 1)
919                 return 0; /* the only one */
920
921         /* take lnet_net_lock(any) would be OK */
922         if (!list_empty(&the_lnet.ln_nis_cpt)) {
923                 list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
924                         if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
925                                 continue;
926
927                         LASSERT(ni->ni_cpts != NULL);
928                         return ni->ni_cpts[lnet_nid_cpt_hash
929                                            (nid, ni->ni_ncpts)];
930                 }
931         }
932
933         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
934 }
935
936 int
937 lnet_cpt_of_nid(lnet_nid_t nid)
938 {
939         int     cpt;
940         int     cpt2;
941
942         if (LNET_CPT_NUMBER == 1)
943                 return 0; /* the only one */
944
945         if (list_empty(&the_lnet.ln_nis_cpt))
946                 return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
947
948         cpt = lnet_net_lock_current();
949         cpt2 = lnet_cpt_of_nid_locked(nid);
950         lnet_net_unlock(cpt);
951
952         return cpt2;
953 }
954 EXPORT_SYMBOL(lnet_cpt_of_nid);
955
956 int
957 lnet_islocalnet(__u32 net)
958 {
959         struct lnet_ni  *ni;
960         int             cpt;
961
962         cpt = lnet_net_lock_current();
963
964         ni = lnet_net2ni_locked(net, cpt);
965         if (ni != NULL)
966                 lnet_ni_decref_locked(ni, cpt);
967
968         lnet_net_unlock(cpt);
969
970         return ni != NULL;
971 }
972
973 lnet_ni_t  *
974 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
975 {
976         struct lnet_ni   *ni;
977         struct list_head *tmp;
978
979         LASSERT(cpt != LNET_LOCK_EX);
980
981         list_for_each(tmp, &the_lnet.ln_nis) {
982                 ni = list_entry(tmp, lnet_ni_t, ni_list);
983
984                 if (ni->ni_nid == nid) {
985                         lnet_ni_addref_locked(ni, cpt);
986                         return ni;
987                 }
988         }
989
990         return NULL;
991 }
992
993 int
994 lnet_islocalnid(lnet_nid_t nid)
995 {
996         struct lnet_ni  *ni;
997         int             cpt;
998
999         cpt = lnet_net_lock_current();
1000         ni = lnet_nid2ni_locked(nid, cpt);
1001         if (ni != NULL)
1002                 lnet_ni_decref_locked(ni, cpt);
1003         lnet_net_unlock(cpt);
1004
1005         return ni != NULL;
1006 }
1007
1008 int
1009 lnet_count_acceptor_nis (void)
1010 {
1011         /* Return the # of NIs that need the acceptor. */
1012         int              count = 0;
1013 #if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
1014         struct list_head *tmp;
1015         struct lnet_ni   *ni;
1016         int              cpt;
1017
1018         cpt = lnet_net_lock_current();
1019         list_for_each(tmp, &the_lnet.ln_nis) {
1020                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1021
1022                 if (ni->ni_lnd->lnd_accept != NULL)
1023                         count++;
1024         }
1025
1026         lnet_net_unlock(cpt);
1027
1028 #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
1029         return count;
1030 }
1031
1032 static lnet_ping_info_t *
1033 lnet_ping_info_create(int num_ni)
1034 {
1035         lnet_ping_info_t *ping_info;
1036         unsigned int     infosz;
1037
1038         infosz = offsetof(lnet_ping_info_t, pi_ni[num_ni]);
1039         LIBCFS_ALLOC(ping_info, infosz);
1040         if (ping_info == NULL) {
1041                 CERROR("Can't allocate ping info[%d]\n", num_ni);
1042                 return NULL;
1043         }
1044
1045         ping_info->pi_nnis = num_ni;
1046         ping_info->pi_pid = the_lnet.ln_pid;
1047         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
1048         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
1049
1050         return ping_info;
1051 }
1052
1053 static inline int
1054 lnet_get_ni_count(void)
1055 {
1056         struct lnet_ni *ni;
1057         int            count = 0;
1058
1059         lnet_net_lock(0);
1060
1061         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
1062                 count++;
1063
1064         lnet_net_unlock(0);
1065
1066         return count;
1067 }
1068
1069 static inline void
1070 lnet_ping_info_free(lnet_ping_info_t *pinfo)
1071 {
1072         LIBCFS_FREE(pinfo,
1073                     offsetof(lnet_ping_info_t,
1074                              pi_ni[pinfo->pi_nnis]));
1075 }
1076
1077 static void
1078 lnet_ping_info_destroy(void)
1079 {
1080         struct lnet_ni  *ni;
1081
1082         lnet_net_lock(LNET_LOCK_EX);
1083
1084         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1085                 lnet_ni_lock(ni);
1086                 ni->ni_status = NULL;
1087                 lnet_ni_unlock(ni);
1088         }
1089
1090         lnet_ping_info_free(the_lnet.ln_ping_info);
1091         the_lnet.ln_ping_info = NULL;
1092
1093         lnet_net_unlock(LNET_LOCK_EX);
1094 }
1095
1096 static void
1097 lnet_ping_event_handler(lnet_event_t *event)
1098 {
1099         lnet_ping_info_t *pinfo = event->md.user_ptr;
1100
1101         if (event->unlinked)
1102                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
1103 }
1104
1105 static int
1106 lnet_ping_info_setup(lnet_ping_info_t **ppinfo, lnet_handle_md_t *md_handle,
1107                      int ni_count, bool set_eq)
1108 {
1109         lnet_handle_me_t  me_handle;
1110         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1111         lnet_md_t         md = {0};
1112         int               rc, rc2;
1113
1114         if (set_eq) {
1115                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
1116                                  &the_lnet.ln_ping_target_eq);
1117                 if (rc != 0) {
1118                         CERROR("Can't allocate ping EQ: %d\n", rc);
1119                         return rc;
1120                 }
1121         }
1122
1123         *ppinfo = lnet_ping_info_create(ni_count);
1124         if (*ppinfo == NULL) {
1125                 rc = -ENOMEM;
1126                 goto failed_0;
1127         }
1128
1129         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1130                           LNET_PROTO_PING_MATCHBITS, 0,
1131                           LNET_UNLINK, LNET_INS_AFTER,
1132                           &me_handle);
1133         if (rc != 0) {
1134                 CERROR("Can't create ping ME: %d\n", rc);
1135                 goto failed_1;
1136         }
1137
1138         /* initialize md content */
1139         md.start     = *ppinfo;
1140         md.length    = offsetof(lnet_ping_info_t,
1141                                 pi_ni[(*ppinfo)->pi_nnis]);
1142         md.threshold = LNET_MD_THRESH_INF;
1143         md.max_size  = 0;
1144         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1145                        LNET_MD_MANAGE_REMOTE;
1146         md.user_ptr  = NULL;
1147         md.eq_handle = the_lnet.ln_ping_target_eq;
1148         md.user_ptr = *ppinfo;
1149
1150         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
1151         if (rc != 0) {
1152                 CERROR("Can't attach ping MD: %d\n", rc);
1153                 goto failed_2;
1154         }
1155
1156         return 0;
1157
1158 failed_2:
1159         rc2 = LNetMEUnlink(me_handle);
1160         LASSERT(rc2 == 0);
1161 failed_1:
1162         lnet_ping_info_free(*ppinfo);
1163         *ppinfo = NULL;
1164 failed_0:
1165         if (set_eq)
1166                 LNetEQFree(the_lnet.ln_ping_target_eq);
1167         return rc;
1168 }
1169
1170 static void
1171 lnet_ping_md_unlink(lnet_ping_info_t *pinfo, lnet_handle_md_t *md_handle)
1172 {
1173         sigset_t        blocked = cfs_block_allsigs();
1174
1175         LNetMDUnlink(*md_handle);
1176         LNetInvalidateHandle(md_handle);
1177
1178         /* NB md could be busy; this just starts the unlink */
1179         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
1180                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
1181                 cfs_pause(cfs_time_seconds(1));
1182         }
1183
1184         cfs_restore_sigs(blocked);
1185 }
1186
1187 static void
1188 lnet_ping_info_install_locked(lnet_ping_info_t *ping_info)
1189 {
1190         int                     i;
1191         lnet_ni_t               *ni;
1192         lnet_ni_status_t        *ns;
1193
1194         i = 0;
1195         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1196                 LASSERT(i < ping_info->pi_nnis);
1197
1198                 ns = &ping_info->pi_ni[i];
1199
1200                 ns->ns_nid = ni->ni_nid;
1201
1202                 lnet_ni_lock(ni);
1203                 ns->ns_status = (ni->ni_status != NULL) ?
1204                                 ni->ni_status->ns_status : LNET_NI_STATUS_UP;
1205                 ni->ni_status = ns;
1206                 lnet_ni_unlock(ni);
1207
1208                 i++;
1209         }
1210 }
1211
1212 static void
1213 lnet_ping_target_update(lnet_ping_info_t *pinfo, lnet_handle_md_t md_handle)
1214 {
1215         lnet_ping_info_t *old_pinfo = NULL;
1216         lnet_handle_md_t old_md;
1217
1218         /* switch the NIs to point to the new ping info created */
1219         lnet_net_lock(LNET_LOCK_EX);
1220
1221         if (!the_lnet.ln_routing)
1222                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1223         lnet_ping_info_install_locked(pinfo);
1224
1225         if (the_lnet.ln_ping_info != NULL) {
1226                 old_pinfo = the_lnet.ln_ping_info;
1227                 old_md = the_lnet.ln_ping_target_md;
1228         }
1229         the_lnet.ln_ping_target_md = md_handle;
1230         the_lnet.ln_ping_info = pinfo;
1231
1232         lnet_net_unlock(LNET_LOCK_EX);
1233
1234         if (old_pinfo != NULL) {
1235                 /* unlink the old ping info */
1236                 lnet_ping_md_unlink(old_pinfo, &old_md);
1237                 lnet_ping_info_free(old_pinfo);
1238         }
1239 }
1240
1241 static void
1242 lnet_ping_target_fini(void)
1243 {
1244         int             rc;
1245
1246         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1247                             &the_lnet.ln_ping_target_md);
1248
1249         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1250         LASSERT(rc == 0);
1251
1252         lnet_ping_info_destroy();
1253 }
1254
1255 static int
1256 lnet_ni_tq_credits(lnet_ni_t *ni)
1257 {
1258         int     credits;
1259
1260         LASSERT(ni->ni_ncpts >= 1);
1261
1262         if (ni->ni_ncpts == 1)
1263                 return ni->ni_maxtxcredits;
1264
1265         credits = ni->ni_maxtxcredits / ni->ni_ncpts;
1266         credits = max(credits, 8 * ni->ni_peertxcredits);
1267         credits = min(credits, ni->ni_maxtxcredits);
1268
1269         return credits;
1270 }
1271
1272 static void
1273 lnet_clear_zombies_nis_locked(void)
1274 {
1275         int             i;
1276         int             islo;
1277         lnet_ni_t       *ni;
1278
1279         /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
1280          * and shut them down in guaranteed thread context */
1281         i = 2;
1282         while (!list_empty(&the_lnet.ln_nis_zombie)) {
1283                 int     *ref;
1284                 int     j;
1285
1286                 ni = list_entry(the_lnet.ln_nis_zombie.next,
1287                                 lnet_ni_t, ni_list);
1288                 list_del_init(&ni->ni_list);
1289                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1290                         if (*ref == 0)
1291                                 continue;
1292                         /* still busy, add it back to zombie list */
1293                         list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
1294                         break;
1295                 }
1296
1297                 if (!list_empty(&ni->ni_list)) {
1298                         lnet_net_unlock(LNET_LOCK_EX);
1299                         ++i;
1300                         if ((i & (-i)) == i) {
1301                                 CDEBUG(D_WARNING,
1302                                        "Waiting for zombie LNI %s\n",
1303                                        libcfs_nid2str(ni->ni_nid));
1304                         }
1305                         cfs_pause(cfs_time_seconds(1));
1306                         lnet_net_lock(LNET_LOCK_EX);
1307                         continue;
1308                 }
1309
1310                 ni->ni_lnd->lnd_refcount--;
1311                 lnet_net_unlock(LNET_LOCK_EX);
1312
1313                 islo = ni->ni_lnd->lnd_type == LOLND;
1314
1315                 LASSERT(!in_interrupt());
1316                 (ni->ni_lnd->lnd_shutdown)(ni);
1317
1318                 /* can't deref lnd anymore now; it might have unregistered
1319                  * itself...  */
1320
1321                 if (!islo)
1322                         CDEBUG(D_LNI, "Removed LNI %s\n",
1323                               libcfs_nid2str(ni->ni_nid));
1324
1325                 lnet_ni_free(ni);
1326                 i = 2;
1327                 lnet_net_lock(LNET_LOCK_EX);
1328         }
1329 }
1330
1331 static void
1332 lnet_shutdown_lndnis(void)
1333 {
1334         int             i;
1335         lnet_ni_t       *ni;
1336
1337         /* NB called holding the global mutex */
1338
1339         /* All quiet on the API front */
1340         LASSERT(!the_lnet.ln_shutdown);
1341         LASSERT(the_lnet.ln_refcount == 0);
1342         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
1343
1344         lnet_net_lock(LNET_LOCK_EX);
1345         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1346
1347         /* Unlink NIs from the global table */
1348         while (!list_empty(&the_lnet.ln_nis)) {
1349                 ni = list_entry(the_lnet.ln_nis.next,
1350                                 lnet_ni_t, ni_list);
1351                 /* move it to zombie list and nobody can find it anymore */
1352                 list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
1353                 lnet_ni_decref_locked(ni, 0);   /* drop ln_nis' ref */
1354
1355                 if (!list_empty(&ni->ni_cptlist)) {
1356                         list_del_init(&ni->ni_cptlist);
1357                         lnet_ni_decref_locked(ni, 0);
1358                 }
1359         }
1360
1361         /* Drop the cached eqwait NI. */
1362         if (the_lnet.ln_eq_waitni != NULL) {
1363                 lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0);
1364                 the_lnet.ln_eq_waitni = NULL;
1365         }
1366
1367         /* Drop the cached loopback NI. */
1368         if (the_lnet.ln_loni != NULL) {
1369                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1370                 the_lnet.ln_loni = NULL;
1371         }
1372
1373         lnet_net_unlock(LNET_LOCK_EX);
1374
1375         /* Clear lazy portals and drop delayed messages which hold refs
1376          * on their lnet_msg_t::msg_rxpeer */
1377         for (i = 0; i < the_lnet.ln_nportals; i++)
1378                 LNetClearLazyPortal(i);
1379
1380         /* Clear the peer table and wait for all peers to go (they hold refs on
1381          * their NIs) */
1382         lnet_peer_tables_cleanup(NULL);
1383
1384         lnet_net_lock(LNET_LOCK_EX);
1385
1386         lnet_clear_zombies_nis_locked();
1387         the_lnet.ln_shutdown = 0;
1388         lnet_net_unlock(LNET_LOCK_EX);
1389 }
1390
1391 int
1392 lnet_shutdown_lndni(__u32 net)
1393 {
1394         lnet_ping_info_t *pinfo;
1395         lnet_handle_md_t md_handle;
1396         lnet_ni_t       *found_ni = NULL;
1397         int             ni_count;
1398         int             rc;
1399
1400         if (LNET_NETTYP(net) == LOLND)
1401                 return -EINVAL;
1402
1403         ni_count = lnet_get_ni_count();
1404
1405         /* create and link a new ping info, before removing the old one */
1406         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count - 1, false);
1407         if (rc != 0)
1408                 return rc;
1409
1410         /* proceed with shutting down the NI */
1411         lnet_net_lock(LNET_LOCK_EX);
1412
1413         found_ni = lnet_net2ni_locked(net, 0);
1414         if (found_ni == NULL) {
1415                 lnet_net_unlock(LNET_LOCK_EX);
1416                 lnet_ping_md_unlink(pinfo, &md_handle);
1417                 lnet_ping_info_free(pinfo);
1418                 return -EINVAL;
1419         }
1420
1421         /* decrement the reference counter on found_ni which was
1422          * incremented when we called lnet_net2ni_locked() */
1423         lnet_ni_decref_locked(found_ni, 0);
1424
1425         /* Move ni to zombie list so nobody can find it anymore */
1426         list_move(&found_ni->ni_list, &the_lnet.ln_nis_zombie);
1427
1428         /* Drop the lock reference for the ln_nis ref. */
1429         lnet_ni_decref_locked(found_ni, 0);
1430
1431         if (!list_empty(&found_ni->ni_cptlist)) {
1432                 list_del_init(&found_ni->ni_cptlist);
1433                 lnet_ni_decref_locked(found_ni, 0);
1434         }
1435
1436         lnet_net_unlock(LNET_LOCK_EX);
1437
1438         /* Do peer table cleanup for this ni */
1439         lnet_peer_tables_cleanup(found_ni);
1440
1441         lnet_net_lock(LNET_LOCK_EX);
1442         lnet_clear_zombies_nis_locked();
1443         lnet_net_unlock(LNET_LOCK_EX);
1444
1445         lnet_ping_target_update(pinfo, md_handle);
1446
1447         return 0;
1448 }
1449
1450 /*
1451  * Callers of lnet_startup_lndnis need to clean up using
1452  * lnet_shutdown_lndnis if startup fails
1453  */
1454 static int
1455 lnet_startup_lndnis(struct list_head *nilist, __s32 peer_timeout,
1456                     __s32 peer_cr, __s32 peer_buf_cr, __s32 credits,
1457                     int *ni_count)
1458 {
1459         int                     rc = 0;
1460         struct lnet_ni          *ni;
1461         int                     lnd_type;
1462         lnd_t                   *lnd;
1463         struct lnet_tx_queue    *tq;
1464         int                     i;
1465
1466         while (!list_empty(nilist)) {
1467                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1468                 lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
1469
1470                 if (!libcfs_isknown_lnd(lnd_type))
1471                         goto failed;
1472
1473                 if (lnd_type == CIBLND    ||
1474                     lnd_type == OPENIBLND ||
1475                     lnd_type == IIBLND    ||
1476                     lnd_type == VIBLND) {
1477                         CERROR("LND %s obsoleted\n",
1478                                libcfs_lnd2str(lnd_type));
1479                         goto failed;
1480                 }
1481
1482                 /* Make sure this new NI is unique. */
1483                 lnet_net_lock(LNET_LOCK_EX);
1484                 if (!lnet_net_unique(LNET_NIDNET(ni->ni_nid),
1485                                      &the_lnet.ln_nis)) {
1486                         if (lnd_type == LOLND) {
1487                                 lnet_net_unlock(LNET_LOCK_EX);
1488                                 list_del(&ni->ni_list);
1489                                 lnet_ni_free(ni);
1490                                 continue;
1491                         }
1492
1493                         CERROR("Net %s is not unique\n",
1494                                libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
1495                         lnet_net_unlock(LNET_LOCK_EX);
1496                         goto failed;
1497                 }
1498                 lnet_net_unlock(LNET_LOCK_EX);
1499
1500                 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1501                 lnd = lnet_find_lnd_by_type(lnd_type);
1502
1503 #ifdef __KERNEL__
1504                 if (lnd == NULL) {
1505                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1506                         rc = request_module("%s",
1507                                                 libcfs_lnd2modname(lnd_type));
1508                         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1509
1510                         lnd = lnet_find_lnd_by_type(lnd_type);
1511                         if (lnd == NULL) {
1512                                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1513                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1514                                        libcfs_lnd2str(lnd_type),
1515                                        libcfs_lnd2modname(lnd_type), rc);
1516 #ifndef HAVE_MODULE_LOADING_SUPPORT
1517                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1518                                          "compiled with kernel module "
1519                                          "loading support.");
1520 #endif
1521                                 goto failed;
1522                         }
1523                 }
1524 #else
1525                 if (lnd == NULL) {
1526                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1527                         CERROR("LND %s not supported\n",
1528                                libcfs_lnd2str(lnd_type));
1529                         goto failed;
1530                 }
1531 #endif
1532
1533                 lnet_net_lock(LNET_LOCK_EX);
1534                 lnd->lnd_refcount++;
1535                 lnet_net_unlock(LNET_LOCK_EX);
1536
1537                 ni->ni_lnd = lnd;
1538
1539                 rc = (lnd->lnd_startup)(ni);
1540
1541                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1542
1543                 if (rc != 0) {
1544                         LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
1545                                            "\n",
1546                                            rc, libcfs_lnd2str(lnd->lnd_type));
1547                         lnet_net_lock(LNET_LOCK_EX);
1548                         lnd->lnd_refcount--;
1549                         lnet_net_unlock(LNET_LOCK_EX);
1550                         goto failed;
1551                 }
1552
1553                 /* If given some LND tunable parameters, parse those now to
1554                  * override the values in the NI structure. */
1555                 if (peer_buf_cr >= 0)
1556                         ni->ni_peerrtrcredits = peer_buf_cr;
1557                 if (peer_timeout >= 0)
1558                         ni->ni_peertimeout = peer_timeout;
1559                 /*
1560                  * TODO
1561                  * Note: For now, don't allow the user to change
1562                  * peertxcredits as this number is used in the
1563                  * IB LND to control queue depth.
1564                  * if (peer_cr != -1)
1565                  *      ni->ni_peertxcredits = peer_cr;
1566                  */
1567                 if (credits >= 0)
1568                         ni->ni_maxtxcredits = credits;
1569
1570                 LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
1571
1572                 list_del(&ni->ni_list);
1573
1574                 lnet_net_lock(LNET_LOCK_EX);
1575                 /* refcount for ln_nis */
1576                 lnet_ni_addref_locked(ni, 0);
1577                 list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1578                 if (ni->ni_cpts != NULL) {
1579                         list_add_tail(&ni->ni_cptlist,
1580                                       &the_lnet.ln_nis_cpt);
1581                         lnet_ni_addref_locked(ni, 0);
1582                 }
1583
1584                 lnet_net_unlock(LNET_LOCK_EX);
1585
1586                 /* increment the ni_count here to account for the LOLND as
1587                  * well.  If we increment past this point then the number
1588                  * of count will be missing the LOLND, and then ping and
1589                  * will not report the LOLND
1590                  */
1591                 if (ni_count != NULL)
1592                         (*ni_count)++;
1593
1594                 if (lnd->lnd_type == LOLND) {
1595                         lnet_ni_addref(ni);
1596                         LASSERT(the_lnet.ln_loni == NULL);
1597                         the_lnet.ln_loni = ni;
1598                         continue;
1599                 }
1600
1601 #ifndef __KERNEL__
1602                 if (lnd->lnd_wait != NULL) {
1603                         if (the_lnet.ln_eq_waitni == NULL) {
1604                                 lnet_ni_addref(ni);
1605                                 the_lnet.ln_eq_waitni = ni;
1606                         }
1607                 } else {
1608 # ifndef HAVE_LIBPTHREAD
1609                         LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a "
1610                                            "single-threaded runtime\n",
1611                                            libcfs_lnd2str(lnd_type));
1612                         goto failed;
1613 # endif
1614                 }
1615 #endif
1616                 if (ni->ni_peertxcredits == 0 ||
1617                     ni->ni_maxtxcredits == 0) {
1618                         LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1619                                            libcfs_lnd2str(lnd->lnd_type),
1620                                            ni->ni_peertxcredits == 0 ?
1621                                            "" : "per-peer ");
1622                         goto failed;
1623                 }
1624
1625                 cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1626                         tq->tq_credits_min =
1627                         tq->tq_credits_max =
1628                         tq->tq_credits = lnet_ni_tq_credits(ni);
1629                 }
1630
1631                 CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1632                        libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
1633                        lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1634                        ni->ni_peerrtrcredits, ni->ni_peertimeout);
1635         }
1636
1637         return 0;
1638 failed:
1639         while (!list_empty(nilist)) {
1640                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1641                 list_del(&ni->ni_list);
1642                 lnet_ni_free(ni);
1643         }
1644         return -EINVAL;
1645 }
1646
1647 /**
1648  * Initialize LNet library.
1649  *
1650  * Only userspace program needs to call this function - it's automatically
1651  * called in the kernel at module loading time. Caller has to call LNetFini()
1652  * after a call to LNetInit(), if and only if the latter returned 0. It must
1653  * be called exactly once.
1654  *
1655  * \return 0 on success, and -ve on failures.
1656  */
1657 int
1658 LNetInit(void)
1659 {
1660         int     rc;
1661
1662         lnet_assert_wire_constants();
1663         LASSERT(!the_lnet.ln_init);
1664
1665         memset(&the_lnet, 0, sizeof(the_lnet));
1666
1667         /* refer to global cfs_cpt_table for now */
1668         the_lnet.ln_cpt_table   = cfs_cpt_table;
1669         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1670
1671         LASSERT(the_lnet.ln_cpt_number > 0);
1672         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1673                 /* we are under risk of consuming all lh_cookie */
1674                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1675                        "please change setting of CPT-table and retry\n",
1676                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1677                 return -1;
1678         }
1679
1680         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1681                 the_lnet.ln_cpt_bits++;
1682
1683         rc = lnet_create_locks();
1684         if (rc != 0) {
1685                 CERROR("Can't create LNet global locks: %d\n", rc);
1686                 return -1;
1687         }
1688
1689         the_lnet.ln_refcount = 0;
1690         the_lnet.ln_init = 1;
1691         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1692         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1693         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1694         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1695
1696 #ifdef __KERNEL__
1697         /* The hash table size is the number of bits it takes to express the set
1698          * ln_num_routes, minus 1 (better to under estimate than over so we
1699          * don't waste memory). */
1700         if (rnet_htable_size <= 0)
1701                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1702         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1703                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1704         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1705                                            order_base_2(rnet_htable_size) - 1);
1706
1707         /* All LNDs apart from the LOLND are in separate modules.  They
1708          * register themselves when their module loads, and unregister
1709          * themselves when their module is unloaded. */
1710 #else
1711         the_lnet.ln_remote_nets_hbits = 8;
1712
1713         /* Register LNDs
1714          * NB the order here determines default 'networks=' order */
1715 # ifdef HAVE_LIBPTHREAD
1716         LNET_REGISTER_ULND(the_tcplnd);
1717 # endif
1718 #endif
1719         lnet_register_lnd(&the_lolnd);
1720         return 0;
1721 }
1722 EXPORT_SYMBOL(LNetInit);
1723
1724 /**
1725  * Finalize LNet library.
1726  *
1727  * Only userspace program needs to call this function. It can be called
1728  * at most once.
1729  *
1730  * \pre LNetInit() called with success.
1731  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1732  */
1733 void
1734 LNetFini(void)
1735 {
1736         LASSERT(the_lnet.ln_init);
1737         LASSERT(the_lnet.ln_refcount == 0);
1738
1739         while (!list_empty(&the_lnet.ln_lnds))
1740                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1741                                                lnd_t, lnd_list));
1742         lnet_destroy_locks();
1743
1744         the_lnet.ln_init = 0;
1745 }
1746 EXPORT_SYMBOL(LNetFini);
1747
1748 /**
1749  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1750  *
1751  * Userspace program should call this after a successful call to LNetInit().
1752  * Users must call this function at least once before any other functions.
1753  * For each successful call there must be a corresponding call to
1754  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1755  * ignored.
1756  *
1757  * The PID used by LNet may be different from the one requested.
1758  * See LNetGetId().
1759  *
1760  * \param requested_pid PID requested by the caller.
1761  *
1762  * \return >= 0 on success, and < 0 error code on failures.
1763  */
1764 int
1765 LNetNIInit(lnet_pid_t requested_pid)
1766 {
1767         int                     im_a_router = 0;
1768         int                     rc;
1769         int                     ni_count = 0;
1770         int                     lnd_type;
1771         struct lnet_ni          *ni;
1772         lnet_ping_info_t        *pinfo;
1773         lnet_handle_md_t        md_handle;
1774         struct list_head        net_head;
1775
1776         INIT_LIST_HEAD(&net_head);
1777
1778         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1779
1780         LASSERT(the_lnet.ln_init);
1781         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1782
1783         if (the_lnet.ln_refcount > 0) {
1784                 rc = the_lnet.ln_refcount++;
1785                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1786                 return rc;
1787         }
1788
1789         rc = lnet_prepare(requested_pid);
1790         if (rc != 0)
1791                 goto failed0;
1792
1793         rc = lnet_parse_networks(&net_head,
1794                                  !the_lnet.ln_nis_from_mod_params ?
1795                                    lnet_get_networks() : "");
1796         if (rc < 0)
1797                 goto failed1;
1798
1799         rc = lnet_startup_lndnis(&net_head, -1, -1, -1, -1, &ni_count);
1800         if (rc != 0)
1801                 goto failed2;
1802
1803         if (the_lnet.ln_eq_waitni != NULL && ni_count > 1) {
1804                 lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type;
1805                 LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
1806                                    "\n",
1807                                    libcfs_lnd2str(lnd_type));
1808                 goto failed2;
1809         }
1810
1811         rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1812         if (rc != 0)
1813                 goto failed2;
1814
1815         rc = lnet_check_routes();
1816         if (rc != 0)
1817                 goto failed2;
1818
1819         rc = lnet_rtrpools_alloc(im_a_router);
1820         if (rc != 0)
1821                 goto failed2;
1822
1823         rc = lnet_acceptor_start();
1824         if (rc != 0)
1825                 goto failed2;
1826         the_lnet.ln_refcount = 1;
1827         /* Now I may use my own API functions... */
1828
1829         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1830         if (rc != 0)
1831                 goto failed3;
1832
1833         lnet_ping_target_update(pinfo, md_handle);
1834
1835         rc = lnet_router_checker_start();
1836         if (rc != 0)
1837                 goto failed4;
1838
1839         lnet_fault_init();
1840         lnet_proc_init();
1841
1842         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1843
1844         return 0;
1845
1846 failed4:
1847         the_lnet.ln_refcount = 0;
1848         lnet_ping_md_unlink(pinfo, &md_handle);
1849         lnet_ping_info_free(pinfo);
1850 failed3:
1851         lnet_acceptor_stop();
1852         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1853         LASSERT(rc == 0);
1854 failed2:
1855         lnet_destroy_routes();
1856         lnet_shutdown_lndnis();
1857 failed1:
1858         lnet_unprepare();
1859 failed0:
1860         LASSERT(rc < 0);
1861         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1862         while (!list_empty(&net_head)) {
1863                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
1864                 list_del_init(&ni->ni_list);
1865                 lnet_ni_free(ni);
1866         }
1867         return rc;
1868 }
1869 EXPORT_SYMBOL(LNetNIInit);
1870
1871 /**
1872  * Stop LNet interfaces, routing, and forwarding.
1873  *
1874  * Users must call this function once for each successful call to LNetNIInit().
1875  * Once the LNetNIFini() operation has been started, the results of pending
1876  * API operations are undefined.
1877  *
1878  * \return always 0 for current implementation.
1879  */
1880 int
1881 LNetNIFini()
1882 {
1883         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1884
1885         LASSERT (the_lnet.ln_init);
1886         LASSERT (the_lnet.ln_refcount > 0);
1887
1888         if (the_lnet.ln_refcount != 1) {
1889                 the_lnet.ln_refcount--;
1890         } else {
1891                 LASSERT(!the_lnet.ln_niinit_self);
1892
1893                 lnet_fault_fini();
1894
1895                 lnet_proc_fini();
1896                 lnet_router_checker_stop();
1897                 lnet_ping_target_fini();
1898
1899                 /* Teardown fns that use my own API functions BEFORE here */
1900                 the_lnet.ln_refcount = 0;
1901
1902                 lnet_acceptor_stop();
1903                 lnet_destroy_routes();
1904                 lnet_shutdown_lndnis();
1905                 lnet_unprepare();
1906         }
1907
1908         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1909         return 0;
1910 }
1911 EXPORT_SYMBOL(LNetNIFini);
1912
1913 /**
1914  * Grabs the ni data from the ni structure and fills the out
1915  * parameters
1916  *
1917  * \param[in] ni network        interface structure
1918  * \param[out] cpt_count        the number of cpts the ni is on
1919  * \param[out] nid              Network Interface ID
1920  * \param[out] peer_timeout     NI peer timeout
1921  * \param[out] peer_tx_crdits   NI peer transmit credits
1922  * \param[out] peer_rtr_credits NI peer router credits
1923  * \param[out] max_tx_credits   NI max transmit credit
1924  * \param[out] net_config       Network configuration
1925  */
1926 static void
1927 lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
1928                   int *peer_timeout, int *peer_tx_credits,
1929                   int *peer_rtr_credits, int *max_tx_credits,
1930                   struct lnet_ioctl_net_config *net_config)
1931 {
1932         int i;
1933
1934         if (ni == NULL)
1935                 return;
1936
1937         if (net_config == NULL)
1938                 return;
1939
1940         CLASSERT(ARRAY_SIZE(ni->ni_interfaces) ==
1941                  ARRAY_SIZE(net_config->ni_interfaces));
1942
1943         if (ni->ni_interfaces[0] != NULL) {
1944                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1945                         if (ni->ni_interfaces[i] != NULL) {
1946                                 strncpy(net_config->ni_interfaces[i],
1947                                         ni->ni_interfaces[i],
1948                                         sizeof(net_config->ni_interfaces[i]));
1949                         }
1950                 }
1951         }
1952
1953         *nid = ni->ni_nid;
1954         *peer_timeout = ni->ni_peertimeout;
1955         *peer_tx_credits = ni->ni_peertxcredits;
1956         *peer_rtr_credits = ni->ni_peerrtrcredits;
1957         *max_tx_credits = ni->ni_maxtxcredits;
1958
1959         net_config->ni_status = ni->ni_status->ns_status;
1960
1961         for (i = 0;
1962              ni->ni_cpts != NULL && i < ni->ni_ncpts &&
1963              i < LNET_MAX_SHOW_NUM_CPT;
1964              i++)
1965                 net_config->ni_cpts[i] = ni->ni_cpts[i];
1966
1967         *cpt_count = ni->ni_ncpts;
1968 }
1969
1970 int
1971 lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
1972                     int *peer_tx_credits, int *peer_rtr_credits,
1973                     int *max_tx_credits,
1974                     struct lnet_ioctl_net_config *net_config)
1975 {
1976         struct lnet_ni          *ni;
1977         struct list_head        *tmp;
1978         int                     cpt;
1979         int                     rc = -ENOENT;
1980
1981         cpt = lnet_net_lock_current();
1982
1983         list_for_each(tmp, &the_lnet.ln_nis) {
1984                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1985                 if (idx-- == 0) {
1986                         rc = 0;
1987                         lnet_ni_lock(ni);
1988                         lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
1989                                           peer_tx_credits, peer_rtr_credits,
1990                                           max_tx_credits, net_config);
1991                         lnet_ni_unlock(ni);
1992                         break;
1993                 }
1994         }
1995
1996         lnet_net_unlock(cpt);
1997         return rc;
1998 }
1999
2000 int
2001 lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
2002                 __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
2003                 __s32 credits)
2004 {
2005         lnet_ping_info_t        *pinfo;
2006         lnet_handle_md_t        md_handle;
2007         struct lnet_ni          *ni;
2008         struct list_head        net_head;
2009         int                     rc;
2010
2011         INIT_LIST_HEAD(&net_head);
2012
2013         /* Create a ni structure for the network string */
2014         rc = lnet_parse_networks(&net_head, nets);
2015         if (rc < 0)
2016                 return rc;
2017
2018         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2019
2020         if (rc > 1) {
2021                 rc = -EINVAL; /* only add one interface per call */
2022                 goto failed0;
2023         }
2024
2025         rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
2026                                   false);
2027         if (rc != 0)
2028                 goto failed0;
2029
2030         rc = lnet_startup_lndnis(&net_head, peer_timeout, peer_cr,
2031                                  peer_buf_cr, credits, NULL);
2032         if (rc != 0)
2033                 goto failed1;
2034
2035         lnet_ping_target_update(pinfo, md_handle);
2036         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2037
2038         return 0;
2039
2040 failed1:
2041         lnet_ping_md_unlink(pinfo, &md_handle);
2042         lnet_ping_info_free(pinfo);
2043 failed0:
2044         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2045         while (!list_empty(&net_head)) {
2046                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
2047                 list_del_init(&ni->ni_list);
2048                 lnet_ni_free(ni);
2049         }
2050         return rc;
2051 }
2052
2053 int
2054 lnet_dyn_del_ni(__u32 net)
2055 {
2056         int rc;
2057
2058         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2059         rc = lnet_shutdown_lndni(net);
2060         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2061
2062         return rc;
2063 }
2064
2065 /**
2066  * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
2067  * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
2068  * internal ioctl handler.
2069  *
2070  * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
2071  *
2072  * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
2073  * The data will be printed to system console. Don't use it excessively.
2074  * \param arg A pointer to lnet_process_id_t, process ID of the peer.
2075  *
2076  * \return Always return 0 when called by users directly (i.e., not via ioctl).
2077  */
2078 int
2079 LNetCtl(unsigned int cmd, void *arg)
2080 {
2081         struct libcfs_ioctl_data *data = arg;
2082         struct lnet_ioctl_config_data *config;
2083         lnet_process_id_t         id = {0};
2084         lnet_ni_t                *ni;
2085         int                       rc;
2086
2087         CLASSERT(LIBCFS_IOC_DATA_MAX >= sizeof(struct lnet_ioctl_net_config) +
2088                                         sizeof(struct lnet_ioctl_config_data));
2089         LASSERT(the_lnet.ln_init);
2090
2091         switch (cmd) {
2092         case IOC_LIBCFS_GET_NI:
2093                 rc = LNetGetId(data->ioc_count, &id);
2094                 data->ioc_nid = id.nid;
2095                 return rc;
2096
2097         case IOC_LIBCFS_FAIL_NID:
2098                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2099
2100         case IOC_LIBCFS_ADD_ROUTE:
2101                 config = arg;
2102                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2103                 rc = lnet_add_route(config->cfg_net,
2104                                     config->cfg_config_u.cfg_route.rtr_hop,
2105                                     config->cfg_nid,
2106                                     config->cfg_config_u.cfg_route.
2107                                         rtr_priority);
2108                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2109                 return (rc != 0) ? rc : lnet_check_routes();
2110
2111         case IOC_LIBCFS_DEL_ROUTE:
2112                 config = arg;
2113                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2114                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2115                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2116                 return rc;
2117
2118         case IOC_LIBCFS_GET_ROUTE:
2119                 config = arg;
2120                 return lnet_get_route(config->cfg_count,
2121                                       &config->cfg_net,
2122                                       &config->cfg_config_u.cfg_route.rtr_hop,
2123                                       &config->cfg_nid,
2124                                       &config->cfg_config_u.cfg_route.rtr_flags,
2125                                       &config->cfg_config_u.cfg_route.
2126                                         rtr_priority);
2127
2128         case IOC_LIBCFS_GET_NET: {
2129                 struct lnet_ioctl_net_config *net_config;
2130                 config = arg;
2131                 net_config = (struct lnet_ioctl_net_config *)
2132                         config->cfg_bulk;
2133                 if (config == NULL || net_config == NULL)
2134                         return -1;
2135
2136                 return lnet_get_net_config(config->cfg_count,
2137                                            &config->cfg_ncpts,
2138                                            &config->cfg_nid,
2139                                            &config->cfg_config_u.
2140                                                 cfg_net.net_peer_timeout,
2141                                            &config->cfg_config_u.cfg_net.
2142                                                 net_peer_tx_credits,
2143                                            &config->cfg_config_u.cfg_net.
2144                                                 net_peer_rtr_credits,
2145                                            &config->cfg_config_u.cfg_net.
2146                                                 net_max_tx_credits,
2147                                            net_config);
2148         }
2149
2150         case IOC_LIBCFS_GET_LNET_STATS:
2151         {
2152                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2153
2154                 lnet_counters_get(&lnet_stats->st_cntrs);
2155                 return 0;
2156         }
2157
2158 #if defined(__KERNEL__) && defined(LNET_ROUTER)
2159         case IOC_LIBCFS_CONFIG_RTR:
2160                 config = arg;
2161                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2162                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2163                         rc = lnet_rtrpools_enable();
2164                         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2165                         return rc;
2166                 }
2167                 lnet_rtrpools_disable();
2168                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2169                 return 0;
2170
2171         case IOC_LIBCFS_ADD_BUF:
2172                 config = arg;
2173                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2174                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2175                                                 buf_tiny,
2176                                           config->cfg_config_u.cfg_buffers.
2177                                                 buf_small,
2178                                           config->cfg_config_u.cfg_buffers.
2179                                                 buf_large);
2180                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2181                 return rc;
2182 #endif
2183
2184         case IOC_LIBCFS_GET_BUF: {
2185                 struct lnet_ioctl_pool_cfg *pool_cfg;
2186                 config = arg;
2187                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2188                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2189         }
2190
2191         case IOC_LIBCFS_GET_PEER_INFO: {
2192                 struct lnet_ioctl_peer *peer_info = arg;
2193                 return lnet_get_peer_info(
2194                    peer_info->pr_count,
2195                    &peer_info->pr_nid,
2196                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2197                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2198                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2199                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2200                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2201                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2202                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2203                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2204         }
2205
2206         case IOC_LIBCFS_NOTIFY_ROUTER:
2207                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2208                                    cfs_time_current() -
2209                                    cfs_time_seconds(cfs_time_current_sec() -
2210                                                     (time_t)data->ioc_u64[0]));
2211
2212         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
2213                 /* This can be removed once lustre stops calling it */
2214                 return 0;
2215
2216         case IOC_LIBCFS_LNET_DIST:
2217                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2218                 if (rc < 0 && rc != -EHOSTUNREACH)
2219                         return rc;
2220
2221                 data->ioc_u32[0] = rc;
2222                 return 0;
2223
2224         case IOC_LIBCFS_TESTPROTOCOMPAT:
2225                 lnet_net_lock(LNET_LOCK_EX);
2226                 the_lnet.ln_testprotocompat = data->ioc_flags;
2227                 lnet_net_unlock(LNET_LOCK_EX);
2228                 return 0;
2229
2230         case IOC_LIBCFS_LNET_FAULT:
2231                 return lnet_fault_ctl(data->ioc_flags, data);
2232
2233         case IOC_LIBCFS_PING:
2234                 id.nid = data->ioc_nid;
2235                 id.pid = data->ioc_u32[0];
2236                 rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
2237                                (lnet_process_id_t __user *)data->ioc_pbuf1,
2238                                data->ioc_plen1/sizeof(lnet_process_id_t));
2239                 if (rc < 0)
2240                         return rc;
2241                 data->ioc_count = rc;
2242                 return 0;
2243
2244         case IOC_LIBCFS_DEBUG_PEER: {
2245                 /* CAVEAT EMPTOR: this one designed for calling directly; not
2246                  * via an ioctl */
2247                 id = *((lnet_process_id_t *) arg);
2248
2249                 lnet_debug_peer(id.nid);
2250
2251                 ni = lnet_net2ni(LNET_NIDNET(id.nid));
2252                 if (ni == NULL) {
2253                         CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
2254                 } else {
2255                         if (ni->ni_lnd->lnd_ctl == NULL) {
2256                                 CDEBUG(D_WARNING, "No ctl for %s\n",
2257                                        libcfs_id2str(id));
2258                         } else {
2259                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2260                         }
2261
2262                         lnet_ni_decref(ni);
2263                 }
2264                 return 0;
2265         }
2266
2267         default:
2268                 ni = lnet_net2ni(data->ioc_net);
2269                 if (ni == NULL)
2270                         return -EINVAL;
2271
2272                 if (ni->ni_lnd->lnd_ctl == NULL)
2273                         rc = -EINVAL;
2274                 else
2275                         rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2276
2277                 lnet_ni_decref(ni);
2278                 return rc;
2279         }
2280         /* not reached */
2281 }
2282 EXPORT_SYMBOL(LNetCtl);
2283
2284 /**
2285  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2286  * all interfaces share a same PID, as requested by LNetNIInit().
2287  *
2288  * \param index Index of the interface to look up.
2289  * \param id On successful return, this location will hold the
2290  * lnet_process_id_t ID of the interface.
2291  *
2292  * \retval 0 If an interface exists at \a index.
2293  * \retval -ENOENT If no interface has been found.
2294  */
2295 int
2296 LNetGetId(unsigned int index, lnet_process_id_t *id)
2297 {
2298         struct lnet_ni   *ni;
2299         struct list_head *tmp;
2300         int               cpt;
2301         int               rc = -ENOENT;
2302
2303         LASSERT(the_lnet.ln_init);
2304         LASSERT(the_lnet.ln_refcount > 0);
2305
2306         cpt = lnet_net_lock_current();
2307
2308         list_for_each(tmp, &the_lnet.ln_nis) {
2309                 if (index-- != 0)
2310                         continue;
2311
2312                 ni = list_entry(tmp, lnet_ni_t, ni_list);
2313
2314                 id->nid = ni->ni_nid;
2315                 id->pid = the_lnet.ln_pid;
2316                 rc = 0;
2317                 break;
2318         }
2319
2320         lnet_net_unlock(cpt);
2321         return rc;
2322 }
2323 EXPORT_SYMBOL(LNetGetId);
2324
2325 /**
2326  * Print a string representation of handle \a h into buffer \a str of
2327  * \a len bytes.
2328  */
2329 void
2330 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2331 {
2332         snprintf(str, len, LPX64, h.cookie);
2333 }
2334 EXPORT_SYMBOL(LNetSnprintHandle);
2335
2336 static int
2337 lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids,
2338           int n_ids)
2339 {
2340         lnet_handle_eq_t     eqh;
2341         lnet_handle_md_t     mdh;
2342         lnet_event_t         event;
2343         lnet_md_t            md = {0};
2344         int                  which;
2345         int                  unlinked = 0;
2346         int                  replied = 0;
2347         const int            a_long_time = 60000; /* mS */
2348         int                  infosz;
2349         lnet_ping_info_t    *info;
2350         lnet_process_id_t    tmpid;
2351         int                  i;
2352         int                  nob;
2353         int                  rc;
2354         int                  rc2;
2355         sigset_t         blocked;
2356
2357         infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
2358
2359         if (n_ids <= 0 ||
2360             id.nid == LNET_NID_ANY ||
2361             timeout_ms > 500000 ||              /* arbitrary limit! */
2362             n_ids > 20)                         /* arbitrary limit! */
2363                 return -EINVAL;
2364
2365         if (id.pid == LNET_PID_ANY)
2366                 id.pid = LNET_PID_LUSTRE;
2367
2368         LIBCFS_ALLOC(info, infosz);
2369         if (info == NULL)
2370                 return -ENOMEM;
2371
2372         /* NB 2 events max (including any unlink event) */
2373         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2374         if (rc != 0) {
2375                 CERROR("Can't allocate EQ: %d\n", rc);
2376                 goto out_0;
2377         }
2378
2379         /* initialize md content */
2380         md.start     = info;
2381         md.length    = infosz;
2382         md.threshold = 2; /*GET/REPLY*/
2383         md.max_size  = 0;
2384         md.options   = LNET_MD_TRUNCATE;
2385         md.user_ptr  = NULL;
2386         md.eq_handle = eqh;
2387
2388         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2389         if (rc != 0) {
2390                 CERROR("Can't bind MD: %d\n", rc);
2391                 goto out_1;
2392         }
2393
2394         rc = LNetGet(LNET_NID_ANY, mdh, id,
2395                      LNET_RESERVED_PORTAL,
2396                      LNET_PROTO_PING_MATCHBITS, 0);
2397
2398         if (rc != 0) {
2399                 /* Don't CERROR; this could be deliberate! */
2400
2401                 rc2 = LNetMDUnlink(mdh);
2402                 LASSERT(rc2 == 0);
2403
2404                 /* NB must wait for the UNLINK event below... */
2405                 unlinked = 1;
2406                 timeout_ms = a_long_time;
2407         }
2408
2409         do {
2410                 /* MUST block for unlink to complete */
2411                 if (unlinked)
2412                         blocked = cfs_block_allsigs();
2413
2414                 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
2415
2416                 if (unlinked)
2417                         cfs_restore_sigs(blocked);
2418
2419                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2420                        (rc2 <= 0) ? -1 : event.type,
2421                        (rc2 <= 0) ? -1 : event.status,
2422                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2423
2424                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2425
2426                 if (rc2 <= 0 || event.status != 0) {
2427                         /* timeout or error */
2428                         if (!replied && rc == 0)
2429                                 rc = (rc2 < 0) ? rc2 :
2430                                      (rc2 == 0) ? -ETIMEDOUT :
2431                                      event.status;
2432
2433                         if (!unlinked) {
2434                                 /* Ensure completion in finite time... */
2435                                 LNetMDUnlink(mdh);
2436                                 /* No assertion (racing with network) */
2437                                 unlinked = 1;
2438                                 timeout_ms = a_long_time;
2439                         } else if (rc2 == 0) {
2440                                 /* timed out waiting for unlink */
2441                                 CWARN("ping %s: late network completion\n",
2442                                       libcfs_id2str(id));
2443                         }
2444                 } else if (event.type == LNET_EVENT_REPLY) {
2445                         replied = 1;
2446                         rc = event.mlength;
2447                 }
2448
2449         } while (rc2 <= 0 || !event.unlinked);
2450
2451         if (!replied) {
2452                 if (rc >= 0)
2453                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2454                               libcfs_id2str(id));
2455                 rc = -EIO;
2456                 goto out_1;
2457         }
2458
2459         nob = rc;
2460         LASSERT(nob >= 0 && nob <= infosz);
2461
2462         rc = -EPROTO;                           /* if I can't parse... */
2463
2464         if (nob < 8) {
2465                 /* can't check magic/version */
2466                 CERROR("%s: ping info too short %d\n",
2467                        libcfs_id2str(id), nob);
2468                 goto out_1;
2469         }
2470
2471         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2472                 lnet_swap_pinginfo(info);
2473         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2474                 CERROR("%s: Unexpected magic %08x\n",
2475                        libcfs_id2str(id), info->pi_magic);
2476                 goto out_1;
2477         }
2478
2479         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2480                 CERROR("%s: ping w/o NI status: 0x%x\n",
2481                        libcfs_id2str(id), info->pi_features);
2482                 goto out_1;
2483         }
2484
2485         if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
2486                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2487                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
2488                 goto out_1;
2489         }
2490
2491         if (info->pi_nnis < n_ids)
2492                 n_ids = info->pi_nnis;
2493
2494         if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
2495                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2496                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
2497                 goto out_1;
2498         }
2499
2500         rc = -EFAULT;                           /* If I SEGV... */
2501
2502         for (i = 0; i < n_ids; i++) {
2503                 tmpid.pid = info->pi_pid;
2504                 tmpid.nid = info->pi_ni[i].ns_nid;
2505                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2506                         goto out_1;
2507         }
2508         rc = info->pi_nnis;
2509
2510  out_1:
2511         rc2 = LNetEQFree(eqh);
2512         if (rc2 != 0)
2513                 CERROR("rc2 %d\n", rc2);
2514         LASSERT(rc2 == 0);
2515
2516  out_0:
2517         LIBCFS_FREE(info, infosz);
2518         return rc;
2519 }