Whamcloud - gitweb
LU-5435 lnet: lustre network latency simulation
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2013, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include <lnet/lib-lnet.h>
39 #include <lnet/lib-dlc.h>
40 #ifdef __KERNEL__
41 #include <linux/log2.h>
42 #endif
43
44 #ifdef __KERNEL__
45 #define D_LNI D_CONSOLE
46 #else
47 #define D_LNI D_CONFIG
48 #endif
49
50 lnet_t      the_lnet;                           /* THE state of the network */
51 EXPORT_SYMBOL(the_lnet);
52
53 #ifdef __KERNEL__
54
55 static char *ip2nets = "";
56 CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
57                 "LNET network <- IP table");
58
59 static char *networks = "";
60 CFS_MODULE_PARM(networks, "s", charp, 0444,
61                 "local networks");
62
63 static char *routes = "";
64 CFS_MODULE_PARM(routes, "s", charp, 0444,
65                 "routes to non-local networks");
66
67 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
68 CFS_MODULE_PARM(rnet_htable_size, "i", int, 0444,
69                 "size of remote network hash table");
70
71 static void lnet_ping_target_fini(void);
72 static int lnet_ping(lnet_process_id_t id, int timeout_ms,
73                      lnet_process_id_t *ids, int n_ids);
74
75 static char *
76 lnet_get_routes(void)
77 {
78         return routes;
79 }
80
81 static char *
82 lnet_get_networks(void)
83 {
84         char   *nets;
85         int     rc;
86
87         if (*networks != 0 && *ip2nets != 0) {
88                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
89                                    "'ip2nets' but not both at once\n");
90                 return NULL;
91         }
92
93         if (*ip2nets != 0) {
94                 rc = lnet_parse_ip2nets(&nets, ip2nets);
95                 return (rc == 0) ? nets : NULL;
96         }
97
98         if (*networks != 0)
99                 return networks;
100
101         return "tcp";
102 }
103
104 static void
105 lnet_init_locks(void)
106 {
107         spin_lock_init(&the_lnet.ln_eq_wait_lock);
108         init_waitqueue_head(&the_lnet.ln_eq_waitq);
109         mutex_init(&the_lnet.ln_lnd_mutex);
110         mutex_init(&the_lnet.ln_api_mutex);
111 }
112
113 static void
114 lnet_fini_locks(void)
115 {
116 }
117
118 #else
119
120 static char *
121 lnet_get_routes(void)
122 {
123         char *str = getenv("LNET_ROUTES");
124
125         return (str == NULL) ? "" : str;
126 }
127
128 static char *
129 lnet_get_networks (void)
130 {
131         static char       default_networks[256];
132         char             *networks = getenv("LNET_NETWORKS");
133         char             *str;
134         char             *sep;
135         int               len;
136         int               nob;
137         struct list_head *tmp;
138
139         if (networks != NULL)
140                 return networks;
141
142         /* In userland, the default 'networks=' is the list of known net types */
143         len = sizeof(default_networks);
144         str = default_networks;
145         *str = 0;
146         sep = "";
147
148         list_for_each(tmp, &the_lnet.ln_lnds) {
149                 lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
150
151                 nob = snprintf(str, len, "%s%s", sep,
152                                libcfs_lnd2str(lnd->lnd_type));
153                 if (nob >= len) {
154                         /* overflowed the string; leave it where it was */
155                         *str = 0;
156                         break;
157                 }
158                 len -= nob;
159                 str += nob;
160                 sep = ",";
161         }
162
163         return default_networks;
164 }
165
166 # ifndef HAVE_LIBPTHREAD
167
168 static void lnet_init_locks(void)
169 {
170         the_lnet.ln_eq_wait_lock = 0;
171         the_lnet.ln_lnd_mutex = 0;
172         the_lnet.ln_api_mutex = 0;
173 }
174
175 static void lnet_fini_locks(void)
176 {
177         LASSERT(the_lnet.ln_api_mutex == 0);
178         LASSERT(the_lnet.ln_lnd_mutex == 0);
179         LASSERT(the_lnet.ln_eq_wait_lock == 0);
180 }
181
182 # else
183
184 static void lnet_init_locks(void)
185 {
186         pthread_cond_init(&the_lnet.ln_eq_cond, NULL);
187         pthread_mutex_init(&the_lnet.ln_eq_wait_lock, NULL);
188         pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL);
189         pthread_mutex_init(&the_lnet.ln_api_mutex, NULL);
190 }
191
192 static void lnet_fini_locks(void)
193 {
194         pthread_mutex_destroy(&the_lnet.ln_api_mutex);
195         pthread_mutex_destroy(&the_lnet.ln_lnd_mutex);
196         pthread_mutex_destroy(&the_lnet.ln_eq_wait_lock);
197         pthread_cond_destroy(&the_lnet.ln_eq_cond);
198 }
199
200 # endif
201 #endif
202
203 static int
204 lnet_create_remote_nets_table(void)
205 {
206         int               i;
207         struct list_head *hash;
208
209         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
210         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
211         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
212         if (hash == NULL) {
213                 CERROR("Failed to create remote nets hash table\n");
214                 return -ENOMEM;
215         }
216
217         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
218                 INIT_LIST_HEAD(&hash[i]);
219         the_lnet.ln_remote_nets_hash = hash;
220         return 0;
221 }
222
223 static void
224 lnet_destroy_remote_nets_table(void)
225 {
226         int i;
227
228         if (the_lnet.ln_remote_nets_hash == NULL)
229                 return;
230
231         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
232                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
233
234         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
235                     LNET_REMOTE_NETS_HASH_SIZE *
236                     sizeof(the_lnet.ln_remote_nets_hash[0]));
237         the_lnet.ln_remote_nets_hash = NULL;
238 }
239
240 static void
241 lnet_destroy_locks(void)
242 {
243         if (the_lnet.ln_res_lock != NULL) {
244                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
245                 the_lnet.ln_res_lock = NULL;
246         }
247
248         if (the_lnet.ln_net_lock != NULL) {
249                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
250                 the_lnet.ln_net_lock = NULL;
251         }
252
253         lnet_fini_locks();
254 }
255
256 static int
257 lnet_create_locks(void)
258 {
259         lnet_init_locks();
260
261         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
262         if (the_lnet.ln_res_lock == NULL)
263                 goto failed;
264
265         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
266         if (the_lnet.ln_net_lock == NULL)
267                 goto failed;
268
269         return 0;
270
271  failed:
272         lnet_destroy_locks();
273         return -ENOMEM;
274 }
275
276 static void lnet_assert_wire_constants(void)
277 {
278         /* Wire protocol assertions generated by 'wirecheck'
279          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
280          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
281          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
282
283         /* Constants... */
284         CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
285         CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
286         CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
287         CLASSERT (LNET_MSG_ACK == 0);
288         CLASSERT (LNET_MSG_PUT == 1);
289         CLASSERT (LNET_MSG_GET == 2);
290         CLASSERT (LNET_MSG_REPLY == 3);
291         CLASSERT (LNET_MSG_HELLO == 4);
292
293         /* Checks for struct ptl_handle_wire_t */
294         CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
295         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
296         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
297         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
298         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
299
300         /* Checks for struct lnet_magicversion_t */
301         CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
302         CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
303         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
304         CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
305         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
306         CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
307         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
308
309         /* Checks for struct lnet_hdr_t */
310         CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
311         CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
312         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
313         CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
314         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
315         CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
316         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
317         CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
318         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
319         CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
320         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
321         CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
322         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
323         CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
324         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
325
326         /* Ack */
327         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
328         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
329         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
330         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
331         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
332         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
333
334         /* Put */
335         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
336         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
337         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
338         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
339         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
340         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
341         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
342         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
343         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
344         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
345
346         /* Get */
347         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
348         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
349         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
350         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
351         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
352         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
353         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
354         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
355         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
356         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
357
358         /* Reply */
359         CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
360         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
361
362         /* Hello */
363         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
364         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
365         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
366         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
367 }
368
369 static lnd_t *
370 lnet_find_lnd_by_type (int type)
371 {
372         lnd_t            *lnd;
373         struct list_head *tmp;
374
375         /* holding lnd mutex */
376         list_for_each(tmp, &the_lnet.ln_lnds) {
377                 lnd = list_entry(tmp, lnd_t, lnd_list);
378
379                 if ((int)lnd->lnd_type == type)
380                         return lnd;
381         }
382         return NULL;
383 }
384
385 void
386 lnet_register_lnd (lnd_t *lnd)
387 {
388         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
389
390         LASSERT(the_lnet.ln_init);
391         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
392         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
393
394         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
395         lnd->lnd_refcount = 0;
396
397         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
398
399         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
400 }
401 EXPORT_SYMBOL(lnet_register_lnd);
402
403 void
404 lnet_unregister_lnd (lnd_t *lnd)
405 {
406         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
407
408         LASSERT(the_lnet.ln_init);
409         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
410         LASSERT(lnd->lnd_refcount == 0);
411
412         list_del(&lnd->lnd_list);
413         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
414
415         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
416 }
417 EXPORT_SYMBOL(lnet_unregister_lnd);
418
419 void
420 lnet_counters_get(lnet_counters_t *counters)
421 {
422         lnet_counters_t *ctr;
423         int             i;
424
425         memset(counters, 0, sizeof(*counters));
426
427         lnet_net_lock(LNET_LOCK_EX);
428
429         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
430                 counters->msgs_max     += ctr->msgs_max;
431                 counters->msgs_alloc   += ctr->msgs_alloc;
432                 counters->errors       += ctr->errors;
433                 counters->send_count   += ctr->send_count;
434                 counters->recv_count   += ctr->recv_count;
435                 counters->route_count  += ctr->route_count;
436                 counters->drop_count   += ctr->drop_count;
437                 counters->send_length  += ctr->send_length;
438                 counters->recv_length  += ctr->recv_length;
439                 counters->route_length += ctr->route_length;
440                 counters->drop_length  += ctr->drop_length;
441
442         }
443         lnet_net_unlock(LNET_LOCK_EX);
444 }
445 EXPORT_SYMBOL(lnet_counters_get);
446
447 void
448 lnet_counters_reset(void)
449 {
450         lnet_counters_t *counters;
451         int             i;
452
453         lnet_net_lock(LNET_LOCK_EX);
454
455         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
456                 memset(counters, 0, sizeof(lnet_counters_t));
457
458         lnet_net_unlock(LNET_LOCK_EX);
459 }
460 EXPORT_SYMBOL(lnet_counters_reset);
461
462 #ifdef LNET_USE_LIB_FREELIST
463
464 int
465 lnet_freelist_init(lnet_freelist_t *fl, int n, int size)
466 {
467         char *space;
468
469         LASSERT (n > 0);
470
471         size += offsetof (lnet_freeobj_t, fo_contents);
472
473         LIBCFS_ALLOC(space, n * size);
474         if (space == NULL)
475                 return (-ENOMEM);
476
477         INIT_LIST_HEAD(&fl->fl_list);
478         fl->fl_objs = space;
479         fl->fl_nobjs = n;
480         fl->fl_objsize = size;
481
482         do {
483                 list_add((struct list_head *)space, &fl->fl_list);
484                 space += size;
485         } while (--n != 0);
486
487         return 0;
488 }
489
490 void
491 lnet_freelist_fini(lnet_freelist_t *fl)
492 {
493         struct list_head *el;
494         int               count;
495
496         if (fl->fl_nobjs == 0)
497                 return;
498
499         count = 0;
500         for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
501                 count++;
502
503         LASSERT (count == fl->fl_nobjs);
504
505         LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
506         memset (fl, 0, sizeof (*fl));
507 }
508
509 #endif /* LNET_USE_LIB_FREELIST */
510
511 static __u64 lnet_create_interface_cookie(void)
512 {
513         /* NB the interface cookie in wire handles guards against delayed
514          * replies and ACKs appearing valid after reboot. Initialisation time,
515          * even if it's only implemented to millisecond resolution is probably
516          * easily good enough. */
517         struct timeval tv;
518         __u64          cookie;
519 #ifndef __KERNEL__
520         int            rc = gettimeofday (&tv, NULL);
521         LASSERT (rc == 0);
522 #else
523         do_gettimeofday(&tv);
524 #endif
525         cookie = tv.tv_sec;
526         cookie *= 1000000;
527         cookie += tv.tv_usec;
528         return cookie;
529 }
530
531 static char *
532 lnet_res_type2str(int type)
533 {
534         switch (type) {
535         default:
536                 LBUG();
537         case LNET_COOKIE_TYPE_MD:
538                 return "MD";
539         case LNET_COOKIE_TYPE_ME:
540                 return "ME";
541         case LNET_COOKIE_TYPE_EQ:
542                 return "EQ";
543         }
544 }
545
546 static void
547 lnet_res_container_cleanup(struct lnet_res_container *rec)
548 {
549         int     count = 0;
550
551         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
552                 return;
553
554         while (!list_empty(&rec->rec_active)) {
555                 struct list_head *e = rec->rec_active.next;
556
557                 list_del_init(e);
558                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
559                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
560
561                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
562                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
563
564                 } else { /* NB: Active MEs should be attached on portals */
565                         LBUG();
566                 }
567                 count++;
568         }
569
570         if (count > 0) {
571                 /* Found alive MD/ME/EQ, user really should unlink/free
572                  * all of them before finalize LNet, but if someone didn't,
573                  * we have to recycle garbage for him */
574                 CERROR("%d active elements on exit of %s container\n",
575                        count, lnet_res_type2str(rec->rec_type));
576         }
577
578 #ifdef LNET_USE_LIB_FREELIST
579         lnet_freelist_fini(&rec->rec_freelist);
580 #endif
581         if (rec->rec_lh_hash != NULL) {
582                 LIBCFS_FREE(rec->rec_lh_hash,
583                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
584                 rec->rec_lh_hash = NULL;
585         }
586
587         rec->rec_type = 0; /* mark it as finalized */
588 }
589
590 static int
591 lnet_res_container_setup(struct lnet_res_container *rec,
592                          int cpt, int type, int objnum, int objsz)
593 {
594         int     rc = 0;
595         int     i;
596
597         LASSERT(rec->rec_type == 0);
598
599         rec->rec_type = type;
600         INIT_LIST_HEAD(&rec->rec_active);
601
602 #ifdef LNET_USE_LIB_FREELIST
603         memset(&rec->rec_freelist, 0, sizeof(rec->rec_freelist));
604         rc = lnet_freelist_init(&rec->rec_freelist, objnum, objsz);
605         if (rc != 0)
606                 goto out;
607 #endif
608         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
609
610         /* Arbitrary choice of hash table size */
611         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
612                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
613         if (rec->rec_lh_hash == NULL) {
614                 rc = -ENOMEM;
615                 goto out;
616         }
617
618         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
619                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
620
621         return 0;
622
623 out:
624         CERROR("Failed to setup %s resource container\n",
625                lnet_res_type2str(type));
626         lnet_res_container_cleanup(rec);
627         return rc;
628 }
629
630 static void
631 lnet_res_containers_destroy(struct lnet_res_container **recs)
632 {
633         struct lnet_res_container       *rec;
634         int                             i;
635
636         cfs_percpt_for_each(rec, i, recs)
637                 lnet_res_container_cleanup(rec);
638
639         cfs_percpt_free(recs);
640 }
641
642 static struct lnet_res_container **
643 lnet_res_containers_create(int type, int objnum, int objsz)
644 {
645         struct lnet_res_container       **recs;
646         struct lnet_res_container       *rec;
647         int                             rc;
648         int                             i;
649
650         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
651         if (recs == NULL) {
652                 CERROR("Failed to allocate %s resource containers\n",
653                        lnet_res_type2str(type));
654                 return NULL;
655         }
656
657         cfs_percpt_for_each(rec, i, recs) {
658                 rc = lnet_res_container_setup(rec, i, type, objnum, objsz);
659                 if (rc != 0) {
660                         lnet_res_containers_destroy(recs);
661                         return NULL;
662                 }
663         }
664
665         return recs;
666 }
667
668 lnet_libhandle_t *
669 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
670 {
671         /* ALWAYS called with lnet_res_lock held */
672         struct list_head        *head;
673         lnet_libhandle_t        *lh;
674         unsigned int            hash;
675
676         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
677                 return NULL;
678
679         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
680         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
681
682         list_for_each_entry(lh, head, lh_hash_chain) {
683                 if (lh->lh_cookie == cookie)
684                         return lh;
685         }
686
687         return NULL;
688 }
689
690 void
691 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
692 {
693         /* ALWAYS called with lnet_res_lock held */
694         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
695         unsigned int    hash;
696
697         lh->lh_cookie = rec->rec_lh_cookie;
698         rec->rec_lh_cookie += 1 << ibits;
699
700         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
701
702         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
703 }
704
705 #ifndef __KERNEL__
706 /**
707  * Reserved API - do not use.
708  * Temporary workaround to allow uOSS and test programs force server
709  * mode in userspace. See comments near ln_server_mode_flag in
710  * lnet/lib-types.h */
711
712 void
713 lnet_server_mode() {
714         the_lnet.ln_server_mode_flag = 1;
715 }
716 #endif
717
718 static int lnet_unprepare(void);
719
720 static int
721 lnet_prepare(lnet_pid_t requested_pid)
722 {
723         /* Prepare to bring up the network */
724         struct lnet_res_container **recs;
725         int                       rc = 0;
726
727         if (requested_pid == LNET_PID_ANY) {
728                 /* Don't instantiate LNET just for me */
729                 return -ENETDOWN;
730         }
731
732         LASSERT (the_lnet.ln_refcount == 0);
733
734         the_lnet.ln_routing = 0;
735
736 #ifdef __KERNEL__
737         LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
738         the_lnet.ln_pid = requested_pid;
739 #else
740         if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
741                 LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
742
743                 if (current_uid() != 0) /* Only root can run user-space server */
744                         return -EPERM;
745                 the_lnet.ln_pid = requested_pid;
746
747         } else {/* client case (liblustre) */
748
749                 /* My PID must be unique on this node and flag I'm userspace */
750                 the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
751         }
752 #endif
753
754         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
755         INIT_LIST_HEAD(&the_lnet.ln_nis);
756         INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
757         INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
758         INIT_LIST_HEAD(&the_lnet.ln_routers);
759         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
760         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
761
762         rc = lnet_create_remote_nets_table();
763         if (rc != 0)
764                 goto failed;
765
766         the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
767
768         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
769                                                 sizeof(lnet_counters_t));
770         if (the_lnet.ln_counters == NULL) {
771                 CERROR("Failed to allocate counters for LNet\n");
772                 rc = -ENOMEM;
773                 goto failed;
774         }
775
776         rc = lnet_peer_tables_create();
777         if (rc != 0)
778                 goto failed;
779
780         rc = lnet_msg_containers_create();
781         if (rc != 0)
782                 goto failed;
783
784         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
785                                       LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS,
786                                       sizeof(lnet_eq_t));
787         if (rc != 0)
788                 goto failed;
789
790         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES,
791                                           sizeof(lnet_me_t));
792         if (recs == NULL)
793                 goto failed;
794
795         the_lnet.ln_me_containers = recs;
796
797         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS,
798                                           sizeof(lnet_libmd_t));
799         if (recs == NULL)
800                 goto failed;
801
802         the_lnet.ln_md_containers = recs;
803
804         rc = lnet_portals_create();
805         if (rc != 0) {
806                 CERROR("Failed to create portals for LNet: %d\n", rc);
807                 goto failed;
808         }
809
810         return 0;
811
812  failed:
813         lnet_unprepare();
814         return rc;
815 }
816
817 static int
818 lnet_unprepare (void)
819 {
820         /* NB no LNET_LOCK since this is the last reference.  All LND instances
821          * have shut down already, so it is safe to unlink and free all
822          * descriptors, even those that appear committed to a network op (eg MD
823          * with non-zero pending count) */
824
825         lnet_fail_nid(LNET_NID_ANY, 0);
826
827         LASSERT(the_lnet.ln_refcount == 0);
828         LASSERT(list_empty(&the_lnet.ln_test_peers));
829         LASSERT(list_empty(&the_lnet.ln_nis));
830         LASSERT(list_empty(&the_lnet.ln_nis_cpt));
831         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
832
833         lnet_portals_destroy();
834
835         if (the_lnet.ln_md_containers != NULL) {
836                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
837                 the_lnet.ln_md_containers = NULL;
838         }
839
840         if (the_lnet.ln_me_containers != NULL) {
841                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
842                 the_lnet.ln_me_containers = NULL;
843         }
844
845         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
846
847         lnet_msg_containers_destroy();
848         lnet_peer_tables_destroy();
849         lnet_rtrpools_free(0);
850
851         if (the_lnet.ln_counters != NULL) {
852                 cfs_percpt_free(the_lnet.ln_counters);
853                 the_lnet.ln_counters = NULL;
854         }
855         lnet_destroy_remote_nets_table();
856
857         return 0;
858 }
859
860 lnet_ni_t  *
861 lnet_net2ni_locked(__u32 net, int cpt)
862 {
863         struct list_head *tmp;
864         lnet_ni_t        *ni;
865
866         LASSERT(cpt != LNET_LOCK_EX);
867
868         list_for_each(tmp, &the_lnet.ln_nis) {
869                 ni = list_entry(tmp, lnet_ni_t, ni_list);
870
871                 if (LNET_NIDNET(ni->ni_nid) == net) {
872                         lnet_ni_addref_locked(ni, cpt);
873                         return ni;
874                 }
875         }
876
877         return NULL;
878 }
879
880 lnet_ni_t *
881 lnet_net2ni(__u32 net)
882 {
883         lnet_ni_t *ni;
884
885         lnet_net_lock(0);
886         ni = lnet_net2ni_locked(net, 0);
887         lnet_net_unlock(0);
888
889         return ni;
890 }
891 EXPORT_SYMBOL(lnet_net2ni);
892
893 static unsigned int
894 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
895 {
896         __u64           key = nid;
897         unsigned int    val;
898
899         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
900
901         if (number == 1)
902                 return 0;
903
904         val = hash_long(key, LNET_CPT_BITS);
905         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
906         if (val < number)
907                 return val;
908
909         return (unsigned int)(key + val + (val >> 1)) % number;
910 }
911
912 int
913 lnet_cpt_of_nid_locked(lnet_nid_t nid)
914 {
915         struct lnet_ni *ni;
916
917         /* must called with hold of lnet_net_lock */
918         if (LNET_CPT_NUMBER == 1)
919                 return 0; /* the only one */
920
921         /* take lnet_net_lock(any) would be OK */
922         if (!list_empty(&the_lnet.ln_nis_cpt)) {
923                 list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
924                         if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
925                                 continue;
926
927                         LASSERT(ni->ni_cpts != NULL);
928                         return ni->ni_cpts[lnet_nid_cpt_hash
929                                            (nid, ni->ni_ncpts)];
930                 }
931         }
932
933         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
934 }
935
936 int
937 lnet_cpt_of_nid(lnet_nid_t nid)
938 {
939         int     cpt;
940         int     cpt2;
941
942         if (LNET_CPT_NUMBER == 1)
943                 return 0; /* the only one */
944
945         if (list_empty(&the_lnet.ln_nis_cpt))
946                 return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
947
948         cpt = lnet_net_lock_current();
949         cpt2 = lnet_cpt_of_nid_locked(nid);
950         lnet_net_unlock(cpt);
951
952         return cpt2;
953 }
954 EXPORT_SYMBOL(lnet_cpt_of_nid);
955
956 int
957 lnet_islocalnet(__u32 net)
958 {
959         struct lnet_ni  *ni;
960         int             cpt;
961
962         cpt = lnet_net_lock_current();
963
964         ni = lnet_net2ni_locked(net, cpt);
965         if (ni != NULL)
966                 lnet_ni_decref_locked(ni, cpt);
967
968         lnet_net_unlock(cpt);
969
970         return ni != NULL;
971 }
972
973 lnet_ni_t  *
974 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
975 {
976         struct lnet_ni   *ni;
977         struct list_head *tmp;
978
979         LASSERT(cpt != LNET_LOCK_EX);
980
981         list_for_each(tmp, &the_lnet.ln_nis) {
982                 ni = list_entry(tmp, lnet_ni_t, ni_list);
983
984                 if (ni->ni_nid == nid) {
985                         lnet_ni_addref_locked(ni, cpt);
986                         return ni;
987                 }
988         }
989
990         return NULL;
991 }
992
993 int
994 lnet_islocalnid(lnet_nid_t nid)
995 {
996         struct lnet_ni  *ni;
997         int             cpt;
998
999         cpt = lnet_net_lock_current();
1000         ni = lnet_nid2ni_locked(nid, cpt);
1001         if (ni != NULL)
1002                 lnet_ni_decref_locked(ni, cpt);
1003         lnet_net_unlock(cpt);
1004
1005         return ni != NULL;
1006 }
1007
1008 int
1009 lnet_count_acceptor_nis (void)
1010 {
1011         /* Return the # of NIs that need the acceptor. */
1012         int              count = 0;
1013 #if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
1014         struct list_head *tmp;
1015         struct lnet_ni   *ni;
1016         int              cpt;
1017
1018         cpt = lnet_net_lock_current();
1019         list_for_each(tmp, &the_lnet.ln_nis) {
1020                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1021
1022                 if (ni->ni_lnd->lnd_accept != NULL)
1023                         count++;
1024         }
1025
1026         lnet_net_unlock(cpt);
1027
1028 #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
1029         return count;
1030 }
1031
1032 static lnet_ping_info_t *
1033 lnet_ping_info_create(int num_ni)
1034 {
1035         lnet_ping_info_t *ping_info;
1036         unsigned int     infosz;
1037
1038         infosz = offsetof(lnet_ping_info_t, pi_ni[num_ni]);
1039         LIBCFS_ALLOC(ping_info, infosz);
1040         if (ping_info == NULL) {
1041                 CERROR("Can't allocate ping info[%d]\n", num_ni);
1042                 return NULL;
1043         }
1044
1045         ping_info->pi_nnis = num_ni;
1046         ping_info->pi_pid = the_lnet.ln_pid;
1047         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
1048         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
1049
1050         return ping_info;
1051 }
1052
1053 static inline int
1054 lnet_get_ni_count(void)
1055 {
1056         struct lnet_ni *ni;
1057         int            count = 0;
1058
1059         lnet_net_lock(0);
1060
1061         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
1062                 count++;
1063
1064         lnet_net_unlock(0);
1065
1066         return count;
1067 }
1068
1069 static inline void
1070 lnet_ping_info_free(lnet_ping_info_t *pinfo)
1071 {
1072         LIBCFS_FREE(pinfo,
1073                     offsetof(lnet_ping_info_t,
1074                              pi_ni[pinfo->pi_nnis]));
1075 }
1076
1077 static void
1078 lnet_ping_info_destroy(void)
1079 {
1080         struct lnet_ni  *ni;
1081
1082         lnet_net_lock(LNET_LOCK_EX);
1083
1084         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1085                 lnet_ni_lock(ni);
1086                 ni->ni_status = NULL;
1087                 lnet_ni_unlock(ni);
1088         }
1089
1090         lnet_ping_info_free(the_lnet.ln_ping_info);
1091         the_lnet.ln_ping_info = NULL;
1092
1093         lnet_net_unlock(LNET_LOCK_EX);
1094 }
1095
1096 static void
1097 lnet_ping_event_handler(lnet_event_t *event)
1098 {
1099         lnet_ping_info_t *pinfo = event->md.user_ptr;
1100
1101         if (event->unlinked)
1102                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
1103 }
1104
1105 static int
1106 lnet_ping_info_setup(lnet_ping_info_t **ppinfo, lnet_handle_md_t *md_handle,
1107                      int ni_count, bool set_eq)
1108 {
1109         lnet_handle_me_t  me_handle;
1110         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1111         lnet_md_t         md = {0};
1112         int               rc, rc2;
1113
1114         if (set_eq) {
1115                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
1116                                  &the_lnet.ln_ping_target_eq);
1117                 if (rc != 0) {
1118                         CERROR("Can't allocate ping EQ: %d\n", rc);
1119                         return rc;
1120                 }
1121         }
1122
1123         *ppinfo = lnet_ping_info_create(ni_count);
1124         if (*ppinfo == NULL) {
1125                 rc = -ENOMEM;
1126                 goto failed_0;
1127         }
1128
1129         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1130                           LNET_PROTO_PING_MATCHBITS, 0,
1131                           LNET_UNLINK, LNET_INS_AFTER,
1132                           &me_handle);
1133         if (rc != 0) {
1134                 CERROR("Can't create ping ME: %d\n", rc);
1135                 goto failed_1;
1136         }
1137
1138         /* initialize md content */
1139         md.start     = *ppinfo;
1140         md.length    = offsetof(lnet_ping_info_t,
1141                                 pi_ni[(*ppinfo)->pi_nnis]);
1142         md.threshold = LNET_MD_THRESH_INF;
1143         md.max_size  = 0;
1144         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1145                        LNET_MD_MANAGE_REMOTE;
1146         md.user_ptr  = NULL;
1147         md.eq_handle = the_lnet.ln_ping_target_eq;
1148         md.user_ptr = *ppinfo;
1149
1150         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
1151         if (rc != 0) {
1152                 CERROR("Can't attach ping MD: %d\n", rc);
1153                 goto failed_2;
1154         }
1155
1156         return 0;
1157
1158 failed_2:
1159         rc2 = LNetMEUnlink(me_handle);
1160         LASSERT(rc2 == 0);
1161 failed_1:
1162         lnet_ping_info_free(*ppinfo);
1163         *ppinfo = NULL;
1164 failed_0:
1165         if (set_eq)
1166                 LNetEQFree(the_lnet.ln_ping_target_eq);
1167         return rc;
1168 }
1169
1170 static void
1171 lnet_ping_md_unlink(lnet_ping_info_t *pinfo, lnet_handle_md_t *md_handle)
1172 {
1173         sigset_t        blocked = cfs_block_allsigs();
1174
1175         LNetMDUnlink(*md_handle);
1176         LNetInvalidateHandle(md_handle);
1177
1178         /* NB md could be busy; this just starts the unlink */
1179         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
1180                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
1181                 cfs_pause(cfs_time_seconds(1));
1182         }
1183
1184         cfs_restore_sigs(blocked);
1185 }
1186
1187 static void
1188 lnet_ping_info_install_locked(lnet_ping_info_t *ping_info)
1189 {
1190         int                     i;
1191         lnet_ni_t               *ni;
1192         lnet_ni_status_t        *ns;
1193
1194         i = 0;
1195         list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
1196                 LASSERT(i < ping_info->pi_nnis);
1197
1198                 ns = &ping_info->pi_ni[i];
1199
1200                 ns->ns_nid = ni->ni_nid;
1201
1202                 lnet_ni_lock(ni);
1203                 ns->ns_status = (ni->ni_status != NULL) ?
1204                                 ni->ni_status->ns_status : LNET_NI_STATUS_UP;
1205                 ni->ni_status = ns;
1206                 lnet_ni_unlock(ni);
1207
1208                 i++;
1209         }
1210 }
1211
1212 static void
1213 lnet_ping_target_update(lnet_ping_info_t *pinfo, lnet_handle_md_t md_handle)
1214 {
1215         lnet_ping_info_t *old_pinfo = NULL;
1216         lnet_handle_md_t old_md;
1217
1218         /* switch the NIs to point to the new ping info created */
1219         lnet_net_lock(LNET_LOCK_EX);
1220
1221         if (!the_lnet.ln_routing)
1222                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1223         lnet_ping_info_install_locked(pinfo);
1224
1225         if (the_lnet.ln_ping_info != NULL) {
1226                 old_pinfo = the_lnet.ln_ping_info;
1227                 old_md = the_lnet.ln_ping_target_md;
1228         }
1229         the_lnet.ln_ping_target_md = md_handle;
1230         the_lnet.ln_ping_info = pinfo;
1231
1232         lnet_net_unlock(LNET_LOCK_EX);
1233
1234         if (old_pinfo != NULL) {
1235                 /* unlink the old ping info */
1236                 lnet_ping_md_unlink(old_pinfo, &old_md);
1237                 lnet_ping_info_free(old_pinfo);
1238         }
1239 }
1240
1241 static void
1242 lnet_ping_target_fini(void)
1243 {
1244         int             rc;
1245
1246         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1247                             &the_lnet.ln_ping_target_md);
1248
1249         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1250         LASSERT(rc == 0);
1251
1252         lnet_ping_info_destroy();
1253 }
1254
1255 static int
1256 lnet_ni_tq_credits(lnet_ni_t *ni)
1257 {
1258         int     credits;
1259
1260         LASSERT(ni->ni_ncpts >= 1);
1261
1262         if (ni->ni_ncpts == 1)
1263                 return ni->ni_maxtxcredits;
1264
1265         credits = ni->ni_maxtxcredits / ni->ni_ncpts;
1266         credits = max(credits, 8 * ni->ni_peertxcredits);
1267         credits = min(credits, ni->ni_maxtxcredits);
1268
1269         return credits;
1270 }
1271
1272 static void
1273 lnet_clear_zombies_nis_locked(void)
1274 {
1275         int             i;
1276         int             islo;
1277         lnet_ni_t       *ni;
1278
1279         /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
1280          * and shut them down in guaranteed thread context */
1281         i = 2;
1282         while (!list_empty(&the_lnet.ln_nis_zombie)) {
1283                 int     *ref;
1284                 int     j;
1285
1286                 ni = list_entry(the_lnet.ln_nis_zombie.next,
1287                                 lnet_ni_t, ni_list);
1288                 list_del_init(&ni->ni_list);
1289                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1290                         if (*ref == 0)
1291                                 continue;
1292                         /* still busy, add it back to zombie list */
1293                         list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
1294                         break;
1295                 }
1296
1297                 if (!list_empty(&ni->ni_list)) {
1298                         lnet_net_unlock(LNET_LOCK_EX);
1299                         ++i;
1300                         if ((i & (-i)) == i) {
1301                                 CDEBUG(D_WARNING,
1302                                        "Waiting for zombie LNI %s\n",
1303                                        libcfs_nid2str(ni->ni_nid));
1304                         }
1305                         cfs_pause(cfs_time_seconds(1));
1306                         lnet_net_lock(LNET_LOCK_EX);
1307                         continue;
1308                 }
1309
1310                 ni->ni_lnd->lnd_refcount--;
1311                 lnet_net_unlock(LNET_LOCK_EX);
1312
1313                 islo = ni->ni_lnd->lnd_type == LOLND;
1314
1315                 LASSERT(!in_interrupt());
1316                 (ni->ni_lnd->lnd_shutdown)(ni);
1317
1318                 /* can't deref lnd anymore now; it might have unregistered
1319                  * itself...  */
1320
1321                 if (!islo)
1322                         CDEBUG(D_LNI, "Removed LNI %s\n",
1323                               libcfs_nid2str(ni->ni_nid));
1324
1325                 lnet_ni_free(ni);
1326                 i = 2;
1327                 lnet_net_lock(LNET_LOCK_EX);
1328         }
1329 }
1330
1331 static void
1332 lnet_shutdown_lndnis(void)
1333 {
1334         int             i;
1335         lnet_ni_t       *ni;
1336
1337         /* NB called holding the global mutex */
1338
1339         /* All quiet on the API front */
1340         LASSERT(!the_lnet.ln_shutdown);
1341         LASSERT(the_lnet.ln_refcount == 0);
1342         LASSERT(list_empty(&the_lnet.ln_nis_zombie));
1343
1344         lnet_net_lock(LNET_LOCK_EX);
1345         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1346
1347         /* Unlink NIs from the global table */
1348         while (!list_empty(&the_lnet.ln_nis)) {
1349                 ni = list_entry(the_lnet.ln_nis.next,
1350                                 lnet_ni_t, ni_list);
1351                 /* move it to zombie list and nobody can find it anymore */
1352                 list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
1353                 lnet_ni_decref_locked(ni, 0);   /* drop ln_nis' ref */
1354
1355                 if (!list_empty(&ni->ni_cptlist)) {
1356                         list_del_init(&ni->ni_cptlist);
1357                         lnet_ni_decref_locked(ni, 0);
1358                 }
1359         }
1360
1361         /* Drop the cached eqwait NI. */
1362         if (the_lnet.ln_eq_waitni != NULL) {
1363                 lnet_ni_decref_locked(the_lnet.ln_eq_waitni, 0);
1364                 the_lnet.ln_eq_waitni = NULL;
1365         }
1366
1367         /* Drop the cached loopback NI. */
1368         if (the_lnet.ln_loni != NULL) {
1369                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1370                 the_lnet.ln_loni = NULL;
1371         }
1372
1373         lnet_net_unlock(LNET_LOCK_EX);
1374
1375         /* Clear lazy portals and drop delayed messages which hold refs
1376          * on their lnet_msg_t::msg_rxpeer */
1377         for (i = 0; i < the_lnet.ln_nportals; i++)
1378                 LNetClearLazyPortal(i);
1379
1380         /* Clear the peer table and wait for all peers to go (they hold refs on
1381          * their NIs) */
1382         lnet_peer_tables_cleanup(NULL);
1383
1384         lnet_net_lock(LNET_LOCK_EX);
1385
1386         lnet_clear_zombies_nis_locked();
1387         the_lnet.ln_shutdown = 0;
1388         lnet_net_unlock(LNET_LOCK_EX);
1389 }
1390
1391 int
1392 lnet_shutdown_lndni(__u32 net)
1393 {
1394         lnet_ping_info_t *pinfo;
1395         lnet_handle_md_t md_handle;
1396         lnet_ni_t       *found_ni = NULL;
1397         int             ni_count;
1398         int             rc;
1399
1400         if (LNET_NETTYP(net) == LOLND)
1401                 return -EINVAL;
1402
1403         ni_count = lnet_get_ni_count();
1404
1405         /* create and link a new ping info, before removing the old one */
1406         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count - 1, false);
1407         if (rc != 0)
1408                 return rc;
1409
1410         /* proceed with shutting down the NI */
1411         lnet_net_lock(LNET_LOCK_EX);
1412
1413         found_ni = lnet_net2ni_locked(net, 0);
1414         if (found_ni == NULL) {
1415                 lnet_net_unlock(LNET_LOCK_EX);
1416                 lnet_ping_md_unlink(pinfo, &md_handle);
1417                 lnet_ping_info_free(pinfo);
1418                 return -EINVAL;
1419         }
1420
1421         /* decrement the reference counter on found_ni which was
1422          * incremented when we called lnet_net2ni_locked() */
1423         lnet_ni_decref_locked(found_ni, 0);
1424
1425         /* Move ni to zombie list so nobody can find it anymore */
1426         list_move(&found_ni->ni_list, &the_lnet.ln_nis_zombie);
1427
1428         /* Drop the lock reference for the ln_nis ref. */
1429         lnet_ni_decref_locked(found_ni, 0);
1430
1431         if (!list_empty(&found_ni->ni_cptlist)) {
1432                 list_del_init(&found_ni->ni_cptlist);
1433                 lnet_ni_decref_locked(found_ni, 0);
1434         }
1435
1436         lnet_net_unlock(LNET_LOCK_EX);
1437
1438         /* Do peer table cleanup for this ni */
1439         lnet_peer_tables_cleanup(found_ni);
1440
1441         lnet_net_lock(LNET_LOCK_EX);
1442         lnet_clear_zombies_nis_locked();
1443         lnet_net_unlock(LNET_LOCK_EX);
1444
1445         lnet_ping_target_update(pinfo, md_handle);
1446
1447         return 0;
1448 }
1449
1450 static int
1451 lnet_startup_lndnis(struct list_head *nilist, __s32 peer_timeout,
1452                     __s32 peer_cr, __s32 peer_buf_cr, __s32 credits,
1453                     int *ni_count)
1454 {
1455         int                     rc = 0;
1456         struct lnet_ni          *ni;
1457         int                     lnd_type;
1458         lnd_t                   *lnd;
1459         struct lnet_tx_queue    *tq;
1460         int                     i;
1461
1462         while (!list_empty(nilist)) {
1463                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1464                 lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
1465
1466                 if (!libcfs_isknown_lnd(lnd_type))
1467                         goto failed;
1468
1469                 if (lnd_type == CIBLND    ||
1470                     lnd_type == OPENIBLND ||
1471                     lnd_type == IIBLND    ||
1472                     lnd_type == VIBLND) {
1473                         CERROR("LND %s obsoleted\n",
1474                                libcfs_lnd2str(lnd_type));
1475                         goto failed;
1476                 }
1477
1478                 /* Make sure this new NI is unique. */
1479                 lnet_net_lock(LNET_LOCK_EX);
1480                 if (!lnet_net_unique(LNET_NIDNET(ni->ni_nid),
1481                                      &the_lnet.ln_nis)) {
1482                         if (lnd_type == LOLND) {
1483                                 lnet_net_unlock(LNET_LOCK_EX);
1484                                 list_del(&ni->ni_list);
1485                                 lnet_ni_free(ni);
1486                                 continue;
1487                         }
1488
1489                         CERROR("Net %s is not unique\n",
1490                                libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
1491                         lnet_net_unlock(LNET_LOCK_EX);
1492                         goto failed;
1493                 }
1494                 lnet_net_unlock(LNET_LOCK_EX);
1495
1496                 LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1497                 lnd = lnet_find_lnd_by_type(lnd_type);
1498
1499 #ifdef __KERNEL__
1500                 if (lnd == NULL) {
1501                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1502                         rc = request_module("%s",
1503                                                 libcfs_lnd2modname(lnd_type));
1504                         LNET_MUTEX_LOCK(&the_lnet.ln_lnd_mutex);
1505
1506                         lnd = lnet_find_lnd_by_type(lnd_type);
1507                         if (lnd == NULL) {
1508                                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1509                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1510                                        libcfs_lnd2str(lnd_type),
1511                                        libcfs_lnd2modname(lnd_type), rc);
1512 #ifndef HAVE_MODULE_LOADING_SUPPORT
1513                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1514                                          "compiled with kernel module "
1515                                          "loading support.");
1516 #endif
1517                                 goto failed;
1518                         }
1519                 }
1520 #else
1521                 if (lnd == NULL) {
1522                         LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1523                         CERROR("LND %s not supported\n",
1524                                libcfs_lnd2str(lnd_type));
1525                         goto failed;
1526                 }
1527 #endif
1528
1529                 lnet_net_lock(LNET_LOCK_EX);
1530                 lnd->lnd_refcount++;
1531                 lnet_net_unlock(LNET_LOCK_EX);
1532
1533                 ni->ni_lnd = lnd;
1534
1535                 rc = (lnd->lnd_startup)(ni);
1536
1537                 LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex);
1538
1539                 if (rc != 0) {
1540                         LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
1541                                            "\n",
1542                                            rc, libcfs_lnd2str(lnd->lnd_type));
1543                         lnet_net_lock(LNET_LOCK_EX);
1544                         lnd->lnd_refcount--;
1545                         lnet_net_unlock(LNET_LOCK_EX);
1546                         goto failed;
1547                 }
1548
1549                 /* If given some LND tunable parameters, parse those now to
1550                  * override the values in the NI structure. */
1551                 if (peer_buf_cr >= 0)
1552                         ni->ni_peerrtrcredits = peer_buf_cr;
1553                 if (peer_timeout >= 0)
1554                         ni->ni_peertimeout = peer_timeout;
1555                 /*
1556                  * TODO
1557                  * Note: For now, don't allow the user to change
1558                  * peertxcredits as this number is used in the
1559                  * IB LND to control queue depth.
1560                  * if (peer_cr != -1)
1561                  *      ni->ni_peertxcredits = peer_cr;
1562                  */
1563                 if (credits >= 0)
1564                         ni->ni_maxtxcredits = credits;
1565
1566                 LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
1567
1568                 list_del(&ni->ni_list);
1569
1570                 lnet_net_lock(LNET_LOCK_EX);
1571                 /* refcount for ln_nis */
1572                 lnet_ni_addref_locked(ni, 0);
1573                 list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1574                 if (ni->ni_cpts != NULL) {
1575                         list_add_tail(&ni->ni_cptlist,
1576                                       &the_lnet.ln_nis_cpt);
1577                         lnet_ni_addref_locked(ni, 0);
1578                 }
1579
1580                 lnet_net_unlock(LNET_LOCK_EX);
1581
1582                 /* increment the ni_count here to account for the LOLND as
1583                  * well.  If we increment past this point then the number
1584                  * of count will be missing the LOLND, and then ping and
1585                  * will not report the LOLND
1586                  */
1587                 if (ni_count != NULL)
1588                         (*ni_count)++;
1589
1590                 if (lnd->lnd_type == LOLND) {
1591                         lnet_ni_addref(ni);
1592                         LASSERT(the_lnet.ln_loni == NULL);
1593                         the_lnet.ln_loni = ni;
1594                         continue;
1595                 }
1596
1597 #ifndef __KERNEL__
1598                 if (lnd->lnd_wait != NULL) {
1599                         if (the_lnet.ln_eq_waitni == NULL) {
1600                                 lnet_ni_addref(ni);
1601                                 the_lnet.ln_eq_waitni = ni;
1602                         }
1603                 } else {
1604 # ifndef HAVE_LIBPTHREAD
1605                         LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a "
1606                                            "single-threaded runtime\n",
1607                                            libcfs_lnd2str(lnd_type));
1608                         goto failed;
1609 # endif
1610                 }
1611 #endif
1612                 if (ni->ni_peertxcredits == 0 ||
1613                     ni->ni_maxtxcredits == 0) {
1614                         LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1615                                            libcfs_lnd2str(lnd->lnd_type),
1616                                            ni->ni_peertxcredits == 0 ?
1617                                            "" : "per-peer ");
1618                         goto failed;
1619                 }
1620
1621                 cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1622                         tq->tq_credits_min =
1623                         tq->tq_credits_max =
1624                         tq->tq_credits = lnet_ni_tq_credits(ni);
1625                 }
1626
1627                 CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1628                        libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
1629                        lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1630                        ni->ni_peerrtrcredits, ni->ni_peertimeout);
1631         }
1632
1633         return 0;
1634 failed:
1635         while (!list_empty(nilist)) {
1636                 ni = list_entry(nilist->next, lnet_ni_t, ni_list);
1637                 list_del(&ni->ni_list);
1638                 lnet_ni_free(ni);
1639         }
1640         return -EINVAL;
1641 }
1642
1643 /**
1644  * Initialize LNet library.
1645  *
1646  * Only userspace program needs to call this function - it's automatically
1647  * called in the kernel at module loading time. Caller has to call LNetFini()
1648  * after a call to LNetInit(), if and only if the latter returned 0. It must
1649  * be called exactly once.
1650  *
1651  * \return 0 on success, and -ve on failures.
1652  */
1653 int
1654 LNetInit(void)
1655 {
1656         int     rc;
1657
1658         lnet_assert_wire_constants();
1659         LASSERT(!the_lnet.ln_init);
1660
1661         memset(&the_lnet, 0, sizeof(the_lnet));
1662
1663         /* refer to global cfs_cpt_table for now */
1664         the_lnet.ln_cpt_table   = cfs_cpt_table;
1665         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1666
1667         LASSERT(the_lnet.ln_cpt_number > 0);
1668         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1669                 /* we are under risk of consuming all lh_cookie */
1670                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1671                        "please change setting of CPT-table and retry\n",
1672                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1673                 return -1;
1674         }
1675
1676         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1677                 the_lnet.ln_cpt_bits++;
1678
1679         rc = lnet_create_locks();
1680         if (rc != 0) {
1681                 CERROR("Can't create LNet global locks: %d\n", rc);
1682                 return -1;
1683         }
1684
1685         the_lnet.ln_refcount = 0;
1686         the_lnet.ln_init = 1;
1687         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1688         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1689         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1690         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1691
1692 #ifdef __KERNEL__
1693         /* The hash table size is the number of bits it takes to express the set
1694          * ln_num_routes, minus 1 (better to under estimate than over so we
1695          * don't waste memory). */
1696         if (rnet_htable_size <= 0)
1697                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1698         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1699                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1700         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1701                                            order_base_2(rnet_htable_size) - 1);
1702
1703         /* All LNDs apart from the LOLND are in separate modules.  They
1704          * register themselves when their module loads, and unregister
1705          * themselves when their module is unloaded. */
1706 #else
1707         the_lnet.ln_remote_nets_hbits = 8;
1708
1709         /* Register LNDs
1710          * NB the order here determines default 'networks=' order */
1711 # ifdef HAVE_LIBPTHREAD
1712         LNET_REGISTER_ULND(the_tcplnd);
1713 # endif
1714 #endif
1715         lnet_register_lnd(&the_lolnd);
1716         return 0;
1717 }
1718 EXPORT_SYMBOL(LNetInit);
1719
1720 /**
1721  * Finalize LNet library.
1722  *
1723  * Only userspace program needs to call this function. It can be called
1724  * at most once.
1725  *
1726  * \pre LNetInit() called with success.
1727  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1728  */
1729 void
1730 LNetFini(void)
1731 {
1732         LASSERT(the_lnet.ln_init);
1733         LASSERT(the_lnet.ln_refcount == 0);
1734
1735         while (!list_empty(&the_lnet.ln_lnds))
1736                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1737                                                lnd_t, lnd_list));
1738         lnet_destroy_locks();
1739
1740         the_lnet.ln_init = 0;
1741 }
1742 EXPORT_SYMBOL(LNetFini);
1743
1744 /**
1745  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1746  *
1747  * Userspace program should call this after a successful call to LNetInit().
1748  * Users must call this function at least once before any other functions.
1749  * For each successful call there must be a corresponding call to
1750  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1751  * ignored.
1752  *
1753  * The PID used by LNet may be different from the one requested.
1754  * See LNetGetId().
1755  *
1756  * \param requested_pid PID requested by the caller.
1757  *
1758  * \return >= 0 on success, and < 0 error code on failures.
1759  */
1760 int
1761 LNetNIInit(lnet_pid_t requested_pid)
1762 {
1763         int                     im_a_router = 0;
1764         int                     rc;
1765         int                     ni_count = 0;
1766         int                     lnd_type;
1767         struct lnet_ni          *ni;
1768         lnet_ping_info_t        *pinfo;
1769         lnet_handle_md_t        md_handle;
1770         struct list_head        net_head;
1771
1772         INIT_LIST_HEAD(&net_head);
1773
1774         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1775
1776         LASSERT(the_lnet.ln_init);
1777         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1778
1779         if (the_lnet.ln_refcount > 0) {
1780                 rc = the_lnet.ln_refcount++;
1781                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1782                 return rc;
1783         }
1784
1785         rc = lnet_prepare(requested_pid);
1786         if (rc != 0)
1787                 goto failed0;
1788
1789         rc = lnet_parse_networks(&net_head,
1790                                  !the_lnet.ln_nis_from_mod_params ?
1791                                    lnet_get_networks() : "");
1792         if (rc < 0)
1793                 goto failed1;
1794
1795         rc = lnet_startup_lndnis(&net_head, -1, -1, -1, -1, &ni_count);
1796         if (rc != 0)
1797                 goto failed1;
1798
1799         if (the_lnet.ln_eq_waitni != NULL && ni_count > 1) {
1800                 lnd_type = the_lnet.ln_eq_waitni->ni_lnd->lnd_type;
1801                 LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
1802                                    "\n",
1803                                    libcfs_lnd2str(lnd_type));
1804                 goto failed2;
1805         }
1806
1807         rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1808         if (rc != 0)
1809                 goto failed2;
1810
1811         rc = lnet_check_routes();
1812         if (rc != 0)
1813                 goto failed2;
1814
1815         rc = lnet_rtrpools_alloc(im_a_router);
1816         if (rc != 0)
1817                 goto failed2;
1818
1819         rc = lnet_acceptor_start();
1820         if (rc != 0)
1821                 goto failed2;
1822         the_lnet.ln_refcount = 1;
1823         /* Now I may use my own API functions... */
1824
1825         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1826         if (rc != 0)
1827                 goto failed3;
1828
1829         lnet_ping_target_update(pinfo, md_handle);
1830
1831         rc = lnet_router_checker_start();
1832         if (rc != 0)
1833                 goto failed4;
1834
1835         lnet_fault_init();
1836         lnet_proc_init();
1837
1838         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1839
1840         return 0;
1841
1842 failed4:
1843         the_lnet.ln_refcount = 0;
1844         lnet_ping_md_unlink(pinfo, &md_handle);
1845         lnet_ping_info_free(pinfo);
1846 failed3:
1847         lnet_acceptor_stop();
1848         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1849         LASSERT(rc == 0);
1850 failed2:
1851         lnet_destroy_routes();
1852         lnet_shutdown_lndnis();
1853 failed1:
1854         lnet_unprepare();
1855 failed0:
1856         LASSERT(rc < 0);
1857         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1858         while (!list_empty(&net_head)) {
1859                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
1860                 list_del_init(&ni->ni_list);
1861                 lnet_ni_free(ni);
1862         }
1863         return rc;
1864 }
1865 EXPORT_SYMBOL(LNetNIInit);
1866
1867 /**
1868  * Stop LNet interfaces, routing, and forwarding.
1869  *
1870  * Users must call this function once for each successful call to LNetNIInit().
1871  * Once the LNetNIFini() operation has been started, the results of pending
1872  * API operations are undefined.
1873  *
1874  * \return always 0 for current implementation.
1875  */
1876 int
1877 LNetNIFini()
1878 {
1879         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
1880
1881         LASSERT (the_lnet.ln_init);
1882         LASSERT (the_lnet.ln_refcount > 0);
1883
1884         if (the_lnet.ln_refcount != 1) {
1885                 the_lnet.ln_refcount--;
1886         } else {
1887                 LASSERT(!the_lnet.ln_niinit_self);
1888
1889                 lnet_fault_fini();
1890
1891                 lnet_proc_fini();
1892                 lnet_router_checker_stop();
1893                 lnet_ping_target_fini();
1894
1895                 /* Teardown fns that use my own API functions BEFORE here */
1896                 the_lnet.ln_refcount = 0;
1897
1898                 lnet_acceptor_stop();
1899                 lnet_destroy_routes();
1900                 lnet_shutdown_lndnis();
1901                 lnet_unprepare();
1902         }
1903
1904         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
1905         return 0;
1906 }
1907 EXPORT_SYMBOL(LNetNIFini);
1908
1909 /**
1910  * Grabs the ni data from the ni structure and fills the out
1911  * parameters
1912  *
1913  * \param[in] ni network        interface structure
1914  * \param[out] cpt_count        the number of cpts the ni is on
1915  * \param[out] nid              Network Interface ID
1916  * \param[out] peer_timeout     NI peer timeout
1917  * \param[out] peer_tx_crdits   NI peer transmit credits
1918  * \param[out] peer_rtr_credits NI peer router credits
1919  * \param[out] max_tx_credits   NI max transmit credit
1920  * \param[out] net_config       Network configuration
1921  */
1922 static void
1923 lnet_fill_ni_info(struct lnet_ni *ni, __u32 *cpt_count, __u64 *nid,
1924                   int *peer_timeout, int *peer_tx_credits,
1925                   int *peer_rtr_credits, int *max_tx_credits,
1926                   struct lnet_ioctl_net_config *net_config)
1927 {
1928         int i;
1929
1930         if (ni == NULL)
1931                 return;
1932
1933         if (net_config == NULL)
1934                 return;
1935
1936         CLASSERT(ARRAY_SIZE(ni->ni_interfaces) ==
1937                  ARRAY_SIZE(net_config->ni_interfaces));
1938
1939         if (ni->ni_interfaces[0] != NULL) {
1940                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1941                         if (ni->ni_interfaces[i] != NULL) {
1942                                 strncpy(net_config->ni_interfaces[i],
1943                                         ni->ni_interfaces[i],
1944                                         sizeof(net_config->ni_interfaces[i]));
1945                         }
1946                 }
1947         }
1948
1949         *nid = ni->ni_nid;
1950         *peer_timeout = ni->ni_peertimeout;
1951         *peer_tx_credits = ni->ni_peertxcredits;
1952         *peer_rtr_credits = ni->ni_peerrtrcredits;
1953         *max_tx_credits = ni->ni_maxtxcredits;
1954
1955         net_config->ni_status = ni->ni_status->ns_status;
1956
1957         for (i = 0;
1958              ni->ni_cpts != NULL && i < ni->ni_ncpts &&
1959              i < LNET_MAX_SHOW_NUM_CPT;
1960              i++)
1961                 net_config->ni_cpts[i] = ni->ni_cpts[i];
1962
1963         *cpt_count = ni->ni_ncpts;
1964 }
1965
1966 int
1967 lnet_get_net_config(int idx, __u32 *cpt_count, __u64 *nid, int *peer_timeout,
1968                     int *peer_tx_credits, int *peer_rtr_credits,
1969                     int *max_tx_credits,
1970                     struct lnet_ioctl_net_config *net_config)
1971 {
1972         struct lnet_ni          *ni;
1973         struct list_head        *tmp;
1974         int                     cpt;
1975         int                     rc = -ENOENT;
1976
1977         cpt = lnet_net_lock_current();
1978
1979         list_for_each(tmp, &the_lnet.ln_nis) {
1980                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1981                 if (idx-- == 0) {
1982                         rc = 0;
1983                         lnet_ni_lock(ni);
1984                         lnet_fill_ni_info(ni, cpt_count, nid, peer_timeout,
1985                                           peer_tx_credits, peer_rtr_credits,
1986                                           max_tx_credits, net_config);
1987                         lnet_ni_unlock(ni);
1988                         break;
1989                 }
1990         }
1991
1992         lnet_net_unlock(cpt);
1993         return rc;
1994 }
1995
1996 int
1997 lnet_dyn_add_ni(lnet_pid_t requested_pid, char *nets,
1998                 __s32 peer_timeout, __s32 peer_cr, __s32 peer_buf_cr,
1999                 __s32 credits)
2000 {
2001         lnet_ping_info_t        *pinfo;
2002         lnet_handle_md_t        md_handle;
2003         struct lnet_ni          *ni;
2004         struct list_head        net_head;
2005         int                     rc;
2006
2007         INIT_LIST_HEAD(&net_head);
2008
2009         /* Create a ni structure for the network string */
2010         rc = lnet_parse_networks(&net_head, nets);
2011         if (rc < 0)
2012                 return rc;
2013
2014         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2015
2016         if (rc > 1) {
2017                 rc = -EINVAL; /* only add one interface per call */
2018                 goto failed0;
2019         }
2020
2021         rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
2022                                   false);
2023         if (rc != 0)
2024                 goto failed0;
2025
2026         rc = lnet_startup_lndnis(&net_head, peer_timeout, peer_cr,
2027                                  peer_buf_cr, credits, NULL);
2028         if (rc != 0)
2029                 goto failed1;
2030
2031         lnet_ping_target_update(pinfo, md_handle);
2032         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2033
2034         return 0;
2035
2036 failed1:
2037         lnet_ping_md_unlink(pinfo, &md_handle);
2038         lnet_ping_info_free(pinfo);
2039 failed0:
2040         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2041         while (!list_empty(&net_head)) {
2042                 ni = list_entry(net_head.next, struct lnet_ni, ni_list);
2043                 list_del_init(&ni->ni_list);
2044                 lnet_ni_free(ni);
2045         }
2046         return rc;
2047 }
2048
2049 int
2050 lnet_dyn_del_ni(__u32 net)
2051 {
2052         int rc;
2053
2054         LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2055         rc = lnet_shutdown_lndni(net);
2056         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2057
2058         return rc;
2059 }
2060
2061 /**
2062  * This is an ugly hack to export IOC_LIBCFS_DEBUG_PEER and
2063  * IOC_LIBCFS_PORTALS_COMPATIBILITY commands to users, by tweaking the LNet
2064  * internal ioctl handler.
2065  *
2066  * IOC_LIBCFS_PORTALS_COMPATIBILITY is now deprecated, don't use it.
2067  *
2068  * \param cmd IOC_LIBCFS_DEBUG_PEER to print debugging data about a peer.
2069  * The data will be printed to system console. Don't use it excessively.
2070  * \param arg A pointer to lnet_process_id_t, process ID of the peer.
2071  *
2072  * \return Always return 0 when called by users directly (i.e., not via ioctl).
2073  */
2074 int
2075 LNetCtl(unsigned int cmd, void *arg)
2076 {
2077         struct libcfs_ioctl_data *data = arg;
2078         struct lnet_ioctl_config_data *config;
2079         lnet_process_id_t         id = {0};
2080         lnet_ni_t                *ni;
2081         int                       rc;
2082
2083         CLASSERT(LIBCFS_IOC_DATA_MAX >= sizeof(struct lnet_ioctl_net_config) +
2084                                         sizeof(struct lnet_ioctl_config_data));
2085         LASSERT(the_lnet.ln_init);
2086
2087         switch (cmd) {
2088         case IOC_LIBCFS_GET_NI:
2089                 rc = LNetGetId(data->ioc_count, &id);
2090                 data->ioc_nid = id.nid;
2091                 return rc;
2092
2093         case IOC_LIBCFS_FAIL_NID:
2094                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2095
2096         case IOC_LIBCFS_ADD_ROUTE:
2097                 config = arg;
2098                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2099                 rc = lnet_add_route(config->cfg_net,
2100                                     config->cfg_config_u.cfg_route.rtr_hop,
2101                                     config->cfg_nid,
2102                                     config->cfg_config_u.cfg_route.
2103                                         rtr_priority);
2104                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2105                 return (rc != 0) ? rc : lnet_check_routes();
2106
2107         case IOC_LIBCFS_DEL_ROUTE:
2108                 config = arg;
2109                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2110                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2111                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2112                 return rc;
2113
2114         case IOC_LIBCFS_GET_ROUTE:
2115                 config = arg;
2116                 return lnet_get_route(config->cfg_count,
2117                                       &config->cfg_net,
2118                                       &config->cfg_config_u.cfg_route.rtr_hop,
2119                                       &config->cfg_nid,
2120                                       &config->cfg_config_u.cfg_route.rtr_flags,
2121                                       &config->cfg_config_u.cfg_route.
2122                                         rtr_priority);
2123
2124         case IOC_LIBCFS_GET_NET: {
2125                 struct lnet_ioctl_net_config *net_config;
2126                 config = arg;
2127                 net_config = (struct lnet_ioctl_net_config *)
2128                         config->cfg_bulk;
2129                 if (config == NULL || net_config == NULL)
2130                         return -1;
2131
2132                 return lnet_get_net_config(config->cfg_count,
2133                                            &config->cfg_ncpts,
2134                                            &config->cfg_nid,
2135                                            &config->cfg_config_u.
2136                                                 cfg_net.net_peer_timeout,
2137                                            &config->cfg_config_u.cfg_net.
2138                                                 net_peer_tx_credits,
2139                                            &config->cfg_config_u.cfg_net.
2140                                                 net_peer_rtr_credits,
2141                                            &config->cfg_config_u.cfg_net.
2142                                                 net_max_tx_credits,
2143                                            net_config);
2144         }
2145
2146         case IOC_LIBCFS_GET_LNET_STATS:
2147         {
2148                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2149
2150                 lnet_counters_get(&lnet_stats->st_cntrs);
2151                 return 0;
2152         }
2153
2154 #if defined(__KERNEL__) && defined(LNET_ROUTER)
2155         case IOC_LIBCFS_CONFIG_RTR:
2156                 config = arg;
2157                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2158                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2159                         rc = lnet_rtrpools_enable();
2160                         LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2161                         return rc;
2162                 }
2163                 lnet_rtrpools_disable();
2164                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2165                 return 0;
2166
2167         case IOC_LIBCFS_ADD_BUF:
2168                 config = arg;
2169                 LNET_MUTEX_LOCK(&the_lnet.ln_api_mutex);
2170                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2171                                                 buf_tiny,
2172                                           config->cfg_config_u.cfg_buffers.
2173                                                 buf_small,
2174                                           config->cfg_config_u.cfg_buffers.
2175                                                 buf_large);
2176                 LNET_MUTEX_UNLOCK(&the_lnet.ln_api_mutex);
2177                 return rc;
2178 #endif
2179
2180         case IOC_LIBCFS_GET_BUF: {
2181                 struct lnet_ioctl_pool_cfg *pool_cfg;
2182                 config = arg;
2183                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2184                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2185         }
2186
2187         case IOC_LIBCFS_GET_PEER_INFO: {
2188                 struct lnet_ioctl_peer *peer_info = arg;
2189                 return lnet_get_peer_info(
2190                    peer_info->pr_count,
2191                    &peer_info->pr_nid,
2192                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2193                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2194                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2195                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2196                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2197                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2198                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2199                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2200         }
2201
2202         case IOC_LIBCFS_NOTIFY_ROUTER:
2203                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2204                                    cfs_time_current() -
2205                                    cfs_time_seconds(cfs_time_current_sec() -
2206                                                     (time_t)data->ioc_u64[0]));
2207
2208         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
2209                 /* This can be removed once lustre stops calling it */
2210                 return 0;
2211
2212         case IOC_LIBCFS_LNET_DIST:
2213                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2214                 if (rc < 0 && rc != -EHOSTUNREACH)
2215                         return rc;
2216
2217                 data->ioc_u32[0] = rc;
2218                 return 0;
2219
2220         case IOC_LIBCFS_TESTPROTOCOMPAT:
2221                 lnet_net_lock(LNET_LOCK_EX);
2222                 the_lnet.ln_testprotocompat = data->ioc_flags;
2223                 lnet_net_unlock(LNET_LOCK_EX);
2224                 return 0;
2225
2226         case IOC_LIBCFS_LNET_FAULT:
2227                 return lnet_fault_ctl(data->ioc_flags, data);
2228
2229         case IOC_LIBCFS_PING:
2230                 id.nid = data->ioc_nid;
2231                 id.pid = data->ioc_u32[0];
2232                 rc = lnet_ping(id, data->ioc_u32[1], /* timeout */
2233                                (lnet_process_id_t __user *)data->ioc_pbuf1,
2234                                data->ioc_plen1/sizeof(lnet_process_id_t));
2235                 if (rc < 0)
2236                         return rc;
2237                 data->ioc_count = rc;
2238                 return 0;
2239
2240         case IOC_LIBCFS_DEBUG_PEER: {
2241                 /* CAVEAT EMPTOR: this one designed for calling directly; not
2242                  * via an ioctl */
2243                 id = *((lnet_process_id_t *) arg);
2244
2245                 lnet_debug_peer(id.nid);
2246
2247                 ni = lnet_net2ni(LNET_NIDNET(id.nid));
2248                 if (ni == NULL) {
2249                         CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(id));
2250                 } else {
2251                         if (ni->ni_lnd->lnd_ctl == NULL) {
2252                                 CDEBUG(D_WARNING, "No ctl for %s\n",
2253                                        libcfs_id2str(id));
2254                         } else {
2255                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2256                         }
2257
2258                         lnet_ni_decref(ni);
2259                 }
2260                 return 0;
2261         }
2262
2263         default:
2264                 ni = lnet_net2ni(data->ioc_net);
2265                 if (ni == NULL)
2266                         return -EINVAL;
2267
2268                 if (ni->ni_lnd->lnd_ctl == NULL)
2269                         rc = -EINVAL;
2270                 else
2271                         rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
2272
2273                 lnet_ni_decref(ni);
2274                 return rc;
2275         }
2276         /* not reached */
2277 }
2278 EXPORT_SYMBOL(LNetCtl);
2279
2280 /**
2281  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2282  * all interfaces share a same PID, as requested by LNetNIInit().
2283  *
2284  * \param index Index of the interface to look up.
2285  * \param id On successful return, this location will hold the
2286  * lnet_process_id_t ID of the interface.
2287  *
2288  * \retval 0 If an interface exists at \a index.
2289  * \retval -ENOENT If no interface has been found.
2290  */
2291 int
2292 LNetGetId(unsigned int index, lnet_process_id_t *id)
2293 {
2294         struct lnet_ni   *ni;
2295         struct list_head *tmp;
2296         int               cpt;
2297         int               rc = -ENOENT;
2298
2299         LASSERT(the_lnet.ln_init);
2300         LASSERT(the_lnet.ln_refcount > 0);
2301
2302         cpt = lnet_net_lock_current();
2303
2304         list_for_each(tmp, &the_lnet.ln_nis) {
2305                 if (index-- != 0)
2306                         continue;
2307
2308                 ni = list_entry(tmp, lnet_ni_t, ni_list);
2309
2310                 id->nid = ni->ni_nid;
2311                 id->pid = the_lnet.ln_pid;
2312                 rc = 0;
2313                 break;
2314         }
2315
2316         lnet_net_unlock(cpt);
2317         return rc;
2318 }
2319 EXPORT_SYMBOL(LNetGetId);
2320
2321 /**
2322  * Print a string representation of handle \a h into buffer \a str of
2323  * \a len bytes.
2324  */
2325 void
2326 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2327 {
2328         snprintf(str, len, LPX64, h.cookie);
2329 }
2330 EXPORT_SYMBOL(LNetSnprintHandle);
2331
2332 static int
2333 lnet_ping(lnet_process_id_t id, int timeout_ms, lnet_process_id_t __user *ids,
2334           int n_ids)
2335 {
2336         lnet_handle_eq_t     eqh;
2337         lnet_handle_md_t     mdh;
2338         lnet_event_t         event;
2339         lnet_md_t            md = {0};
2340         int                  which;
2341         int                  unlinked = 0;
2342         int                  replied = 0;
2343         const int            a_long_time = 60000; /* mS */
2344         int                  infosz;
2345         lnet_ping_info_t    *info;
2346         lnet_process_id_t    tmpid;
2347         int                  i;
2348         int                  nob;
2349         int                  rc;
2350         int                  rc2;
2351         sigset_t         blocked;
2352
2353         infosz = offsetof(lnet_ping_info_t, pi_ni[n_ids]);
2354
2355         if (n_ids <= 0 ||
2356             id.nid == LNET_NID_ANY ||
2357             timeout_ms > 500000 ||              /* arbitrary limit! */
2358             n_ids > 20)                         /* arbitrary limit! */
2359                 return -EINVAL;
2360
2361         if (id.pid == LNET_PID_ANY)
2362                 id.pid = LNET_PID_LUSTRE;
2363
2364         LIBCFS_ALLOC(info, infosz);
2365         if (info == NULL)
2366                 return -ENOMEM;
2367
2368         /* NB 2 events max (including any unlink event) */
2369         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2370         if (rc != 0) {
2371                 CERROR("Can't allocate EQ: %d\n", rc);
2372                 goto out_0;
2373         }
2374
2375         /* initialize md content */
2376         md.start     = info;
2377         md.length    = infosz;
2378         md.threshold = 2; /*GET/REPLY*/
2379         md.max_size  = 0;
2380         md.options   = LNET_MD_TRUNCATE;
2381         md.user_ptr  = NULL;
2382         md.eq_handle = eqh;
2383
2384         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2385         if (rc != 0) {
2386                 CERROR("Can't bind MD: %d\n", rc);
2387                 goto out_1;
2388         }
2389
2390         rc = LNetGet(LNET_NID_ANY, mdh, id,
2391                      LNET_RESERVED_PORTAL,
2392                      LNET_PROTO_PING_MATCHBITS, 0);
2393
2394         if (rc != 0) {
2395                 /* Don't CERROR; this could be deliberate! */
2396
2397                 rc2 = LNetMDUnlink(mdh);
2398                 LASSERT(rc2 == 0);
2399
2400                 /* NB must wait for the UNLINK event below... */
2401                 unlinked = 1;
2402                 timeout_ms = a_long_time;
2403         }
2404
2405         do {
2406                 /* MUST block for unlink to complete */
2407                 if (unlinked)
2408                         blocked = cfs_block_allsigs();
2409
2410                 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
2411
2412                 if (unlinked)
2413                         cfs_restore_sigs(blocked);
2414
2415                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2416                        (rc2 <= 0) ? -1 : event.type,
2417                        (rc2 <= 0) ? -1 : event.status,
2418                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2419
2420                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2421
2422                 if (rc2 <= 0 || event.status != 0) {
2423                         /* timeout or error */
2424                         if (!replied && rc == 0)
2425                                 rc = (rc2 < 0) ? rc2 :
2426                                      (rc2 == 0) ? -ETIMEDOUT :
2427                                      event.status;
2428
2429                         if (!unlinked) {
2430                                 /* Ensure completion in finite time... */
2431                                 LNetMDUnlink(mdh);
2432                                 /* No assertion (racing with network) */
2433                                 unlinked = 1;
2434                                 timeout_ms = a_long_time;
2435                         } else if (rc2 == 0) {
2436                                 /* timed out waiting for unlink */
2437                                 CWARN("ping %s: late network completion\n",
2438                                       libcfs_id2str(id));
2439                         }
2440                 } else if (event.type == LNET_EVENT_REPLY) {
2441                         replied = 1;
2442                         rc = event.mlength;
2443                 }
2444
2445         } while (rc2 <= 0 || !event.unlinked);
2446
2447         if (!replied) {
2448                 if (rc >= 0)
2449                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2450                               libcfs_id2str(id));
2451                 rc = -EIO;
2452                 goto out_1;
2453         }
2454
2455         nob = rc;
2456         LASSERT(nob >= 0 && nob <= infosz);
2457
2458         rc = -EPROTO;                           /* if I can't parse... */
2459
2460         if (nob < 8) {
2461                 /* can't check magic/version */
2462                 CERROR("%s: ping info too short %d\n",
2463                        libcfs_id2str(id), nob);
2464                 goto out_1;
2465         }
2466
2467         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2468                 lnet_swap_pinginfo(info);
2469         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2470                 CERROR("%s: Unexpected magic %08x\n",
2471                        libcfs_id2str(id), info->pi_magic);
2472                 goto out_1;
2473         }
2474
2475         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2476                 CERROR("%s: ping w/o NI status: 0x%x\n",
2477                        libcfs_id2str(id), info->pi_features);
2478                 goto out_1;
2479         }
2480
2481         if (nob < offsetof(lnet_ping_info_t, pi_ni[0])) {
2482                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2483                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[0]));
2484                 goto out_1;
2485         }
2486
2487         if (info->pi_nnis < n_ids)
2488                 n_ids = info->pi_nnis;
2489
2490         if (nob < offsetof(lnet_ping_info_t, pi_ni[n_ids])) {
2491                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2492                        nob, (int)offsetof(lnet_ping_info_t, pi_ni[n_ids]));
2493                 goto out_1;
2494         }
2495
2496         rc = -EFAULT;                           /* If I SEGV... */
2497
2498         for (i = 0; i < n_ids; i++) {
2499                 tmpid.pid = info->pi_pid;
2500                 tmpid.nid = info->pi_ni[i].ns_nid;
2501                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2502                         goto out_1;
2503         }
2504         rc = info->pi_nnis;
2505
2506  out_1:
2507         rc2 = LNetEQFree(eqh);
2508         if (rc2 != 0)
2509                 CERROR("rc2 %d\n", rc2);
2510         LASSERT(rc2 == 0);
2511
2512  out_0:
2513         LIBCFS_FREE(info, infosz);
2514         return rc;
2515 }