Whamcloud - gitweb
b=13139,i=liangzhen,i=maxim:
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see [sun.com URL with a
20  * copy of GPLv2].
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38 #include <lnet/lib-lnet.h>
39
40 #ifdef __KERNEL__
41 #define D_LNI D_CONSOLE
42 #else
43 #define D_LNI D_CONFIG
44 #endif
45
46 lnet_t      the_lnet;                           /* THE state of the network */
47
48 #ifdef __KERNEL__
49
50 static char *ip2nets = "";
51 CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
52                 "LNET network <- IP table");
53
54 static char *networks = "";
55 CFS_MODULE_PARM(networks, "s", charp, 0444,
56                 "local networks");
57
58 static char *routes = "";
59 CFS_MODULE_PARM(routes, "s", charp, 0444,
60                 "routes to non-local networks");
61
62 char *
63 lnet_get_routes(void)
64 {
65         return routes;
66 }
67
68 char *
69 lnet_get_networks(void)
70 {
71         char   *nets;
72         int     rc;
73
74         if (*networks != 0 && *ip2nets != 0) {
75                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
76                                    "'ip2nets' but not both at once\n");
77                 return NULL;
78         }
79
80         if (*ip2nets != 0) {
81                 rc = lnet_parse_ip2nets(&nets, ip2nets);
82                 return (rc == 0) ? nets : NULL;
83         }
84
85         if (*networks != 0)
86                 return networks;
87
88         return "tcp";
89 }
90
91 void
92 lnet_init_locks(void)
93 {
94         spin_lock_init (&the_lnet.ln_lock);
95         cfs_waitq_init (&the_lnet.ln_waitq);
96         init_mutex(&the_lnet.ln_lnd_mutex);
97         init_mutex(&the_lnet.ln_api_mutex);
98 }
99
100 void
101 lnet_fini_locks(void)
102 {
103 }
104
105 #else
106
107 char *
108 lnet_get_routes(void)
109 {
110         char *str = getenv("LNET_ROUTES");
111
112         return (str == NULL) ? "" : str;
113 }
114
115 char *
116 lnet_get_networks (void)
117 {
118         static char       default_networks[256];
119         char             *networks = getenv ("LNET_NETWORKS");
120         char             *ip2nets  = getenv ("LNET_IP2NETS");
121         char             *str;
122         char             *sep;
123         int               len;
124         int               nob;
125         int               rc;
126         struct list_head *tmp;
127
128 #ifdef NOT_YET
129         if (networks != NULL && ip2nets != NULL) {
130                 LCONSOLE_ERROR_MSG(0x103, "Please set EITHER 'LNET_NETWORKS' or"
131                                    " 'LNET_IP2NETS' but not both at once\n");
132                 return NULL;
133         }
134
135         if (ip2nets != NULL) {
136                 rc = lnet_parse_ip2nets(&networks, ip2nets);
137                 return (rc == 0) ? networks : NULL;
138         }
139 #else
140         ip2nets = NULL;
141         rc = 0;
142 #endif
143         if (networks != NULL)
144                 return networks;
145
146         /* In userland, the default 'networks=' is the list of known net types */
147
148         len = sizeof(default_networks);
149         str = default_networks;
150         *str = 0;
151         sep = "";
152
153         list_for_each (tmp, &the_lnet.ln_lnds) {
154                 lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
155
156                 nob = snprintf(str, len, "%s%s", sep,
157                                libcfs_lnd2str(lnd->lnd_type));
158                 len -= nob;
159                 if (len < 0) {
160                         /* overflowed the string; leave it where it was */
161                         *str = 0;
162                         break;
163                 }
164
165                 str += nob;
166                 sep = ",";
167         }
168
169         return default_networks;
170 }
171
172 # ifndef HAVE_LIBPTHREAD
173
174 void lnet_init_locks(void)
175 {
176         the_lnet.ln_lock = 0;
177         the_lnet.ln_lnd_mutex = 0;
178         the_lnet.ln_api_mutex = 0;
179 }
180
181 void lnet_fini_locks(void)
182 {
183         LASSERT (the_lnet.ln_api_mutex == 0);
184         LASSERT (the_lnet.ln_lnd_mutex == 0);
185         LASSERT (the_lnet.ln_lock == 0);
186 }
187
188 # else
189
190 void lnet_init_locks(void)
191 {
192         pthread_cond_init(&the_lnet.ln_cond, NULL);
193         pthread_mutex_init(&the_lnet.ln_lock, NULL);
194         pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL);
195         pthread_mutex_init(&the_lnet.ln_api_mutex, NULL);
196 }
197
198 void lnet_fini_locks(void)
199 {
200         pthread_mutex_destroy(&the_lnet.ln_api_mutex);
201         pthread_mutex_destroy(&the_lnet.ln_lnd_mutex);
202         pthread_mutex_destroy(&the_lnet.ln_lock);
203         pthread_cond_destroy(&the_lnet.ln_cond);
204 }
205
206 # endif
207 #endif
208
209 void lnet_assert_wire_constants (void)
210 {
211         /* Wire protocol assertions generated by 'wirecheck'
212          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
213          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
214          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
215
216         /* Constants... */
217         CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
218         CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
219         CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
220         CLASSERT (LNET_MSG_ACK == 0);
221         CLASSERT (LNET_MSG_PUT == 1);
222         CLASSERT (LNET_MSG_GET == 2);
223         CLASSERT (LNET_MSG_REPLY == 3);
224         CLASSERT (LNET_MSG_HELLO == 4);
225
226         /* Checks for struct ptl_handle_wire_t */
227         CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
228         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
229         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
230         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
231         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
232
233         /* Checks for struct lnet_magicversion_t */
234         CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
235         CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
236         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
237         CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
238         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
239         CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
240         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
241
242         /* Checks for struct lnet_hdr_t */
243         CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
244         CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
245         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
246         CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
247         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
248         CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
249         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
250         CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
251         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
252         CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
253         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
254         CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
255         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
256         CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
257         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
258
259         /* Ack */
260         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
261         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
262         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
263         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
264         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
265         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
266
267         /* Put */
268         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
269         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
270         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
271         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
272         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
273         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
274         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
275         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
276         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
277         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
278
279         /* Get */
280         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
281         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
282         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
283         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
284         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
285         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
286         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
287         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
288         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
289         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
290
291         /* Reply */
292         CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
293         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
294
295         /* Hello */
296         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
297         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
298         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
299         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
300 }
301
302 lnd_t *
303 lnet_find_lnd_by_type (int type)
304 {
305         lnd_t              *lnd;
306         struct list_head   *tmp;
307
308         /* holding lnd mutex */
309         list_for_each (tmp, &the_lnet.ln_lnds) {
310                 lnd = list_entry(tmp, lnd_t, lnd_list);
311
312                 if (lnd->lnd_type == type)
313                         return lnd;
314         }
315
316         return NULL;
317 }
318
319 void
320 lnet_register_lnd (lnd_t *lnd)
321 {
322         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
323
324         LASSERT (the_lnet.ln_init);
325         LASSERT (libcfs_isknown_lnd(lnd->lnd_type));
326         LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
327
328         list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds);
329         lnd->lnd_refcount = 0;
330
331         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
332
333         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
334 }
335
336 void
337 lnet_unregister_lnd (lnd_t *lnd)
338 {
339         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
340
341         LASSERT (the_lnet.ln_init);
342         LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
343         LASSERT (lnd->lnd_refcount == 0);
344
345         list_del (&lnd->lnd_list);
346         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
347
348         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
349 }
350
351 #ifndef LNET_USE_LIB_FREELIST
352
353 int
354 lnet_descriptor_setup (void)
355 {
356         return 0;
357 }
358
359 void
360 lnet_descriptor_cleanup (void)
361 {
362 }
363
364 #else
365
366 int
367 lnet_freelist_init (lnet_freelist_t *fl, int n, int size)
368 {
369         char *space;
370
371         LASSERT (n > 0);
372
373         size += offsetof (lnet_freeobj_t, fo_contents);
374
375         LIBCFS_ALLOC(space, n * size);
376         if (space == NULL)
377                 return (-ENOMEM);
378
379         CFS_INIT_LIST_HEAD (&fl->fl_list);
380         fl->fl_objs = space;
381         fl->fl_nobjs = n;
382         fl->fl_objsize = size;
383
384         do
385         {
386                 memset (space, 0, size);
387                 list_add ((struct list_head *)space, &fl->fl_list);
388                 space += size;
389         } while (--n != 0);
390
391         return (0);
392 }
393
394 void
395 lnet_freelist_fini (lnet_freelist_t *fl)
396 {
397         struct list_head *el;
398         int               count;
399
400         if (fl->fl_nobjs == 0)
401                 return;
402
403         count = 0;
404         for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
405                 count++;
406
407         LASSERT (count == fl->fl_nobjs);
408
409         LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
410         memset (fl, 0, sizeof (fl));
411 }
412
413 int
414 lnet_descriptor_setup (void)
415 {
416         /* NB on failure caller must still call lnet_descriptor_cleanup */
417         /*               ******                                         */
418         int        rc;
419
420         memset (&the_lnet.ln_free_mes,  0, sizeof (the_lnet.ln_free_mes));
421         memset (&the_lnet.ln_free_msgs, 0, sizeof (the_lnet.ln_free_msgs));
422         memset (&the_lnet.ln_free_mds,  0, sizeof (the_lnet.ln_free_mds));
423         memset (&the_lnet.ln_free_eqs,  0, sizeof (the_lnet.ln_free_eqs));
424
425         rc = lnet_freelist_init(&the_lnet.ln_free_mes,
426                                 MAX_MES, sizeof (lnet_me_t));
427         if (rc != 0)
428                 return (rc);
429
430         rc = lnet_freelist_init(&the_lnet.ln_free_msgs,
431                                 MAX_MSGS, sizeof (lnet_msg_t));
432         if (rc != 0)
433                 return (rc);
434
435         rc = lnet_freelist_init(&the_lnet.ln_free_mds,
436                                 MAX_MDS, sizeof (lnet_libmd_t));
437         if (rc != 0)
438                 return (rc);
439
440         rc = lnet_freelist_init(&the_lnet.ln_free_eqs,
441                                 MAX_EQS, sizeof (lnet_eq_t));
442         return (rc);
443 }
444
445 void
446 lnet_descriptor_cleanup (void)
447 {
448         lnet_freelist_fini (&the_lnet.ln_free_mes);
449         lnet_freelist_fini (&the_lnet.ln_free_msgs);
450         lnet_freelist_fini (&the_lnet.ln_free_mds);
451         lnet_freelist_fini (&the_lnet.ln_free_eqs);
452 }
453
454 #endif
455
456 __u64
457 lnet_create_interface_cookie (void)
458 {
459         /* NB the interface cookie in wire handles guards against delayed
460          * replies and ACKs appearing valid after reboot. Initialisation time,
461          * even if it's only implemented to millisecond resolution is probably
462          * easily good enough. */
463         struct timeval tv;
464         __u64          cookie;
465 #ifndef __KERNEL__
466         int            rc = gettimeofday (&tv, NULL);
467         LASSERT (rc == 0);
468 #else
469         do_gettimeofday(&tv);
470 #endif
471         cookie = tv.tv_sec;
472         cookie *= 1000000;
473         cookie += tv.tv_usec;
474         return cookie;
475 }
476
477 int
478 lnet_setup_handle_hash (void)
479 {
480         int       i;
481
482         /* Arbitrary choice of hash table size */
483 #ifdef __KERNEL__
484         the_lnet.ln_lh_hash_size = CFS_PAGE_SIZE / sizeof (struct list_head);
485 #else
486         the_lnet.ln_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
487 #endif
488         LIBCFS_ALLOC(the_lnet.ln_lh_hash_table,
489                      the_lnet.ln_lh_hash_size * sizeof (struct list_head));
490         if (the_lnet.ln_lh_hash_table == NULL)
491                 return (-ENOMEM);
492
493         for (i = 0; i < the_lnet.ln_lh_hash_size; i++)
494                 CFS_INIT_LIST_HEAD (&the_lnet.ln_lh_hash_table[i]);
495
496         the_lnet.ln_next_object_cookie = LNET_COOKIE_TYPES;
497
498         return (0);
499 }
500
501 void
502 lnet_cleanup_handle_hash (void)
503 {
504         if (the_lnet.ln_lh_hash_table == NULL)
505                 return;
506
507         LIBCFS_FREE(the_lnet.ln_lh_hash_table,
508                     the_lnet.ln_lh_hash_size * sizeof (struct list_head));
509 }
510
511 lnet_libhandle_t *
512 lnet_lookup_cookie (__u64 cookie, int type)
513 {
514         /* ALWAYS called with LNET_LOCK held */
515         struct list_head    *list;
516         struct list_head    *el;
517         unsigned int         hash;
518
519         if ((cookie & (LNET_COOKIE_TYPES - 1)) != type)
520                 return (NULL);
521
522         hash = ((unsigned int)cookie) % the_lnet.ln_lh_hash_size;
523         list = &the_lnet.ln_lh_hash_table[hash];
524
525         list_for_each (el, list) {
526                 lnet_libhandle_t *lh = list_entry (el, lnet_libhandle_t,
527                                                   lh_hash_chain);
528
529                 if (lh->lh_cookie == cookie)
530                         return (lh);
531         }
532
533         return (NULL);
534 }
535
536 void
537 lnet_initialise_handle (lnet_libhandle_t *lh, int type)
538 {
539         /* ALWAYS called with LNET_LOCK held */
540         unsigned int    hash;
541
542         LASSERT (type >= 0 && type < LNET_COOKIE_TYPES);
543         lh->lh_cookie = the_lnet.ln_next_object_cookie | type;
544         the_lnet.ln_next_object_cookie += LNET_COOKIE_TYPES;
545
546         hash = ((unsigned int)lh->lh_cookie) % the_lnet.ln_lh_hash_size;
547         list_add (&lh->lh_hash_chain, &the_lnet.ln_lh_hash_table[hash]);
548 }
549
550 void
551 lnet_invalidate_handle (lnet_libhandle_t *lh)
552 {
553         /* ALWAYS called with LNET_LOCK held */
554         list_del (&lh->lh_hash_chain);
555 }
556
557 int
558 lnet_init_finalizers(void)
559 {
560 #ifdef __KERNEL__
561         int    i;
562
563         the_lnet.ln_nfinalizers = num_online_cpus();
564
565         LIBCFS_ALLOC(the_lnet.ln_finalizers,
566                      the_lnet.ln_nfinalizers *
567                      sizeof(*the_lnet.ln_finalizers));
568         if (the_lnet.ln_finalizers == NULL) {
569                 CERROR("Can't allocate ln_finalizers\n");
570                 return -ENOMEM;
571         }
572
573         for (i = 0; i < the_lnet.ln_nfinalizers; i++)
574                 the_lnet.ln_finalizers[i] = NULL;
575 #else
576         the_lnet.ln_finalizing = 0;
577 #endif
578
579         CFS_INIT_LIST_HEAD(&the_lnet.ln_finalizeq);
580         return 0;
581 }
582
583 void
584 lnet_fini_finalizers(void)
585 {
586 #ifdef __KERNEL__
587         int    i;
588
589         for (i = 0; i < the_lnet.ln_nfinalizers; i++)
590                 LASSERT (the_lnet.ln_finalizers[i] == NULL);
591
592         LIBCFS_FREE(the_lnet.ln_finalizers,
593                     the_lnet.ln_nfinalizers *
594                     sizeof(*the_lnet.ln_finalizers));
595 #else
596         LASSERT (!the_lnet.ln_finalizing);
597 #endif
598         LASSERT (list_empty(&the_lnet.ln_finalizeq));
599 }
600
601 #ifndef __KERNEL__
602 /* Temporary workaround to allow uOSS and test programs force server
603  * mode in userspace. See comments near ln_server_mode_flag in
604  * lnet/lib-types.h */
605
606 void
607 lnet_server_mode() {
608         the_lnet.ln_server_mode_flag = 1;
609 }
610 #endif
611
612 int
613 lnet_prepare(lnet_pid_t requested_pid)
614 {
615         /* Prepare to bring up the network */
616         int               rc = 0;
617         int               i;
618
619         LASSERT (the_lnet.ln_refcount == 0);
620
621         the_lnet.ln_routing = 0;
622
623 #ifdef __KERNEL__
624         LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
625         the_lnet.ln_pid = requested_pid;
626 #else
627         if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
628                 LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
629
630                 if (cfs_curproc_uid())/* Only root can run user-space server */
631                         return -EPERM;
632                 the_lnet.ln_pid = requested_pid;
633
634         } else {/* client case (liblustre) */
635
636                 /* My PID must be unique on this node and flag I'm userspace */
637                 the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
638         }
639 #endif
640
641         rc = lnet_descriptor_setup();
642         if (rc != 0)
643                 goto failed0;
644
645         memset(&the_lnet.ln_counters, 0,
646                sizeof(the_lnet.ln_counters));
647
648         CFS_INIT_LIST_HEAD (&the_lnet.ln_active_msgs);
649         CFS_INIT_LIST_HEAD (&the_lnet.ln_active_mds);
650         CFS_INIT_LIST_HEAD (&the_lnet.ln_active_eqs);
651         CFS_INIT_LIST_HEAD (&the_lnet.ln_test_peers);
652         CFS_INIT_LIST_HEAD (&the_lnet.ln_nis);
653         CFS_INIT_LIST_HEAD (&the_lnet.ln_zombie_nis);
654         CFS_INIT_LIST_HEAD (&the_lnet.ln_remote_nets);
655         CFS_INIT_LIST_HEAD (&the_lnet.ln_routers);
656
657         the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
658
659         lnet_init_rtrpools();
660
661         rc = lnet_setup_handle_hash ();
662         if (rc != 0)
663                 goto failed0;
664
665         rc = lnet_create_peer_table();
666         if (rc != 0)
667                 goto failed1;
668
669         rc = lnet_init_finalizers();
670         if (rc != 0)
671                 goto failed2;
672
673         the_lnet.ln_nportals = MAX_PORTALS;
674         LIBCFS_ALLOC(the_lnet.ln_portals,
675                      the_lnet.ln_nportals *
676                      sizeof(*the_lnet.ln_portals));
677         if (the_lnet.ln_portals == NULL) {
678                 rc = -ENOMEM;
679                 goto failed3;
680         }
681
682         for (i = 0; i < the_lnet.ln_nportals; i++) {
683                 CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_ml));
684                 CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_msgq));
685                 the_lnet.ln_portals[i].ptl_options = 0;
686         }
687
688         return 0;
689
690  failed3:
691         lnet_fini_finalizers();
692  failed2:
693         lnet_destroy_peer_table();
694  failed1:
695         lnet_cleanup_handle_hash();
696  failed0:
697         lnet_descriptor_cleanup();
698         return rc;
699 }
700
701 int
702 lnet_unprepare (void)
703 {
704         int       idx;
705
706         /* NB no LNET_LOCK since this is the last reference.  All LND instances
707          * have shut down already, so it is safe to unlink and free all
708          * descriptors, even those that appear committed to a network op (eg MD
709          * with non-zero pending count) */
710
711         lnet_fail_nid(LNET_NID_ANY, 0);
712
713         LASSERT (list_empty(&the_lnet.ln_test_peers));
714         LASSERT (the_lnet.ln_refcount == 0);
715         LASSERT (list_empty(&the_lnet.ln_nis));
716         LASSERT (list_empty(&the_lnet.ln_zombie_nis));
717         LASSERT (the_lnet.ln_nzombie_nis == 0);
718
719         for (idx = 0; idx < the_lnet.ln_nportals; idx++) {
720                 LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq));
721
722                 while (!list_empty (&the_lnet.ln_portals[idx].ptl_ml)) {
723                         lnet_me_t *me = list_entry (the_lnet.ln_portals[idx].ptl_ml.next,
724                                                     lnet_me_t, me_list);
725
726                         CERROR ("Active me %p on exit\n", me);
727                         list_del (&me->me_list);
728                         lnet_me_free (me);
729                 }
730         }
731
732         while (!list_empty (&the_lnet.ln_active_mds)) {
733                 lnet_libmd_t *md = list_entry (the_lnet.ln_active_mds.next,
734                                                lnet_libmd_t, md_list);
735
736                 CERROR ("Active md %p on exit\n", md);
737                 list_del (&md->md_list);
738                 lnet_md_free (md);
739         }
740
741         while (!list_empty (&the_lnet.ln_active_eqs)) {
742                 lnet_eq_t *eq = list_entry (the_lnet.ln_active_eqs.next,
743                                             lnet_eq_t, eq_list);
744
745                 CERROR ("Active eq %p on exit\n", eq);
746                 list_del (&eq->eq_list);
747                 lnet_eq_free (eq);
748         }
749
750         while (!list_empty (&the_lnet.ln_active_msgs)) {
751                 lnet_msg_t *msg = list_entry (the_lnet.ln_active_msgs.next,
752                                               lnet_msg_t, msg_activelist);
753
754                 CERROR ("Active msg %p on exit\n", msg);
755                 LASSERT (msg->msg_onactivelist);
756                 msg->msg_onactivelist = 0;
757                 list_del (&msg->msg_activelist);
758                 lnet_msg_free (msg);
759         }
760
761         LIBCFS_FREE(the_lnet.ln_portals,  
762                     the_lnet.ln_nportals * sizeof(*the_lnet.ln_portals));
763
764         lnet_free_rtrpools();
765         lnet_fini_finalizers();
766         lnet_destroy_peer_table();
767         lnet_cleanup_handle_hash();
768         lnet_descriptor_cleanup();
769
770         return (0);
771 }
772
773 lnet_ni_t  *
774 lnet_net2ni_locked (__u32 net)
775 {
776         struct list_head *tmp;
777         lnet_ni_t        *ni;
778
779         list_for_each (tmp, &the_lnet.ln_nis) {
780                 ni = list_entry(tmp, lnet_ni_t, ni_list);
781
782                 if (LNET_NIDNET(ni->ni_nid) == net) {
783                         lnet_ni_addref_locked(ni);
784                         return ni;
785                 }
786         }
787
788         return NULL;
789 }
790
791 int
792 lnet_islocalnet (__u32 net)
793 {
794         lnet_ni_t        *ni;
795
796         LNET_LOCK();
797         ni = lnet_net2ni_locked(net);
798         if (ni != NULL)
799                 lnet_ni_decref_locked(ni);
800         LNET_UNLOCK();
801
802         return ni != NULL;
803 }
804
805 lnet_ni_t  *
806 lnet_nid2ni_locked (lnet_nid_t nid)
807 {
808         struct list_head *tmp;
809         lnet_ni_t        *ni;
810
811         list_for_each (tmp, &the_lnet.ln_nis) {
812                 ni = list_entry(tmp, lnet_ni_t, ni_list);
813
814                 if (ni->ni_nid == nid) {
815                         lnet_ni_addref_locked(ni);
816                         return ni;
817                 }
818         }
819
820         return NULL;
821 }
822
823 int
824 lnet_islocalnid (lnet_nid_t nid)
825 {
826         lnet_ni_t     *ni;
827
828         LNET_LOCK();
829         ni = lnet_nid2ni_locked(nid);
830         if (ni != NULL)
831                 lnet_ni_decref_locked(ni);
832         LNET_UNLOCK();
833
834         return ni != NULL;
835 }
836
837 int
838 lnet_count_acceptor_nis (void)
839 {
840         /* Return the # of NIs that need the acceptor. */
841         int                count = 0;
842 #if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
843         struct list_head  *tmp;
844         lnet_ni_t         *ni;
845
846         LNET_LOCK();
847         list_for_each (tmp, &the_lnet.ln_nis) {
848                 ni = list_entry(tmp, lnet_ni_t, ni_list);
849
850                 if (ni->ni_lnd->lnd_accept != NULL)
851                         count++;
852         }
853
854         LNET_UNLOCK();
855
856 #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
857         return count;
858 }
859
860 void
861 lnet_shutdown_lndnis (void)
862 {
863         int                i;
864         int                islo;
865         lnet_ni_t         *ni;
866
867         /* NB called holding the global mutex */
868
869         /* All quiet on the API front */
870         LASSERT (!the_lnet.ln_shutdown);
871         LASSERT (the_lnet.ln_refcount == 0);
872         LASSERT (list_empty(&the_lnet.ln_zombie_nis));
873         LASSERT (the_lnet.ln_nzombie_nis == 0);
874         LASSERT (list_empty(&the_lnet.ln_remote_nets));
875
876         LNET_LOCK();
877         the_lnet.ln_shutdown = 1;               /* flag shutdown */
878
879         /* Unlink NIs from the global table */
880         while (!list_empty(&the_lnet.ln_nis)) {
881                 ni = list_entry(the_lnet.ln_nis.next,
882                                 lnet_ni_t, ni_list);
883                 list_del (&ni->ni_list);
884
885                 the_lnet.ln_nzombie_nis++;
886                 lnet_ni_decref_locked(ni); /* drop apini's ref */
887         }
888
889         /* Drop the cached eqwait NI. */
890         if (the_lnet.ln_eqwaitni != NULL) {
891                 lnet_ni_decref_locked(the_lnet.ln_eqwaitni);
892                 the_lnet.ln_eqwaitni = NULL;
893         }
894
895         /* Drop the cached loopback NI. */
896         if (the_lnet.ln_loni != NULL) {
897                 lnet_ni_decref_locked(the_lnet.ln_loni);
898                 the_lnet.ln_loni = NULL;
899         }
900
901         LNET_UNLOCK();
902
903         /* Clear lazy portals and drop delayed messages which hold refs
904          * on their lnet_msg_t::msg_rxpeer */
905         for (i = 0; i < the_lnet.ln_nportals; i++)
906                 LNetClearLazyPortal(i);
907
908         /* Clear the peer table and wait for all peers to go (they hold refs on
909          * their NIs) */
910         lnet_clear_peer_table();
911
912         LNET_LOCK();
913         /* Now wait for the NI's I just nuked to show up on apini_zombie_nis
914          * and shut them down in guaranteed thread context */
915         i = 2;
916         while (the_lnet.ln_nzombie_nis != 0) {
917
918                 while (list_empty(&the_lnet.ln_zombie_nis)) {
919                         LNET_UNLOCK();
920                         ++i;
921                         if ((i & (-i)) == i)
922                                 CDEBUG(D_WARNING,"Waiting for %d zombie NIs\n",
923                                        the_lnet.ln_nzombie_nis);
924                         cfs_pause(cfs_time_seconds(1));
925                         LNET_LOCK();
926                 }
927
928                 ni = list_entry(the_lnet.ln_zombie_nis.next,
929                                 lnet_ni_t, ni_list);
930                 list_del(&ni->ni_list);
931                 ni->ni_lnd->lnd_refcount--;
932
933                 LNET_UNLOCK();
934
935                 islo = ni->ni_lnd->lnd_type == LOLND;
936
937                 LASSERT (!in_interrupt ());
938                 (ni->ni_lnd->lnd_shutdown)(ni);
939
940                 /* can't deref lnd anymore now; it might have unregistered
941                  * itself...  */
942
943                 if (!islo)
944                         CDEBUG(D_LNI, "Removed LNI %s\n",
945                                libcfs_nid2str(ni->ni_nid));
946
947                 LIBCFS_FREE(ni, sizeof(*ni));
948
949                 LNET_LOCK();
950                 the_lnet.ln_nzombie_nis--;
951         }
952
953         the_lnet.ln_shutdown = 0;
954         LNET_UNLOCK();
955
956         if (the_lnet.ln_network_tokens != NULL) {
957                 LIBCFS_FREE(the_lnet.ln_network_tokens,
958                             the_lnet.ln_network_tokens_nob);
959                 the_lnet.ln_network_tokens = NULL;
960         }
961 }
962
963 int
964 lnet_startup_lndnis (void)
965 {
966         lnd_t             *lnd;
967         lnet_ni_t         *ni;
968         struct list_head   nilist;
969         int                rc = 0;
970         int                lnd_type;
971         int                nicount = 0;
972         char              *nets = lnet_get_networks();
973
974         CFS_INIT_LIST_HEAD(&nilist);
975
976         if (nets == NULL)
977                 goto failed;
978
979         rc = lnet_parse_networks(&nilist, nets);
980         if (rc != 0)
981                 goto failed;
982
983         while (!list_empty(&nilist)) {
984                 ni = list_entry(nilist.next, lnet_ni_t, ni_list);
985                 lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
986
987                 LASSERT (libcfs_isknown_lnd(lnd_type));
988
989                 LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
990                 lnd = lnet_find_lnd_by_type(lnd_type);
991
992 #ifdef __KERNEL__
993                 if (lnd == NULL) {
994                         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
995                         rc = request_module(libcfs_lnd2modname(lnd_type));
996                         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
997
998                         lnd = lnet_find_lnd_by_type(lnd_type);
999                         if (lnd == NULL) {
1000                                 LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
1001                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1002                                        libcfs_lnd2str(lnd_type),
1003                                        libcfs_lnd2modname(lnd_type), rc);
1004 #ifndef CONFIG_KMOD
1005                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1006                                          "compiled with CONFIG_KMOD set for "
1007                                          "automatic module loading.");
1008 #endif
1009                                 goto failed;
1010                         }
1011                 }
1012 #else
1013                 if (lnd == NULL) {
1014                         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
1015                         CERROR("LND %s not supported\n",
1016                                libcfs_lnd2str(lnd_type));
1017                         goto failed;
1018                 }
1019 #endif
1020
1021                 ni->ni_refcount = 1;
1022
1023                 LNET_LOCK();
1024                 lnd->lnd_refcount++;
1025                 LNET_UNLOCK();
1026
1027                 ni->ni_lnd = lnd;
1028
1029                 rc = (lnd->lnd_startup)(ni);
1030
1031                 LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
1032
1033                 if (rc != 0) {
1034                         LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
1035                                            "\n",
1036                                            rc, libcfs_lnd2str(lnd->lnd_type));
1037                         LNET_LOCK();
1038                         lnd->lnd_refcount--;
1039                         LNET_UNLOCK();
1040                         goto failed;
1041                 }
1042
1043                 list_del(&ni->ni_list);
1044
1045                 LNET_LOCK();
1046                 list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1047                 LNET_UNLOCK();
1048
1049                 if (lnd->lnd_type == LOLND) {
1050                         lnet_ni_addref(ni);
1051                         LASSERT (the_lnet.ln_loni == NULL);
1052                         the_lnet.ln_loni = ni;
1053                         continue;
1054                 }
1055
1056 #ifndef __KERNEL__
1057                 if (lnd->lnd_wait != NULL) {
1058                         if (the_lnet.ln_eqwaitni == NULL) {
1059                                 lnet_ni_addref(ni);
1060                                 the_lnet.ln_eqwaitni = ni;
1061                         }
1062                 } else {
1063 # ifndef HAVE_LIBPTHREAD
1064                         LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a "
1065                                            "single-threaded runtime\n",
1066                                            libcfs_lnd2str(lnd_type));
1067                         goto failed;
1068 # endif
1069                 }
1070 #endif
1071                 if (ni->ni_peertxcredits == 0 ||
1072                     ni->ni_maxtxcredits == 0) {
1073                         LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1074                                            libcfs_lnd2str(lnd->lnd_type),
1075                                            ni->ni_peertxcredits == 0 ?
1076                                            "" : "per-peer ");
1077                         goto failed;
1078                 }
1079
1080                 ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits;
1081
1082                 CDEBUG(D_LNI, "Added LNI %s [%d/%d]\n",
1083                        libcfs_nid2str(ni->ni_nid),
1084                        ni->ni_peertxcredits, ni->ni_txcredits);
1085
1086                 nicount++;
1087         }
1088
1089         if (the_lnet.ln_eqwaitni != NULL && nicount > 1) {
1090                 lnd_type = the_lnet.ln_eqwaitni->ni_lnd->lnd_type;
1091                 LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
1092                                    "\n",
1093                                    libcfs_lnd2str(lnd_type));
1094                 goto failed;
1095         }
1096
1097         return 0;
1098
1099  failed:
1100         lnet_shutdown_lndnis();
1101
1102         while (!list_empty(&nilist)) {
1103                 ni = list_entry(nilist.next, lnet_ni_t, ni_list);
1104                 list_del(&ni->ni_list);
1105                 LIBCFS_FREE(ni, sizeof(*ni));
1106         }
1107
1108         return -ENETDOWN;
1109 }
1110
1111 int
1112 LNetInit(void)
1113 {
1114         lnet_assert_wire_constants ();
1115         LASSERT (!the_lnet.ln_init);
1116
1117         memset(&the_lnet, 0, sizeof(the_lnet));
1118
1119         lnet_init_locks();
1120         CFS_INIT_LIST_HEAD(&the_lnet.ln_lnds);
1121         the_lnet.ln_refcount = 0;
1122         the_lnet.ln_init = 1;
1123
1124 #ifdef __KERNEL__
1125         /* All LNDs apart from the LOLND are in separate modules.  They
1126          * register themselves when their module loads, and unregister
1127          * themselves when their module is unloaded. */
1128 #else
1129         /* Register LNDs
1130          * NB the order here determines default 'networks=' order */
1131 # ifdef CRAY_XT3
1132         LNET_REGISTER_ULND(the_ptllnd);
1133 # endif
1134 # ifdef HAVE_LIBPTHREAD
1135         LNET_REGISTER_ULND(the_tcplnd);
1136 # endif
1137 #endif
1138         lnet_register_lnd(&the_lolnd);
1139         return 0;
1140 }
1141
1142 void
1143 LNetFini(void)
1144 {
1145         LASSERT (the_lnet.ln_init);
1146         LASSERT (the_lnet.ln_refcount == 0);
1147
1148         while (!list_empty(&the_lnet.ln_lnds))
1149                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1150                                                lnd_t, lnd_list));
1151         lnet_fini_locks();
1152
1153         the_lnet.ln_init = 0;
1154 }
1155
1156 int
1157 LNetNIInit(lnet_pid_t requested_pid)
1158 {
1159         int         im_a_router = 0;
1160         int         rc;
1161
1162         LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex);
1163
1164         LASSERT (the_lnet.ln_init);
1165         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1166
1167         if (the_lnet.ln_refcount > 0) {
1168                 rc = the_lnet.ln_refcount++;
1169                 goto out;
1170         }
1171
1172         if (requested_pid == LNET_PID_ANY) {
1173                 /* Don't instantiate LNET just for me */
1174                 rc = -ENETDOWN;
1175                 goto failed0;
1176         }
1177
1178         rc = lnet_prepare(requested_pid);
1179         if (rc != 0)
1180                 goto failed0;
1181
1182         rc = lnet_startup_lndnis();
1183         if (rc != 0)
1184                 goto failed1;
1185
1186         rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1187         if (rc != 0)
1188                 goto failed2;
1189
1190         rc = lnet_check_routes();
1191         if (rc != 0)
1192                 goto failed2;
1193
1194         rc = lnet_alloc_rtrpools(im_a_router);
1195         if (rc != 0)
1196                 goto failed2;
1197
1198         rc = lnet_acceptor_start();
1199         if (rc != 0)
1200                 goto failed2;
1201
1202         the_lnet.ln_refcount = 1;
1203         /* Now I may use my own API functions... */
1204
1205         rc = lnet_router_checker_start();
1206         if (rc != 0)
1207                 goto failed3;
1208
1209         rc = lnet_ping_target_init();
1210         if (rc != 0)
1211                 goto failed4;
1212
1213         lnet_proc_init();
1214         goto out;
1215
1216  failed4:
1217         lnet_router_checker_stop();
1218  failed3:
1219         the_lnet.ln_refcount = 0;
1220         lnet_acceptor_stop();
1221  failed2:
1222         lnet_destroy_routes();
1223         lnet_shutdown_lndnis();
1224  failed1:
1225         lnet_unprepare();
1226  failed0:
1227         LASSERT (rc < 0);
1228  out:
1229         LNET_MUTEX_UP(&the_lnet.ln_api_mutex);
1230         return rc;
1231 }
1232
1233 int
1234 LNetNIFini()
1235 {
1236         LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex);
1237
1238         LASSERT (the_lnet.ln_init);
1239         LASSERT (the_lnet.ln_refcount > 0);
1240
1241         if (the_lnet.ln_refcount != 1) {
1242                 the_lnet.ln_refcount--;
1243         } else {
1244                 LASSERT (!the_lnet.ln_niinit_self);
1245
1246                 lnet_proc_fini();
1247                 lnet_ping_target_fini();
1248                 lnet_router_checker_stop();
1249
1250                 /* Teardown fns that use my own API functions BEFORE here */
1251                 the_lnet.ln_refcount = 0;
1252
1253                 lnet_acceptor_stop();
1254                 lnet_destroy_routes();
1255                 lnet_shutdown_lndnis();
1256                 lnet_unprepare();
1257         }
1258
1259         LNET_MUTEX_UP(&the_lnet.ln_api_mutex);
1260         return 0;
1261 }
1262
1263 int
1264 LNetCtl(unsigned int cmd, void *arg)
1265 {
1266         struct libcfs_ioctl_data *data = arg;
1267         lnet_process_id_t         id;
1268         lnet_ni_t                *ni;
1269         int                       rc;
1270
1271         LASSERT (the_lnet.ln_init);
1272         LASSERT (the_lnet.ln_refcount > 0);
1273
1274         switch (cmd) {
1275         case IOC_LIBCFS_GET_NI:
1276                 rc = LNetGetId(data->ioc_count, &id);
1277                 data->ioc_nid = id.nid;
1278                 return rc;
1279
1280         case IOC_LIBCFS_FAIL_NID:
1281                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
1282
1283         case IOC_LIBCFS_ADD_ROUTE:
1284                 rc = lnet_add_route(data->ioc_net, data->ioc_count,
1285                                     data->ioc_nid);
1286                 return (rc != 0) ? rc : lnet_check_routes();
1287
1288         case IOC_LIBCFS_DEL_ROUTE:
1289                 return lnet_del_route(data->ioc_net, data->ioc_nid);
1290
1291         case IOC_LIBCFS_GET_ROUTE:
1292                 return lnet_get_route(data->ioc_count,
1293                                       &data->ioc_net, &data->ioc_count,
1294                                       &data->ioc_nid, &data->ioc_flags);
1295         case IOC_LIBCFS_NOTIFY_ROUTER:
1296                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
1297                                    (time_t)data->ioc_u64[0]);
1298
1299         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
1300                 /* This can be removed once lustre stops calling it */
1301                 return 0;
1302
1303         case IOC_LIBCFS_LNET_DIST:
1304                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
1305                 if (rc < 0 && rc != -EHOSTUNREACH)
1306                         return rc;
1307
1308                 data->ioc_u32[0] = rc;
1309                 return 0;
1310
1311         case IOC_LIBCFS_TESTPROTOCOMPAT:
1312                 LNET_LOCK();
1313                 the_lnet.ln_testprotocompat = data->ioc_flags;
1314                 LNET_UNLOCK();
1315                 return 0;
1316
1317         case IOC_LIBCFS_PING:
1318                 rc = lnet_ping((lnet_process_id_t) {.nid = data->ioc_nid,
1319                                                     .pid = data->ioc_u32[0]},
1320                                data->ioc_u32[1], /* timeout */
1321                                (lnet_process_id_t *)data->ioc_pbuf1,
1322                                data->ioc_plen1/sizeof(lnet_process_id_t));
1323                 if (rc < 0)
1324                         return rc;
1325                 data->ioc_count = rc;
1326                 return 0;
1327
1328         case IOC_LIBCFS_DEBUG_PEER: {
1329                 /* CAVEAT EMPTOR: this one designed for calling directly; not
1330                  * via an ioctl */
1331                 lnet_process_id_t *id = arg;
1332
1333                 lnet_debug_peer(id->nid);
1334
1335                 ni = lnet_net2ni(LNET_NIDNET(id->nid));
1336                 if (ni == NULL) {
1337                         CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(*id));
1338                 } else {
1339                         if (ni->ni_lnd->lnd_ctl == NULL) {
1340                                 CDEBUG(D_WARNING, "No ctl for %s\n",
1341                                        libcfs_id2str(*id));
1342                         } else {
1343                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
1344                         }
1345
1346                         lnet_ni_decref(ni);
1347                 }
1348                 return 0;
1349         }
1350
1351         default:
1352                 ni = lnet_net2ni(data->ioc_net);
1353                 if (ni == NULL)
1354                         return -EINVAL;
1355
1356                 if (ni->ni_lnd->lnd_ctl == NULL)
1357                         rc = -EINVAL;
1358                 else
1359                         rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
1360
1361                 lnet_ni_decref(ni);
1362                 return rc;
1363         }
1364         /* not reached */
1365 }
1366
1367 int
1368 LNetGetId(unsigned int index, lnet_process_id_t *id)
1369 {
1370         lnet_ni_t        *ni;
1371         struct list_head *tmp;
1372         int               rc = -ENOENT;
1373
1374         LASSERT (the_lnet.ln_init);
1375         LASSERT (the_lnet.ln_refcount > 0);
1376
1377         LNET_LOCK();
1378
1379         list_for_each(tmp, &the_lnet.ln_nis) {
1380                 if (index-- != 0)
1381                         continue;
1382
1383                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1384
1385                 id->nid = ni->ni_nid;
1386                 id->pid = the_lnet.ln_pid;
1387                 rc = 0;
1388                 break;
1389         }
1390
1391         LNET_UNLOCK();
1392
1393         return rc;
1394 }
1395
1396 void
1397 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
1398 {
1399         snprintf(str, len, LPX64, h.cookie);
1400 }
1401
1402
1403 int
1404 lnet_ping_target_init(void)
1405 {
1406         lnet_handle_me_t  meh;
1407         lnet_process_id_t id;
1408         int               rc;
1409         int               rc2;
1410         int               n;
1411         int               infosz;
1412         int               i;
1413
1414         for (n = 0; ; n++) {
1415                 rc = LNetGetId(n, &id);
1416                 if (rc == -ENOENT)
1417                         break;
1418
1419                 LASSERT (rc == 0);
1420         }
1421
1422         infosz = offsetof(lnet_ping_info_t, pi_nid[n]);
1423         LIBCFS_ALLOC(the_lnet.ln_ping_info, infosz);
1424         if (the_lnet.ln_ping_info == NULL) {
1425                 CERROR("Can't allocate ping info[%d]\n", n);
1426                 return -ENOMEM;
1427         }
1428
1429         the_lnet.ln_ping_info->pi_magic   = LNET_PROTO_PING_MAGIC;
1430         the_lnet.ln_ping_info->pi_version = LNET_PROTO_PING_VERSION;
1431         the_lnet.ln_ping_info->pi_pid     = the_lnet.ln_pid;
1432         the_lnet.ln_ping_info->pi_nnids   = n;
1433
1434         for (i = 0; i < n; i++) {
1435                 rc = LNetGetId(i, &id);
1436                 LASSERT (rc == 0);
1437                 the_lnet.ln_ping_info->pi_nid[i] = id.nid;
1438         }
1439
1440         /* We can have a tiny EQ since we only need to see the unlink event on
1441          * teardown, which by definition is the last one! */
1442         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq);
1443         if (rc != 0) {
1444                 CERROR("Can't allocate ping EQ: %d\n", rc);
1445                 goto failed_0;
1446         }
1447
1448         rc = LNetMEAttach(LNET_RESERVED_PORTAL,
1449                           (lnet_process_id_t){.nid = LNET_NID_ANY,
1450                                               .pid = LNET_PID_ANY},
1451                           LNET_PROTO_PING_MATCHBITS, 0LL,
1452                           LNET_UNLINK, LNET_INS_AFTER,
1453                           &meh);
1454         if (rc != 0) {
1455                 CERROR("Can't create ping ME: %d\n", rc);
1456                 goto failed_1;
1457         }
1458
1459         rc = LNetMDAttach(meh,
1460                           (lnet_md_t){.start = the_lnet.ln_ping_info,
1461                                       .length = infosz,
1462                                       .threshold = LNET_MD_THRESH_INF,
1463                                       .options = (LNET_MD_OP_GET |
1464                                                   LNET_MD_TRUNCATE |
1465                                                   LNET_MD_MANAGE_REMOTE),
1466                                       .eq_handle = the_lnet.ln_ping_target_eq},
1467                           LNET_RETAIN,
1468                           &the_lnet.ln_ping_target_md);
1469         if (rc != 0) {
1470                 CERROR("Can't attach ping MD: %d\n", rc);
1471                 goto failed_2;
1472         }
1473
1474         return 0;
1475
1476  failed_2:
1477         rc2 = LNetMEUnlink(meh);
1478         LASSERT (rc2 == 0);
1479  failed_1:
1480         rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
1481         LASSERT (rc2 == 0);
1482  failed_0:
1483         LIBCFS_FREE(the_lnet.ln_ping_info, infosz);
1484
1485         return rc;
1486 }
1487
1488 void
1489 lnet_ping_target_fini(void)
1490 {
1491         lnet_event_t    event;
1492         int             rc;
1493         int             which;
1494         int             timeout_ms = 1000;
1495         cfs_sigset_t    blocked = cfs_block_allsigs();
1496
1497         LNetMDUnlink(the_lnet.ln_ping_target_md);
1498         /* NB md could be busy; this just starts the unlink */
1499
1500         for (;;) {
1501                 rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1,
1502                                 timeout_ms, &event, &which);
1503
1504                 /* I expect overflow... */
1505                 LASSERT (rc >= 0 || rc == -EOVERFLOW);
1506
1507                 if (rc == 0) {
1508                         /* timed out: provide a diagnostic */
1509                         CWARN("Still waiting for ping MD to unlink\n");
1510                         timeout_ms *= 2;
1511                         continue;
1512                 }
1513
1514                 /* Got a valid event */
1515                 if (event.unlinked)
1516                         break;
1517         }
1518
1519         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1520         LASSERT (rc == 0);
1521
1522         LIBCFS_FREE(the_lnet.ln_ping_info,
1523                     offsetof(lnet_ping_info_t,
1524                              pi_nid[the_lnet.ln_ping_info->pi_nnids]));
1525
1526         cfs_restore_sigs(blocked);
1527 }
1528
1529 int
1530 lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids)
1531 {
1532         lnet_handle_eq_t     eqh;
1533         lnet_handle_md_t     mdh;
1534         lnet_event_t         event;
1535         int                  which;
1536         int                  unlinked = 0;
1537         int                  replied = 0;
1538         const int            a_long_time = 60000; /* mS */
1539         int                  infosz = offsetof(lnet_ping_info_t, pi_nid[n_ids]);
1540         lnet_ping_info_t    *info;
1541         lnet_process_id_t    tmpid;
1542         int                  i;
1543         int                  nob;
1544         int                  rc;
1545         int                  rc2;
1546         cfs_sigset_t         blocked;
1547
1548         if (n_ids <= 0 ||
1549             id.nid == LNET_NID_ANY ||
1550             timeout_ms > 500000 ||              /* arbitrary limit! */
1551             n_ids > 20)                         /* arbitrary limit! */
1552                 return -EINVAL;
1553
1554         if (id.pid == LNET_PID_ANY)
1555                 id.pid = LUSTRE_SRV_LNET_PID;
1556
1557         LIBCFS_ALLOC(info, infosz);
1558         if (info == NULL)
1559                 return -ENOMEM;
1560
1561         /* NB 2 events max (including any unlink event) */
1562         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
1563         if (rc != 0) {
1564                 CERROR("Can't allocate EQ: %d\n", rc);
1565                 goto out_0;
1566         }
1567
1568         rc = LNetMDBind((lnet_md_t){.start = info,
1569                                     .length = infosz,
1570                                     .threshold = 2, /* GET/REPLY */
1571                                     .options = LNET_MD_TRUNCATE,
1572                                     .eq_handle = eqh},
1573                         LNET_UNLINK,
1574                         &mdh);
1575         if (rc != 0) {
1576                 CERROR("Can't bind MD: %d\n", rc);
1577                 goto out_1;
1578         }
1579
1580         rc = LNetGet(LNET_NID_ANY, mdh, id,
1581                      LNET_RESERVED_PORTAL,
1582                      LNET_PROTO_PING_MATCHBITS, 0);
1583
1584         if (rc != 0) {
1585                 /* Don't CERROR; this could be deliberate! */
1586
1587                 rc2 = LNetMDUnlink(mdh);
1588                 LASSERT (rc2 == 0);
1589
1590                 /* NB must wait for the UNLINK event below... */
1591                 unlinked = 1;
1592                 timeout_ms = a_long_time;
1593         }
1594
1595         do {
1596                 /* MUST block for unlink to complete */
1597                 if (unlinked)
1598                         blocked = cfs_block_allsigs();
1599
1600                 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
1601
1602                 if (unlinked)
1603                         cfs_restore_sigs(blocked);
1604
1605                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
1606                        (rc2 <= 0) ? -1 : event.type,
1607                        (rc2 <= 0) ? -1 : event.status,
1608                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
1609
1610                 LASSERT (rc2 != -EOVERFLOW);     /* can't miss anything */
1611
1612                 if (rc2 <= 0 || event.status != 0) {
1613                         /* timeout or error */
1614                         if (!replied && rc == 0)
1615                                 rc = (rc2 < 0) ? rc2 :
1616                                      (rc2 == 0) ? -ETIMEDOUT :
1617                                      event.status;
1618
1619                         if (!unlinked) {
1620                                 /* Ensure completion in finite time... */
1621                                 LNetMDUnlink(mdh);
1622                                 /* No assertion (racing with network) */
1623                                 unlinked = 1;
1624                                 timeout_ms = a_long_time;
1625                         } else if (rc2 == 0) {
1626                                 /* timed out waiting for unlink */
1627                                 CWARN("ping %s: late network completion\n",
1628                                       libcfs_id2str(id));
1629                         }
1630
1631                 } else if (event.type == LNET_EVENT_REPLY) {
1632                         replied = 1;
1633                         rc = event.mlength;
1634                 }
1635
1636         } while (rc2 <= 0 || !event.unlinked);
1637
1638         if (!replied) {
1639                 if (rc >= 0)
1640                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
1641                               libcfs_id2str(id));
1642                 rc = -EIO;
1643                 goto out_1;
1644         }
1645
1646         nob = rc;
1647         LASSERT (nob >= 0 && nob <= infosz);
1648
1649         rc = -EPROTO;                           /* if I can't parse... */
1650
1651         if (nob < 8) {
1652                 /* can't check magic/version */
1653                 CERROR("%s: ping info too short %d\n",
1654                        libcfs_id2str(id), nob);
1655                 goto out_1;
1656         }
1657
1658         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
1659                 /* NB I might be swabbing garbage until I check below, but it
1660                  * doesn't matter */
1661                 __swab32s(&info->pi_version);
1662                 __swab32s(&info->pi_pid);
1663                 __swab32s(&info->pi_nnids);
1664                 for (i = 0; i < info->pi_nnids && i < n_ids; i++)
1665                         __swab64s(&info->pi_nid[i]);
1666
1667         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
1668                 CERROR("%s: Unexpected magic %08x\n", 
1669                        libcfs_id2str(id), info->pi_magic);
1670                 goto out_1;
1671         }
1672
1673         if (info->pi_version != LNET_PROTO_PING_VERSION) {
1674                 CERROR("%s: Unexpected version 0x%x\n",
1675                        libcfs_id2str(id), info->pi_version);
1676                 goto out_1;
1677         }
1678
1679         if (nob < offsetof(lnet_ping_info_t, pi_nid[0])) {
1680                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
1681                        nob, (int)offsetof(lnet_ping_info_t, pi_nid[0]));
1682                 goto out_1;
1683         }
1684
1685         if (info->pi_nnids < n_ids)
1686                 n_ids = info->pi_nnids;
1687
1688         if (nob < offsetof(lnet_ping_info_t, pi_nid[n_ids])) {
1689                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
1690                        nob, (int)offsetof(lnet_ping_info_t, pi_nid[n_ids]));
1691                 goto out_1;
1692         }
1693
1694         rc = -EFAULT;                           /* If I SEGV... */
1695
1696         for (i = 0; i < n_ids; i++) {
1697                 tmpid.pid = info->pi_pid;
1698                 tmpid.nid = info->pi_nid[i];
1699 #ifdef __KERNEL__
1700                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
1701                         goto out_1;
1702 #else
1703                 ids[i] = tmpid;
1704 #endif
1705         }
1706         rc = info->pi_nnids;
1707
1708  out_1:
1709         rc2 = LNetEQFree(eqh);
1710         if (rc2 != 0)
1711                 CERROR("rc2 %d\n", rc2);
1712         LASSERT (rc2 == 0);
1713
1714  out_0:
1715         LIBCFS_FREE(info, infosz);
1716         return rc;
1717 }