Whamcloud - gitweb
b=15272
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #define DEBUG_SUBSYSTEM S_LNET
23 #include <lnet/lib-lnet.h>
24
25 #ifdef __KERNEL__
26 #define D_LNI D_CONSOLE
27 #else
28 #define D_LNI D_CONFIG
29 #endif
30
31 lnet_t      the_lnet;                           /* THE state of the network */
32
33 #ifdef __KERNEL__
34
35 static char *ip2nets = "";
36 CFS_MODULE_PARM(ip2nets, "s", charp, 0444,
37                 "LNET network <- IP table");
38
39 static char *networks = "";
40 CFS_MODULE_PARM(networks, "s", charp, 0444,
41                 "local networks");
42
43 static char *routes = "";
44 CFS_MODULE_PARM(routes, "s", charp, 0444,
45                 "routes to non-local networks");
46
47 static char *portals_compatibility = "none";
48 CFS_MODULE_PARM(portals_compatibility, "s", charp, 0444,
49                 "wire protocol compatibility: 'strong'|'weak'|'none'");
50
51 char *
52 lnet_get_routes(void)
53 {
54         return routes;
55 }
56
57 char *
58 lnet_get_networks(void)
59 {
60         char   *nets;
61         int     rc;
62
63         if (*networks != 0 && *ip2nets != 0) {
64                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
65                                    "'ip2nets' but not both at once\n");
66                 return NULL;
67         }
68         
69         if (*ip2nets != 0) {
70                 rc = lnet_parse_ip2nets(&nets, ip2nets);
71                 return (rc == 0) ? nets : NULL;
72         }
73
74         if (*networks != 0)
75                 return networks;
76
77         return "tcp";
78 }
79
80 int
81 lnet_get_portals_compatibility(void)
82 {
83         if (!strcmp(portals_compatibility, "none")) {
84                 return 0;
85         }
86
87         if (!strcmp(portals_compatibility, "weak")) {
88                 return 1;
89                 LCONSOLE_WARN("Starting in weak portals-compatible mode\n");
90         }
91
92         if (!strcmp(portals_compatibility, "strong")) {
93                 return 2;
94                 LCONSOLE_WARN("Starting in strong portals-compatible mode\n");
95         } 
96
97         LCONSOLE_ERROR_MSG(0x102, "portals_compatibility=\"%s\" not supported\n",
98                            portals_compatibility);
99         return -EINVAL;
100 }
101
102 void
103 lnet_init_locks(void)
104 {
105         spin_lock_init (&the_lnet.ln_lock);
106         cfs_waitq_init (&the_lnet.ln_waitq);
107         init_mutex(&the_lnet.ln_lnd_mutex);
108         init_mutex(&the_lnet.ln_api_mutex);
109 }
110
111 void
112 lnet_fini_locks(void)
113 {
114 }
115
116 #else
117
118 char *
119 lnet_get_routes(void)
120 {
121         char *str = getenv("LNET_ROUTES");
122         
123         return (str == NULL) ? "" : str;
124 }
125
126 char *
127 lnet_get_networks (void)
128 {
129         static char       default_networks[256];
130         char             *networks = getenv ("LNET_NETWORKS");
131         char             *ip2nets  = getenv ("LNET_IP2NETS");
132         char             *str;
133         char             *sep;
134         int               len;
135         int               nob;
136         int               rc;
137         struct list_head *tmp;
138
139 #ifdef NOT_YET
140         if (networks != NULL && ip2nets != NULL) {
141                 LCONSOLE_ERROR_MSG(0x103, "Please set EITHER 'LNET_NETWORKS' or"
142                                    " 'LNET_IP2NETS' but not both at once\n");
143                 return NULL;
144         }
145
146         if (ip2nets != NULL) {
147                 rc = lnet_parse_ip2nets(&networks, ip2nets);
148                 return (rc == 0) ? networks : NULL;
149         }
150 #else
151         ip2nets = NULL;
152         rc = 0;
153 #endif
154         if (networks != NULL)
155                 return networks;
156
157         /* In userland, the default 'networks=' is the list of known net types */
158
159         len = sizeof(default_networks);
160         str = default_networks;
161         *str = 0;
162         sep = "";
163                 
164         list_for_each (tmp, &the_lnet.ln_lnds) {
165                         lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
166                         
167                         nob = snprintf(str, len, "%s%s", sep,
168                                        libcfs_lnd2str(lnd->lnd_type));
169                         len -= nob;
170                         if (len < 0) {
171                                 /* overflowed the string; leave it where it was */
172                                 *str = 0;
173                                 break;
174                         }
175                         
176                         str += nob;
177                         sep = ",";
178         }
179
180         return default_networks;
181 }
182
183 int
184 lnet_get_portals_compatibility(void)
185 {
186         return 0;
187 }
188
189 # ifndef HAVE_LIBPTHREAD
190
191 void lnet_init_locks(void)
192 {
193         the_lnet.ln_lock = 0;
194         the_lnet.ln_lnd_mutex = 0;
195         the_lnet.ln_api_mutex = 0;
196 }
197
198 void lnet_fini_locks(void)
199 {
200         LASSERT (the_lnet.ln_api_mutex == 0);
201         LASSERT (the_lnet.ln_lnd_mutex == 0);
202         LASSERT (the_lnet.ln_lock == 0);
203 }
204
205 # else
206
207 void lnet_init_locks(void)
208 {
209         pthread_cond_init(&the_lnet.ln_cond, NULL);
210         pthread_mutex_init(&the_lnet.ln_lock, NULL);
211         pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL);
212         pthread_mutex_init(&the_lnet.ln_api_mutex, NULL);
213 }
214
215 void lnet_fini_locks(void)
216 {
217         pthread_mutex_destroy(&the_lnet.ln_api_mutex);
218         pthread_mutex_destroy(&the_lnet.ln_lnd_mutex);
219         pthread_mutex_destroy(&the_lnet.ln_lock);
220         pthread_cond_destroy(&the_lnet.ln_cond);
221 }
222
223 # endif
224 #endif
225
226 void lnet_assert_wire_constants (void)
227 {
228         /* Wire protocol assertions generated by 'wirecheck'
229          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
230          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
231          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
232
233         /* Constants... */
234         CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
235         CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1);
236         CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0);
237         CLASSERT (LNET_MSG_ACK == 0);
238         CLASSERT (LNET_MSG_PUT == 1);
239         CLASSERT (LNET_MSG_GET == 2);
240         CLASSERT (LNET_MSG_REPLY == 3);
241         CLASSERT (LNET_MSG_HELLO == 4);
242
243         /* Checks for struct ptl_handle_wire_t */
244         CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16);
245         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0);
246         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8);
247         CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8);
248         CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8);
249
250         /* Checks for struct lnet_magicversion_t */
251         CLASSERT ((int)sizeof(lnet_magicversion_t) == 8);
252         CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0);
253         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
254         CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4);
255         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
256         CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6);
257         CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
258
259         /* Checks for struct lnet_hdr_t */
260         CLASSERT ((int)sizeof(lnet_hdr_t) == 72);
261         CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0);
262         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
263         CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8);
264         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
265         CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16);
266         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
267         CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20);
268         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
269         CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24);
270         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
271         CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28);
272         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
273         CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32);
274         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
275
276         /* Ack */
277         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
278         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
279         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
280         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
281         CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
282         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
283
284         /* Put */
285         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
286         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
287         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
288         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
289         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
290         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
291         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
292         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
293         CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
294         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
295
296         /* Get */
297         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
298         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
299         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
300         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
301         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
302         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
303         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
304         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
305         CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
306         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
307
308         /* Reply */
309         CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
310         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
311
312         /* Hello */
313         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
314         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
315         CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
316         CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
317 }
318
319 lnd_t *
320 lnet_find_lnd_by_type (int type) 
321 {
322         lnd_t              *lnd;
323         struct list_head   *tmp;
324
325         /* holding lnd mutex */
326         list_for_each (tmp, &the_lnet.ln_lnds) {
327                 lnd = list_entry(tmp, lnd_t, lnd_list);
328
329                 if (lnd->lnd_type == type)
330                         return lnd;
331         }
332         
333         return NULL;
334 }
335
336 void
337 lnet_register_lnd (lnd_t *lnd)
338 {
339         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
340
341         LASSERT (the_lnet.ln_init);
342         LASSERT (libcfs_isknown_lnd(lnd->lnd_type));
343         LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
344         
345         list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds);
346         lnd->lnd_refcount = 0;
347
348         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
349
350         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
351 }
352
353 void
354 lnet_unregister_lnd (lnd_t *lnd)
355 {
356         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
357
358         LASSERT (the_lnet.ln_init);
359         LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
360         LASSERT (lnd->lnd_refcount == 0);
361         
362         list_del (&lnd->lnd_list);
363         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
364
365         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
366 }
367
368 #ifndef LNET_USE_LIB_FREELIST
369
370 int
371 lnet_descriptor_setup (void)
372 {
373         return 0;
374 }
375
376 void
377 lnet_descriptor_cleanup (void)
378 {
379 }
380
381 #else
382
383 int
384 lnet_freelist_init (lnet_freelist_t *fl, int n, int size)
385 {
386         char *space;
387
388         LASSERT (n > 0);
389
390         size += offsetof (lnet_freeobj_t, fo_contents);
391
392         LIBCFS_ALLOC(space, n * size);
393         if (space == NULL)
394                 return (-ENOMEM);
395
396         CFS_INIT_LIST_HEAD (&fl->fl_list);
397         fl->fl_objs = space;
398         fl->fl_nobjs = n;
399         fl->fl_objsize = size;
400
401         do
402         {
403                 memset (space, 0, size);
404                 list_add ((struct list_head *)space, &fl->fl_list);
405                 space += size;
406         } while (--n != 0);
407
408         return (0);
409 }
410
411 void
412 lnet_freelist_fini (lnet_freelist_t *fl)
413 {
414         struct list_head *el;
415         int               count;
416
417         if (fl->fl_nobjs == 0)
418                 return;
419
420         count = 0;
421         for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
422                 count++;
423
424         LASSERT (count == fl->fl_nobjs);
425
426         LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
427         memset (fl, 0, sizeof (fl));
428 }
429
430 int
431 lnet_descriptor_setup (void)
432 {
433         /* NB on failure caller must still call lnet_descriptor_cleanup */
434         /*               ******                                         */
435         int        rc;
436
437         memset (&the_lnet.ln_free_mes,  0, sizeof (the_lnet.ln_free_mes));
438         memset (&the_lnet.ln_free_msgs, 0, sizeof (the_lnet.ln_free_msgs));
439         memset (&the_lnet.ln_free_mds,  0, sizeof (the_lnet.ln_free_mds));
440         memset (&the_lnet.ln_free_eqs,  0, sizeof (the_lnet.ln_free_eqs));
441
442         rc = lnet_freelist_init(&the_lnet.ln_free_mes,
443                                 MAX_MES, sizeof (lnet_me_t));
444         if (rc != 0)
445                 return (rc);
446
447         rc = lnet_freelist_init(&the_lnet.ln_free_msgs,
448                                 MAX_MSGS, sizeof (lnet_msg_t));
449         if (rc != 0)
450                 return (rc);
451
452         rc = lnet_freelist_init(&the_lnet.ln_free_mds,
453                                 MAX_MDS, sizeof (lnet_libmd_t));
454         if (rc != 0)
455                 return (rc);
456
457         rc = lnet_freelist_init(&the_lnet.ln_free_eqs,
458                                 MAX_EQS, sizeof (lnet_eq_t));
459         return (rc);
460 }
461
462 void
463 lnet_descriptor_cleanup (void)
464 {
465         lnet_freelist_fini (&the_lnet.ln_free_mes);
466         lnet_freelist_fini (&the_lnet.ln_free_msgs);
467         lnet_freelist_fini (&the_lnet.ln_free_mds);
468         lnet_freelist_fini (&the_lnet.ln_free_eqs);
469 }
470
471 #endif
472
473 __u64
474 lnet_create_interface_cookie (void)
475 {
476         /* NB the interface cookie in wire handles guards against delayed
477          * replies and ACKs appearing valid after reboot. Initialisation time,
478          * even if it's only implemented to millisecond resolution is probably
479          * easily good enough. */
480         struct timeval tv;
481         __u64          cookie;
482 #ifndef __KERNEL__
483         int            rc = gettimeofday (&tv, NULL);
484         LASSERT (rc == 0);
485 #else
486         do_gettimeofday(&tv);
487 #endif
488         cookie = tv.tv_sec;
489         cookie *= 1000000;
490         cookie += tv.tv_usec;
491         return cookie;
492 }
493
494 int
495 lnet_setup_handle_hash (void) 
496 {
497         int       i;
498         
499         /* Arbitrary choice of hash table size */
500 #ifdef __KERNEL__
501         the_lnet.ln_lh_hash_size = CFS_PAGE_SIZE / sizeof (struct list_head);
502 #else
503         the_lnet.ln_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
504 #endif
505         LIBCFS_ALLOC(the_lnet.ln_lh_hash_table,
506                      the_lnet.ln_lh_hash_size * sizeof (struct list_head));
507         if (the_lnet.ln_lh_hash_table == NULL)
508                 return (-ENOMEM);
509         
510         for (i = 0; i < the_lnet.ln_lh_hash_size; i++)
511                 CFS_INIT_LIST_HEAD (&the_lnet.ln_lh_hash_table[i]);
512
513         the_lnet.ln_next_object_cookie = LNET_COOKIE_TYPES;
514         
515         return (0);
516 }
517
518 void
519 lnet_cleanup_handle_hash (void)
520 {
521         if (the_lnet.ln_lh_hash_table == NULL)
522                 return;
523         
524         LIBCFS_FREE(the_lnet.ln_lh_hash_table,
525                     the_lnet.ln_lh_hash_size * sizeof (struct list_head));
526 }
527
528 lnet_libhandle_t *
529 lnet_lookup_cookie (__u64 cookie, int type) 
530 {
531         /* ALWAYS called with LNET_LOCK held */
532         struct list_head    *list;
533         struct list_head    *el;
534         unsigned int         hash;
535
536         if ((cookie & (LNET_COOKIE_TYPES - 1)) != type)
537                 return (NULL);
538         
539         hash = ((unsigned int)cookie) % the_lnet.ln_lh_hash_size;
540         list = &the_lnet.ln_lh_hash_table[hash];
541         
542         list_for_each (el, list) {
543                 lnet_libhandle_t *lh = list_entry (el, lnet_libhandle_t,
544                                                   lh_hash_chain);
545                 
546                 if (lh->lh_cookie == cookie)
547                         return (lh);
548         }
549         
550         return (NULL);
551 }
552
553 void
554 lnet_initialise_handle (lnet_libhandle_t *lh, int type) 
555 {
556         /* ALWAYS called with LNET_LOCK held */
557         unsigned int    hash;
558
559         LASSERT (type >= 0 && type < LNET_COOKIE_TYPES);
560         lh->lh_cookie = the_lnet.ln_next_object_cookie | type;
561         the_lnet.ln_next_object_cookie += LNET_COOKIE_TYPES;
562         
563         hash = ((unsigned int)lh->lh_cookie) % the_lnet.ln_lh_hash_size;
564         list_add (&lh->lh_hash_chain, &the_lnet.ln_lh_hash_table[hash]);
565 }
566
567 void
568 lnet_invalidate_handle (lnet_libhandle_t *lh)
569 {
570         /* ALWAYS called with LNET_LOCK held */
571         list_del (&lh->lh_hash_chain);
572 }
573
574 int
575 lnet_init_finalizers(void)
576 {
577 #ifdef __KERNEL__
578         int    i;
579
580         the_lnet.ln_nfinalizers = num_online_cpus();
581
582         LIBCFS_ALLOC(the_lnet.ln_finalizers,
583                      the_lnet.ln_nfinalizers * 
584                      sizeof(*the_lnet.ln_finalizers));
585         if (the_lnet.ln_finalizers == NULL) {
586                 CERROR("Can't allocate ln_finalizers\n");
587                 return -ENOMEM;
588         }
589
590         for (i = 0; i < the_lnet.ln_nfinalizers; i++)
591                 the_lnet.ln_finalizers[i] = NULL;
592 #else
593         the_lnet.ln_finalizing = 0;
594 #endif
595
596         CFS_INIT_LIST_HEAD(&the_lnet.ln_finalizeq);
597         return 0;
598 }
599
600 void
601 lnet_fini_finalizers(void)
602 {
603 #ifdef __KERNEL__
604         int    i;
605         
606         for (i = 0; i < the_lnet.ln_nfinalizers; i++)
607                 LASSERT (the_lnet.ln_finalizers[i] == NULL);
608
609         LIBCFS_FREE(the_lnet.ln_finalizers,
610                     the_lnet.ln_nfinalizers *
611                     sizeof(*the_lnet.ln_finalizers));
612 #else
613         LASSERT (!the_lnet.ln_finalizing);
614 #endif
615         LASSERT (list_empty(&the_lnet.ln_finalizeq));
616 }
617
618 #ifndef __KERNEL__
619 /* Temporary workaround to allow uOSS and test programs force server
620  * mode in userspace. See comments near ln_server_mode_flag in
621  * lnet/lib-types.h */
622
623 void
624 lnet_server_mode() {
625         the_lnet.ln_server_mode_flag = 1;
626 }
627 #endif        
628
629 int
630 lnet_prepare(lnet_pid_t requested_pid)
631 {
632         /* Prepare to bring up the network */
633         int               rc = 0;
634         int               i;
635
636         LASSERT (the_lnet.ln_refcount == 0);
637
638         the_lnet.ln_routing = 0;
639
640 #ifdef __KERNEL__
641         LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
642         the_lnet.ln_pid = requested_pid;
643 #else
644         if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
645                 LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
646                 
647                 if (cfs_curproc_uid())/* Only root can run user-space server */
648                         return -EPERM;
649                 the_lnet.ln_pid = requested_pid;
650
651         } else {/* client case (liblustre) */
652
653                 /* My PID must be unique on this node and flag I'm userspace */
654                 the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
655         }        
656 #endif
657
658         rc = lnet_descriptor_setup();
659         if (rc != 0)
660                 goto failed0;
661
662         memset(&the_lnet.ln_counters, 0, 
663                sizeof(the_lnet.ln_counters));
664
665         CFS_INIT_LIST_HEAD (&the_lnet.ln_active_msgs);
666         CFS_INIT_LIST_HEAD (&the_lnet.ln_active_mds);
667         CFS_INIT_LIST_HEAD (&the_lnet.ln_active_eqs);
668         CFS_INIT_LIST_HEAD (&the_lnet.ln_test_peers);
669         CFS_INIT_LIST_HEAD (&the_lnet.ln_nis);
670         CFS_INIT_LIST_HEAD (&the_lnet.ln_zombie_nis);
671         CFS_INIT_LIST_HEAD (&the_lnet.ln_remote_nets);
672         CFS_INIT_LIST_HEAD (&the_lnet.ln_routers);
673
674         the_lnet.ln_interface_cookie = lnet_create_interface_cookie();
675
676         lnet_init_rtrpools();
677
678         rc = lnet_setup_handle_hash ();
679         if (rc != 0)
680                 goto failed0;
681
682         rc = lnet_create_peer_table();
683         if (rc != 0)
684                 goto failed1;
685
686         rc = lnet_init_finalizers();
687         if (rc != 0)
688                 goto failed2;
689
690         the_lnet.ln_nportals = MAX_PORTALS;
691         LIBCFS_ALLOC(the_lnet.ln_portals, 
692                      the_lnet.ln_nportals * 
693                      sizeof(*the_lnet.ln_portals));
694         if (the_lnet.ln_portals == NULL) {
695                 rc = -ENOMEM;
696                 goto failed3;
697         }
698
699         for (i = 0; i < the_lnet.ln_nportals; i++) {
700                 CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_ml));
701                 CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_msgq));
702                 the_lnet.ln_portals[i].ptl_options = 0;
703         }
704
705         return 0;
706         
707  failed3:
708         lnet_fini_finalizers();
709  failed2:
710         lnet_destroy_peer_table();
711  failed1:
712         lnet_cleanup_handle_hash();
713  failed0:
714         lnet_descriptor_cleanup();
715         return rc;
716 }
717
718 int
719 lnet_unprepare (void)
720 {
721         int       idx;
722         
723         /* NB no LNET_LOCK since this is the last reference.  All LND instances
724          * have shut down already, so it is safe to unlink and free all
725          * descriptors, even those that appear committed to a network op (eg MD
726          * with non-zero pending count) */
727
728         lnet_fail_nid(LNET_NID_ANY, 0);
729
730         LASSERT (list_empty(&the_lnet.ln_test_peers));
731         LASSERT (the_lnet.ln_refcount == 0);
732         LASSERT (list_empty(&the_lnet.ln_nis));
733         LASSERT (list_empty(&the_lnet.ln_zombie_nis));
734         LASSERT (the_lnet.ln_nzombie_nis == 0);
735                
736         for (idx = 0; idx < the_lnet.ln_nportals; idx++) {
737                 LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq));
738
739                 while (!list_empty (&the_lnet.ln_portals[idx].ptl_ml)) {
740                         lnet_me_t *me = list_entry (the_lnet.ln_portals[idx].ptl_ml.next,
741                                                     lnet_me_t, me_list);
742
743                         CERROR ("Active me %p on exit\n", me);
744                         list_del (&me->me_list);
745                         lnet_me_free (me);
746                 }
747         }
748
749         while (!list_empty (&the_lnet.ln_active_mds)) {
750                 lnet_libmd_t *md = list_entry (the_lnet.ln_active_mds.next,
751                                                lnet_libmd_t, md_list);
752
753                 CERROR ("Active md %p on exit\n", md);
754                 list_del (&md->md_list);
755                 lnet_md_free (md);
756         }
757
758         while (!list_empty (&the_lnet.ln_active_eqs)) {
759                 lnet_eq_t *eq = list_entry (the_lnet.ln_active_eqs.next,
760                                             lnet_eq_t, eq_list);
761
762                 CERROR ("Active eq %p on exit\n", eq);
763                 list_del (&eq->eq_list);
764                 lnet_eq_free (eq);
765         }
766
767         while (!list_empty (&the_lnet.ln_active_msgs)) {
768                 lnet_msg_t *msg = list_entry (the_lnet.ln_active_msgs.next,
769                                               lnet_msg_t, msg_activelist);
770
771                 CERROR ("Active msg %p on exit\n", msg);
772                 LASSERT (msg->msg_onactivelist);
773                 msg->msg_onactivelist = 0;
774                 list_del (&msg->msg_activelist);
775                 lnet_msg_free (msg);
776         }
777
778         LIBCFS_FREE(the_lnet.ln_portals,  
779                     the_lnet.ln_nportals * sizeof(*the_lnet.ln_portals));
780
781         lnet_free_rtrpools();
782         lnet_fini_finalizers();
783         lnet_destroy_peer_table();
784         lnet_cleanup_handle_hash();
785         lnet_descriptor_cleanup();
786
787         return (0);
788 }
789
790 lnet_ni_t  *
791 lnet_net2ni_locked (__u32 net)
792 {
793         struct list_head *tmp;
794         lnet_ni_t        *ni;
795
796         list_for_each (tmp, &the_lnet.ln_nis) {
797                 ni = list_entry(tmp, lnet_ni_t, ni_list);
798
799                 if (lnet_ptlcompat_matchnet(LNET_NIDNET(ni->ni_nid), net)) {
800                         lnet_ni_addref_locked(ni);
801                         return ni;
802                 }
803         }
804         
805         return NULL;
806 }
807
808 int
809 lnet_islocalnet (__u32 net)
810 {
811         lnet_ni_t        *ni;
812         
813         LNET_LOCK();
814         ni = lnet_net2ni_locked(net);
815         if (ni != NULL)
816                 lnet_ni_decref_locked(ni);
817         LNET_UNLOCK();
818
819         return ni != NULL;
820 }
821
822 lnet_ni_t  *
823 lnet_nid2ni_locked (lnet_nid_t nid)
824 {
825         struct list_head *tmp;
826         lnet_ni_t        *ni;
827
828         list_for_each (tmp, &the_lnet.ln_nis) {
829                 ni = list_entry(tmp, lnet_ni_t, ni_list);
830
831                 if (lnet_ptlcompat_matchnid(ni->ni_nid, nid)) {
832                         lnet_ni_addref_locked(ni);
833                         return ni;
834                 }
835         }
836         
837         return NULL;
838 }
839
840 int
841 lnet_islocalnid (lnet_nid_t nid)
842 {
843         lnet_ni_t     *ni;
844         
845         LNET_LOCK();
846         ni = lnet_nid2ni_locked(nid);
847         if (ni != NULL)
848                 lnet_ni_decref_locked(ni);
849         LNET_UNLOCK();
850
851         return ni != NULL;
852 }
853
854 int
855 lnet_count_acceptor_nis (lnet_ni_t **first_ni)
856 {
857         /* Return the # of NIs that need the acceptor.  Return the first one in
858          * *first_ni so the acceptor can pass it connections "blind" to retain
859          * binary compatibility. */
860         int                count = 0;
861 #if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
862         struct list_head  *tmp;
863         lnet_ni_t         *ni;
864
865         LNET_LOCK();
866         list_for_each (tmp, &the_lnet.ln_nis) {
867                 ni = list_entry(tmp, lnet_ni_t, ni_list);
868
869                 if (ni->ni_lnd->lnd_accept != NULL) {
870                         /* This LND uses the acceptor */
871                         if (count == 0 && first_ni != NULL) {
872                                 lnet_ni_addref_locked(ni);
873                                 *first_ni = ni;
874                         }
875                         count++;
876                 }
877         }
878         
879         LNET_UNLOCK();
880
881 #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
882         return count;
883 }
884
885 void
886 lnet_shutdown_lndnis (void)
887 {
888         int                i;
889         int                islo;
890         lnet_ni_t         *ni;
891
892         /* NB called holding the global mutex */
893
894         /* All quiet on the API front */
895         LASSERT (!the_lnet.ln_shutdown);
896         LASSERT (the_lnet.ln_refcount == 0);
897         LASSERT (list_empty(&the_lnet.ln_zombie_nis));
898         LASSERT (the_lnet.ln_nzombie_nis == 0);
899         LASSERT (list_empty(&the_lnet.ln_remote_nets));
900
901         LNET_LOCK();
902         the_lnet.ln_shutdown = 1;               /* flag shutdown */
903
904         /* Unlink NIs from the global table */
905         while (!list_empty(&the_lnet.ln_nis)) {
906                 ni = list_entry(the_lnet.ln_nis.next,
907                                 lnet_ni_t, ni_list);
908                 list_del (&ni->ni_list);
909
910                 the_lnet.ln_nzombie_nis++;
911                 lnet_ni_decref_locked(ni); /* drop apini's ref */
912         }
913
914         /* Drop the cached eqwait NI. */
915         if (the_lnet.ln_eqwaitni != NULL) {
916                 lnet_ni_decref_locked(the_lnet.ln_eqwaitni);
917                 the_lnet.ln_eqwaitni = NULL;
918         }
919
920         /* Drop the cached loopback NI. */
921         if (the_lnet.ln_loni != NULL) {
922                 lnet_ni_decref_locked(the_lnet.ln_loni);
923                 the_lnet.ln_loni = NULL;
924         }
925
926         LNET_UNLOCK();
927
928         /* Clear lazy portals and drop delayed messages which hold refs
929          * on their lnet_msg_t::msg_rxpeer */
930         for (i = 0; i < the_lnet.ln_nportals; i++)
931                 LNetClearLazyPortal(i);
932
933         /* Clear the peer table and wait for all peers to go (they hold refs on
934          * their NIs) */
935         lnet_clear_peer_table();
936
937         LNET_LOCK();
938         /* Now wait for the NI's I just nuked to show up on apini_zombie_nis
939          * and shut them down in guaranteed thread context */
940         i = 2;
941         while (the_lnet.ln_nzombie_nis != 0) {
942
943                 while (list_empty(&the_lnet.ln_zombie_nis)) {
944                         LNET_UNLOCK();
945                         ++i;
946                         if ((i & (-i)) == i)
947                                 CDEBUG(D_WARNING,"Waiting for %d zombie NIs\n",
948                                        the_lnet.ln_nzombie_nis);
949                         cfs_pause(cfs_time_seconds(1));
950                         LNET_LOCK();
951                 }
952
953                 ni = list_entry(the_lnet.ln_zombie_nis.next,
954                                 lnet_ni_t, ni_list);
955                 list_del(&ni->ni_list);
956                 ni->ni_lnd->lnd_refcount--;
957
958                 LNET_UNLOCK();
959
960                 islo = ni->ni_lnd->lnd_type == LOLND;
961
962                 LASSERT (!in_interrupt ());
963                 (ni->ni_lnd->lnd_shutdown)(ni);
964
965                 /* can't deref lnd anymore now; it might have unregistered
966                  * itself...  */
967
968                 if (!islo)
969                         CDEBUG(D_LNI, "Removed LNI %s\n",
970                                libcfs_nid2str(ni->ni_nid));
971
972                 LIBCFS_FREE(ni, sizeof(*ni));
973
974                 LNET_LOCK();
975                 the_lnet.ln_nzombie_nis--;
976         }
977
978         the_lnet.ln_shutdown = 0;
979         LNET_UNLOCK();
980
981         if (the_lnet.ln_network_tokens != NULL) {
982                 LIBCFS_FREE(the_lnet.ln_network_tokens,
983                             the_lnet.ln_network_tokens_nob);
984                 the_lnet.ln_network_tokens = NULL;
985         }
986 }
987
988 int
989 lnet_startup_lndnis (void)
990 {
991         lnd_t             *lnd;
992         lnet_ni_t         *ni;
993         struct list_head   nilist;
994         int                rc = 0;
995         int                lnd_type;
996         int                nicount = 0;
997         char              *nets = lnet_get_networks();
998
999         CFS_INIT_LIST_HEAD(&nilist);
1000
1001         if (nets == NULL)
1002                 goto failed;
1003
1004         rc = lnet_parse_networks(&nilist, nets);
1005         if (rc != 0)
1006                 goto failed;
1007
1008         while (!list_empty(&nilist)) {
1009                 ni = list_entry(nilist.next, lnet_ni_t, ni_list);
1010                 lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
1011
1012                 LASSERT (libcfs_isknown_lnd(lnd_type));
1013
1014                 LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
1015                 lnd = lnet_find_lnd_by_type(lnd_type);
1016
1017 #ifdef __KERNEL__
1018                 if (lnd == NULL) {
1019                         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
1020                         rc = request_module(libcfs_lnd2modname(lnd_type));
1021                         LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex);
1022
1023                         lnd = lnet_find_lnd_by_type(lnd_type);
1024                         if (lnd == NULL) {
1025                                 LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
1026                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1027                                        libcfs_lnd2str(lnd_type),
1028                                        libcfs_lnd2modname(lnd_type), rc);
1029 #ifndef CONFIG_KMOD
1030                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1031                                          "compiled with CONFIG_KMOD set for "
1032                                          "automatic module loading.");
1033 #endif
1034                                 goto failed;
1035                         }
1036                 }
1037 #else
1038                 if (lnd == NULL) {
1039                         LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
1040                         CERROR("LND %s not supported\n",
1041                                libcfs_lnd2str(lnd_type));
1042                         goto failed;
1043                 }
1044 #endif
1045
1046                 ni->ni_refcount = 1;
1047
1048                 LNET_LOCK();
1049                 lnd->lnd_refcount++;
1050                 LNET_UNLOCK();
1051
1052                 ni->ni_lnd = lnd;
1053
1054                 rc = (lnd->lnd_startup)(ni);
1055
1056                 LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex);
1057
1058                 if (rc != 0) {
1059                         LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s"
1060                                            "\n",
1061                                            rc, libcfs_lnd2str(lnd->lnd_type));
1062                         LNET_LOCK();
1063                         lnd->lnd_refcount--;
1064                         LNET_UNLOCK();
1065                         goto failed;
1066                 }
1067
1068                 list_del(&ni->ni_list);
1069
1070                 LNET_LOCK();
1071                 list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
1072                 LNET_UNLOCK();
1073
1074                 if (lnd->lnd_type == LOLND) {
1075                         lnet_ni_addref(ni);
1076                         LASSERT (the_lnet.ln_loni == NULL);
1077                         the_lnet.ln_loni = ni;
1078                         continue;
1079                 }
1080
1081 #ifndef __KERNEL__
1082                 if (lnd->lnd_wait != NULL) {
1083                         if (the_lnet.ln_eqwaitni == NULL) {
1084                                 lnet_ni_addref(ni);
1085                                 the_lnet.ln_eqwaitni = ni;
1086                         }
1087                 } else {
1088 # ifndef HAVE_LIBPTHREAD
1089                         LCONSOLE_ERROR_MSG(0x106, "LND %s not supported in a "
1090                                            "single-threaded runtime\n",
1091                                            libcfs_lnd2str(lnd_type));
1092                         goto failed;
1093 # endif
1094                 }
1095 #endif
1096                 if (ni->ni_peertxcredits == 0 ||
1097                     ni->ni_maxtxcredits == 0) {
1098                         LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1099                                            libcfs_lnd2str(lnd->lnd_type),
1100                                            ni->ni_peertxcredits == 0 ?
1101                                            "" : "per-peer ");
1102                         goto failed;
1103                 }
1104
1105                 ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits;
1106
1107                 CDEBUG(D_LNI, "Added LNI %s [%d/%d]\n",
1108                        libcfs_nid2str(ni->ni_nid),
1109                        ni->ni_peertxcredits, ni->ni_txcredits);
1110
1111                 /* Handle nidstrings for network 0 just like this one */
1112                 if (the_lnet.ln_ptlcompat > 0) {
1113                         if (nicount > 0) {
1114                                 LCONSOLE_ERROR_MSG(0x108, "Can't run > 1 "
1115                                        "network when portals_compatibility is "
1116                                        "set\n");
1117                                 goto failed;
1118                         }
1119                         libcfs_setnet0alias(lnd->lnd_type);
1120                 }
1121                 
1122                 nicount++;
1123         }
1124
1125         if (the_lnet.ln_eqwaitni != NULL && nicount > 1) {
1126                 lnd_type = the_lnet.ln_eqwaitni->ni_lnd->lnd_type;
1127                 LCONSOLE_ERROR_MSG(0x109, "LND %s can only run single-network"
1128                                    "\n",
1129                                    libcfs_lnd2str(lnd_type));
1130                 goto failed;
1131         }
1132
1133         return 0;
1134
1135  failed:
1136         lnet_shutdown_lndnis();
1137
1138         while (!list_empty(&nilist)) {
1139                 ni = list_entry(nilist.next, lnet_ni_t, ni_list);
1140                 list_del(&ni->ni_list);
1141                 LIBCFS_FREE(ni, sizeof(*ni));
1142         }
1143
1144         return -ENETDOWN;
1145 }
1146
1147 int
1148 LNetInit(void)
1149 {
1150         int    rc;
1151
1152         lnet_assert_wire_constants ();
1153         LASSERT (!the_lnet.ln_init);
1154
1155         memset(&the_lnet, 0, sizeof(the_lnet));
1156
1157         rc = lnet_get_portals_compatibility();
1158         if (rc < 0)
1159                 return rc;
1160
1161         lnet_init_locks();
1162         CFS_INIT_LIST_HEAD(&the_lnet.ln_lnds);
1163         the_lnet.ln_ptlcompat = rc;
1164         the_lnet.ln_refcount = 0;
1165         the_lnet.ln_init = 1;
1166
1167 #ifdef __KERNEL__
1168         /* All LNDs apart from the LOLND are in separate modules.  They
1169          * register themselves when their module loads, and unregister
1170          * themselves when their module is unloaded. */
1171 #else
1172         /* Register LNDs
1173          * NB the order here determines default 'networks=' order */
1174 # ifdef CRAY_XT3
1175         LNET_REGISTER_ULND(the_ptllnd);
1176 # endif
1177 # ifdef HAVE_LIBPTHREAD
1178         LNET_REGISTER_ULND(the_tcplnd);
1179 # endif
1180 #endif
1181         lnet_register_lnd(&the_lolnd);
1182         return 0;
1183 }
1184
1185 void
1186 LNetFini(void)
1187 {
1188         LASSERT (the_lnet.ln_init);
1189         LASSERT (the_lnet.ln_refcount == 0);
1190
1191         while (!list_empty(&the_lnet.ln_lnds))
1192                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1193                                                lnd_t, lnd_list));
1194         lnet_fini_locks();
1195
1196         the_lnet.ln_init = 0;
1197 }
1198
1199 int
1200 LNetNIInit(lnet_pid_t requested_pid)
1201 {
1202         int         im_a_router = 0;
1203         int         rc;
1204
1205         LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex);
1206
1207         LASSERT (the_lnet.ln_init);
1208         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1209
1210         if (the_lnet.ln_refcount > 0) {
1211                 rc = the_lnet.ln_refcount++;
1212                 goto out;
1213         }
1214
1215         if (requested_pid == LNET_PID_ANY) {
1216                 /* Don't instantiate LNET just for me */
1217                 rc = -ENETDOWN;
1218                 goto failed0;
1219         }
1220
1221         rc = lnet_prepare(requested_pid);
1222         if (rc != 0)
1223                 goto failed0;
1224
1225         rc = lnet_startup_lndnis();
1226         if (rc != 0)
1227                 goto failed1;
1228
1229         rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1230         if (rc != 0)
1231                 goto failed2;
1232
1233         rc = lnet_check_routes();
1234         if (rc != 0)
1235                 goto failed2;
1236
1237         rc = lnet_alloc_rtrpools(im_a_router);
1238         if (rc != 0)
1239                 goto failed2;
1240
1241         rc = lnet_acceptor_start();
1242         if (rc != 0)
1243                 goto failed2;
1244
1245         the_lnet.ln_refcount = 1;
1246         /* Now I may use my own API functions... */
1247
1248         rc = lnet_router_checker_start();
1249         if (rc != 0)
1250                 goto failed3;
1251
1252         rc = lnet_ping_target_init();
1253         if (rc != 0)
1254                 goto failed4;
1255
1256         lnet_proc_init();
1257         goto out;
1258
1259  failed4:
1260         lnet_router_checker_stop();
1261  failed3:
1262         the_lnet.ln_refcount = 0;
1263         lnet_acceptor_stop();
1264  failed2:
1265         lnet_destroy_routes();
1266         lnet_shutdown_lndnis();
1267  failed1:
1268         lnet_unprepare();
1269  failed0:
1270         LASSERT (rc < 0);
1271  out:
1272         LNET_MUTEX_UP(&the_lnet.ln_api_mutex);
1273         return rc;
1274 }
1275
1276 int
1277 LNetNIFini()
1278 {
1279         LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex);
1280
1281         LASSERT (the_lnet.ln_init);
1282         LASSERT (the_lnet.ln_refcount > 0);
1283
1284         if (the_lnet.ln_refcount != 1) {
1285                 the_lnet.ln_refcount--;
1286         } else {
1287                 LASSERT (!the_lnet.ln_niinit_self);
1288
1289                 lnet_proc_fini();
1290                 lnet_ping_target_fini();
1291                 lnet_router_checker_stop();
1292
1293                 /* Teardown fns that use my own API functions BEFORE here */
1294                 the_lnet.ln_refcount = 0;
1295
1296                 lnet_acceptor_stop();
1297                 lnet_destroy_routes();
1298                 lnet_shutdown_lndnis();
1299                 lnet_unprepare();
1300         }
1301
1302         LNET_MUTEX_UP(&the_lnet.ln_api_mutex);
1303         return 0;
1304 }
1305
1306 int
1307 LNetCtl(unsigned int cmd, void *arg)
1308 {
1309         struct libcfs_ioctl_data *data = arg;
1310         lnet_process_id_t         id;
1311         lnet_ni_t                *ni;
1312         int                       rc;
1313
1314         LASSERT (the_lnet.ln_init);
1315         LASSERT (the_lnet.ln_refcount > 0);
1316
1317         switch (cmd) {
1318         case IOC_LIBCFS_GET_NI:
1319                 rc = LNetGetId(data->ioc_count, &id);
1320                 data->ioc_nid = id.nid;
1321                 return rc;
1322
1323         case IOC_LIBCFS_FAIL_NID:
1324                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
1325                 
1326         case IOC_LIBCFS_ADD_ROUTE:
1327                 rc = lnet_add_route(data->ioc_net, data->ioc_count, 
1328                                     data->ioc_nid);
1329                 return (rc != 0) ? rc : lnet_check_routes();
1330                 
1331         case IOC_LIBCFS_DEL_ROUTE:
1332                 return lnet_del_route(data->ioc_net, data->ioc_nid);
1333
1334         case IOC_LIBCFS_GET_ROUTE:
1335                 return lnet_get_route(data->ioc_count, 
1336                                       &data->ioc_net, &data->ioc_count, 
1337                                       &data->ioc_nid, &data->ioc_flags);
1338         case IOC_LIBCFS_NOTIFY_ROUTER:
1339                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, 
1340                                    (time_t)data->ioc_u64[0]);
1341
1342         case IOC_LIBCFS_PORTALS_COMPATIBILITY:
1343                 return the_lnet.ln_ptlcompat;
1344
1345         case IOC_LIBCFS_LNET_DIST:
1346                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
1347                 if (rc < 0 && rc != -EHOSTUNREACH)
1348                         return rc;
1349                 
1350                 data->ioc_u32[0] = rc;
1351                 return 0;
1352
1353         case IOC_LIBCFS_TESTPROTOCOMPAT:
1354                 LNET_LOCK();
1355                 the_lnet.ln_testprotocompat = data->ioc_flags;
1356                 LNET_UNLOCK();
1357                 return 0;
1358
1359         case IOC_LIBCFS_PING:
1360                 rc = lnet_ping((lnet_process_id_t) {.nid = data->ioc_nid,
1361                                                     .pid = data->ioc_u32[0]},
1362                                data->ioc_u32[1], /* timeout */
1363                                (lnet_process_id_t *)data->ioc_pbuf1,
1364                                data->ioc_plen1/sizeof(lnet_process_id_t));
1365                 if (rc < 0)
1366                         return rc;
1367                 data->ioc_count = rc;
1368                 return 0;
1369
1370         case IOC_LIBCFS_DEBUG_PEER: {
1371                 /* CAVEAT EMPTOR: this one designed for calling directly; not
1372                  * via an ioctl */
1373                 lnet_process_id_t *id = arg;
1374
1375                 lnet_debug_peer(id->nid);
1376
1377                 ni = lnet_net2ni(LNET_NIDNET(id->nid));
1378                 if (ni == NULL) {
1379                         CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(*id));
1380                 } else {
1381                         if (ni->ni_lnd->lnd_ctl == NULL) {
1382                                 CDEBUG(D_WARNING, "No ctl for %s\n",
1383                                        libcfs_id2str(*id));
1384                         } else {
1385                                 (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
1386                         }
1387                         
1388                         lnet_ni_decref(ni);
1389                 }
1390                 return 0;
1391         }
1392                 
1393         default:
1394                 ni = lnet_net2ni(data->ioc_net);
1395                 if (ni == NULL)
1396                         return -EINVAL;
1397
1398                 if (ni->ni_lnd->lnd_ctl == NULL)
1399                         rc = -EINVAL;
1400                 else
1401                         rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
1402
1403                 lnet_ni_decref(ni);
1404                 return rc;
1405         }
1406         /* not reached */
1407 }
1408
1409 int
1410 LNetGetId(unsigned int index, lnet_process_id_t *id)
1411 {
1412         lnet_ni_t        *ni;
1413         struct list_head *tmp;
1414         int               rc = -ENOENT;
1415
1416         LASSERT (the_lnet.ln_init);
1417         LASSERT (the_lnet.ln_refcount > 0);
1418
1419         LNET_LOCK();
1420
1421         list_for_each(tmp, &the_lnet.ln_nis) {
1422                 if (index-- != 0)
1423                         continue;
1424                 
1425                 ni = list_entry(tmp, lnet_ni_t, ni_list);
1426
1427                 id->nid = ni->ni_nid;
1428                 id->pid = the_lnet.ln_pid;
1429                 rc = 0;
1430                 break;
1431         }
1432
1433         LNET_UNLOCK();
1434
1435         return rc;
1436 }
1437
1438 void
1439 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
1440 {
1441         snprintf(str, len, LPX64, h.cookie);
1442 }
1443
1444
1445 int
1446 lnet_ping_target_init(void)
1447 {
1448         lnet_handle_me_t  meh;
1449         lnet_process_id_t id;
1450         int               rc;
1451         int               rc2;
1452         int               n;
1453         int               infosz;
1454         int               i;
1455         
1456         for (n = 0; ; n++) {
1457                 rc = LNetGetId(n, &id);
1458                 if (rc == -ENOENT)
1459                         break;
1460
1461                 LASSERT (rc == 0);
1462         }
1463
1464         infosz = offsetof(lnet_ping_info_t, pi_nid[n]);
1465         LIBCFS_ALLOC(the_lnet.ln_ping_info, infosz);
1466         if (the_lnet.ln_ping_info == NULL) {
1467                 CERROR("Can't allocate ping info[%d]\n", n);
1468                 return -ENOMEM;
1469         }
1470
1471         the_lnet.ln_ping_info->pi_magic   = LNET_PROTO_PING_MAGIC;
1472         the_lnet.ln_ping_info->pi_version = LNET_PROTO_PING_VERSION;
1473         the_lnet.ln_ping_info->pi_pid     = the_lnet.ln_pid;
1474         the_lnet.ln_ping_info->pi_nnids   = n;
1475
1476         for (i = 0; i < n; i++) {
1477                 rc = LNetGetId(i, &id);
1478                 LASSERT (rc == 0);
1479                 the_lnet.ln_ping_info->pi_nid[i] = id.nid;
1480         }
1481         
1482         /* We can have a tiny EQ since we only need to see the unlink event on
1483          * teardown, which by definition is the last one! */
1484         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq);
1485         if (rc != 0) {
1486                 CERROR("Can't allocate ping EQ: %d\n", rc);
1487                 goto failed_0;
1488         }
1489
1490         rc = LNetMEAttach(LNET_RESERVED_PORTAL,
1491                           (lnet_process_id_t){.nid = LNET_NID_ANY,
1492                                               .pid = LNET_PID_ANY},
1493                           LNET_PROTO_PING_MATCHBITS, 0LL,
1494                           LNET_UNLINK, LNET_INS_AFTER,
1495                           &meh);
1496         if (rc != 0) {
1497                 CERROR("Can't create ping ME: %d\n", rc);
1498                 goto failed_1;
1499         }
1500
1501         rc = LNetMDAttach(meh,
1502                           (lnet_md_t){.start = the_lnet.ln_ping_info,
1503                                       .length = infosz,
1504                                       .threshold = LNET_MD_THRESH_INF,
1505                                       .options = (LNET_MD_OP_GET |
1506                                                   LNET_MD_TRUNCATE |
1507                                                   LNET_MD_MANAGE_REMOTE),
1508                                       .eq_handle = the_lnet.ln_ping_target_eq},
1509                           LNET_RETAIN,
1510                           &the_lnet.ln_ping_target_md);
1511         if (rc != 0) {
1512                 CERROR("Can't attach ping MD: %d\n", rc);
1513                 goto failed_2;
1514         }
1515
1516         return 0;
1517
1518  failed_2:
1519         rc2 = LNetMEUnlink(meh);
1520         LASSERT (rc2 == 0);
1521  failed_1:
1522         rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
1523         LASSERT (rc2 == 0);
1524  failed_0:
1525         LIBCFS_FREE(the_lnet.ln_ping_info, infosz);
1526
1527         return rc;
1528 }
1529
1530 void
1531 lnet_ping_target_fini(void)
1532 {
1533         lnet_event_t    event;
1534         int             rc;
1535         int             which;
1536         int             timeout_ms = 1000;
1537         cfs_sigset_t    blocked = cfs_block_allsigs();
1538
1539         LNetMDUnlink(the_lnet.ln_ping_target_md);
1540         /* NB md could be busy; this just starts the unlink */
1541
1542         for (;;) {
1543                 rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1,
1544                                 timeout_ms, &event, &which);
1545
1546                 /* I expect overflow... */
1547                 LASSERT (rc >= 0 || rc == -EOVERFLOW);
1548
1549                 if (rc == 0) {
1550                         /* timed out: provide a diagnostic */
1551                         CWARN("Still waiting for ping MD to unlink\n");
1552                         timeout_ms *= 2;
1553                         continue;
1554                 }
1555
1556                 /* Got a valid event */
1557                 if (event.unlinked)
1558                         break;
1559         }
1560
1561         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1562         LASSERT (rc == 0);
1563
1564         LIBCFS_FREE(the_lnet.ln_ping_info,
1565                     offsetof(lnet_ping_info_t,
1566                              pi_nid[the_lnet.ln_ping_info->pi_nnids]));
1567
1568         cfs_restore_sigs(blocked);
1569 }
1570
1571 int
1572 lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids)
1573 {
1574         lnet_handle_eq_t     eqh;
1575         lnet_handle_md_t     mdh;
1576         lnet_event_t         event;
1577         int                  which;
1578         int                  unlinked = 0;
1579         int                  replied = 0;
1580         const int            a_long_time = 60000; /* mS */
1581         int                  infosz = offsetof(lnet_ping_info_t, pi_nid[n_ids]);
1582         lnet_ping_info_t    *info;
1583         lnet_process_id_t    tmpid;
1584         int                  i;
1585         int                  nob;
1586         int                  rc;
1587         int                  rc2;
1588         cfs_sigset_t         blocked;
1589
1590         if (n_ids <= 0 ||
1591             id.nid == LNET_NID_ANY ||
1592             timeout_ms > 500000 ||              /* arbitrary limit! */
1593             n_ids > 20)                         /* arbitrary limit! */
1594                 return -EINVAL;
1595
1596         if (id.pid == LNET_PID_ANY)
1597                 id.pid = LUSTRE_SRV_LNET_PID;
1598
1599         LIBCFS_ALLOC(info, infosz);
1600         if (info == NULL)
1601                 return -ENOMEM;
1602
1603         /* NB 2 events max (including any unlink event) */
1604         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
1605         if (rc != 0) {
1606                 CERROR("Can't allocate EQ: %d\n", rc);
1607                 goto out_0;
1608         }
1609
1610         rc = LNetMDBind((lnet_md_t){.start = info,
1611                                     .length = infosz,
1612                                     .threshold = 2, /* GET/REPLY */
1613                                     .options = LNET_MD_TRUNCATE,
1614                                     .eq_handle = eqh},
1615                         LNET_UNLINK,
1616                         &mdh);
1617         if (rc != 0) {
1618                 CERROR("Can't bind MD: %d\n", rc);
1619                 goto out_1;
1620         }
1621
1622         rc = LNetGet(LNET_NID_ANY, mdh, id,
1623                      LNET_RESERVED_PORTAL,
1624                      LNET_PROTO_PING_MATCHBITS, 0);
1625
1626         if (rc != 0) {
1627                 /* Don't CERROR; this could be deliberate! */
1628
1629                 rc2 = LNetMDUnlink(mdh);
1630                 LASSERT (rc2 == 0);
1631
1632                 /* NB must wait for the UNLINK event below... */
1633                 unlinked = 1;
1634                 timeout_ms = a_long_time;
1635         }
1636
1637         do {
1638                 /* MUST block for unlink to complete */
1639                 if (unlinked)
1640                         blocked = cfs_block_allsigs();
1641
1642                 rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which);
1643
1644                 if (unlinked)
1645                         cfs_restore_sigs(blocked);
1646
1647                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
1648                        (rc2 <= 0) ? -1 : event.type,
1649                        (rc2 <= 0) ? -1 : event.status,
1650                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
1651
1652                 LASSERT (rc2 != -EOVERFLOW);     /* can't miss anything */
1653
1654                 if (rc2 <= 0 || event.status != 0) {
1655                         /* timeout or error */
1656                         if (!replied && rc == 0)
1657                                 rc = (rc2 < 0) ? rc2 :
1658                                      (rc2 == 0) ? -ETIMEDOUT :
1659                                      event.status;
1660
1661                         if (!unlinked) {
1662                                 /* Ensure completion in finite time... */
1663                                 LNetMDUnlink(mdh);
1664                                 /* No assertion (racing with network) */
1665                                 unlinked = 1;
1666                                 timeout_ms = a_long_time;
1667                         } else if (rc2 == 0) {
1668                                 /* timed out waiting for unlink */
1669                                 CWARN("ping %s: late network completion\n",
1670                                       libcfs_id2str(id));
1671                         }
1672
1673                 } else if (event.type == LNET_EVENT_REPLY) {
1674                         replied = 1;
1675                         rc = event.mlength;
1676                 }
1677
1678         } while (rc2 <= 0 || !event.unlinked);
1679
1680         if (!replied) {
1681                 if (rc >= 0)
1682                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
1683                               libcfs_id2str(id));
1684                 rc = -EIO;
1685                 goto out_1;
1686         }
1687
1688         nob = rc;
1689         LASSERT (nob >= 0 && nob <= infosz);
1690
1691         rc = -EPROTO;                           /* if I can't parse... */
1692
1693         if (nob < 8) {
1694                 /* can't check magic/version */
1695                 CERROR("%s: ping info too short %d\n",
1696                        libcfs_id2str(id), nob);
1697                 goto out_1;
1698         }
1699
1700         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
1701                 /* NB I might be swabbing garbage until I check below, but it
1702                  * doesn't matter */
1703                 __swab32s(&info->pi_version);
1704                 __swab32s(&info->pi_pid);
1705                 __swab32s(&info->pi_nnids);
1706                 for (i = 0; i < info->pi_nnids && i < n_ids; i++)
1707                         __swab64s(&info->pi_nid[i]);
1708
1709         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
1710                 CERROR("%s: Unexpected magic %08x\n", 
1711                        libcfs_id2str(id), info->pi_magic);
1712                 goto out_1;
1713         }
1714
1715         if (info->pi_version != LNET_PROTO_PING_VERSION) {
1716                 CERROR("%s: Unexpected version 0x%x\n",
1717                        libcfs_id2str(id), info->pi_version);
1718                 goto out_1;
1719         }
1720
1721         if (nob < offsetof(lnet_ping_info_t, pi_nid[0])) {
1722                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), 
1723                        nob, (int)offsetof(lnet_ping_info_t, pi_nid[0]));
1724                 goto out_1;
1725         }
1726
1727         if (info->pi_nnids < n_ids)
1728                 n_ids = info->pi_nnids;
1729
1730         if (nob < offsetof(lnet_ping_info_t, pi_nid[n_ids])) {
1731                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), 
1732                        nob, (int)offsetof(lnet_ping_info_t, pi_nid[n_ids]));
1733                 goto out_1;
1734         }
1735
1736         rc = -EFAULT;                           /* If I SEGV... */
1737
1738         for (i = 0; i < n_ids; i++) {
1739                 tmpid.pid = info->pi_pid;
1740                 tmpid.nid = info->pi_nid[i];
1741 #ifdef __KERNEL__
1742                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
1743                         goto out_1;
1744 #else
1745                 ids[i] = tmpid;
1746 #endif
1747         }
1748         rc = info->pi_nnids;
1749
1750  out_1:
1751         rc2 = LNetEQFree(eqh);
1752         if (rc2 != 0)
1753                 CERROR("rc2 %d\n", rc2);
1754         LASSERT (rc2 == 0);
1755
1756  out_0:
1757         LIBCFS_FREE(info, infosz);
1758         return rc;
1759 }