Whamcloud - gitweb
75116adbddb7e129d93ae2e193373f06bce03f33
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_LNET
34 #include <linux/log2.h>
35 #include <linux/ktime.h>
36
37 #include <lnet/lib-lnet.h>
38
39 #define D_LNI D_CONSOLE
40
41 lnet_t      the_lnet;                           /* THE state of the network */
42 EXPORT_SYMBOL(the_lnet);
43
44 static char *ip2nets = "";
45 module_param(ip2nets, charp, 0444);
46 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
47
48 static char *networks = "";
49 module_param(networks, charp, 0444);
50 MODULE_PARM_DESC(networks, "local networks");
51
52 static char *routes = "";
53 module_param(routes, charp, 0444);
54 MODULE_PARM_DESC(routes, "routes to non-local networks");
55
56 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
57 module_param(rnet_htable_size, int, 0444);
58 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
59
60 static int use_tcp_bonding = false;
61 module_param(use_tcp_bonding, int, 0444);
62 MODULE_PARM_DESC(use_tcp_bonding,
63                  "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
64
65 static int lnet_ping(lnet_process_id_t id, signed long timeout,
66                      lnet_process_id_t __user *ids, int n_ids);
67
68 static char *
69 lnet_get_routes(void)
70 {
71         return routes;
72 }
73
74 static char *
75 lnet_get_networks(void)
76 {
77         char   *nets;
78         int     rc;
79
80         if (*networks != 0 && *ip2nets != 0) {
81                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
82                                    "'ip2nets' but not both at once\n");
83                 return NULL;
84         }
85
86         if (*ip2nets != 0) {
87                 rc = lnet_parse_ip2nets(&nets, ip2nets);
88                 return (rc == 0) ? nets : NULL;
89         }
90
91         if (*networks != 0)
92                 return networks;
93
94         return "tcp";
95 }
96
97 static void
98 lnet_init_locks(void)
99 {
100         spin_lock_init(&the_lnet.ln_eq_wait_lock);
101         init_waitqueue_head(&the_lnet.ln_eq_waitq);
102         init_waitqueue_head(&the_lnet.ln_rc_waitq);
103         mutex_init(&the_lnet.ln_lnd_mutex);
104         mutex_init(&the_lnet.ln_api_mutex);
105 }
106
107 static void
108 lnet_fini_locks(void)
109 {
110 }
111
112 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
113 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
114                                             *  MDs kmem_cache */
115
116 static int
117 lnet_descriptor_setup(void)
118 {
119         /* create specific kmem_cache for MEs and small MDs (i.e., originally
120          * allocated in <size-xxx> kmem_cache).
121          */
122         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(lnet_me_t),
123                                             0, 0, NULL);
124         if (!lnet_mes_cachep)
125                 return -ENOMEM;
126
127         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
128                                                   LNET_SMALL_MD_SIZE, 0, 0,
129                                                   NULL);
130         if (!lnet_small_mds_cachep)
131                 return -ENOMEM;
132
133         return 0;
134 }
135
136 static void
137 lnet_descriptor_cleanup(void)
138 {
139
140         if (lnet_small_mds_cachep) {
141                 kmem_cache_destroy(lnet_small_mds_cachep);
142                 lnet_small_mds_cachep = NULL;
143         }
144
145         if (lnet_mes_cachep) {
146                 kmem_cache_destroy(lnet_mes_cachep);
147                 lnet_mes_cachep = NULL;
148         }
149 }
150
151 static int
152 lnet_create_remote_nets_table(void)
153 {
154         int               i;
155         struct list_head *hash;
156
157         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
158         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
159         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
160         if (hash == NULL) {
161                 CERROR("Failed to create remote nets hash table\n");
162                 return -ENOMEM;
163         }
164
165         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
166                 INIT_LIST_HEAD(&hash[i]);
167         the_lnet.ln_remote_nets_hash = hash;
168         return 0;
169 }
170
171 static void
172 lnet_destroy_remote_nets_table(void)
173 {
174         int i;
175
176         if (the_lnet.ln_remote_nets_hash == NULL)
177                 return;
178
179         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
180                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
181
182         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
183                     LNET_REMOTE_NETS_HASH_SIZE *
184                     sizeof(the_lnet.ln_remote_nets_hash[0]));
185         the_lnet.ln_remote_nets_hash = NULL;
186 }
187
188 static void
189 lnet_destroy_locks(void)
190 {
191         if (the_lnet.ln_res_lock != NULL) {
192                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
193                 the_lnet.ln_res_lock = NULL;
194         }
195
196         if (the_lnet.ln_net_lock != NULL) {
197                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
198                 the_lnet.ln_net_lock = NULL;
199         }
200
201         lnet_fini_locks();
202 }
203
204 static int
205 lnet_create_locks(void)
206 {
207         lnet_init_locks();
208
209         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
210         if (the_lnet.ln_res_lock == NULL)
211                 goto failed;
212
213         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
214         if (the_lnet.ln_net_lock == NULL)
215                 goto failed;
216
217         return 0;
218
219  failed:
220         lnet_destroy_locks();
221         return -ENOMEM;
222 }
223
224 static void lnet_assert_wire_constants(void)
225 {
226         /* Wire protocol assertions generated by 'wirecheck'
227          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
228          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
229          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
230
231         /* Constants... */
232         CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
233         CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1);
234         CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0);
235         CLASSERT(LNET_MSG_ACK == 0);
236         CLASSERT(LNET_MSG_PUT == 1);
237         CLASSERT(LNET_MSG_GET == 2);
238         CLASSERT(LNET_MSG_REPLY == 3);
239         CLASSERT(LNET_MSG_HELLO == 4);
240
241         /* Checks for struct lnet_handle_wire */
242         CLASSERT((int)sizeof(struct lnet_handle_wire) == 16);
243         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) == 0);
244         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) == 8);
245         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_object_cookie) == 8);
246         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) == 8);
247
248         /* Checks for struct lnet_magicversion_t */
249         CLASSERT((int)sizeof(lnet_magicversion_t) == 8);
250         CLASSERT((int)offsetof(lnet_magicversion_t, magic) == 0);
251         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
252         CLASSERT((int)offsetof(lnet_magicversion_t, version_major) == 4);
253         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
254         CLASSERT((int)offsetof(lnet_magicversion_t, version_minor) == 6);
255         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
256
257         /* Checks for struct lnet_hdr_t */
258         CLASSERT((int)sizeof(lnet_hdr_t) == 72);
259         CLASSERT((int)offsetof(lnet_hdr_t, dest_nid) == 0);
260         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
261         CLASSERT((int)offsetof(lnet_hdr_t, src_nid) == 8);
262         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
263         CLASSERT((int)offsetof(lnet_hdr_t, dest_pid) == 16);
264         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
265         CLASSERT((int)offsetof(lnet_hdr_t, src_pid) == 20);
266         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
267         CLASSERT((int)offsetof(lnet_hdr_t, type) == 24);
268         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
269         CLASSERT((int)offsetof(lnet_hdr_t, payload_length) == 28);
270         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
271         CLASSERT((int)offsetof(lnet_hdr_t, msg) == 32);
272         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
273
274         /* Ack */
275         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
276         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
277         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
278         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
279         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
280         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
281
282         /* Put */
283         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
284         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
285         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
286         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
287         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
288         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
289         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
290         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
291         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
292         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
293
294         /* Get */
295         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
296         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
297         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
298         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
299         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
300         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
301         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
302         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
303         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
304         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
305
306         /* Reply */
307         CLASSERT((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
308         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
309
310         /* Hello */
311         CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
312         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
313         CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
314         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
315 }
316
317 static lnd_t *lnet_find_lnd_by_type(__u32 type)
318 {
319         lnd_t            *lnd;
320         struct list_head *tmp;
321
322         /* holding lnd mutex */
323         list_for_each(tmp, &the_lnet.ln_lnds) {
324                 lnd = list_entry(tmp, lnd_t, lnd_list);
325
326                 if (lnd->lnd_type == type)
327                         return lnd;
328         }
329         return NULL;
330 }
331
332 void
333 lnet_register_lnd (lnd_t *lnd)
334 {
335         mutex_lock(&the_lnet.ln_lnd_mutex);
336
337         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
338         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
339
340         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
341         lnd->lnd_refcount = 0;
342
343         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
344
345         mutex_unlock(&the_lnet.ln_lnd_mutex);
346 }
347 EXPORT_SYMBOL(lnet_register_lnd);
348
349 void
350 lnet_unregister_lnd (lnd_t *lnd)
351 {
352         mutex_lock(&the_lnet.ln_lnd_mutex);
353
354         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
355         LASSERT(lnd->lnd_refcount == 0);
356
357         list_del(&lnd->lnd_list);
358         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
359
360         mutex_unlock(&the_lnet.ln_lnd_mutex);
361 }
362 EXPORT_SYMBOL(lnet_unregister_lnd);
363
364 void
365 lnet_counters_get(lnet_counters_t *counters)
366 {
367         lnet_counters_t *ctr;
368         int             i;
369
370         memset(counters, 0, sizeof(*counters));
371
372         lnet_net_lock(LNET_LOCK_EX);
373
374         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
375                 counters->msgs_max     += ctr->msgs_max;
376                 counters->msgs_alloc   += ctr->msgs_alloc;
377                 counters->errors       += ctr->errors;
378                 counters->send_count   += ctr->send_count;
379                 counters->recv_count   += ctr->recv_count;
380                 counters->route_count  += ctr->route_count;
381                 counters->drop_count   += ctr->drop_count;
382                 counters->send_length  += ctr->send_length;
383                 counters->recv_length  += ctr->recv_length;
384                 counters->route_length += ctr->route_length;
385                 counters->drop_length  += ctr->drop_length;
386
387         }
388         lnet_net_unlock(LNET_LOCK_EX);
389 }
390 EXPORT_SYMBOL(lnet_counters_get);
391
392 void
393 lnet_counters_reset(void)
394 {
395         lnet_counters_t *counters;
396         int             i;
397
398         lnet_net_lock(LNET_LOCK_EX);
399
400         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
401                 memset(counters, 0, sizeof(lnet_counters_t));
402
403         lnet_net_unlock(LNET_LOCK_EX);
404 }
405
406 static char *
407 lnet_res_type2str(int type)
408 {
409         switch (type) {
410         default:
411                 LBUG();
412         case LNET_COOKIE_TYPE_MD:
413                 return "MD";
414         case LNET_COOKIE_TYPE_ME:
415                 return "ME";
416         case LNET_COOKIE_TYPE_EQ:
417                 return "EQ";
418         }
419 }
420
421 static void
422 lnet_res_container_cleanup(struct lnet_res_container *rec)
423 {
424         int     count = 0;
425
426         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
427                 return;
428
429         while (!list_empty(&rec->rec_active)) {
430                 struct list_head *e = rec->rec_active.next;
431
432                 list_del_init(e);
433                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
434                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
435
436                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
437                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
438
439                 } else { /* NB: Active MEs should be attached on portals */
440                         LBUG();
441                 }
442                 count++;
443         }
444
445         if (count > 0) {
446                 /* Found alive MD/ME/EQ, user really should unlink/free
447                  * all of them before finalize LNet, but if someone didn't,
448                  * we have to recycle garbage for him */
449                 CERROR("%d active elements on exit of %s container\n",
450                        count, lnet_res_type2str(rec->rec_type));
451         }
452
453         if (rec->rec_lh_hash != NULL) {
454                 LIBCFS_FREE(rec->rec_lh_hash,
455                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
456                 rec->rec_lh_hash = NULL;
457         }
458
459         rec->rec_type = 0; /* mark it as finalized */
460 }
461
462 static int
463 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
464 {
465         int     rc = 0;
466         int     i;
467
468         LASSERT(rec->rec_type == 0);
469
470         rec->rec_type = type;
471         INIT_LIST_HEAD(&rec->rec_active);
472
473         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
474
475         /* Arbitrary choice of hash table size */
476         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
477                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
478         if (rec->rec_lh_hash == NULL) {
479                 rc = -ENOMEM;
480                 goto out;
481         }
482
483         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
484                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
485
486         return 0;
487
488 out:
489         CERROR("Failed to setup %s resource container\n",
490                lnet_res_type2str(type));
491         lnet_res_container_cleanup(rec);
492         return rc;
493 }
494
495 static void
496 lnet_res_containers_destroy(struct lnet_res_container **recs)
497 {
498         struct lnet_res_container       *rec;
499         int                             i;
500
501         cfs_percpt_for_each(rec, i, recs)
502                 lnet_res_container_cleanup(rec);
503
504         cfs_percpt_free(recs);
505 }
506
507 static struct lnet_res_container **
508 lnet_res_containers_create(int type)
509 {
510         struct lnet_res_container       **recs;
511         struct lnet_res_container       *rec;
512         int                             rc;
513         int                             i;
514
515         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
516         if (recs == NULL) {
517                 CERROR("Failed to allocate %s resource containers\n",
518                        lnet_res_type2str(type));
519                 return NULL;
520         }
521
522         cfs_percpt_for_each(rec, i, recs) {
523                 rc = lnet_res_container_setup(rec, i, type);
524                 if (rc != 0) {
525                         lnet_res_containers_destroy(recs);
526                         return NULL;
527                 }
528         }
529
530         return recs;
531 }
532
533 lnet_libhandle_t *
534 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
535 {
536         /* ALWAYS called with lnet_res_lock held */
537         struct list_head        *head;
538         lnet_libhandle_t        *lh;
539         unsigned int            hash;
540
541         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
542                 return NULL;
543
544         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
545         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
546
547         list_for_each_entry(lh, head, lh_hash_chain) {
548                 if (lh->lh_cookie == cookie)
549                         return lh;
550         }
551
552         return NULL;
553 }
554
555 void
556 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
557 {
558         /* ALWAYS called with lnet_res_lock held */
559         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
560         unsigned int    hash;
561
562         lh->lh_cookie = rec->rec_lh_cookie;
563         rec->rec_lh_cookie += 1 << ibits;
564
565         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
566
567         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
568 }
569
570 static int lnet_unprepare(void);
571
572 static int
573 lnet_prepare(lnet_pid_t requested_pid)
574 {
575         /* Prepare to bring up the network */
576         struct lnet_res_container **recs;
577         int                       rc = 0;
578
579         if (requested_pid == LNET_PID_ANY) {
580                 /* Don't instantiate LNET just for me */
581                 return -ENETDOWN;
582         }
583
584         LASSERT(the_lnet.ln_refcount == 0);
585
586         the_lnet.ln_routing = 0;
587
588         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
589         the_lnet.ln_pid = requested_pid;
590
591         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
592         INIT_LIST_HEAD(&the_lnet.ln_peers);
593         INIT_LIST_HEAD(&the_lnet.ln_nets);
594         INIT_LIST_HEAD(&the_lnet.ln_routers);
595         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
596         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
597
598         rc = lnet_descriptor_setup();
599         if (rc != 0)
600                 goto failed;
601
602         rc = lnet_create_remote_nets_table();
603         if (rc != 0)
604                 goto failed;
605
606         /*
607          * NB the interface cookie in wire handles guards against delayed
608          * replies and ACKs appearing valid after reboot.
609          */
610         the_lnet.ln_interface_cookie = ktime_get_real_ns();
611
612         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
613                                                 sizeof(lnet_counters_t));
614         if (the_lnet.ln_counters == NULL) {
615                 CERROR("Failed to allocate counters for LNet\n");
616                 rc = -ENOMEM;
617                 goto failed;
618         }
619
620         rc = lnet_peer_tables_create();
621         if (rc != 0)
622                 goto failed;
623
624         rc = lnet_msg_containers_create();
625         if (rc != 0)
626                 goto failed;
627
628         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
629                                       LNET_COOKIE_TYPE_EQ);
630         if (rc != 0)
631                 goto failed;
632
633         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
634         if (recs == NULL) {
635                 rc = -ENOMEM;
636                 goto failed;
637         }
638
639         the_lnet.ln_me_containers = recs;
640
641         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
642         if (recs == NULL) {
643                 rc = -ENOMEM;
644                 goto failed;
645         }
646
647         the_lnet.ln_md_containers = recs;
648
649         rc = lnet_portals_create();
650         if (rc != 0) {
651                 CERROR("Failed to create portals for LNet: %d\n", rc);
652                 goto failed;
653         }
654
655         return 0;
656
657  failed:
658         lnet_unprepare();
659         return rc;
660 }
661
662 static int
663 lnet_unprepare (void)
664 {
665         /* NB no LNET_LOCK since this is the last reference.  All LND instances
666          * have shut down already, so it is safe to unlink and free all
667          * descriptors, even those that appear committed to a network op (eg MD
668          * with non-zero pending count) */
669
670         lnet_fail_nid(LNET_NID_ANY, 0);
671
672         LASSERT(the_lnet.ln_refcount == 0);
673         LASSERT(list_empty(&the_lnet.ln_test_peers));
674         LASSERT(list_empty(&the_lnet.ln_nets));
675
676         lnet_portals_destroy();
677
678         if (the_lnet.ln_md_containers != NULL) {
679                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
680                 the_lnet.ln_md_containers = NULL;
681         }
682
683         if (the_lnet.ln_me_containers != NULL) {
684                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
685                 the_lnet.ln_me_containers = NULL;
686         }
687
688         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
689
690         lnet_msg_containers_destroy();
691         lnet_peer_tables_destroy();
692         lnet_rtrpools_free(0);
693
694         if (the_lnet.ln_counters != NULL) {
695                 cfs_percpt_free(the_lnet.ln_counters);
696                 the_lnet.ln_counters = NULL;
697         }
698         lnet_destroy_remote_nets_table();
699         lnet_descriptor_cleanup();
700
701         return 0;
702 }
703
704 lnet_ni_t  *
705 lnet_net2ni_locked(__u32 net_id, int cpt)
706 {
707         struct lnet_ni   *ni;
708         struct lnet_net  *net;
709
710         LASSERT(cpt != LNET_LOCK_EX);
711
712         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
713                 if (net->net_id == net_id) {
714                         ni = list_entry(net->net_ni_list.next, struct lnet_ni,
715                                         ni_netlist);
716                         return ni;
717                 }
718         }
719
720         return NULL;
721 }
722
723 lnet_ni_t *
724 lnet_net2ni(__u32 net)
725 {
726         lnet_ni_t *ni;
727
728         lnet_net_lock(0);
729         ni = lnet_net2ni_locked(net, 0);
730         lnet_net_unlock(0);
731
732         return ni;
733 }
734 EXPORT_SYMBOL(lnet_net2ni);
735
736 struct lnet_net *
737 lnet_get_net_locked(__u32 net_id)
738 {
739         struct lnet_net  *net;
740
741         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
742                 if (net->net_id == net_id)
743                         return net;
744         }
745
746         return NULL;
747 }
748
749 unsigned int
750 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
751 {
752         __u64           key = nid;
753         unsigned int    val;
754
755         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
756
757         if (number == 1)
758                 return 0;
759
760         val = hash_long(key, LNET_CPT_BITS);
761         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
762         if (val < number)
763                 return val;
764
765         return (unsigned int)(key + val + (val >> 1)) % number;
766 }
767
768 int
769 lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
770 {
771         struct lnet_net *net;
772
773         /* must called with hold of lnet_net_lock */
774         if (LNET_CPT_NUMBER == 1)
775                 return 0; /* the only one */
776
777         /*
778          * If NI is provided then use the CPT identified in the NI cpt
779          * list if one exists. If one doesn't exist, then that NI is
780          * associated with all CPTs and it follows that the net it belongs
781          * to is implicitly associated with all CPTs, so just hash the nid
782          * and return that.
783          */
784         if (ni != NULL) {
785                 if (ni->ni_cpts != NULL)
786                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
787                                                              ni->ni_ncpts)];
788                 else
789                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
790         }
791
792         /* no NI provided so look at the net */
793         net = lnet_get_net_locked(LNET_NIDNET(nid));
794
795         if (net != NULL && net->net_cpts != NULL) {
796                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
797         }
798
799         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
800 }
801
802 int
803 lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
804 {
805         int     cpt;
806         int     cpt2;
807
808         if (LNET_CPT_NUMBER == 1)
809                 return 0; /* the only one */
810
811         cpt = lnet_net_lock_current();
812
813         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
814
815         lnet_net_unlock(cpt);
816
817         return cpt2;
818 }
819 EXPORT_SYMBOL(lnet_cpt_of_nid);
820
821 int
822 lnet_islocalnet(__u32 net_id)
823 {
824         struct lnet_net *net;
825         int             cpt;
826         bool            local;
827
828         cpt = lnet_net_lock_current();
829
830         net = lnet_get_net_locked(net_id);
831
832         local = net != NULL;
833
834         lnet_net_unlock(cpt);
835
836         return local;
837 }
838
839 bool
840 lnet_is_ni_healthy_locked(struct lnet_ni *ni)
841 {
842         if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
843             ni->ni_state == LNET_NI_STATE_DEGRADED)
844                 return true;
845
846         return false;
847 }
848
849 lnet_ni_t  *
850 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
851 {
852         struct lnet_net  *net;
853         struct lnet_ni   *ni;
854
855         LASSERT(cpt != LNET_LOCK_EX);
856
857         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
858                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
859                         if (ni->ni_nid == nid)
860                                 return ni;
861                 }
862         }
863
864         return NULL;
865 }
866
867 lnet_ni_t *
868 lnet_nid2ni_addref(lnet_nid_t nid)
869 {
870         lnet_ni_t *ni;
871
872         lnet_net_lock(0);
873         ni = lnet_nid2ni_locked(nid, 0);
874         if (ni)
875                 lnet_ni_addref_locked(ni, 0);
876         lnet_net_unlock(0);
877
878         return ni;
879 }
880 EXPORT_SYMBOL(lnet_nid2ni_addref);
881
882 int
883 lnet_islocalnid(lnet_nid_t nid)
884 {
885         struct lnet_ni  *ni;
886         int             cpt;
887
888         cpt = lnet_net_lock_current();
889         ni = lnet_nid2ni_locked(nid, cpt);
890         lnet_net_unlock(cpt);
891
892         return ni != NULL;
893 }
894
895 int
896 lnet_count_acceptor_nets(void)
897 {
898         /* Return the # of NIs that need the acceptor. */
899         int              count = 0;
900         struct lnet_net  *net;
901         int              cpt;
902
903         cpt = lnet_net_lock_current();
904         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
905                 /* all socklnd type networks should have the acceptor
906                  * thread started */
907                 if (net->net_lnd->lnd_accept != NULL)
908                         count++;
909         }
910
911         lnet_net_unlock(cpt);
912
913         return count;
914 }
915
916 static struct lnet_ping_info *
917 lnet_ping_info_create(int num_ni)
918 {
919         struct lnet_ping_info *ping_info;
920         unsigned int     infosz;
921
922         infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]);
923         LIBCFS_ALLOC(ping_info, infosz);
924         if (ping_info == NULL) {
925                 CERROR("Can't allocate ping info[%d]\n", num_ni);
926                 return NULL;
927         }
928
929         ping_info->pi_nnis = num_ni;
930         ping_info->pi_pid = the_lnet.ln_pid;
931         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
932         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
933
934         return ping_info;
935 }
936
937 static inline int
938 lnet_get_net_ni_count_locked(struct lnet_net *net)
939 {
940         struct lnet_ni  *ni;
941         int             count = 0;
942
943         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
944                 count++;
945
946         return count;
947 }
948
949 static inline int
950 lnet_get_ni_count(void)
951 {
952         struct lnet_ni  *ni;
953         struct lnet_net *net;
954         int             count = 0;
955
956         lnet_net_lock(0);
957
958         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
959                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
960                         count++;
961         }
962
963         lnet_net_unlock(0);
964
965         return count;
966 }
967
968 static inline void
969 lnet_ping_info_free(struct lnet_ping_info *pinfo)
970 {
971         LIBCFS_FREE(pinfo,
972                     offsetof(struct lnet_ping_info,
973                              pi_ni[pinfo->pi_nnis]));
974 }
975
976 static void
977 lnet_ping_info_destroy(void)
978 {
979         struct lnet_net *net;
980         struct lnet_ni  *ni;
981
982         lnet_net_lock(LNET_LOCK_EX);
983
984         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
985                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
986                         lnet_ni_lock(ni);
987                         ni->ni_status = NULL;
988                         lnet_ni_unlock(ni);
989                 }
990         }
991
992         lnet_ping_info_free(the_lnet.ln_ping_info);
993         the_lnet.ln_ping_info = NULL;
994
995         lnet_net_unlock(LNET_LOCK_EX);
996 }
997
998 static void
999 lnet_ping_event_handler(lnet_event_t *event)
1000 {
1001         struct lnet_ping_info *pinfo = event->md.user_ptr;
1002
1003         if (event->unlinked)
1004                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
1005 }
1006
1007 static int
1008 lnet_ping_info_setup(struct lnet_ping_info **ppinfo, lnet_handle_md_t *md_handle,
1009                      int ni_count, bool set_eq)
1010 {
1011         lnet_handle_me_t  me_handle;
1012         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1013         lnet_md_t         md = {NULL};
1014         int               rc, rc2;
1015
1016         if (set_eq) {
1017                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
1018                                  &the_lnet.ln_ping_target_eq);
1019                 if (rc != 0) {
1020                         CERROR("Can't allocate ping EQ: %d\n", rc);
1021                         return rc;
1022                 }
1023         }
1024
1025         *ppinfo = lnet_ping_info_create(ni_count);
1026         if (*ppinfo == NULL) {
1027                 rc = -ENOMEM;
1028                 goto failed_0;
1029         }
1030
1031         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1032                           LNET_PROTO_PING_MATCHBITS, 0,
1033                           LNET_UNLINK, LNET_INS_AFTER,
1034                           &me_handle);
1035         if (rc != 0) {
1036                 CERROR("Can't create ping ME: %d\n", rc);
1037                 goto failed_1;
1038         }
1039
1040         /* initialize md content */
1041         md.start     = *ppinfo;
1042         md.length    = offsetof(struct lnet_ping_info,
1043                                 pi_ni[(*ppinfo)->pi_nnis]);
1044         md.threshold = LNET_MD_THRESH_INF;
1045         md.max_size  = 0;
1046         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1047                        LNET_MD_MANAGE_REMOTE;
1048         md.user_ptr  = NULL;
1049         md.eq_handle = the_lnet.ln_ping_target_eq;
1050         md.user_ptr = *ppinfo;
1051
1052         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
1053         if (rc != 0) {
1054                 CERROR("Can't attach ping MD: %d\n", rc);
1055                 goto failed_2;
1056         }
1057
1058         return 0;
1059
1060 failed_2:
1061         rc2 = LNetMEUnlink(me_handle);
1062         LASSERT(rc2 == 0);
1063 failed_1:
1064         lnet_ping_info_free(*ppinfo);
1065         *ppinfo = NULL;
1066 failed_0:
1067         if (set_eq)
1068                 LNetEQFree(the_lnet.ln_ping_target_eq);
1069         return rc;
1070 }
1071
1072 static void
1073 lnet_ping_md_unlink(struct lnet_ping_info *pinfo, lnet_handle_md_t *md_handle)
1074 {
1075         sigset_t        blocked = cfs_block_allsigs();
1076
1077         LNetMDUnlink(*md_handle);
1078         LNetInvalidateHandle(md_handle);
1079
1080         /* NB md could be busy; this just starts the unlink */
1081         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
1082                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
1083                 set_current_state(TASK_UNINTERRUPTIBLE);
1084                 schedule_timeout(cfs_time_seconds(1));
1085         }
1086
1087         cfs_restore_sigs(blocked);
1088 }
1089
1090 static void
1091 lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
1092 {
1093         int                     i;
1094         struct lnet_ni          *ni;
1095         struct lnet_net         *net;
1096         struct lnet_ni_status *ns;
1097
1098         i = 0;
1099         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1100                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1101                         LASSERT(i < ping_info->pi_nnis);
1102
1103                         ns = &ping_info->pi_ni[i];
1104
1105                         ns->ns_nid = ni->ni_nid;
1106
1107                         lnet_ni_lock(ni);
1108                         ns->ns_status = (ni->ni_status != NULL) ?
1109                                         ni->ni_status->ns_status :
1110                                                 LNET_NI_STATUS_UP;
1111                         ni->ni_status = ns;
1112                         lnet_ni_unlock(ni);
1113
1114                         i++;
1115                 }
1116
1117         }
1118 }
1119
1120 static void
1121 lnet_ping_target_update(struct lnet_ping_info *pinfo, lnet_handle_md_t md_handle)
1122 {
1123         struct lnet_ping_info *old_pinfo = NULL;
1124         lnet_handle_md_t old_md;
1125
1126         /* switch the NIs to point to the new ping info created */
1127         lnet_net_lock(LNET_LOCK_EX);
1128
1129         if (!the_lnet.ln_routing)
1130                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1131         lnet_ping_info_install_locked(pinfo);
1132
1133         if (the_lnet.ln_ping_info != NULL) {
1134                 old_pinfo = the_lnet.ln_ping_info;
1135                 old_md = the_lnet.ln_ping_target_md;
1136         }
1137         the_lnet.ln_ping_target_md = md_handle;
1138         the_lnet.ln_ping_info = pinfo;
1139
1140         lnet_net_unlock(LNET_LOCK_EX);
1141
1142         if (old_pinfo != NULL) {
1143                 /* unlink the old ping info */
1144                 lnet_ping_md_unlink(old_pinfo, &old_md);
1145                 lnet_ping_info_free(old_pinfo);
1146         }
1147 }
1148
1149 static void
1150 lnet_ping_target_fini(void)
1151 {
1152         int             rc;
1153
1154         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1155                             &the_lnet.ln_ping_target_md);
1156
1157         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1158         LASSERT(rc == 0);
1159
1160         lnet_ping_info_destroy();
1161 }
1162
1163 static int
1164 lnet_ni_tq_credits(lnet_ni_t *ni)
1165 {
1166         int     credits;
1167
1168         LASSERT(ni->ni_ncpts >= 1);
1169
1170         if (ni->ni_ncpts == 1)
1171                 return ni->ni_net->net_tunables.lct_max_tx_credits;
1172
1173         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
1174         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
1175         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
1176
1177         return credits;
1178 }
1179
1180 static void
1181 lnet_ni_unlink_locked(lnet_ni_t *ni)
1182 {
1183         if (!list_empty(&ni->ni_cptlist)) {
1184                 list_del_init(&ni->ni_cptlist);
1185                 lnet_ni_decref_locked(ni, 0);
1186         }
1187
1188         /* move it to zombie list and nobody can find it anymore */
1189         LASSERT(!list_empty(&ni->ni_netlist));
1190         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
1191         lnet_ni_decref_locked(ni, 0);
1192 }
1193
1194 static void
1195 lnet_clear_zombies_nis_locked(struct lnet_net *net)
1196 {
1197         int             i;
1198         int             islo;
1199         lnet_ni_t       *ni;
1200         struct list_head *zombie_list = &net->net_ni_zombie;
1201
1202         /*
1203          * Now wait for the NIs I just nuked to show up on the zombie
1204          * list and shut them down in guaranteed thread context
1205          */
1206         i = 2;
1207         while (!list_empty(zombie_list)) {
1208                 int     *ref;
1209                 int     j;
1210
1211                 ni = list_entry(zombie_list->next,
1212                                 lnet_ni_t, ni_netlist);
1213                 list_del_init(&ni->ni_netlist);
1214                 /* the ni should be in deleting state. If it's not it's
1215                  * a bug */
1216                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
1217                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1218                         if (*ref == 0)
1219                                 continue;
1220                         /* still busy, add it back to zombie list */
1221                         list_add(&ni->ni_netlist, zombie_list);
1222                         break;
1223                 }
1224
1225                 if (!list_empty(&ni->ni_netlist)) {
1226                         lnet_net_unlock(LNET_LOCK_EX);
1227                         ++i;
1228                         if ((i & (-i)) == i) {
1229                                 CDEBUG(D_WARNING,
1230                                        "Waiting for zombie LNI %s\n",
1231                                        libcfs_nid2str(ni->ni_nid));
1232                         }
1233                         set_current_state(TASK_UNINTERRUPTIBLE);
1234                         schedule_timeout(cfs_time_seconds(1));
1235                         lnet_net_lock(LNET_LOCK_EX);
1236                         continue;
1237                 }
1238
1239                 lnet_net_unlock(LNET_LOCK_EX);
1240
1241                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
1242
1243                 LASSERT(!in_interrupt());
1244                 (net->net_lnd->lnd_shutdown)(ni);
1245
1246                 if (!islo)
1247                         CDEBUG(D_LNI, "Removed LNI %s\n",
1248                               libcfs_nid2str(ni->ni_nid));
1249
1250                 lnet_ni_free(ni);
1251                 i = 2;
1252                 lnet_net_lock(LNET_LOCK_EX);
1253         }
1254 }
1255
1256 /* shutdown down the NI and release refcount */
1257 static void
1258 lnet_shutdown_lndni(struct lnet_ni *ni)
1259 {
1260         int i;
1261         struct lnet_net *net = ni->ni_net;
1262
1263         lnet_net_lock(LNET_LOCK_EX);
1264         ni->ni_state = LNET_NI_STATE_DELETING;
1265         lnet_ni_unlink_locked(ni);
1266         lnet_net_unlock(LNET_LOCK_EX);
1267
1268         /* clear messages for this NI on the lazy portal */
1269         for (i = 0; i < the_lnet.ln_nportals; i++)
1270                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
1271
1272         /* Do peer table cleanup for this ni */
1273         lnet_peer_tables_cleanup(ni);
1274
1275         lnet_net_lock(LNET_LOCK_EX);
1276         lnet_clear_zombies_nis_locked(net);
1277         lnet_net_unlock(LNET_LOCK_EX);
1278 }
1279
1280 static void
1281 lnet_shutdown_lndnet(struct lnet_net *net)
1282 {
1283         struct lnet_ni *ni;
1284
1285         lnet_net_lock(LNET_LOCK_EX);
1286
1287         net->net_state = LNET_NET_STATE_DELETING;
1288
1289         list_del_init(&net->net_list);
1290
1291         while (!list_empty(&net->net_ni_list)) {
1292                 ni = list_entry(net->net_ni_list.next,
1293                                 lnet_ni_t, ni_netlist);
1294                 lnet_net_unlock(LNET_LOCK_EX);
1295                 lnet_shutdown_lndni(ni);
1296                 lnet_net_lock(LNET_LOCK_EX);
1297         }
1298
1299         /*
1300          * decrement ref count on lnd only when the entire network goes
1301          * away
1302          */
1303         net->net_lnd->lnd_refcount--;
1304
1305         lnet_net_unlock(LNET_LOCK_EX);
1306
1307         lnet_net_free(net);
1308 }
1309
1310 static void
1311 lnet_shutdown_lndnets(void)
1312 {
1313         struct lnet_net *net;
1314
1315         /* NB called holding the global mutex */
1316
1317         /* All quiet on the API front */
1318         LASSERT(!the_lnet.ln_shutdown);
1319         LASSERT(the_lnet.ln_refcount == 0);
1320
1321         lnet_net_lock(LNET_LOCK_EX);
1322         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1323
1324         while (!list_empty(&the_lnet.ln_nets)) {
1325                 /*
1326                  * move the nets to the zombie list to avoid them being
1327                  * picked up for new work. LONET is also included in the
1328                  * Nets that will be moved to the zombie list
1329                  */
1330                 net = list_entry(the_lnet.ln_nets.next,
1331                                  struct lnet_net, net_list);
1332                 list_move(&net->net_list, &the_lnet.ln_net_zombie);
1333         }
1334
1335         /* Drop the cached loopback Net. */
1336         if (the_lnet.ln_loni != NULL) {
1337                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1338                 the_lnet.ln_loni = NULL;
1339         }
1340         lnet_net_unlock(LNET_LOCK_EX);
1341
1342         /* iterate through the net zombie list and delete each net */
1343         while (!list_empty(&the_lnet.ln_net_zombie)) {
1344                 net = list_entry(the_lnet.ln_net_zombie.next,
1345                                  struct lnet_net, net_list);
1346                 lnet_shutdown_lndnet(net);
1347         }
1348
1349         lnet_net_lock(LNET_LOCK_EX);
1350         the_lnet.ln_shutdown = 0;
1351         lnet_net_unlock(LNET_LOCK_EX);
1352 }
1353
1354 static int
1355 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
1356 {
1357         int                     rc = -EINVAL;
1358         struct lnet_tx_queue    *tq;
1359         int                     i;
1360         struct lnet_net         *net = ni->ni_net;
1361
1362         mutex_lock(&the_lnet.ln_lnd_mutex);
1363
1364         if (tun) {
1365                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
1366                 ni->ni_lnd_tunables_set = true;
1367         }
1368
1369         rc = (net->net_lnd->lnd_startup)(ni);
1370
1371         mutex_unlock(&the_lnet.ln_lnd_mutex);
1372
1373         if (rc != 0) {
1374                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1375                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
1376                 lnet_net_lock(LNET_LOCK_EX);
1377                 net->net_lnd->lnd_refcount--;
1378                 lnet_net_unlock(LNET_LOCK_EX);
1379                 goto failed0;
1380         }
1381
1382         ni->ni_state = LNET_NI_STATE_ACTIVE;
1383
1384         /* We keep a reference on the loopback net through the loopback NI */
1385         if (net->net_lnd->lnd_type == LOLND) {
1386                 lnet_ni_addref(ni);
1387                 LASSERT(the_lnet.ln_loni == NULL);
1388                 the_lnet.ln_loni = ni;
1389                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
1390                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
1391                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
1392                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
1393                 return 0;
1394         }
1395
1396         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
1397             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
1398                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1399                                    libcfs_lnd2str(net->net_lnd->lnd_type),
1400                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
1401                                         "" : "per-peer ");
1402                 /* shutdown the NI since if we get here then it must've already
1403                  * been started
1404                  */
1405                 lnet_shutdown_lndni(ni);
1406                 return -EINVAL;
1407         }
1408
1409         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1410                 tq->tq_credits_min =
1411                 tq->tq_credits_max =
1412                 tq->tq_credits = lnet_ni_tq_credits(ni);
1413         }
1414
1415         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1416                 libcfs_nid2str(ni->ni_nid),
1417                 ni->ni_net->net_tunables.lct_peer_tx_credits,
1418                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1419                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
1420                 ni->ni_net->net_tunables.lct_peer_timeout);
1421
1422         return 0;
1423 failed0:
1424         lnet_ni_free(ni);
1425         return rc;
1426 }
1427
1428 static int
1429 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
1430 {
1431         struct lnet_ni          *ni;
1432         struct lnet_net         *net_l = NULL;
1433         struct list_head        local_ni_list;
1434         int                     rc;
1435         int                     ni_count = 0;
1436         __u32                   lnd_type;
1437         lnd_t                   *lnd;
1438         int                     peer_timeout =
1439                 net->net_tunables.lct_peer_timeout;
1440         int                     maxtxcredits =
1441                 net->net_tunables.lct_max_tx_credits;
1442         int                     peerrtrcredits =
1443                 net->net_tunables.lct_peer_rtr_credits;
1444
1445         INIT_LIST_HEAD(&local_ni_list);
1446
1447         /*
1448          * make sure that this net is unique. If it isn't then
1449          * we are adding interfaces to an already existing network, and
1450          * 'net' is just a convenient way to pass in the list.
1451          * if it is unique we need to find the LND and load it if
1452          * necessary.
1453          */
1454         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
1455                 lnd_type = LNET_NETTYP(net->net_id);
1456
1457                 LASSERT(libcfs_isknown_lnd(lnd_type));
1458
1459                 if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
1460                     lnd_type == IIBLND || lnd_type == VIBLND) {
1461                         CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
1462                         rc = -EINVAL;
1463                         goto failed0;
1464                 }
1465
1466                 mutex_lock(&the_lnet.ln_lnd_mutex);
1467                 lnd = lnet_find_lnd_by_type(lnd_type);
1468
1469                 if (lnd == NULL) {
1470                         mutex_unlock(&the_lnet.ln_lnd_mutex);
1471                         rc = request_module("%s", libcfs_lnd2modname(lnd_type));
1472                         mutex_lock(&the_lnet.ln_lnd_mutex);
1473
1474                         lnd = lnet_find_lnd_by_type(lnd_type);
1475                         if (lnd == NULL) {
1476                                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1477                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1478                                 libcfs_lnd2str(lnd_type),
1479                                 libcfs_lnd2modname(lnd_type), rc);
1480 #ifndef HAVE_MODULE_LOADING_SUPPORT
1481                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1482                                                 "compiled with kernel module "
1483                                                 "loading support.");
1484 #endif
1485                                 rc = -EINVAL;
1486                                 goto failed0;
1487                         }
1488                 }
1489
1490                 lnet_net_lock(LNET_LOCK_EX);
1491                 lnd->lnd_refcount++;
1492                 lnet_net_unlock(LNET_LOCK_EX);
1493
1494                 net->net_lnd = lnd;
1495
1496                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1497
1498                 net_l = net;
1499         }
1500
1501         /*
1502          * net_l: if the network being added is unique then net_l
1503          *        will point to that network
1504          *        if the network being added is not unique then
1505          *        net_l points to the existing network.
1506          *
1507          * When we enter the loop below, we'll pick NIs off he
1508          * network beign added and start them up, then add them to
1509          * a local ni list. Once we've successfully started all
1510          * the NIs then we join the local NI list (of started up
1511          * networks) with the net_l->net_ni_list, which should
1512          * point to the correct network to add the new ni list to
1513          *
1514          * If any of the new NIs fail to start up, then we want to
1515          * iterate through the local ni list, which should include
1516          * any NIs which were successfully started up, and shut
1517          * them down.
1518          *
1519          * After than we want to delete the network being added,
1520          * to avoid a memory leak.
1521          */
1522
1523         /*
1524          * When a network uses TCP bonding then all its interfaces
1525          * must be specified when the network is first defined: the
1526          * TCP bonding code doesn't allow for interfaces to be added
1527          * or removed.
1528          */
1529         if (net_l != net && net_l != NULL && use_tcp_bonding &&
1530             LNET_NETTYP(net_l->net_id) == SOCKLND) {
1531                 rc = -EINVAL;
1532                 goto failed0;
1533         }
1534
1535         while (!list_empty(&net->net_ni_added)) {
1536                 ni = list_entry(net->net_ni_added.next, struct lnet_ni,
1537                                 ni_netlist);
1538                 list_del_init(&ni->ni_netlist);
1539
1540                 /* make sure that the the NI we're about to start
1541                  * up is actually unique. if it's not fail. */
1542                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
1543                                         ni->ni_interfaces[0])) {
1544                         rc = -EINVAL;
1545                         goto failed1;
1546                 }
1547
1548                 /* adjust the pointer the parent network, just in case it
1549                  * the net is a duplicate */
1550                 ni->ni_net = net_l;
1551
1552                 rc = lnet_startup_lndni(ni, tun);
1553
1554                 LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
1555                         ni->ni_net->net_lnd->lnd_query != NULL);
1556
1557                 if (rc < 0)
1558                         goto failed1;
1559
1560                 lnet_ni_addref(ni);
1561                 list_add_tail(&ni->ni_netlist, &local_ni_list);
1562
1563                 ni_count++;
1564         }
1565
1566         lnet_net_lock(LNET_LOCK_EX);
1567         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
1568         lnet_net_unlock(LNET_LOCK_EX);
1569
1570         /* if the network is not unique then we don't want to keep
1571          * it around after we're done. Free it. Otherwise add that
1572          * net to the global the_lnet.ln_nets */
1573         if (net_l != net && net_l != NULL) {
1574                 /*
1575                  * TODO - note. currently the tunables can not be updated
1576                  * once added
1577                  */
1578                 lnet_net_free(net);
1579         } else {
1580                 net->net_state = LNET_NET_STATE_ACTIVE;
1581                 /*
1582                  * restore tunables after it has been overwitten by the
1583                  * lnd
1584                  */
1585                 if (peer_timeout != -1)
1586                         net->net_tunables.lct_peer_timeout = peer_timeout;
1587                 if (maxtxcredits != -1)
1588                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
1589                 if (peerrtrcredits != -1)
1590                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
1591
1592                 lnet_net_lock(LNET_LOCK_EX);
1593                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
1594                 lnet_net_unlock(LNET_LOCK_EX);
1595         }
1596
1597         return ni_count;
1598
1599 failed1:
1600         /*
1601          * shutdown the new NIs that are being started up
1602          * free the NET being started
1603          */
1604         while (!list_empty(&local_ni_list)) {
1605                 ni = list_entry(local_ni_list.next, struct lnet_ni,
1606                                 ni_netlist);
1607
1608                 lnet_shutdown_lndni(ni);
1609         }
1610
1611 failed0:
1612         lnet_net_free(net);
1613
1614         return rc;
1615 }
1616
1617 static int
1618 lnet_startup_lndnets(struct list_head *netlist)
1619 {
1620         struct lnet_net         *net;
1621         int                     rc;
1622         int                     ni_count = 0;
1623
1624         while (!list_empty(netlist)) {
1625                 net = list_entry(netlist->next, struct lnet_net, net_list);
1626                 list_del_init(&net->net_list);
1627
1628                 rc = lnet_startup_lndnet(net, NULL);
1629
1630                 if (rc < 0)
1631                         goto failed;
1632
1633                 ni_count += rc;
1634         }
1635
1636         return ni_count;
1637 failed:
1638         lnet_shutdown_lndnets();
1639
1640         return rc;
1641 }
1642
1643 /**
1644  * Initialize LNet library.
1645  *
1646  * Automatically called at module loading time. Caller has to call
1647  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
1648  * latter returned 0. It must be called exactly once.
1649  *
1650  * \retval 0 on success
1651  * \retval -ve on failures.
1652  */
1653 int lnet_lib_init(void)
1654 {
1655         int rc;
1656
1657         lnet_assert_wire_constants();
1658
1659         memset(&the_lnet, 0, sizeof(the_lnet));
1660
1661         /* refer to global cfs_cpt_table for now */
1662         the_lnet.ln_cpt_table   = cfs_cpt_table;
1663         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1664
1665         LASSERT(the_lnet.ln_cpt_number > 0);
1666         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1667                 /* we are under risk of consuming all lh_cookie */
1668                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1669                        "please change setting of CPT-table and retry\n",
1670                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1671                 return -E2BIG;
1672         }
1673
1674         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1675                 the_lnet.ln_cpt_bits++;
1676
1677         rc = lnet_create_locks();
1678         if (rc != 0) {
1679                 CERROR("Can't create LNet global locks: %d\n", rc);
1680                 return rc;
1681         }
1682
1683         the_lnet.ln_refcount = 0;
1684         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1685         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1686         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
1687         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1688         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1689
1690         /* The hash table size is the number of bits it takes to express the set
1691          * ln_num_routes, minus 1 (better to under estimate than over so we
1692          * don't waste memory). */
1693         if (rnet_htable_size <= 0)
1694                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1695         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1696                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1697         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1698                                            order_base_2(rnet_htable_size) - 1);
1699
1700         /* All LNDs apart from the LOLND are in separate modules.  They
1701          * register themselves when their module loads, and unregister
1702          * themselves when their module is unloaded. */
1703         lnet_register_lnd(&the_lolnd);
1704         return 0;
1705 }
1706
1707 /**
1708  * Finalize LNet library.
1709  *
1710  * \pre lnet_lib_init() called with success.
1711  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1712  */
1713 void lnet_lib_exit(void)
1714 {
1715         LASSERT(the_lnet.ln_refcount == 0);
1716
1717         while (!list_empty(&the_lnet.ln_lnds))
1718                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1719                                                lnd_t, lnd_list));
1720         lnet_destroy_locks();
1721 }
1722
1723 /**
1724  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1725  *
1726  * Users must call this function at least once before any other functions.
1727  * For each successful call there must be a corresponding call to
1728  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1729  * ignored.
1730  *
1731  * The PID used by LNet may be different from the one requested.
1732  * See LNetGetId().
1733  *
1734  * \param requested_pid PID requested by the caller.
1735  *
1736  * \return >= 0 on success, and < 0 error code on failures.
1737  */
1738 int
1739 LNetNIInit(lnet_pid_t requested_pid)
1740 {
1741         int                     im_a_router = 0;
1742         int                     rc;
1743         int                     ni_count;
1744         struct lnet_ping_info   *pinfo;
1745         lnet_handle_md_t        md_handle;
1746         struct list_head        net_head;
1747         struct lnet_net         *net;
1748
1749         INIT_LIST_HEAD(&net_head);
1750
1751         mutex_lock(&the_lnet.ln_api_mutex);
1752
1753         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1754
1755         if (the_lnet.ln_refcount > 0) {
1756                 rc = the_lnet.ln_refcount++;
1757                 mutex_unlock(&the_lnet.ln_api_mutex);
1758                 return rc;
1759         }
1760
1761         rc = lnet_prepare(requested_pid);
1762         if (rc != 0) {
1763                 mutex_unlock(&the_lnet.ln_api_mutex);
1764                 return rc;
1765         }
1766
1767         /* create a network for Loopback network */
1768         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
1769         if (net == NULL) {
1770                 rc = -ENOMEM;
1771                 goto err_empty_list;
1772         }
1773
1774         /* Add in the loopback NI */
1775         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
1776                 rc = -ENOMEM;
1777                 goto err_empty_list;
1778         }
1779
1780         /* If LNet is being initialized via DLC it is possible
1781          * that the user requests not to load module parameters (ones which
1782          * are supported by DLC) on initialization.  Therefore, make sure not
1783          * to load networks, routes and forwarding from module parameters
1784          * in this case.  On cleanup in case of failure only clean up
1785          * routes if it has been loaded */
1786         if (!the_lnet.ln_nis_from_mod_params) {
1787                 rc = lnet_parse_networks(&net_head, lnet_get_networks(),
1788                                          use_tcp_bonding);
1789                 if (rc < 0)
1790                         goto err_empty_list;
1791         }
1792
1793         ni_count = lnet_startup_lndnets(&net_head);
1794         if (ni_count < 0) {
1795                 rc = ni_count;
1796                 goto err_empty_list;
1797         }
1798
1799         if (!the_lnet.ln_nis_from_mod_params) {
1800                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1801                 if (rc != 0)
1802                         goto err_shutdown_lndnis;
1803
1804                 rc = lnet_check_routes();
1805                 if (rc != 0)
1806                         goto err_destroy_routes;
1807
1808                 rc = lnet_rtrpools_alloc(im_a_router);
1809                 if (rc != 0)
1810                         goto err_destroy_routes;
1811         }
1812
1813         rc = lnet_acceptor_start();
1814         if (rc != 0)
1815                 goto err_destroy_routes;
1816
1817         the_lnet.ln_refcount = 1;
1818         /* Now I may use my own API functions... */
1819
1820         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1821         if (rc != 0)
1822                 goto err_acceptor_stop;
1823
1824         lnet_ping_target_update(pinfo, md_handle);
1825
1826         rc = lnet_router_checker_start();
1827         if (rc != 0)
1828                 goto err_stop_ping;
1829
1830         lnet_fault_init();
1831         lnet_proc_init();
1832
1833         mutex_unlock(&the_lnet.ln_api_mutex);
1834
1835         return 0;
1836
1837 err_stop_ping:
1838         lnet_ping_target_fini();
1839 err_acceptor_stop:
1840         the_lnet.ln_refcount = 0;
1841         lnet_acceptor_stop();
1842 err_destroy_routes:
1843         if (!the_lnet.ln_nis_from_mod_params)
1844                 lnet_destroy_routes();
1845 err_shutdown_lndnis:
1846         lnet_shutdown_lndnets();
1847 err_empty_list:
1848         lnet_unprepare();
1849         LASSERT(rc < 0);
1850         mutex_unlock(&the_lnet.ln_api_mutex);
1851         while (!list_empty(&net_head)) {
1852                 struct lnet_net *net;
1853
1854                 net = list_entry(net_head.next, struct lnet_net, net_list);
1855                 list_del_init(&net->net_list);
1856                 lnet_net_free(net);
1857         }
1858         return rc;
1859 }
1860 EXPORT_SYMBOL(LNetNIInit);
1861
1862 /**
1863  * Stop LNet interfaces, routing, and forwarding.
1864  *
1865  * Users must call this function once for each successful call to LNetNIInit().
1866  * Once the LNetNIFini() operation has been started, the results of pending
1867  * API operations are undefined.
1868  *
1869  * \return always 0 for current implementation.
1870  */
1871 int
1872 LNetNIFini()
1873 {
1874         mutex_lock(&the_lnet.ln_api_mutex);
1875
1876         LASSERT(the_lnet.ln_refcount > 0);
1877
1878         if (the_lnet.ln_refcount != 1) {
1879                 the_lnet.ln_refcount--;
1880         } else {
1881                 LASSERT(!the_lnet.ln_niinit_self);
1882
1883                 lnet_fault_fini();
1884
1885                 lnet_proc_fini();
1886                 lnet_router_checker_stop();
1887                 lnet_ping_target_fini();
1888
1889                 /* Teardown fns that use my own API functions BEFORE here */
1890                 the_lnet.ln_refcount = 0;
1891
1892                 lnet_acceptor_stop();
1893                 lnet_destroy_routes();
1894                 lnet_shutdown_lndnets();
1895                 lnet_unprepare();
1896         }
1897
1898         mutex_unlock(&the_lnet.ln_api_mutex);
1899         return 0;
1900 }
1901 EXPORT_SYMBOL(LNetNIFini);
1902
1903 /**
1904  * Grabs the ni data from the ni structure and fills the out
1905  * parameters
1906  *
1907  * \param[in] ni network        interface structure
1908  * \param[out] cpt_count        the number of cpts the ni is on
1909  * \param[out] nid              Network Interface ID
1910  * \param[out] peer_timeout     NI peer timeout
1911  * \param[out] peer_tx_crdits   NI peer transmit credits
1912  * \param[out] peer_rtr_credits NI peer router credits
1913  * \param[out] max_tx_credits   NI max transmit credit
1914  * \param[out] net_config       Network configuration
1915  */
1916 static void
1917 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
1918 {
1919         struct lnet_ioctl_net_config *net_config;
1920         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
1921         size_t min_size, tunable_size = 0;
1922         int i;
1923
1924         if (!ni || !config)
1925                 return;
1926
1927         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
1928         if (!net_config)
1929                 return;
1930
1931         BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
1932                      ARRAY_SIZE(net_config->ni_interfaces));
1933
1934         for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1935                 if (!ni->ni_interfaces[i])
1936                         break;
1937
1938                 strncpy(net_config->ni_interfaces[i],
1939                         ni->ni_interfaces[i],
1940                         sizeof(net_config->ni_interfaces[i]));
1941         }
1942
1943         config->cfg_nid = ni->ni_nid;
1944         config->cfg_config_u.cfg_net.net_peer_timeout =
1945                 ni->ni_net->net_tunables.lct_peer_timeout;
1946         config->cfg_config_u.cfg_net.net_max_tx_credits =
1947                 ni->ni_net->net_tunables.lct_max_tx_credits;
1948         config->cfg_config_u.cfg_net.net_peer_tx_credits =
1949                 ni->ni_net->net_tunables.lct_peer_tx_credits;
1950         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
1951                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
1952
1953         net_config->ni_status = ni->ni_status->ns_status;
1954
1955         if (ni->ni_cpts) {
1956                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
1957
1958                 for (i = 0; i < num_cpts; i++)
1959                         net_config->ni_cpts[i] = ni->ni_cpts[i];
1960
1961                 config->cfg_ncpts = num_cpts;
1962         }
1963
1964         /*
1965          * See if user land tools sent in a newer and larger version
1966          * of struct lnet_tunables than what the kernel uses.
1967          */
1968         min_size = sizeof(*config) + sizeof(*net_config);
1969
1970         if (config->cfg_hdr.ioc_len > min_size)
1971                 tunable_size = config->cfg_hdr.ioc_len - min_size;
1972
1973         /* Don't copy too much data to user space */
1974         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
1975         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
1976
1977         if (lnd_cfg && min_size) {
1978                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
1979                 config->cfg_config_u.cfg_net.net_interface_count = 1;
1980
1981                 /* Tell user land that kernel side has less data */
1982                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
1983                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
1984                         config->cfg_hdr.ioc_len -= min_size;
1985                 }
1986         }
1987 }
1988
1989 struct lnet_ni *
1990 lnet_get_ni_idx_locked(int idx)
1991 {
1992         struct lnet_ni          *ni;
1993         struct lnet_net         *net;
1994
1995         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1996                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1997                         if (idx-- == 0)
1998                                 return ni;
1999                 }
2000         }
2001
2002         return NULL;
2003 }
2004
2005 struct lnet_ni *
2006 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
2007 {
2008         struct lnet_ni          *ni;
2009         struct lnet_net         *net = mynet;
2010
2011         if (prev == NULL) {
2012                 if (net == NULL)
2013                         net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
2014                                         net_list);
2015                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2016                                 ni_netlist);
2017
2018                 return ni;
2019         }
2020
2021         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
2022                 /* if you reached the end of the ni list and the net is
2023                  * specified, then there are no more nis in that net */
2024                 if (net != NULL)
2025                         return NULL;
2026
2027                 /* we reached the end of this net ni list. move to the
2028                  * next net */
2029                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
2030                         /* no more nets and no more NIs. */
2031                         return NULL;
2032
2033                 /* get the next net */
2034                 net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
2035                                  net_list);
2036                 /* get the ni on it */
2037                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2038                                 ni_netlist);
2039
2040                 return ni;
2041         }
2042
2043         /* there are more nis left */
2044         ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
2045
2046         return ni;
2047 }
2048
2049 int
2050 lnet_get_net_config(struct lnet_ioctl_config_data *config)
2051 {
2052         struct lnet_ni *ni;
2053         int cpt;
2054         int rc = -ENOENT;
2055         int idx = config->cfg_count;
2056
2057         cpt = lnet_net_lock_current();
2058
2059         ni = lnet_get_ni_idx_locked(idx);
2060
2061         if (ni != NULL) {
2062                 rc = 0;
2063                 lnet_ni_lock(ni);
2064                 lnet_fill_ni_info(ni, config);
2065                 lnet_ni_unlock(ni);
2066         }
2067
2068         lnet_net_unlock(cpt);
2069         return rc;
2070 }
2071
2072 int
2073 lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
2074 {
2075         char                    *nets = conf->cfg_config_u.cfg_net.net_intf;
2076         struct lnet_ping_info   *pinfo;
2077         lnet_handle_md_t        md_handle;
2078         struct lnet_net         *net;
2079         struct list_head        net_head;
2080         int                     rc;
2081         lnet_remotenet_t        *rnet;
2082         int                     net_ni_count;
2083         int                     num_acceptor_nets;
2084         __u32                   net_type;
2085         struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
2086
2087         INIT_LIST_HEAD(&net_head);
2088
2089         if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
2090                 lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
2091
2092         /* Create a net/ni structures for the network string */
2093         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2094         if (rc <= 0)
2095                 return rc == 0 ? -EINVAL : rc;
2096
2097         mutex_lock(&the_lnet.ln_api_mutex);
2098
2099         if (rc > 1) {
2100                 rc = -EINVAL; /* only add one network per call */
2101                 goto failed0;
2102         }
2103
2104         net = list_entry(net_head.next, struct lnet_net, net_list);
2105
2106         lnet_net_lock(LNET_LOCK_EX);
2107         rnet = lnet_find_rnet_locked(net->net_id);
2108         lnet_net_unlock(LNET_LOCK_EX);
2109         /* make sure that the net added doesn't invalidate the current
2110          * configuration LNet is keeping */
2111         if (rnet != NULL) {
2112                 CERROR("Adding net %s will invalidate routing configuration\n",
2113                        nets);
2114                 rc = -EUSERS;
2115                 goto failed0;
2116         }
2117
2118         /*
2119          * make sure you calculate the correct number of slots in the ping
2120          * info. Since the ping info is a flattened list of all the NIs,
2121          * we should allocate enough slots to accomodate the number of NIs
2122          * which will be added.
2123          *
2124          * We can use lnet_get_net_ni_count_locked() since the net is not
2125          * on a public list yet, so locking is not a problem
2126          */
2127         net_ni_count = lnet_get_net_ni_count_locked(net);
2128
2129         rc = lnet_ping_info_setup(&pinfo, &md_handle,
2130                                   net_ni_count + lnet_get_ni_count(),
2131                                   false);
2132         if (rc != 0)
2133                 goto failed0;
2134
2135         list_del_init(&net->net_list);
2136
2137         if (lnd_tunables)
2138                 memcpy(&net->net_tunables,
2139                        &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
2140
2141         /*
2142          * before starting this network get a count of the current TCP
2143          * networks which require the acceptor thread running. If that
2144          * count is == 0 before we start up this network, then we'd want to
2145          * start up the acceptor thread after starting up this network
2146          */
2147         num_acceptor_nets = lnet_count_acceptor_nets();
2148
2149         /*
2150          * lnd_startup_lndnet() can deallocate 'net' even if it it returns
2151          * success, because we endded up adding interfaces to an existing
2152          * network. So grab the net_type now
2153          */
2154         net_type = LNET_NETTYP(net->net_id);
2155
2156         rc = lnet_startup_lndnet(net,
2157                                  (lnd_tunables) ? &lnd_tunables->lt_tun : NULL);
2158         if (rc < 0)
2159                 goto failed1;
2160
2161         /*
2162          * Start the acceptor thread if this is the first network
2163          * being added that requires the thread.
2164          */
2165         if (net_type == SOCKLND && num_acceptor_nets == 0)
2166         {
2167                 rc = lnet_acceptor_start();
2168                 if (rc < 0) {
2169                         /* shutdown the net that we just started */
2170                         CERROR("Failed to start up acceptor thread\n");
2171                         /*
2172                          * Note that if we needed to start the acceptor
2173                          * thread, then 'net' must have been the first TCP
2174                          * network, therefore was unique, and therefore
2175                          * wasn't deallocated by lnet_startup_lndnet()
2176                          */
2177                         lnet_shutdown_lndnet(net);
2178                         goto failed1;
2179                 }
2180         }
2181
2182         lnet_ping_target_update(pinfo, md_handle);
2183         mutex_unlock(&the_lnet.ln_api_mutex);
2184
2185         return 0;
2186
2187 failed1:
2188         lnet_ping_md_unlink(pinfo, &md_handle);
2189         lnet_ping_info_free(pinfo);
2190 failed0:
2191         mutex_unlock(&the_lnet.ln_api_mutex);
2192         while (!list_empty(&net_head)) {
2193                 net = list_entry(net_head.next, struct lnet_net, net_list);
2194                 list_del_init(&net->net_list);
2195                 lnet_net_free(net);
2196         }
2197         return rc;
2198 }
2199
2200 int
2201 lnet_dyn_del_ni(__u32 net_id)
2202 {
2203         struct lnet_net  *net;
2204         struct lnet_ping_info *pinfo;
2205         lnet_handle_md_t  md_handle;
2206         int               rc;
2207         int               net_ni_count;
2208
2209         /* don't allow userspace to shutdown the LOLND */
2210         if (LNET_NETTYP(net_id) == LOLND)
2211                 return -EINVAL;
2212
2213         mutex_lock(&the_lnet.ln_api_mutex);
2214
2215         lnet_net_lock(0);
2216
2217         net = lnet_get_net_locked(net_id);
2218         if (net == NULL) {
2219                 rc = -EINVAL;
2220                 goto out;
2221         }
2222
2223         net_ni_count = lnet_get_net_ni_count_locked(net);
2224
2225         lnet_net_unlock(0);
2226
2227         /* create and link a new ping info, before removing the old one */
2228         rc = lnet_ping_info_setup(&pinfo, &md_handle,
2229                                   lnet_get_ni_count() - net_ni_count, false);
2230         if (rc != 0)
2231                 goto out;
2232
2233         lnet_shutdown_lndnet(net);
2234
2235         if (lnet_count_acceptor_nets() == 0)
2236                 lnet_acceptor_stop();
2237
2238         lnet_ping_target_update(pinfo, md_handle);
2239
2240 out:
2241         mutex_unlock(&the_lnet.ln_api_mutex);
2242
2243         return rc;
2244 }
2245
2246 /**
2247  * LNet ioctl handler.
2248  *
2249  */
2250 int
2251 LNetCtl(unsigned int cmd, void *arg)
2252 {
2253         struct libcfs_ioctl_data *data = arg;
2254         struct lnet_ioctl_config_data *config;
2255         lnet_process_id_t         id = {0};
2256         lnet_ni_t                *ni;
2257         int                       rc;
2258
2259         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
2260                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
2261
2262         switch (cmd) {
2263         case IOC_LIBCFS_GET_NI:
2264                 rc = LNetGetId(data->ioc_count, &id);
2265                 data->ioc_nid = id.nid;
2266                 return rc;
2267
2268         case IOC_LIBCFS_FAIL_NID:
2269                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2270
2271         case IOC_LIBCFS_ADD_ROUTE:
2272                 config = arg;
2273
2274                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2275                         return -EINVAL;
2276
2277                 mutex_lock(&the_lnet.ln_api_mutex);
2278                 rc = lnet_add_route(config->cfg_net,
2279                                     config->cfg_config_u.cfg_route.rtr_hop,
2280                                     config->cfg_nid,
2281                                     config->cfg_config_u.cfg_route.
2282                                         rtr_priority);
2283                 if (rc == 0) {
2284                         rc = lnet_check_routes();
2285                         if (rc != 0)
2286                                 lnet_del_route(config->cfg_net,
2287                                                config->cfg_nid);
2288                 }
2289                 mutex_unlock(&the_lnet.ln_api_mutex);
2290                 return rc;
2291
2292         case IOC_LIBCFS_DEL_ROUTE:
2293                 config = arg;
2294
2295                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2296                         return -EINVAL;
2297
2298                 mutex_lock(&the_lnet.ln_api_mutex);
2299                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2300                 mutex_unlock(&the_lnet.ln_api_mutex);
2301                 return rc;
2302
2303         case IOC_LIBCFS_GET_ROUTE:
2304                 config = arg;
2305
2306                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2307                         return -EINVAL;
2308
2309                 return lnet_get_route(config->cfg_count,
2310                                       &config->cfg_net,
2311                                       &config->cfg_config_u.cfg_route.rtr_hop,
2312                                       &config->cfg_nid,
2313                                       &config->cfg_config_u.cfg_route.rtr_flags,
2314                                       &config->cfg_config_u.cfg_route.
2315                                         rtr_priority);
2316
2317         case IOC_LIBCFS_GET_NET: {
2318                 size_t total = sizeof(*config) +
2319                                sizeof(struct lnet_ioctl_net_config);
2320                 config = arg;
2321
2322                 if (config->cfg_hdr.ioc_len < total)
2323                         return -EINVAL;
2324
2325                 return lnet_get_net_config(config);
2326         }
2327
2328         case IOC_LIBCFS_GET_LNET_STATS:
2329         {
2330                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2331
2332                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
2333                         return -EINVAL;
2334
2335                 lnet_counters_get(&lnet_stats->st_cntrs);
2336                 return 0;
2337         }
2338
2339         case IOC_LIBCFS_CONFIG_RTR:
2340                 config = arg;
2341
2342                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2343                         return -EINVAL;
2344
2345                 mutex_lock(&the_lnet.ln_api_mutex);
2346                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2347                         rc = lnet_rtrpools_enable();
2348                         mutex_unlock(&the_lnet.ln_api_mutex);
2349                         return rc;
2350                 }
2351                 lnet_rtrpools_disable();
2352                 mutex_unlock(&the_lnet.ln_api_mutex);
2353                 return 0;
2354
2355         case IOC_LIBCFS_ADD_BUF:
2356                 config = arg;
2357
2358                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2359                         return -EINVAL;
2360
2361                 mutex_lock(&the_lnet.ln_api_mutex);
2362                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2363                                                 buf_tiny,
2364                                           config->cfg_config_u.cfg_buffers.
2365                                                 buf_small,
2366                                           config->cfg_config_u.cfg_buffers.
2367                                                 buf_large);
2368                 mutex_unlock(&the_lnet.ln_api_mutex);
2369                 return rc;
2370
2371         case IOC_LIBCFS_GET_BUF: {
2372                 struct lnet_ioctl_pool_cfg *pool_cfg;
2373                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
2374
2375                 config = arg;
2376
2377                 if (config->cfg_hdr.ioc_len < total)
2378                         return -EINVAL;
2379
2380                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2381                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2382         }
2383
2384         case IOC_LIBCFS_GET_PEER_INFO: {
2385                 struct lnet_ioctl_peer *peer_info = arg;
2386
2387                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
2388                         return -EINVAL;
2389
2390                 return lnet_get_peer_info(
2391                    peer_info->pr_count,
2392                    &peer_info->pr_nid,
2393                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2394                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2395                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2396                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2397                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2398                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2399                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2400                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2401         }
2402
2403         case IOC_LIBCFS_NOTIFY_ROUTER: {
2404                 unsigned long jiffies_passed;
2405
2406                 jiffies_passed = ktime_get_real_seconds() - data->ioc_u64[0];
2407                 jiffies_passed = cfs_time_seconds(jiffies_passed);
2408
2409                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2410                                    jiffies - jiffies_passed);
2411         }
2412
2413         case IOC_LIBCFS_LNET_DIST:
2414                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2415                 if (rc < 0 && rc != -EHOSTUNREACH)
2416                         return rc;
2417
2418                 data->ioc_u32[0] = rc;
2419                 return 0;
2420
2421         case IOC_LIBCFS_TESTPROTOCOMPAT:
2422                 lnet_net_lock(LNET_LOCK_EX);
2423                 the_lnet.ln_testprotocompat = data->ioc_flags;
2424                 lnet_net_unlock(LNET_LOCK_EX);
2425                 return 0;
2426
2427         case IOC_LIBCFS_LNET_FAULT:
2428                 return lnet_fault_ctl(data->ioc_flags, data);
2429
2430         case IOC_LIBCFS_PING: {
2431                 signed long timeout;
2432
2433                 id.nid = data->ioc_nid;
2434                 id.pid = data->ioc_u32[0];
2435
2436                 /* Don't block longer than 2 minutes */
2437                 if (data->ioc_u32[1] > 120 * MSEC_PER_SEC)
2438                         return -EINVAL;
2439
2440                 /* If timestamp is negative then disable timeout */
2441                 if ((s32)data->ioc_u32[1] < 0)
2442                         timeout = MAX_SCHEDULE_TIMEOUT;
2443                 else
2444                         timeout = msecs_to_jiffies(data->ioc_u32[1]);
2445
2446                 rc = lnet_ping(id, timeout, data->ioc_pbuf1,
2447                                data->ioc_plen1 / sizeof(lnet_process_id_t));
2448                 if (rc < 0)
2449                         return rc;
2450                 data->ioc_count = rc;
2451                 return 0;
2452         }
2453         default:
2454                 ni = lnet_net2ni(data->ioc_net);
2455                 if (ni == NULL)
2456                         return -EINVAL;
2457
2458                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
2459                         rc = -EINVAL;
2460                 else
2461                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
2462
2463                 return rc;
2464         }
2465         /* not reached */
2466 }
2467 EXPORT_SYMBOL(LNetCtl);
2468
2469 void LNetDebugPeer(lnet_process_id_t id)
2470 {
2471         lnet_debug_peer(id.nid);
2472 }
2473 EXPORT_SYMBOL(LNetDebugPeer);
2474
2475 /**
2476  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2477  * all interfaces share a same PID, as requested by LNetNIInit().
2478  *
2479  * \param index Index of the interface to look up.
2480  * \param id On successful return, this location will hold the
2481  * lnet_process_id_t ID of the interface.
2482  *
2483  * \retval 0 If an interface exists at \a index.
2484  * \retval -ENOENT If no interface has been found.
2485  */
2486 int
2487 LNetGetId(unsigned int index, lnet_process_id_t *id)
2488 {
2489         struct lnet_ni   *ni;
2490         struct lnet_net  *net;
2491         int               cpt;
2492         int               rc = -ENOENT;
2493
2494         LASSERT(the_lnet.ln_refcount > 0);
2495
2496         cpt = lnet_net_lock_current();
2497
2498         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2499                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2500                         if (index-- != 0)
2501                                 continue;
2502
2503                         id->nid = ni->ni_nid;
2504                         id->pid = the_lnet.ln_pid;
2505                         rc = 0;
2506                         break;
2507                 }
2508         }
2509
2510         lnet_net_unlock(cpt);
2511         return rc;
2512 }
2513 EXPORT_SYMBOL(LNetGetId);
2514
2515 /**
2516  * Print a string representation of handle \a h into buffer \a str of
2517  * \a len bytes.
2518  */
2519 void
2520 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2521 {
2522         snprintf(str, len, "%#llx", h.cookie);
2523 }
2524 EXPORT_SYMBOL(LNetSnprintHandle);
2525
2526 static int lnet_ping(lnet_process_id_t id, signed long timeout,
2527                      lnet_process_id_t __user *ids, int n_ids)
2528 {
2529         lnet_handle_eq_t     eqh;
2530         lnet_handle_md_t     mdh;
2531         lnet_event_t         event;
2532         lnet_md_t            md = { NULL };
2533         int                  which;
2534         int                  unlinked = 0;
2535         int                  replied = 0;
2536         const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC);
2537         int                  infosz;
2538         struct lnet_ping_info    *info;
2539         lnet_process_id_t    tmpid;
2540         int                  i;
2541         int                  nob;
2542         int                  rc;
2543         int                  rc2;
2544         sigset_t         blocked;
2545
2546         infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]);
2547
2548         /* n_ids limit is arbitrary */
2549         if (n_ids <= 0 || n_ids > 20 || id.nid == LNET_NID_ANY)
2550                 return -EINVAL;
2551
2552         if (id.pid == LNET_PID_ANY)
2553                 id.pid = LNET_PID_LUSTRE;
2554
2555         LIBCFS_ALLOC(info, infosz);
2556         if (info == NULL)
2557                 return -ENOMEM;
2558
2559         /* NB 2 events max (including any unlink event) */
2560         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2561         if (rc != 0) {
2562                 CERROR("Can't allocate EQ: %d\n", rc);
2563                 goto out_0;
2564         }
2565
2566         /* initialize md content */
2567         md.start     = info;
2568         md.length    = infosz;
2569         md.threshold = 2; /*GET/REPLY*/
2570         md.max_size  = 0;
2571         md.options   = LNET_MD_TRUNCATE;
2572         md.user_ptr  = NULL;
2573         md.eq_handle = eqh;
2574
2575         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2576         if (rc != 0) {
2577                 CERROR("Can't bind MD: %d\n", rc);
2578                 goto out_1;
2579         }
2580
2581         rc = LNetGet(LNET_NID_ANY, mdh, id,
2582                      LNET_RESERVED_PORTAL,
2583                      LNET_PROTO_PING_MATCHBITS, 0);
2584
2585         if (rc != 0) {
2586                 /* Don't CERROR; this could be deliberate! */
2587
2588                 rc2 = LNetMDUnlink(mdh);
2589                 LASSERT(rc2 == 0);
2590
2591                 /* NB must wait for the UNLINK event below... */
2592                 unlinked = 1;
2593                 timeout = a_long_time;
2594         }
2595
2596         do {
2597                 /* MUST block for unlink to complete */
2598                 if (unlinked)
2599                         blocked = cfs_block_allsigs();
2600
2601                 rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which);
2602
2603                 if (unlinked)
2604                         cfs_restore_sigs(blocked);
2605
2606                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2607                        (rc2 <= 0) ? -1 : event.type,
2608                        (rc2 <= 0) ? -1 : event.status,
2609                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2610
2611                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2612
2613                 if (rc2 <= 0 || event.status != 0) {
2614                         /* timeout or error */
2615                         if (!replied && rc == 0)
2616                                 rc = (rc2 < 0) ? rc2 :
2617                                      (rc2 == 0) ? -ETIMEDOUT :
2618                                      event.status;
2619
2620                         if (!unlinked) {
2621                                 /* Ensure completion in finite time... */
2622                                 LNetMDUnlink(mdh);
2623                                 /* No assertion (racing with network) */
2624                                 unlinked = 1;
2625                                 timeout = a_long_time;
2626                         } else if (rc2 == 0) {
2627                                 /* timed out waiting for unlink */
2628                                 CWARN("ping %s: late network completion\n",
2629                                       libcfs_id2str(id));
2630                         }
2631                 } else if (event.type == LNET_EVENT_REPLY) {
2632                         replied = 1;
2633                         rc = event.mlength;
2634                 }
2635
2636         } while (rc2 <= 0 || !event.unlinked);
2637
2638         if (!replied) {
2639                 if (rc >= 0)
2640                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2641                               libcfs_id2str(id));
2642                 rc = -EIO;
2643                 goto out_1;
2644         }
2645
2646         nob = rc;
2647         LASSERT(nob >= 0 && nob <= infosz);
2648
2649         rc = -EPROTO;                           /* if I can't parse... */
2650
2651         if (nob < 8) {
2652                 /* can't check magic/version */
2653                 CERROR("%s: ping info too short %d\n",
2654                        libcfs_id2str(id), nob);
2655                 goto out_1;
2656         }
2657
2658         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2659                 lnet_swap_pinginfo(info);
2660         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2661                 CERROR("%s: Unexpected magic %08x\n",
2662                        libcfs_id2str(id), info->pi_magic);
2663                 goto out_1;
2664         }
2665
2666         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2667                 CERROR("%s: ping w/o NI status: 0x%x\n",
2668                        libcfs_id2str(id), info->pi_features);
2669                 goto out_1;
2670         }
2671
2672         if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) {
2673                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2674                        nob, (int)offsetof(struct lnet_ping_info, pi_ni[0]));
2675                 goto out_1;
2676         }
2677
2678         if (info->pi_nnis < n_ids)
2679                 n_ids = info->pi_nnis;
2680
2681         if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) {
2682                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2683                        nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids]));
2684                 goto out_1;
2685         }
2686
2687         rc = -EFAULT;                           /* If I SEGV... */
2688
2689         memset(&tmpid, 0, sizeof(tmpid));
2690         for (i = 0; i < n_ids; i++) {
2691                 tmpid.pid = info->pi_pid;
2692                 tmpid.nid = info->pi_ni[i].ns_nid;
2693                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2694                         goto out_1;
2695         }
2696         rc = info->pi_nnis;
2697
2698  out_1:
2699         rc2 = LNetEQFree(eqh);
2700         if (rc2 != 0)
2701                 CERROR("rc2 %d\n", rc2);
2702         LASSERT(rc2 == 0);
2703
2704  out_0:
2705         LIBCFS_FREE(info, infosz);
2706         return rc;
2707 }