Whamcloud - gitweb
LU-7734 lnet: Multi-Rail local_ni/peer_ni selection
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_LNET
34 #include <linux/log2.h>
35 #include <linux/ktime.h>
36
37 #include <lnet/lib-lnet.h>
38
39 #define D_LNI D_CONSOLE
40
41 lnet_t      the_lnet;                           /* THE state of the network */
42 EXPORT_SYMBOL(the_lnet);
43
44 static char *ip2nets = "";
45 module_param(ip2nets, charp, 0444);
46 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
47
48 static char *networks = "";
49 module_param(networks, charp, 0444);
50 MODULE_PARM_DESC(networks, "local networks");
51
52 static char *routes = "";
53 module_param(routes, charp, 0444);
54 MODULE_PARM_DESC(routes, "routes to non-local networks");
55
56 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
57 module_param(rnet_htable_size, int, 0444);
58 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
59
60 static int use_tcp_bonding = false;
61 module_param(use_tcp_bonding, int, 0444);
62 MODULE_PARM_DESC(use_tcp_bonding,
63                  "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
64
65 /*
66  * This sequence number keeps track of how many times DLC was used to
67  * update the configuration. It is incremented on any DLC update and
68  * checked when sending a message to determine if there is a need to
69  * re-run the selection algorithm to handle configuration change.
70  * Look at lnet_select_pathway() for more details on its usage.
71  */
72 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
73
74 static int lnet_ping(lnet_process_id_t id, signed long timeout,
75                      lnet_process_id_t __user *ids, int n_ids);
76
77 static char *
78 lnet_get_routes(void)
79 {
80         return routes;
81 }
82
83 static char *
84 lnet_get_networks(void)
85 {
86         char   *nets;
87         int     rc;
88
89         if (*networks != 0 && *ip2nets != 0) {
90                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
91                                    "'ip2nets' but not both at once\n");
92                 return NULL;
93         }
94
95         if (*ip2nets != 0) {
96                 rc = lnet_parse_ip2nets(&nets, ip2nets);
97                 return (rc == 0) ? nets : NULL;
98         }
99
100         if (*networks != 0)
101                 return networks;
102
103         return "tcp";
104 }
105
106 static void
107 lnet_init_locks(void)
108 {
109         spin_lock_init(&the_lnet.ln_eq_wait_lock);
110         init_waitqueue_head(&the_lnet.ln_eq_waitq);
111         init_waitqueue_head(&the_lnet.ln_rc_waitq);
112         mutex_init(&the_lnet.ln_lnd_mutex);
113         mutex_init(&the_lnet.ln_api_mutex);
114 }
115
116 static void
117 lnet_fini_locks(void)
118 {
119 }
120
121 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
122 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
123                                             *  MDs kmem_cache */
124
125 static int
126 lnet_descriptor_setup(void)
127 {
128         /* create specific kmem_cache for MEs and small MDs (i.e., originally
129          * allocated in <size-xxx> kmem_cache).
130          */
131         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(lnet_me_t),
132                                             0, 0, NULL);
133         if (!lnet_mes_cachep)
134                 return -ENOMEM;
135
136         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
137                                                   LNET_SMALL_MD_SIZE, 0, 0,
138                                                   NULL);
139         if (!lnet_small_mds_cachep)
140                 return -ENOMEM;
141
142         return 0;
143 }
144
145 static void
146 lnet_descriptor_cleanup(void)
147 {
148
149         if (lnet_small_mds_cachep) {
150                 kmem_cache_destroy(lnet_small_mds_cachep);
151                 lnet_small_mds_cachep = NULL;
152         }
153
154         if (lnet_mes_cachep) {
155                 kmem_cache_destroy(lnet_mes_cachep);
156                 lnet_mes_cachep = NULL;
157         }
158 }
159
160 static int
161 lnet_create_remote_nets_table(void)
162 {
163         int               i;
164         struct list_head *hash;
165
166         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
167         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
168         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
169         if (hash == NULL) {
170                 CERROR("Failed to create remote nets hash table\n");
171                 return -ENOMEM;
172         }
173
174         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
175                 INIT_LIST_HEAD(&hash[i]);
176         the_lnet.ln_remote_nets_hash = hash;
177         return 0;
178 }
179
180 static void
181 lnet_destroy_remote_nets_table(void)
182 {
183         int i;
184
185         if (the_lnet.ln_remote_nets_hash == NULL)
186                 return;
187
188         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
189                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
190
191         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
192                     LNET_REMOTE_NETS_HASH_SIZE *
193                     sizeof(the_lnet.ln_remote_nets_hash[0]));
194         the_lnet.ln_remote_nets_hash = NULL;
195 }
196
197 static void
198 lnet_destroy_locks(void)
199 {
200         if (the_lnet.ln_res_lock != NULL) {
201                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
202                 the_lnet.ln_res_lock = NULL;
203         }
204
205         if (the_lnet.ln_net_lock != NULL) {
206                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
207                 the_lnet.ln_net_lock = NULL;
208         }
209
210         lnet_fini_locks();
211 }
212
213 static int
214 lnet_create_locks(void)
215 {
216         lnet_init_locks();
217
218         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
219         if (the_lnet.ln_res_lock == NULL)
220                 goto failed;
221
222         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
223         if (the_lnet.ln_net_lock == NULL)
224                 goto failed;
225
226         return 0;
227
228  failed:
229         lnet_destroy_locks();
230         return -ENOMEM;
231 }
232
233 static void lnet_assert_wire_constants(void)
234 {
235         /* Wire protocol assertions generated by 'wirecheck'
236          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
237          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
238          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
239
240         /* Constants... */
241         CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
242         CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1);
243         CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0);
244         CLASSERT(LNET_MSG_ACK == 0);
245         CLASSERT(LNET_MSG_PUT == 1);
246         CLASSERT(LNET_MSG_GET == 2);
247         CLASSERT(LNET_MSG_REPLY == 3);
248         CLASSERT(LNET_MSG_HELLO == 4);
249
250         /* Checks for struct lnet_handle_wire */
251         CLASSERT((int)sizeof(struct lnet_handle_wire) == 16);
252         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) == 0);
253         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) == 8);
254         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_object_cookie) == 8);
255         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) == 8);
256
257         /* Checks for struct lnet_magicversion_t */
258         CLASSERT((int)sizeof(lnet_magicversion_t) == 8);
259         CLASSERT((int)offsetof(lnet_magicversion_t, magic) == 0);
260         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
261         CLASSERT((int)offsetof(lnet_magicversion_t, version_major) == 4);
262         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
263         CLASSERT((int)offsetof(lnet_magicversion_t, version_minor) == 6);
264         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
265
266         /* Checks for struct lnet_hdr_t */
267         CLASSERT((int)sizeof(lnet_hdr_t) == 72);
268         CLASSERT((int)offsetof(lnet_hdr_t, dest_nid) == 0);
269         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
270         CLASSERT((int)offsetof(lnet_hdr_t, src_nid) == 8);
271         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
272         CLASSERT((int)offsetof(lnet_hdr_t, dest_pid) == 16);
273         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
274         CLASSERT((int)offsetof(lnet_hdr_t, src_pid) == 20);
275         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
276         CLASSERT((int)offsetof(lnet_hdr_t, type) == 24);
277         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
278         CLASSERT((int)offsetof(lnet_hdr_t, payload_length) == 28);
279         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
280         CLASSERT((int)offsetof(lnet_hdr_t, msg) == 32);
281         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
282
283         /* Ack */
284         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
285         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
286         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
287         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
288         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
289         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
290
291         /* Put */
292         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
293         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
294         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
295         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
296         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
297         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
298         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
299         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
300         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
301         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
302
303         /* Get */
304         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
305         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
306         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
307         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
308         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
309         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
310         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
311         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
312         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
313         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
314
315         /* Reply */
316         CLASSERT((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
317         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
318
319         /* Hello */
320         CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
321         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
322         CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
323         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
324 }
325
326 static lnd_t *lnet_find_lnd_by_type(__u32 type)
327 {
328         lnd_t            *lnd;
329         struct list_head *tmp;
330
331         /* holding lnd mutex */
332         list_for_each(tmp, &the_lnet.ln_lnds) {
333                 lnd = list_entry(tmp, lnd_t, lnd_list);
334
335                 if (lnd->lnd_type == type)
336                         return lnd;
337         }
338         return NULL;
339 }
340
341 void
342 lnet_register_lnd (lnd_t *lnd)
343 {
344         mutex_lock(&the_lnet.ln_lnd_mutex);
345
346         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
347         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
348
349         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
350         lnd->lnd_refcount = 0;
351
352         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
353
354         mutex_unlock(&the_lnet.ln_lnd_mutex);
355 }
356 EXPORT_SYMBOL(lnet_register_lnd);
357
358 void
359 lnet_unregister_lnd (lnd_t *lnd)
360 {
361         mutex_lock(&the_lnet.ln_lnd_mutex);
362
363         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
364         LASSERT(lnd->lnd_refcount == 0);
365
366         list_del(&lnd->lnd_list);
367         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
368
369         mutex_unlock(&the_lnet.ln_lnd_mutex);
370 }
371 EXPORT_SYMBOL(lnet_unregister_lnd);
372
373 void
374 lnet_counters_get(lnet_counters_t *counters)
375 {
376         lnet_counters_t *ctr;
377         int             i;
378
379         memset(counters, 0, sizeof(*counters));
380
381         lnet_net_lock(LNET_LOCK_EX);
382
383         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
384                 counters->msgs_max     += ctr->msgs_max;
385                 counters->msgs_alloc   += ctr->msgs_alloc;
386                 counters->errors       += ctr->errors;
387                 counters->send_count   += ctr->send_count;
388                 counters->recv_count   += ctr->recv_count;
389                 counters->route_count  += ctr->route_count;
390                 counters->drop_count   += ctr->drop_count;
391                 counters->send_length  += ctr->send_length;
392                 counters->recv_length  += ctr->recv_length;
393                 counters->route_length += ctr->route_length;
394                 counters->drop_length  += ctr->drop_length;
395
396         }
397         lnet_net_unlock(LNET_LOCK_EX);
398 }
399 EXPORT_SYMBOL(lnet_counters_get);
400
401 void
402 lnet_counters_reset(void)
403 {
404         lnet_counters_t *counters;
405         int             i;
406
407         lnet_net_lock(LNET_LOCK_EX);
408
409         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
410                 memset(counters, 0, sizeof(lnet_counters_t));
411
412         lnet_net_unlock(LNET_LOCK_EX);
413 }
414
415 static char *
416 lnet_res_type2str(int type)
417 {
418         switch (type) {
419         default:
420                 LBUG();
421         case LNET_COOKIE_TYPE_MD:
422                 return "MD";
423         case LNET_COOKIE_TYPE_ME:
424                 return "ME";
425         case LNET_COOKIE_TYPE_EQ:
426                 return "EQ";
427         }
428 }
429
430 static void
431 lnet_res_container_cleanup(struct lnet_res_container *rec)
432 {
433         int     count = 0;
434
435         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
436                 return;
437
438         while (!list_empty(&rec->rec_active)) {
439                 struct list_head *e = rec->rec_active.next;
440
441                 list_del_init(e);
442                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
443                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
444
445                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
446                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
447
448                 } else { /* NB: Active MEs should be attached on portals */
449                         LBUG();
450                 }
451                 count++;
452         }
453
454         if (count > 0) {
455                 /* Found alive MD/ME/EQ, user really should unlink/free
456                  * all of them before finalize LNet, but if someone didn't,
457                  * we have to recycle garbage for him */
458                 CERROR("%d active elements on exit of %s container\n",
459                        count, lnet_res_type2str(rec->rec_type));
460         }
461
462         if (rec->rec_lh_hash != NULL) {
463                 LIBCFS_FREE(rec->rec_lh_hash,
464                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
465                 rec->rec_lh_hash = NULL;
466         }
467
468         rec->rec_type = 0; /* mark it as finalized */
469 }
470
471 static int
472 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
473 {
474         int     rc = 0;
475         int     i;
476
477         LASSERT(rec->rec_type == 0);
478
479         rec->rec_type = type;
480         INIT_LIST_HEAD(&rec->rec_active);
481
482         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
483
484         /* Arbitrary choice of hash table size */
485         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
486                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
487         if (rec->rec_lh_hash == NULL) {
488                 rc = -ENOMEM;
489                 goto out;
490         }
491
492         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
493                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
494
495         return 0;
496
497 out:
498         CERROR("Failed to setup %s resource container\n",
499                lnet_res_type2str(type));
500         lnet_res_container_cleanup(rec);
501         return rc;
502 }
503
504 static void
505 lnet_res_containers_destroy(struct lnet_res_container **recs)
506 {
507         struct lnet_res_container       *rec;
508         int                             i;
509
510         cfs_percpt_for_each(rec, i, recs)
511                 lnet_res_container_cleanup(rec);
512
513         cfs_percpt_free(recs);
514 }
515
516 static struct lnet_res_container **
517 lnet_res_containers_create(int type)
518 {
519         struct lnet_res_container       **recs;
520         struct lnet_res_container       *rec;
521         int                             rc;
522         int                             i;
523
524         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
525         if (recs == NULL) {
526                 CERROR("Failed to allocate %s resource containers\n",
527                        lnet_res_type2str(type));
528                 return NULL;
529         }
530
531         cfs_percpt_for_each(rec, i, recs) {
532                 rc = lnet_res_container_setup(rec, i, type);
533                 if (rc != 0) {
534                         lnet_res_containers_destroy(recs);
535                         return NULL;
536                 }
537         }
538
539         return recs;
540 }
541
542 lnet_libhandle_t *
543 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
544 {
545         /* ALWAYS called with lnet_res_lock held */
546         struct list_head        *head;
547         lnet_libhandle_t        *lh;
548         unsigned int            hash;
549
550         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
551                 return NULL;
552
553         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
554         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
555
556         list_for_each_entry(lh, head, lh_hash_chain) {
557                 if (lh->lh_cookie == cookie)
558                         return lh;
559         }
560
561         return NULL;
562 }
563
564 void
565 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
566 {
567         /* ALWAYS called with lnet_res_lock held */
568         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
569         unsigned int    hash;
570
571         lh->lh_cookie = rec->rec_lh_cookie;
572         rec->rec_lh_cookie += 1 << ibits;
573
574         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
575
576         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
577 }
578
579 static int lnet_unprepare(void);
580
581 static int
582 lnet_prepare(lnet_pid_t requested_pid)
583 {
584         /* Prepare to bring up the network */
585         struct lnet_res_container **recs;
586         int                       rc = 0;
587
588         if (requested_pid == LNET_PID_ANY) {
589                 /* Don't instantiate LNET just for me */
590                 return -ENETDOWN;
591         }
592
593         LASSERT(the_lnet.ln_refcount == 0);
594
595         the_lnet.ln_routing = 0;
596
597         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
598         the_lnet.ln_pid = requested_pid;
599
600         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
601         INIT_LIST_HEAD(&the_lnet.ln_peers);
602         INIT_LIST_HEAD(&the_lnet.ln_nets);
603         INIT_LIST_HEAD(&the_lnet.ln_routers);
604         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
605         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
606
607         rc = lnet_descriptor_setup();
608         if (rc != 0)
609                 goto failed;
610
611         rc = lnet_create_remote_nets_table();
612         if (rc != 0)
613                 goto failed;
614
615         /*
616          * NB the interface cookie in wire handles guards against delayed
617          * replies and ACKs appearing valid after reboot.
618          */
619         the_lnet.ln_interface_cookie = ktime_get_real_ns();
620
621         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
622                                                 sizeof(lnet_counters_t));
623         if (the_lnet.ln_counters == NULL) {
624                 CERROR("Failed to allocate counters for LNet\n");
625                 rc = -ENOMEM;
626                 goto failed;
627         }
628
629         rc = lnet_peer_tables_create();
630         if (rc != 0)
631                 goto failed;
632
633         rc = lnet_msg_containers_create();
634         if (rc != 0)
635                 goto failed;
636
637         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
638                                       LNET_COOKIE_TYPE_EQ);
639         if (rc != 0)
640                 goto failed;
641
642         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
643         if (recs == NULL) {
644                 rc = -ENOMEM;
645                 goto failed;
646         }
647
648         the_lnet.ln_me_containers = recs;
649
650         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
651         if (recs == NULL) {
652                 rc = -ENOMEM;
653                 goto failed;
654         }
655
656         the_lnet.ln_md_containers = recs;
657
658         rc = lnet_portals_create();
659         if (rc != 0) {
660                 CERROR("Failed to create portals for LNet: %d\n", rc);
661                 goto failed;
662         }
663
664         return 0;
665
666  failed:
667         lnet_unprepare();
668         return rc;
669 }
670
671 static int
672 lnet_unprepare (void)
673 {
674         /* NB no LNET_LOCK since this is the last reference.  All LND instances
675          * have shut down already, so it is safe to unlink and free all
676          * descriptors, even those that appear committed to a network op (eg MD
677          * with non-zero pending count) */
678
679         lnet_fail_nid(LNET_NID_ANY, 0);
680
681         LASSERT(the_lnet.ln_refcount == 0);
682         LASSERT(list_empty(&the_lnet.ln_test_peers));
683         LASSERT(list_empty(&the_lnet.ln_nets));
684
685         lnet_portals_destroy();
686
687         if (the_lnet.ln_md_containers != NULL) {
688                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
689                 the_lnet.ln_md_containers = NULL;
690         }
691
692         if (the_lnet.ln_me_containers != NULL) {
693                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
694                 the_lnet.ln_me_containers = NULL;
695         }
696
697         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
698
699         lnet_msg_containers_destroy();
700         lnet_peer_tables_destroy();
701         lnet_rtrpools_free(0);
702
703         if (the_lnet.ln_counters != NULL) {
704                 cfs_percpt_free(the_lnet.ln_counters);
705                 the_lnet.ln_counters = NULL;
706         }
707         lnet_destroy_remote_nets_table();
708         lnet_descriptor_cleanup();
709
710         return 0;
711 }
712
713 lnet_ni_t  *
714 lnet_net2ni_locked(__u32 net_id, int cpt)
715 {
716         struct lnet_ni   *ni;
717         struct lnet_net  *net;
718
719         LASSERT(cpt != LNET_LOCK_EX);
720
721         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
722                 if (net->net_id == net_id) {
723                         ni = list_entry(net->net_ni_list.next, struct lnet_ni,
724                                         ni_netlist);
725                         return ni;
726                 }
727         }
728
729         return NULL;
730 }
731
732 lnet_ni_t *
733 lnet_net2ni(__u32 net)
734 {
735         lnet_ni_t *ni;
736
737         lnet_net_lock(0);
738         ni = lnet_net2ni_locked(net, 0);
739         lnet_net_unlock(0);
740
741         return ni;
742 }
743 EXPORT_SYMBOL(lnet_net2ni);
744
745 struct lnet_net *
746 lnet_get_net_locked(__u32 net_id)
747 {
748         struct lnet_net  *net;
749
750         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
751                 if (net->net_id == net_id)
752                         return net;
753         }
754
755         return NULL;
756 }
757
758 unsigned int
759 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
760 {
761         __u64           key = nid;
762         unsigned int    val;
763
764         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
765
766         if (number == 1)
767                 return 0;
768
769         val = hash_long(key, LNET_CPT_BITS);
770         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
771         if (val < number)
772                 return val;
773
774         return (unsigned int)(key + val + (val >> 1)) % number;
775 }
776
777 int
778 lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
779 {
780         struct lnet_net *net;
781
782         /* must called with hold of lnet_net_lock */
783         if (LNET_CPT_NUMBER == 1)
784                 return 0; /* the only one */
785
786         /*
787          * If NI is provided then use the CPT identified in the NI cpt
788          * list if one exists. If one doesn't exist, then that NI is
789          * associated with all CPTs and it follows that the net it belongs
790          * to is implicitly associated with all CPTs, so just hash the nid
791          * and return that.
792          */
793         if (ni != NULL) {
794                 if (ni->ni_cpts != NULL)
795                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
796                                                              ni->ni_ncpts)];
797                 else
798                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
799         }
800
801         /* no NI provided so look at the net */
802         net = lnet_get_net_locked(LNET_NIDNET(nid));
803
804         if (net != NULL && net->net_cpts != NULL) {
805                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
806         }
807
808         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
809 }
810
811 int
812 lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
813 {
814         int     cpt;
815         int     cpt2;
816
817         if (LNET_CPT_NUMBER == 1)
818                 return 0; /* the only one */
819
820         cpt = lnet_net_lock_current();
821
822         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
823
824         lnet_net_unlock(cpt);
825
826         return cpt2;
827 }
828 EXPORT_SYMBOL(lnet_cpt_of_nid);
829
830 int
831 lnet_islocalnet(__u32 net_id)
832 {
833         struct lnet_net *net;
834         int             cpt;
835         bool            local;
836
837         cpt = lnet_net_lock_current();
838
839         net = lnet_get_net_locked(net_id);
840
841         local = net != NULL;
842
843         lnet_net_unlock(cpt);
844
845         return local;
846 }
847
848 bool
849 lnet_is_ni_healthy_locked(struct lnet_ni *ni)
850 {
851         if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
852             ni->ni_state == LNET_NI_STATE_DEGRADED)
853                 return true;
854
855         return false;
856 }
857
858 lnet_ni_t  *
859 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
860 {
861         struct lnet_net  *net;
862         struct lnet_ni   *ni;
863
864         LASSERT(cpt != LNET_LOCK_EX);
865
866         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
867                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
868                         if (ni->ni_nid == nid)
869                                 return ni;
870                 }
871         }
872
873         return NULL;
874 }
875
876 lnet_ni_t *
877 lnet_nid2ni_addref(lnet_nid_t nid)
878 {
879         lnet_ni_t *ni;
880
881         lnet_net_lock(0);
882         ni = lnet_nid2ni_locked(nid, 0);
883         if (ni)
884                 lnet_ni_addref_locked(ni, 0);
885         lnet_net_unlock(0);
886
887         return ni;
888 }
889 EXPORT_SYMBOL(lnet_nid2ni_addref);
890
891 int
892 lnet_islocalnid(lnet_nid_t nid)
893 {
894         struct lnet_ni  *ni;
895         int             cpt;
896
897         cpt = lnet_net_lock_current();
898         ni = lnet_nid2ni_locked(nid, cpt);
899         lnet_net_unlock(cpt);
900
901         return ni != NULL;
902 }
903
904 int
905 lnet_count_acceptor_nets(void)
906 {
907         /* Return the # of NIs that need the acceptor. */
908         int              count = 0;
909         struct lnet_net  *net;
910         int              cpt;
911
912         cpt = lnet_net_lock_current();
913         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
914                 /* all socklnd type networks should have the acceptor
915                  * thread started */
916                 if (net->net_lnd->lnd_accept != NULL)
917                         count++;
918         }
919
920         lnet_net_unlock(cpt);
921
922         return count;
923 }
924
925 static struct lnet_ping_info *
926 lnet_ping_info_create(int num_ni)
927 {
928         struct lnet_ping_info *ping_info;
929         unsigned int     infosz;
930
931         infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]);
932         LIBCFS_ALLOC(ping_info, infosz);
933         if (ping_info == NULL) {
934                 CERROR("Can't allocate ping info[%d]\n", num_ni);
935                 return NULL;
936         }
937
938         ping_info->pi_nnis = num_ni;
939         ping_info->pi_pid = the_lnet.ln_pid;
940         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
941         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
942
943         return ping_info;
944 }
945
946 static inline int
947 lnet_get_net_ni_count_locked(struct lnet_net *net)
948 {
949         struct lnet_ni  *ni;
950         int             count = 0;
951
952         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
953                 count++;
954
955         return count;
956 }
957
958 static inline int
959 lnet_get_ni_count(void)
960 {
961         struct lnet_ni  *ni;
962         struct lnet_net *net;
963         int             count = 0;
964
965         lnet_net_lock(0);
966
967         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
968                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
969                         count++;
970         }
971
972         lnet_net_unlock(0);
973
974         return count;
975 }
976
977 static inline void
978 lnet_ping_info_free(struct lnet_ping_info *pinfo)
979 {
980         LIBCFS_FREE(pinfo,
981                     offsetof(struct lnet_ping_info,
982                              pi_ni[pinfo->pi_nnis]));
983 }
984
985 static void
986 lnet_ping_info_destroy(void)
987 {
988         struct lnet_net *net;
989         struct lnet_ni  *ni;
990
991         lnet_net_lock(LNET_LOCK_EX);
992
993         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
994                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
995                         lnet_ni_lock(ni);
996                         ni->ni_status = NULL;
997                         lnet_ni_unlock(ni);
998                 }
999         }
1000
1001         lnet_ping_info_free(the_lnet.ln_ping_info);
1002         the_lnet.ln_ping_info = NULL;
1003
1004         lnet_net_unlock(LNET_LOCK_EX);
1005 }
1006
1007 static void
1008 lnet_ping_event_handler(lnet_event_t *event)
1009 {
1010         struct lnet_ping_info *pinfo = event->md.user_ptr;
1011
1012         if (event->unlinked)
1013                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
1014 }
1015
1016 static int
1017 lnet_ping_info_setup(struct lnet_ping_info **ppinfo, lnet_handle_md_t *md_handle,
1018                      int ni_count, bool set_eq)
1019 {
1020         lnet_handle_me_t  me_handle;
1021         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1022         lnet_md_t         md = {NULL};
1023         int               rc, rc2;
1024
1025         if (set_eq) {
1026                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
1027                                  &the_lnet.ln_ping_target_eq);
1028                 if (rc != 0) {
1029                         CERROR("Can't allocate ping EQ: %d\n", rc);
1030                         return rc;
1031                 }
1032         }
1033
1034         *ppinfo = lnet_ping_info_create(ni_count);
1035         if (*ppinfo == NULL) {
1036                 rc = -ENOMEM;
1037                 goto failed_0;
1038         }
1039
1040         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1041                           LNET_PROTO_PING_MATCHBITS, 0,
1042                           LNET_UNLINK, LNET_INS_AFTER,
1043                           &me_handle);
1044         if (rc != 0) {
1045                 CERROR("Can't create ping ME: %d\n", rc);
1046                 goto failed_1;
1047         }
1048
1049         /* initialize md content */
1050         md.start     = *ppinfo;
1051         md.length    = offsetof(struct lnet_ping_info,
1052                                 pi_ni[(*ppinfo)->pi_nnis]);
1053         md.threshold = LNET_MD_THRESH_INF;
1054         md.max_size  = 0;
1055         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1056                        LNET_MD_MANAGE_REMOTE;
1057         md.user_ptr  = NULL;
1058         md.eq_handle = the_lnet.ln_ping_target_eq;
1059         md.user_ptr = *ppinfo;
1060
1061         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
1062         if (rc != 0) {
1063                 CERROR("Can't attach ping MD: %d\n", rc);
1064                 goto failed_2;
1065         }
1066
1067         return 0;
1068
1069 failed_2:
1070         rc2 = LNetMEUnlink(me_handle);
1071         LASSERT(rc2 == 0);
1072 failed_1:
1073         lnet_ping_info_free(*ppinfo);
1074         *ppinfo = NULL;
1075 failed_0:
1076         if (set_eq)
1077                 LNetEQFree(the_lnet.ln_ping_target_eq);
1078         return rc;
1079 }
1080
1081 static void
1082 lnet_ping_md_unlink(struct lnet_ping_info *pinfo, lnet_handle_md_t *md_handle)
1083 {
1084         sigset_t        blocked = cfs_block_allsigs();
1085
1086         LNetMDUnlink(*md_handle);
1087         LNetInvalidateHandle(md_handle);
1088
1089         /* NB md could be busy; this just starts the unlink */
1090         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
1091                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
1092                 set_current_state(TASK_UNINTERRUPTIBLE);
1093                 schedule_timeout(cfs_time_seconds(1));
1094         }
1095
1096         cfs_restore_sigs(blocked);
1097 }
1098
1099 static void
1100 lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
1101 {
1102         int                     i;
1103         struct lnet_ni          *ni;
1104         struct lnet_net         *net;
1105         struct lnet_ni_status *ns;
1106
1107         i = 0;
1108         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1109                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1110                         LASSERT(i < ping_info->pi_nnis);
1111
1112                         ns = &ping_info->pi_ni[i];
1113
1114                         ns->ns_nid = ni->ni_nid;
1115
1116                         lnet_ni_lock(ni);
1117                         ns->ns_status = (ni->ni_status != NULL) ?
1118                                         ni->ni_status->ns_status :
1119                                                 LNET_NI_STATUS_UP;
1120                         ni->ni_status = ns;
1121                         lnet_ni_unlock(ni);
1122
1123                         i++;
1124                 }
1125
1126         }
1127 }
1128
1129 static void
1130 lnet_ping_target_update(struct lnet_ping_info *pinfo, lnet_handle_md_t md_handle)
1131 {
1132         struct lnet_ping_info *old_pinfo = NULL;
1133         lnet_handle_md_t old_md;
1134
1135         /* switch the NIs to point to the new ping info created */
1136         lnet_net_lock(LNET_LOCK_EX);
1137
1138         if (!the_lnet.ln_routing)
1139                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1140         lnet_ping_info_install_locked(pinfo);
1141
1142         if (the_lnet.ln_ping_info != NULL) {
1143                 old_pinfo = the_lnet.ln_ping_info;
1144                 old_md = the_lnet.ln_ping_target_md;
1145         }
1146         the_lnet.ln_ping_target_md = md_handle;
1147         the_lnet.ln_ping_info = pinfo;
1148
1149         lnet_net_unlock(LNET_LOCK_EX);
1150
1151         if (old_pinfo != NULL) {
1152                 /* unlink the old ping info */
1153                 lnet_ping_md_unlink(old_pinfo, &old_md);
1154                 lnet_ping_info_free(old_pinfo);
1155         }
1156 }
1157
1158 static void
1159 lnet_ping_target_fini(void)
1160 {
1161         int             rc;
1162
1163         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1164                             &the_lnet.ln_ping_target_md);
1165
1166         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1167         LASSERT(rc == 0);
1168
1169         lnet_ping_info_destroy();
1170 }
1171
1172 static int
1173 lnet_ni_tq_credits(lnet_ni_t *ni)
1174 {
1175         int     credits;
1176
1177         LASSERT(ni->ni_ncpts >= 1);
1178
1179         if (ni->ni_ncpts == 1)
1180                 return ni->ni_net->net_tunables.lct_max_tx_credits;
1181
1182         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
1183         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
1184         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
1185
1186         return credits;
1187 }
1188
1189 static void
1190 lnet_ni_unlink_locked(lnet_ni_t *ni)
1191 {
1192         if (!list_empty(&ni->ni_cptlist)) {
1193                 list_del_init(&ni->ni_cptlist);
1194                 lnet_ni_decref_locked(ni, 0);
1195         }
1196
1197         /* move it to zombie list and nobody can find it anymore */
1198         LASSERT(!list_empty(&ni->ni_netlist));
1199         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
1200         lnet_ni_decref_locked(ni, 0);
1201 }
1202
1203 static void
1204 lnet_clear_zombies_nis_locked(struct lnet_net *net)
1205 {
1206         int             i;
1207         int             islo;
1208         lnet_ni_t       *ni;
1209         struct list_head *zombie_list = &net->net_ni_zombie;
1210
1211         /*
1212          * Now wait for the NIs I just nuked to show up on the zombie
1213          * list and shut them down in guaranteed thread context
1214          */
1215         i = 2;
1216         while (!list_empty(zombie_list)) {
1217                 int     *ref;
1218                 int     j;
1219
1220                 ni = list_entry(zombie_list->next,
1221                                 lnet_ni_t, ni_netlist);
1222                 list_del_init(&ni->ni_netlist);
1223                 /* the ni should be in deleting state. If it's not it's
1224                  * a bug */
1225                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
1226                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1227                         if (*ref == 0)
1228                                 continue;
1229                         /* still busy, add it back to zombie list */
1230                         list_add(&ni->ni_netlist, zombie_list);
1231                         break;
1232                 }
1233
1234                 if (!list_empty(&ni->ni_netlist)) {
1235                         lnet_net_unlock(LNET_LOCK_EX);
1236                         ++i;
1237                         if ((i & (-i)) == i) {
1238                                 CDEBUG(D_WARNING,
1239                                        "Waiting for zombie LNI %s\n",
1240                                        libcfs_nid2str(ni->ni_nid));
1241                         }
1242                         set_current_state(TASK_UNINTERRUPTIBLE);
1243                         schedule_timeout(cfs_time_seconds(1));
1244                         lnet_net_lock(LNET_LOCK_EX);
1245                         continue;
1246                 }
1247
1248                 lnet_net_unlock(LNET_LOCK_EX);
1249
1250                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
1251
1252                 LASSERT(!in_interrupt());
1253                 (net->net_lnd->lnd_shutdown)(ni);
1254
1255                 if (!islo)
1256                         CDEBUG(D_LNI, "Removed LNI %s\n",
1257                               libcfs_nid2str(ni->ni_nid));
1258
1259                 lnet_ni_free(ni);
1260                 i = 2;
1261                 lnet_net_lock(LNET_LOCK_EX);
1262         }
1263 }
1264
1265 /* shutdown down the NI and release refcount */
1266 static void
1267 lnet_shutdown_lndni(struct lnet_ni *ni)
1268 {
1269         int i;
1270         struct lnet_net *net = ni->ni_net;
1271
1272         lnet_net_lock(LNET_LOCK_EX);
1273         ni->ni_state = LNET_NI_STATE_DELETING;
1274         lnet_ni_unlink_locked(ni);
1275         lnet_net_unlock(LNET_LOCK_EX);
1276
1277         /* clear messages for this NI on the lazy portal */
1278         for (i = 0; i < the_lnet.ln_nportals; i++)
1279                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
1280
1281         /* Do peer table cleanup for this ni */
1282         lnet_peer_tables_cleanup(ni);
1283
1284         lnet_net_lock(LNET_LOCK_EX);
1285         lnet_clear_zombies_nis_locked(net);
1286         lnet_net_unlock(LNET_LOCK_EX);
1287 }
1288
1289 static void
1290 lnet_shutdown_lndnet(struct lnet_net *net)
1291 {
1292         struct lnet_ni *ni;
1293
1294         lnet_net_lock(LNET_LOCK_EX);
1295
1296         net->net_state = LNET_NET_STATE_DELETING;
1297
1298         list_del_init(&net->net_list);
1299
1300         while (!list_empty(&net->net_ni_list)) {
1301                 ni = list_entry(net->net_ni_list.next,
1302                                 lnet_ni_t, ni_netlist);
1303                 lnet_net_unlock(LNET_LOCK_EX);
1304                 lnet_shutdown_lndni(ni);
1305                 lnet_net_lock(LNET_LOCK_EX);
1306         }
1307
1308         /*
1309          * decrement ref count on lnd only when the entire network goes
1310          * away
1311          */
1312         net->net_lnd->lnd_refcount--;
1313
1314         lnet_net_unlock(LNET_LOCK_EX);
1315
1316         lnet_net_free(net);
1317 }
1318
1319 static void
1320 lnet_shutdown_lndnets(void)
1321 {
1322         struct lnet_net *net;
1323
1324         /* NB called holding the global mutex */
1325
1326         /* All quiet on the API front */
1327         LASSERT(!the_lnet.ln_shutdown);
1328         LASSERT(the_lnet.ln_refcount == 0);
1329
1330         lnet_net_lock(LNET_LOCK_EX);
1331         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1332
1333         while (!list_empty(&the_lnet.ln_nets)) {
1334                 /*
1335                  * move the nets to the zombie list to avoid them being
1336                  * picked up for new work. LONET is also included in the
1337                  * Nets that will be moved to the zombie list
1338                  */
1339                 net = list_entry(the_lnet.ln_nets.next,
1340                                  struct lnet_net, net_list);
1341                 list_move(&net->net_list, &the_lnet.ln_net_zombie);
1342         }
1343
1344         /* Drop the cached loopback Net. */
1345         if (the_lnet.ln_loni != NULL) {
1346                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1347                 the_lnet.ln_loni = NULL;
1348         }
1349         lnet_net_unlock(LNET_LOCK_EX);
1350
1351         /* iterate through the net zombie list and delete each net */
1352         while (!list_empty(&the_lnet.ln_net_zombie)) {
1353                 net = list_entry(the_lnet.ln_net_zombie.next,
1354                                  struct lnet_net, net_list);
1355                 lnet_shutdown_lndnet(net);
1356         }
1357
1358         lnet_net_lock(LNET_LOCK_EX);
1359         the_lnet.ln_shutdown = 0;
1360         lnet_net_unlock(LNET_LOCK_EX);
1361 }
1362
1363 static int
1364 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
1365 {
1366         int                     rc = -EINVAL;
1367         struct lnet_tx_queue    *tq;
1368         int                     i;
1369         struct lnet_net         *net = ni->ni_net;
1370
1371         mutex_lock(&the_lnet.ln_lnd_mutex);
1372
1373         if (tun) {
1374                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
1375                 ni->ni_lnd_tunables_set = true;
1376         }
1377
1378         rc = (net->net_lnd->lnd_startup)(ni);
1379
1380         mutex_unlock(&the_lnet.ln_lnd_mutex);
1381
1382         if (rc != 0) {
1383                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1384                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
1385                 lnet_net_lock(LNET_LOCK_EX);
1386                 net->net_lnd->lnd_refcount--;
1387                 lnet_net_unlock(LNET_LOCK_EX);
1388                 goto failed0;
1389         }
1390
1391         ni->ni_state = LNET_NI_STATE_ACTIVE;
1392
1393         /* We keep a reference on the loopback net through the loopback NI */
1394         if (net->net_lnd->lnd_type == LOLND) {
1395                 lnet_ni_addref(ni);
1396                 LASSERT(the_lnet.ln_loni == NULL);
1397                 the_lnet.ln_loni = ni;
1398                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
1399                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
1400                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
1401                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
1402                 return 0;
1403         }
1404
1405         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
1406             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
1407                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1408                                    libcfs_lnd2str(net->net_lnd->lnd_type),
1409                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
1410                                         "" : "per-peer ");
1411                 /* shutdown the NI since if we get here then it must've already
1412                  * been started
1413                  */
1414                 lnet_shutdown_lndni(ni);
1415                 return -EINVAL;
1416         }
1417
1418         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1419                 tq->tq_credits_min =
1420                 tq->tq_credits_max =
1421                 tq->tq_credits = lnet_ni_tq_credits(ni);
1422         }
1423
1424         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1425                 libcfs_nid2str(ni->ni_nid),
1426                 ni->ni_net->net_tunables.lct_peer_tx_credits,
1427                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1428                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
1429                 ni->ni_net->net_tunables.lct_peer_timeout);
1430
1431         return 0;
1432 failed0:
1433         lnet_ni_free(ni);
1434         return rc;
1435 }
1436
1437 static int
1438 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
1439 {
1440         struct lnet_ni          *ni;
1441         struct lnet_net         *net_l = NULL;
1442         struct list_head        local_ni_list;
1443         int                     rc;
1444         int                     ni_count = 0;
1445         __u32                   lnd_type;
1446         lnd_t                   *lnd;
1447         int                     peer_timeout =
1448                 net->net_tunables.lct_peer_timeout;
1449         int                     maxtxcredits =
1450                 net->net_tunables.lct_max_tx_credits;
1451         int                     peerrtrcredits =
1452                 net->net_tunables.lct_peer_rtr_credits;
1453
1454         INIT_LIST_HEAD(&local_ni_list);
1455
1456         /*
1457          * make sure that this net is unique. If it isn't then
1458          * we are adding interfaces to an already existing network, and
1459          * 'net' is just a convenient way to pass in the list.
1460          * if it is unique we need to find the LND and load it if
1461          * necessary.
1462          */
1463         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
1464                 lnd_type = LNET_NETTYP(net->net_id);
1465
1466                 LASSERT(libcfs_isknown_lnd(lnd_type));
1467
1468                 if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
1469                     lnd_type == IIBLND || lnd_type == VIBLND) {
1470                         CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
1471                         rc = -EINVAL;
1472                         goto failed0;
1473                 }
1474
1475                 mutex_lock(&the_lnet.ln_lnd_mutex);
1476                 lnd = lnet_find_lnd_by_type(lnd_type);
1477
1478                 if (lnd == NULL) {
1479                         mutex_unlock(&the_lnet.ln_lnd_mutex);
1480                         rc = request_module("%s", libcfs_lnd2modname(lnd_type));
1481                         mutex_lock(&the_lnet.ln_lnd_mutex);
1482
1483                         lnd = lnet_find_lnd_by_type(lnd_type);
1484                         if (lnd == NULL) {
1485                                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1486                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1487                                 libcfs_lnd2str(lnd_type),
1488                                 libcfs_lnd2modname(lnd_type), rc);
1489 #ifndef HAVE_MODULE_LOADING_SUPPORT
1490                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1491                                                 "compiled with kernel module "
1492                                                 "loading support.");
1493 #endif
1494                                 rc = -EINVAL;
1495                                 goto failed0;
1496                         }
1497                 }
1498
1499                 lnet_net_lock(LNET_LOCK_EX);
1500                 lnd->lnd_refcount++;
1501                 lnet_net_unlock(LNET_LOCK_EX);
1502
1503                 net->net_lnd = lnd;
1504
1505                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1506
1507                 net_l = net;
1508         }
1509
1510         /*
1511          * net_l: if the network being added is unique then net_l
1512          *        will point to that network
1513          *        if the network being added is not unique then
1514          *        net_l points to the existing network.
1515          *
1516          * When we enter the loop below, we'll pick NIs off he
1517          * network beign added and start them up, then add them to
1518          * a local ni list. Once we've successfully started all
1519          * the NIs then we join the local NI list (of started up
1520          * networks) with the net_l->net_ni_list, which should
1521          * point to the correct network to add the new ni list to
1522          *
1523          * If any of the new NIs fail to start up, then we want to
1524          * iterate through the local ni list, which should include
1525          * any NIs which were successfully started up, and shut
1526          * them down.
1527          *
1528          * After than we want to delete the network being added,
1529          * to avoid a memory leak.
1530          */
1531
1532         /*
1533          * When a network uses TCP bonding then all its interfaces
1534          * must be specified when the network is first defined: the
1535          * TCP bonding code doesn't allow for interfaces to be added
1536          * or removed.
1537          */
1538         if (net_l != net && net_l != NULL && use_tcp_bonding &&
1539             LNET_NETTYP(net_l->net_id) == SOCKLND) {
1540                 rc = -EINVAL;
1541                 goto failed0;
1542         }
1543
1544         while (!list_empty(&net->net_ni_added)) {
1545                 ni = list_entry(net->net_ni_added.next, struct lnet_ni,
1546                                 ni_netlist);
1547                 list_del_init(&ni->ni_netlist);
1548
1549                 /* make sure that the the NI we're about to start
1550                  * up is actually unique. if it's not fail. */
1551                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
1552                                         ni->ni_interfaces[0])) {
1553                         rc = -EINVAL;
1554                         goto failed1;
1555                 }
1556
1557                 /* adjust the pointer the parent network, just in case it
1558                  * the net is a duplicate */
1559                 ni->ni_net = net_l;
1560
1561                 rc = lnet_startup_lndni(ni, tun);
1562
1563                 LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
1564                         ni->ni_net->net_lnd->lnd_query != NULL);
1565
1566                 if (rc < 0)
1567                         goto failed1;
1568
1569                 lnet_ni_addref(ni);
1570                 list_add_tail(&ni->ni_netlist, &local_ni_list);
1571
1572                 ni_count++;
1573         }
1574
1575         lnet_net_lock(LNET_LOCK_EX);
1576         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
1577         lnet_incr_dlc_seq();
1578         lnet_net_unlock(LNET_LOCK_EX);
1579
1580         /* if the network is not unique then we don't want to keep
1581          * it around after we're done. Free it. Otherwise add that
1582          * net to the global the_lnet.ln_nets */
1583         if (net_l != net && net_l != NULL) {
1584                 /*
1585                  * TODO - note. currently the tunables can not be updated
1586                  * once added
1587                  */
1588                 lnet_net_free(net);
1589         } else {
1590                 net->net_state = LNET_NET_STATE_ACTIVE;
1591                 /*
1592                  * restore tunables after it has been overwitten by the
1593                  * lnd
1594                  */
1595                 if (peer_timeout != -1)
1596                         net->net_tunables.lct_peer_timeout = peer_timeout;
1597                 if (maxtxcredits != -1)
1598                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
1599                 if (peerrtrcredits != -1)
1600                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
1601
1602                 lnet_net_lock(LNET_LOCK_EX);
1603                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
1604                 lnet_net_unlock(LNET_LOCK_EX);
1605         }
1606
1607         return ni_count;
1608
1609 failed1:
1610         /*
1611          * shutdown the new NIs that are being started up
1612          * free the NET being started
1613          */
1614         while (!list_empty(&local_ni_list)) {
1615                 ni = list_entry(local_ni_list.next, struct lnet_ni,
1616                                 ni_netlist);
1617
1618                 lnet_shutdown_lndni(ni);
1619         }
1620
1621 failed0:
1622         lnet_net_free(net);
1623
1624         return rc;
1625 }
1626
1627 static int
1628 lnet_startup_lndnets(struct list_head *netlist)
1629 {
1630         struct lnet_net         *net;
1631         int                     rc;
1632         int                     ni_count = 0;
1633
1634         while (!list_empty(netlist)) {
1635                 net = list_entry(netlist->next, struct lnet_net, net_list);
1636                 list_del_init(&net->net_list);
1637
1638                 rc = lnet_startup_lndnet(net, NULL);
1639
1640                 if (rc < 0)
1641                         goto failed;
1642
1643                 ni_count += rc;
1644         }
1645
1646         return ni_count;
1647 failed:
1648         lnet_shutdown_lndnets();
1649
1650         return rc;
1651 }
1652
1653 /**
1654  * Initialize LNet library.
1655  *
1656  * Automatically called at module loading time. Caller has to call
1657  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
1658  * latter returned 0. It must be called exactly once.
1659  *
1660  * \retval 0 on success
1661  * \retval -ve on failures.
1662  */
1663 int lnet_lib_init(void)
1664 {
1665         int rc;
1666
1667         lnet_assert_wire_constants();
1668
1669         memset(&the_lnet, 0, sizeof(the_lnet));
1670
1671         /* refer to global cfs_cpt_table for now */
1672         the_lnet.ln_cpt_table   = cfs_cpt_table;
1673         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1674
1675         LASSERT(the_lnet.ln_cpt_number > 0);
1676         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1677                 /* we are under risk of consuming all lh_cookie */
1678                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1679                        "please change setting of CPT-table and retry\n",
1680                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1681                 return -E2BIG;
1682         }
1683
1684         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1685                 the_lnet.ln_cpt_bits++;
1686
1687         rc = lnet_create_locks();
1688         if (rc != 0) {
1689                 CERROR("Can't create LNet global locks: %d\n", rc);
1690                 return rc;
1691         }
1692
1693         the_lnet.ln_refcount = 0;
1694         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1695         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1696         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
1697         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1698         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1699
1700         /* The hash table size is the number of bits it takes to express the set
1701          * ln_num_routes, minus 1 (better to under estimate than over so we
1702          * don't waste memory). */
1703         if (rnet_htable_size <= 0)
1704                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1705         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1706                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1707         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1708                                            order_base_2(rnet_htable_size) - 1);
1709
1710         /* All LNDs apart from the LOLND are in separate modules.  They
1711          * register themselves when their module loads, and unregister
1712          * themselves when their module is unloaded. */
1713         lnet_register_lnd(&the_lolnd);
1714         return 0;
1715 }
1716
1717 /**
1718  * Finalize LNet library.
1719  *
1720  * \pre lnet_lib_init() called with success.
1721  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1722  */
1723 void lnet_lib_exit(void)
1724 {
1725         LASSERT(the_lnet.ln_refcount == 0);
1726
1727         while (!list_empty(&the_lnet.ln_lnds))
1728                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1729                                                lnd_t, lnd_list));
1730         lnet_destroy_locks();
1731 }
1732
1733 /**
1734  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1735  *
1736  * Users must call this function at least once before any other functions.
1737  * For each successful call there must be a corresponding call to
1738  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1739  * ignored.
1740  *
1741  * The PID used by LNet may be different from the one requested.
1742  * See LNetGetId().
1743  *
1744  * \param requested_pid PID requested by the caller.
1745  *
1746  * \return >= 0 on success, and < 0 error code on failures.
1747  */
1748 int
1749 LNetNIInit(lnet_pid_t requested_pid)
1750 {
1751         int                     im_a_router = 0;
1752         int                     rc;
1753         int                     ni_count;
1754         struct lnet_ping_info   *pinfo;
1755         lnet_handle_md_t        md_handle;
1756         struct list_head        net_head;
1757         struct lnet_net         *net;
1758
1759         INIT_LIST_HEAD(&net_head);
1760
1761         mutex_lock(&the_lnet.ln_api_mutex);
1762
1763         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1764
1765         if (the_lnet.ln_refcount > 0) {
1766                 rc = the_lnet.ln_refcount++;
1767                 mutex_unlock(&the_lnet.ln_api_mutex);
1768                 return rc;
1769         }
1770
1771         rc = lnet_prepare(requested_pid);
1772         if (rc != 0) {
1773                 mutex_unlock(&the_lnet.ln_api_mutex);
1774                 return rc;
1775         }
1776
1777         /* create a network for Loopback network */
1778         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
1779         if (net == NULL) {
1780                 rc = -ENOMEM;
1781                 goto err_empty_list;
1782         }
1783
1784         /* Add in the loopback NI */
1785         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
1786                 rc = -ENOMEM;
1787                 goto err_empty_list;
1788         }
1789
1790         /* If LNet is being initialized via DLC it is possible
1791          * that the user requests not to load module parameters (ones which
1792          * are supported by DLC) on initialization.  Therefore, make sure not
1793          * to load networks, routes and forwarding from module parameters
1794          * in this case.  On cleanup in case of failure only clean up
1795          * routes if it has been loaded */
1796         if (!the_lnet.ln_nis_from_mod_params) {
1797                 rc = lnet_parse_networks(&net_head, lnet_get_networks(),
1798                                          use_tcp_bonding);
1799                 if (rc < 0)
1800                         goto err_empty_list;
1801         }
1802
1803         ni_count = lnet_startup_lndnets(&net_head);
1804         if (ni_count < 0) {
1805                 rc = ni_count;
1806                 goto err_empty_list;
1807         }
1808
1809         if (!the_lnet.ln_nis_from_mod_params) {
1810                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1811                 if (rc != 0)
1812                         goto err_shutdown_lndnis;
1813
1814                 rc = lnet_check_routes();
1815                 if (rc != 0)
1816                         goto err_destroy_routes;
1817
1818                 rc = lnet_rtrpools_alloc(im_a_router);
1819                 if (rc != 0)
1820                         goto err_destroy_routes;
1821         }
1822
1823         rc = lnet_acceptor_start();
1824         if (rc != 0)
1825                 goto err_destroy_routes;
1826
1827         the_lnet.ln_refcount = 1;
1828         /* Now I may use my own API functions... */
1829
1830         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1831         if (rc != 0)
1832                 goto err_acceptor_stop;
1833
1834         lnet_ping_target_update(pinfo, md_handle);
1835
1836         rc = lnet_router_checker_start();
1837         if (rc != 0)
1838                 goto err_stop_ping;
1839
1840         lnet_fault_init();
1841         lnet_proc_init();
1842
1843         mutex_unlock(&the_lnet.ln_api_mutex);
1844
1845         return 0;
1846
1847 err_stop_ping:
1848         lnet_ping_target_fini();
1849 err_acceptor_stop:
1850         the_lnet.ln_refcount = 0;
1851         lnet_acceptor_stop();
1852 err_destroy_routes:
1853         if (!the_lnet.ln_nis_from_mod_params)
1854                 lnet_destroy_routes();
1855 err_shutdown_lndnis:
1856         lnet_shutdown_lndnets();
1857 err_empty_list:
1858         lnet_unprepare();
1859         LASSERT(rc < 0);
1860         mutex_unlock(&the_lnet.ln_api_mutex);
1861         while (!list_empty(&net_head)) {
1862                 struct lnet_net *net;
1863
1864                 net = list_entry(net_head.next, struct lnet_net, net_list);
1865                 list_del_init(&net->net_list);
1866                 lnet_net_free(net);
1867         }
1868         return rc;
1869 }
1870 EXPORT_SYMBOL(LNetNIInit);
1871
1872 /**
1873  * Stop LNet interfaces, routing, and forwarding.
1874  *
1875  * Users must call this function once for each successful call to LNetNIInit().
1876  * Once the LNetNIFini() operation has been started, the results of pending
1877  * API operations are undefined.
1878  *
1879  * \return always 0 for current implementation.
1880  */
1881 int
1882 LNetNIFini()
1883 {
1884         mutex_lock(&the_lnet.ln_api_mutex);
1885
1886         LASSERT(the_lnet.ln_refcount > 0);
1887
1888         if (the_lnet.ln_refcount != 1) {
1889                 the_lnet.ln_refcount--;
1890         } else {
1891                 LASSERT(!the_lnet.ln_niinit_self);
1892
1893                 lnet_fault_fini();
1894
1895                 lnet_proc_fini();
1896                 lnet_router_checker_stop();
1897                 lnet_ping_target_fini();
1898
1899                 /* Teardown fns that use my own API functions BEFORE here */
1900                 the_lnet.ln_refcount = 0;
1901
1902                 lnet_acceptor_stop();
1903                 lnet_destroy_routes();
1904                 lnet_shutdown_lndnets();
1905                 lnet_unprepare();
1906         }
1907
1908         mutex_unlock(&the_lnet.ln_api_mutex);
1909         return 0;
1910 }
1911 EXPORT_SYMBOL(LNetNIFini);
1912
1913 /**
1914  * Grabs the ni data from the ni structure and fills the out
1915  * parameters
1916  *
1917  * \param[in] ni network        interface structure
1918  * \param[out] cpt_count        the number of cpts the ni is on
1919  * \param[out] nid              Network Interface ID
1920  * \param[out] peer_timeout     NI peer timeout
1921  * \param[out] peer_tx_crdits   NI peer transmit credits
1922  * \param[out] peer_rtr_credits NI peer router credits
1923  * \param[out] max_tx_credits   NI max transmit credit
1924  * \param[out] net_config       Network configuration
1925  */
1926 static void
1927 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
1928 {
1929         struct lnet_ioctl_net_config *net_config;
1930         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
1931         size_t min_size, tunable_size = 0;
1932         int i;
1933
1934         if (!ni || !config)
1935                 return;
1936
1937         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
1938         if (!net_config)
1939                 return;
1940
1941         BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
1942                      ARRAY_SIZE(net_config->ni_interfaces));
1943
1944         for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1945                 if (!ni->ni_interfaces[i])
1946                         break;
1947
1948                 strncpy(net_config->ni_interfaces[i],
1949                         ni->ni_interfaces[i],
1950                         sizeof(net_config->ni_interfaces[i]));
1951         }
1952
1953         config->cfg_nid = ni->ni_nid;
1954         config->cfg_config_u.cfg_net.net_peer_timeout =
1955                 ni->ni_net->net_tunables.lct_peer_timeout;
1956         config->cfg_config_u.cfg_net.net_max_tx_credits =
1957                 ni->ni_net->net_tunables.lct_max_tx_credits;
1958         config->cfg_config_u.cfg_net.net_peer_tx_credits =
1959                 ni->ni_net->net_tunables.lct_peer_tx_credits;
1960         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
1961                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
1962
1963         net_config->ni_status = ni->ni_status->ns_status;
1964
1965         if (ni->ni_cpts) {
1966                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
1967
1968                 for (i = 0; i < num_cpts; i++)
1969                         net_config->ni_cpts[i] = ni->ni_cpts[i];
1970
1971                 config->cfg_ncpts = num_cpts;
1972         }
1973
1974         /*
1975          * See if user land tools sent in a newer and larger version
1976          * of struct lnet_tunables than what the kernel uses.
1977          */
1978         min_size = sizeof(*config) + sizeof(*net_config);
1979
1980         if (config->cfg_hdr.ioc_len > min_size)
1981                 tunable_size = config->cfg_hdr.ioc_len - min_size;
1982
1983         /* Don't copy too much data to user space */
1984         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
1985         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
1986
1987         if (lnd_cfg && min_size) {
1988                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
1989                 config->cfg_config_u.cfg_net.net_interface_count = 1;
1990
1991                 /* Tell user land that kernel side has less data */
1992                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
1993                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
1994                         config->cfg_hdr.ioc_len -= min_size;
1995                 }
1996         }
1997 }
1998
1999 struct lnet_ni *
2000 lnet_get_ni_idx_locked(int idx)
2001 {
2002         struct lnet_ni          *ni;
2003         struct lnet_net         *net;
2004
2005         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2006                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2007                         if (idx-- == 0)
2008                                 return ni;
2009                 }
2010         }
2011
2012         return NULL;
2013 }
2014
2015 struct lnet_ni *
2016 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
2017 {
2018         struct lnet_ni          *ni;
2019         struct lnet_net         *net = mynet;
2020
2021         if (prev == NULL) {
2022                 if (net == NULL)
2023                         net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
2024                                         net_list);
2025                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2026                                 ni_netlist);
2027
2028                 return ni;
2029         }
2030
2031         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
2032                 /* if you reached the end of the ni list and the net is
2033                  * specified, then there are no more nis in that net */
2034                 if (net != NULL)
2035                         return NULL;
2036
2037                 /* we reached the end of this net ni list. move to the
2038                  * next net */
2039                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
2040                         /* no more nets and no more NIs. */
2041                         return NULL;
2042
2043                 /* get the next net */
2044                 net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
2045                                  net_list);
2046                 /* get the ni on it */
2047                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2048                                 ni_netlist);
2049
2050                 return ni;
2051         }
2052
2053         /* there are more nis left */
2054         ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
2055
2056         return ni;
2057 }
2058
2059 int
2060 lnet_get_net_config(struct lnet_ioctl_config_data *config)
2061 {
2062         struct lnet_ni *ni;
2063         int cpt;
2064         int rc = -ENOENT;
2065         int idx = config->cfg_count;
2066
2067         cpt = lnet_net_lock_current();
2068
2069         ni = lnet_get_ni_idx_locked(idx);
2070
2071         if (ni != NULL) {
2072                 rc = 0;
2073                 lnet_ni_lock(ni);
2074                 lnet_fill_ni_info(ni, config);
2075                 lnet_ni_unlock(ni);
2076         }
2077
2078         lnet_net_unlock(cpt);
2079         return rc;
2080 }
2081
2082 int
2083 lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
2084 {
2085         char                    *nets = conf->cfg_config_u.cfg_net.net_intf;
2086         struct lnet_ping_info   *pinfo;
2087         lnet_handle_md_t        md_handle;
2088         struct lnet_net         *net;
2089         struct list_head        net_head;
2090         int                     rc;
2091         lnet_remotenet_t        *rnet;
2092         int                     net_ni_count;
2093         int                     num_acceptor_nets;
2094         __u32                   net_type;
2095         struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
2096
2097         INIT_LIST_HEAD(&net_head);
2098
2099         if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
2100                 lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
2101
2102         /* Create a net/ni structures for the network string */
2103         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2104         if (rc <= 0)
2105                 return rc == 0 ? -EINVAL : rc;
2106
2107         mutex_lock(&the_lnet.ln_api_mutex);
2108
2109         if (rc > 1) {
2110                 rc = -EINVAL; /* only add one network per call */
2111                 goto failed0;
2112         }
2113
2114         net = list_entry(net_head.next, struct lnet_net, net_list);
2115
2116         lnet_net_lock(LNET_LOCK_EX);
2117         rnet = lnet_find_rnet_locked(net->net_id);
2118         lnet_net_unlock(LNET_LOCK_EX);
2119         /* make sure that the net added doesn't invalidate the current
2120          * configuration LNet is keeping */
2121         if (rnet != NULL) {
2122                 CERROR("Adding net %s will invalidate routing configuration\n",
2123                        nets);
2124                 rc = -EUSERS;
2125                 goto failed0;
2126         }
2127
2128         /*
2129          * make sure you calculate the correct number of slots in the ping
2130          * info. Since the ping info is a flattened list of all the NIs,
2131          * we should allocate enough slots to accomodate the number of NIs
2132          * which will be added.
2133          *
2134          * We can use lnet_get_net_ni_count_locked() since the net is not
2135          * on a public list yet, so locking is not a problem
2136          */
2137         net_ni_count = lnet_get_net_ni_count_locked(net);
2138
2139         rc = lnet_ping_info_setup(&pinfo, &md_handle,
2140                                   net_ni_count + lnet_get_ni_count(),
2141                                   false);
2142         if (rc != 0)
2143                 goto failed0;
2144
2145         list_del_init(&net->net_list);
2146
2147         if (lnd_tunables)
2148                 memcpy(&net->net_tunables,
2149                        &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
2150
2151         /*
2152          * before starting this network get a count of the current TCP
2153          * networks which require the acceptor thread running. If that
2154          * count is == 0 before we start up this network, then we'd want to
2155          * start up the acceptor thread after starting up this network
2156          */
2157         num_acceptor_nets = lnet_count_acceptor_nets();
2158
2159         /*
2160          * lnd_startup_lndnet() can deallocate 'net' even if it it returns
2161          * success, because we endded up adding interfaces to an existing
2162          * network. So grab the net_type now
2163          */
2164         net_type = LNET_NETTYP(net->net_id);
2165
2166         rc = lnet_startup_lndnet(net,
2167                                  (lnd_tunables) ? &lnd_tunables->lt_tun : NULL);
2168         if (rc < 0)
2169                 goto failed1;
2170
2171         /*
2172          * Start the acceptor thread if this is the first network
2173          * being added that requires the thread.
2174          */
2175         if (net_type == SOCKLND && num_acceptor_nets == 0)
2176         {
2177                 rc = lnet_acceptor_start();
2178                 if (rc < 0) {
2179                         /* shutdown the net that we just started */
2180                         CERROR("Failed to start up acceptor thread\n");
2181                         /*
2182                          * Note that if we needed to start the acceptor
2183                          * thread, then 'net' must have been the first TCP
2184                          * network, therefore was unique, and therefore
2185                          * wasn't deallocated by lnet_startup_lndnet()
2186                          */
2187                         lnet_shutdown_lndnet(net);
2188                         goto failed1;
2189                 }
2190         }
2191
2192         lnet_ping_target_update(pinfo, md_handle);
2193         mutex_unlock(&the_lnet.ln_api_mutex);
2194
2195         return 0;
2196
2197 failed1:
2198         lnet_ping_md_unlink(pinfo, &md_handle);
2199         lnet_ping_info_free(pinfo);
2200 failed0:
2201         mutex_unlock(&the_lnet.ln_api_mutex);
2202         while (!list_empty(&net_head)) {
2203                 net = list_entry(net_head.next, struct lnet_net, net_list);
2204                 list_del_init(&net->net_list);
2205                 lnet_net_free(net);
2206         }
2207         return rc;
2208 }
2209
2210 int
2211 lnet_dyn_del_ni(__u32 net_id)
2212 {
2213         struct lnet_net  *net;
2214         struct lnet_ping_info *pinfo;
2215         lnet_handle_md_t  md_handle;
2216         int               rc;
2217         int               net_ni_count;
2218
2219         /* don't allow userspace to shutdown the LOLND */
2220         if (LNET_NETTYP(net_id) == LOLND)
2221                 return -EINVAL;
2222
2223         mutex_lock(&the_lnet.ln_api_mutex);
2224
2225         lnet_net_lock(0);
2226
2227         net = lnet_get_net_locked(net_id);
2228         if (net == NULL) {
2229                 rc = -EINVAL;
2230                 goto out;
2231         }
2232
2233         net_ni_count = lnet_get_net_ni_count_locked(net);
2234
2235         lnet_net_unlock(0);
2236
2237         /* create and link a new ping info, before removing the old one */
2238         rc = lnet_ping_info_setup(&pinfo, &md_handle,
2239                                   lnet_get_ni_count() - net_ni_count, false);
2240         if (rc != 0)
2241                 goto out;
2242
2243         lnet_shutdown_lndnet(net);
2244
2245         if (lnet_count_acceptor_nets() == 0)
2246                 lnet_acceptor_stop();
2247
2248         lnet_ping_target_update(pinfo, md_handle);
2249
2250 out:
2251         mutex_unlock(&the_lnet.ln_api_mutex);
2252
2253         return rc;
2254 }
2255
2256 void lnet_incr_dlc_seq(void)
2257 {
2258         atomic_inc(&lnet_dlc_seq_no);
2259 }
2260
2261 __u32 lnet_get_dlc_seq_locked(void)
2262 {
2263         return atomic_read(&lnet_dlc_seq_no);
2264 }
2265
2266 /**
2267  * LNet ioctl handler.
2268  *
2269  */
2270 int
2271 LNetCtl(unsigned int cmd, void *arg)
2272 {
2273         struct libcfs_ioctl_data *data = arg;
2274         struct lnet_ioctl_config_data *config;
2275         lnet_process_id_t         id = {0};
2276         lnet_ni_t                *ni;
2277         int                       rc;
2278
2279         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
2280                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
2281
2282         switch (cmd) {
2283         case IOC_LIBCFS_GET_NI:
2284                 rc = LNetGetId(data->ioc_count, &id);
2285                 data->ioc_nid = id.nid;
2286                 return rc;
2287
2288         case IOC_LIBCFS_FAIL_NID:
2289                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2290
2291         case IOC_LIBCFS_ADD_ROUTE:
2292                 config = arg;
2293
2294                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2295                         return -EINVAL;
2296
2297                 mutex_lock(&the_lnet.ln_api_mutex);
2298                 rc = lnet_add_route(config->cfg_net,
2299                                     config->cfg_config_u.cfg_route.rtr_hop,
2300                                     config->cfg_nid,
2301                                     config->cfg_config_u.cfg_route.
2302                                         rtr_priority);
2303                 if (rc == 0) {
2304                         rc = lnet_check_routes();
2305                         if (rc != 0)
2306                                 lnet_del_route(config->cfg_net,
2307                                                config->cfg_nid);
2308                 }
2309                 mutex_unlock(&the_lnet.ln_api_mutex);
2310                 return rc;
2311
2312         case IOC_LIBCFS_DEL_ROUTE:
2313                 config = arg;
2314
2315                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2316                         return -EINVAL;
2317
2318                 mutex_lock(&the_lnet.ln_api_mutex);
2319                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2320                 mutex_unlock(&the_lnet.ln_api_mutex);
2321                 return rc;
2322
2323         case IOC_LIBCFS_GET_ROUTE:
2324                 config = arg;
2325
2326                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2327                         return -EINVAL;
2328
2329                 return lnet_get_route(config->cfg_count,
2330                                       &config->cfg_net,
2331                                       &config->cfg_config_u.cfg_route.rtr_hop,
2332                                       &config->cfg_nid,
2333                                       &config->cfg_config_u.cfg_route.rtr_flags,
2334                                       &config->cfg_config_u.cfg_route.
2335                                         rtr_priority);
2336
2337         case IOC_LIBCFS_GET_NET: {
2338                 size_t total = sizeof(*config) +
2339                                sizeof(struct lnet_ioctl_net_config);
2340                 config = arg;
2341
2342                 if (config->cfg_hdr.ioc_len < total)
2343                         return -EINVAL;
2344
2345                 return lnet_get_net_config(config);
2346         }
2347
2348         case IOC_LIBCFS_GET_LNET_STATS:
2349         {
2350                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2351
2352                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
2353                         return -EINVAL;
2354
2355                 lnet_counters_get(&lnet_stats->st_cntrs);
2356                 return 0;
2357         }
2358
2359         case IOC_LIBCFS_CONFIG_RTR:
2360                 config = arg;
2361
2362                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2363                         return -EINVAL;
2364
2365                 mutex_lock(&the_lnet.ln_api_mutex);
2366                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2367                         rc = lnet_rtrpools_enable();
2368                         mutex_unlock(&the_lnet.ln_api_mutex);
2369                         return rc;
2370                 }
2371                 lnet_rtrpools_disable();
2372                 mutex_unlock(&the_lnet.ln_api_mutex);
2373                 return 0;
2374
2375         case IOC_LIBCFS_ADD_BUF:
2376                 config = arg;
2377
2378                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2379                         return -EINVAL;
2380
2381                 mutex_lock(&the_lnet.ln_api_mutex);
2382                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2383                                                 buf_tiny,
2384                                           config->cfg_config_u.cfg_buffers.
2385                                                 buf_small,
2386                                           config->cfg_config_u.cfg_buffers.
2387                                                 buf_large);
2388                 mutex_unlock(&the_lnet.ln_api_mutex);
2389                 return rc;
2390
2391         case IOC_LIBCFS_GET_BUF: {
2392                 struct lnet_ioctl_pool_cfg *pool_cfg;
2393                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
2394
2395                 config = arg;
2396
2397                 if (config->cfg_hdr.ioc_len < total)
2398                         return -EINVAL;
2399
2400                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2401                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2402         }
2403
2404         case IOC_LIBCFS_GET_PEER_INFO: {
2405                 struct lnet_ioctl_peer *peer_info = arg;
2406
2407                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
2408                         return -EINVAL;
2409
2410                 return lnet_get_peer_info(
2411                    peer_info->pr_count,
2412                    &peer_info->pr_nid,
2413                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2414                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2415                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2416                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2417                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2418                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2419                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2420                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2421         }
2422
2423         case IOC_LIBCFS_NOTIFY_ROUTER: {
2424                 unsigned long jiffies_passed;
2425
2426                 jiffies_passed = ktime_get_real_seconds() - data->ioc_u64[0];
2427                 jiffies_passed = cfs_time_seconds(jiffies_passed);
2428
2429                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2430                                    jiffies - jiffies_passed);
2431         }
2432
2433         case IOC_LIBCFS_LNET_DIST:
2434                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2435                 if (rc < 0 && rc != -EHOSTUNREACH)
2436                         return rc;
2437
2438                 data->ioc_u32[0] = rc;
2439                 return 0;
2440
2441         case IOC_LIBCFS_TESTPROTOCOMPAT:
2442                 lnet_net_lock(LNET_LOCK_EX);
2443                 the_lnet.ln_testprotocompat = data->ioc_flags;
2444                 lnet_net_unlock(LNET_LOCK_EX);
2445                 return 0;
2446
2447         case IOC_LIBCFS_LNET_FAULT:
2448                 return lnet_fault_ctl(data->ioc_flags, data);
2449
2450         case IOC_LIBCFS_PING: {
2451                 signed long timeout;
2452
2453                 id.nid = data->ioc_nid;
2454                 id.pid = data->ioc_u32[0];
2455
2456                 /* Don't block longer than 2 minutes */
2457                 if (data->ioc_u32[1] > 120 * MSEC_PER_SEC)
2458                         return -EINVAL;
2459
2460                 /* If timestamp is negative then disable timeout */
2461                 if ((s32)data->ioc_u32[1] < 0)
2462                         timeout = MAX_SCHEDULE_TIMEOUT;
2463                 else
2464                         timeout = msecs_to_jiffies(data->ioc_u32[1]);
2465
2466                 rc = lnet_ping(id, timeout, data->ioc_pbuf1,
2467                                data->ioc_plen1 / sizeof(lnet_process_id_t));
2468                 if (rc < 0)
2469                         return rc;
2470                 data->ioc_count = rc;
2471                 return 0;
2472         }
2473         default:
2474                 ni = lnet_net2ni(data->ioc_net);
2475                 if (ni == NULL)
2476                         return -EINVAL;
2477
2478                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
2479                         rc = -EINVAL;
2480                 else
2481                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
2482
2483                 return rc;
2484         }
2485         /* not reached */
2486 }
2487 EXPORT_SYMBOL(LNetCtl);
2488
2489 void LNetDebugPeer(lnet_process_id_t id)
2490 {
2491         lnet_debug_peer(id.nid);
2492 }
2493 EXPORT_SYMBOL(LNetDebugPeer);
2494
2495 /**
2496  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2497  * all interfaces share a same PID, as requested by LNetNIInit().
2498  *
2499  * \param index Index of the interface to look up.
2500  * \param id On successful return, this location will hold the
2501  * lnet_process_id_t ID of the interface.
2502  *
2503  * \retval 0 If an interface exists at \a index.
2504  * \retval -ENOENT If no interface has been found.
2505  */
2506 int
2507 LNetGetId(unsigned int index, lnet_process_id_t *id)
2508 {
2509         struct lnet_ni   *ni;
2510         struct lnet_net  *net;
2511         int               cpt;
2512         int               rc = -ENOENT;
2513
2514         LASSERT(the_lnet.ln_refcount > 0);
2515
2516         cpt = lnet_net_lock_current();
2517
2518         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2519                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2520                         if (index-- != 0)
2521                                 continue;
2522
2523                         id->nid = ni->ni_nid;
2524                         id->pid = the_lnet.ln_pid;
2525                         rc = 0;
2526                         break;
2527                 }
2528         }
2529
2530         lnet_net_unlock(cpt);
2531         return rc;
2532 }
2533 EXPORT_SYMBOL(LNetGetId);
2534
2535 /**
2536  * Print a string representation of handle \a h into buffer \a str of
2537  * \a len bytes.
2538  */
2539 void
2540 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2541 {
2542         snprintf(str, len, "%#llx", h.cookie);
2543 }
2544 EXPORT_SYMBOL(LNetSnprintHandle);
2545
2546 static int lnet_ping(lnet_process_id_t id, signed long timeout,
2547                      lnet_process_id_t __user *ids, int n_ids)
2548 {
2549         lnet_handle_eq_t     eqh;
2550         lnet_handle_md_t     mdh;
2551         lnet_event_t         event;
2552         lnet_md_t            md = { NULL };
2553         int                  which;
2554         int                  unlinked = 0;
2555         int                  replied = 0;
2556         const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC);
2557         int                  infosz;
2558         struct lnet_ping_info    *info;
2559         lnet_process_id_t    tmpid;
2560         int                  i;
2561         int                  nob;
2562         int                  rc;
2563         int                  rc2;
2564         sigset_t         blocked;
2565
2566         infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]);
2567
2568         /* n_ids limit is arbitrary */
2569         if (n_ids <= 0 || n_ids > 20 || id.nid == LNET_NID_ANY)
2570                 return -EINVAL;
2571
2572         if (id.pid == LNET_PID_ANY)
2573                 id.pid = LNET_PID_LUSTRE;
2574
2575         LIBCFS_ALLOC(info, infosz);
2576         if (info == NULL)
2577                 return -ENOMEM;
2578
2579         /* NB 2 events max (including any unlink event) */
2580         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2581         if (rc != 0) {
2582                 CERROR("Can't allocate EQ: %d\n", rc);
2583                 goto out_0;
2584         }
2585
2586         /* initialize md content */
2587         md.start     = info;
2588         md.length    = infosz;
2589         md.threshold = 2; /*GET/REPLY*/
2590         md.max_size  = 0;
2591         md.options   = LNET_MD_TRUNCATE;
2592         md.user_ptr  = NULL;
2593         md.eq_handle = eqh;
2594
2595         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2596         if (rc != 0) {
2597                 CERROR("Can't bind MD: %d\n", rc);
2598                 goto out_1;
2599         }
2600
2601         rc = LNetGet(LNET_NID_ANY, mdh, id,
2602                      LNET_RESERVED_PORTAL,
2603                      LNET_PROTO_PING_MATCHBITS, 0);
2604
2605         if (rc != 0) {
2606                 /* Don't CERROR; this could be deliberate! */
2607
2608                 rc2 = LNetMDUnlink(mdh);
2609                 LASSERT(rc2 == 0);
2610
2611                 /* NB must wait for the UNLINK event below... */
2612                 unlinked = 1;
2613                 timeout = a_long_time;
2614         }
2615
2616         do {
2617                 /* MUST block for unlink to complete */
2618                 if (unlinked)
2619                         blocked = cfs_block_allsigs();
2620
2621                 rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which);
2622
2623                 if (unlinked)
2624                         cfs_restore_sigs(blocked);
2625
2626                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2627                        (rc2 <= 0) ? -1 : event.type,
2628                        (rc2 <= 0) ? -1 : event.status,
2629                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2630
2631                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2632
2633                 if (rc2 <= 0 || event.status != 0) {
2634                         /* timeout or error */
2635                         if (!replied && rc == 0)
2636                                 rc = (rc2 < 0) ? rc2 :
2637                                      (rc2 == 0) ? -ETIMEDOUT :
2638                                      event.status;
2639
2640                         if (!unlinked) {
2641                                 /* Ensure completion in finite time... */
2642                                 LNetMDUnlink(mdh);
2643                                 /* No assertion (racing with network) */
2644                                 unlinked = 1;
2645                                 timeout = a_long_time;
2646                         } else if (rc2 == 0) {
2647                                 /* timed out waiting for unlink */
2648                                 CWARN("ping %s: late network completion\n",
2649                                       libcfs_id2str(id));
2650                         }
2651                 } else if (event.type == LNET_EVENT_REPLY) {
2652                         replied = 1;
2653                         rc = event.mlength;
2654                 }
2655
2656         } while (rc2 <= 0 || !event.unlinked);
2657
2658         if (!replied) {
2659                 if (rc >= 0)
2660                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2661                               libcfs_id2str(id));
2662                 rc = -EIO;
2663                 goto out_1;
2664         }
2665
2666         nob = rc;
2667         LASSERT(nob >= 0 && nob <= infosz);
2668
2669         rc = -EPROTO;                           /* if I can't parse... */
2670
2671         if (nob < 8) {
2672                 /* can't check magic/version */
2673                 CERROR("%s: ping info too short %d\n",
2674                        libcfs_id2str(id), nob);
2675                 goto out_1;
2676         }
2677
2678         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2679                 lnet_swap_pinginfo(info);
2680         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2681                 CERROR("%s: Unexpected magic %08x\n",
2682                        libcfs_id2str(id), info->pi_magic);
2683                 goto out_1;
2684         }
2685
2686         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2687                 CERROR("%s: ping w/o NI status: 0x%x\n",
2688                        libcfs_id2str(id), info->pi_features);
2689                 goto out_1;
2690         }
2691
2692         if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) {
2693                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2694                        nob, (int)offsetof(struct lnet_ping_info, pi_ni[0]));
2695                 goto out_1;
2696         }
2697
2698         if (info->pi_nnis < n_ids)
2699                 n_ids = info->pi_nnis;
2700
2701         if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) {
2702                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2703                        nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids]));
2704                 goto out_1;
2705         }
2706
2707         rc = -EFAULT;                           /* If I SEGV... */
2708
2709         memset(&tmpid, 0, sizeof(tmpid));
2710         for (i = 0; i < n_ids; i++) {
2711                 tmpid.pid = info->pi_pid;
2712                 tmpid.nid = info->pi_ni[i].ns_nid;
2713                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2714                         goto out_1;
2715         }
2716         rc = info->pi_nnis;
2717
2718  out_1:
2719         rc2 = LNetEQFree(eqh);
2720         if (rc2 != 0)
2721                 CERROR("rc2 %d\n", rc2);
2722         LASSERT(rc2 == 0);
2723
2724  out_0:
2725         LIBCFS_FREE(info, infosz);
2726         return rc;
2727 }