Whamcloud - gitweb
LU-7734 lnet: configure peers from DLC
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_LNET
34 #include <linux/log2.h>
35 #include <linux/ktime.h>
36
37 #include <lnet/lib-lnet.h>
38
39 #define D_LNI D_CONSOLE
40
41 lnet_t      the_lnet;                           /* THE state of the network */
42 EXPORT_SYMBOL(the_lnet);
43
44 static char *ip2nets = "";
45 module_param(ip2nets, charp, 0444);
46 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
47
48 static char *networks = "";
49 module_param(networks, charp, 0444);
50 MODULE_PARM_DESC(networks, "local networks");
51
52 static char *routes = "";
53 module_param(routes, charp, 0444);
54 MODULE_PARM_DESC(routes, "routes to non-local networks");
55
56 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
57 module_param(rnet_htable_size, int, 0444);
58 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
59
60 static int use_tcp_bonding = false;
61 module_param(use_tcp_bonding, int, 0444);
62 MODULE_PARM_DESC(use_tcp_bonding,
63                  "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
64
65 /*
66  * This sequence number keeps track of how many times DLC was used to
67  * update the configuration. It is incremented on any DLC update and
68  * checked when sending a message to determine if there is a need to
69  * re-run the selection algorithm to handle configuration change.
70  * Look at lnet_select_pathway() for more details on its usage.
71  */
72 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
73
74 static int lnet_ping(lnet_process_id_t id, signed long timeout,
75                      lnet_process_id_t __user *ids, int n_ids);
76
77 static char *
78 lnet_get_routes(void)
79 {
80         return routes;
81 }
82
83 static char *
84 lnet_get_networks(void)
85 {
86         char   *nets;
87         int     rc;
88
89         if (*networks != 0 && *ip2nets != 0) {
90                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
91                                    "'ip2nets' but not both at once\n");
92                 return NULL;
93         }
94
95         if (*ip2nets != 0) {
96                 rc = lnet_parse_ip2nets(&nets, ip2nets);
97                 return (rc == 0) ? nets : NULL;
98         }
99
100         if (*networks != 0)
101                 return networks;
102
103         return "tcp";
104 }
105
106 static void
107 lnet_init_locks(void)
108 {
109         spin_lock_init(&the_lnet.ln_eq_wait_lock);
110         init_waitqueue_head(&the_lnet.ln_eq_waitq);
111         init_waitqueue_head(&the_lnet.ln_rc_waitq);
112         mutex_init(&the_lnet.ln_lnd_mutex);
113         mutex_init(&the_lnet.ln_api_mutex);
114 }
115
116 static void
117 lnet_fini_locks(void)
118 {
119 }
120
121 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
122 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
123                                             *  MDs kmem_cache */
124
125 static int
126 lnet_descriptor_setup(void)
127 {
128         /* create specific kmem_cache for MEs and small MDs (i.e., originally
129          * allocated in <size-xxx> kmem_cache).
130          */
131         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(lnet_me_t),
132                                             0, 0, NULL);
133         if (!lnet_mes_cachep)
134                 return -ENOMEM;
135
136         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
137                                                   LNET_SMALL_MD_SIZE, 0, 0,
138                                                   NULL);
139         if (!lnet_small_mds_cachep)
140                 return -ENOMEM;
141
142         return 0;
143 }
144
145 static void
146 lnet_descriptor_cleanup(void)
147 {
148
149         if (lnet_small_mds_cachep) {
150                 kmem_cache_destroy(lnet_small_mds_cachep);
151                 lnet_small_mds_cachep = NULL;
152         }
153
154         if (lnet_mes_cachep) {
155                 kmem_cache_destroy(lnet_mes_cachep);
156                 lnet_mes_cachep = NULL;
157         }
158 }
159
160 static int
161 lnet_create_remote_nets_table(void)
162 {
163         int               i;
164         struct list_head *hash;
165
166         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
167         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
168         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
169         if (hash == NULL) {
170                 CERROR("Failed to create remote nets hash table\n");
171                 return -ENOMEM;
172         }
173
174         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
175                 INIT_LIST_HEAD(&hash[i]);
176         the_lnet.ln_remote_nets_hash = hash;
177         return 0;
178 }
179
180 static void
181 lnet_destroy_remote_nets_table(void)
182 {
183         int i;
184
185         if (the_lnet.ln_remote_nets_hash == NULL)
186                 return;
187
188         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
189                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
190
191         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
192                     LNET_REMOTE_NETS_HASH_SIZE *
193                     sizeof(the_lnet.ln_remote_nets_hash[0]));
194         the_lnet.ln_remote_nets_hash = NULL;
195 }
196
197 static void
198 lnet_destroy_locks(void)
199 {
200         if (the_lnet.ln_res_lock != NULL) {
201                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
202                 the_lnet.ln_res_lock = NULL;
203         }
204
205         if (the_lnet.ln_net_lock != NULL) {
206                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
207                 the_lnet.ln_net_lock = NULL;
208         }
209
210         lnet_fini_locks();
211 }
212
213 static int
214 lnet_create_locks(void)
215 {
216         lnet_init_locks();
217
218         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
219         if (the_lnet.ln_res_lock == NULL)
220                 goto failed;
221
222         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
223         if (the_lnet.ln_net_lock == NULL)
224                 goto failed;
225
226         return 0;
227
228  failed:
229         lnet_destroy_locks();
230         return -ENOMEM;
231 }
232
233 static void lnet_assert_wire_constants(void)
234 {
235         /* Wire protocol assertions generated by 'wirecheck'
236          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
237          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
238          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
239
240         /* Constants... */
241         CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
242         CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1);
243         CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0);
244         CLASSERT(LNET_MSG_ACK == 0);
245         CLASSERT(LNET_MSG_PUT == 1);
246         CLASSERT(LNET_MSG_GET == 2);
247         CLASSERT(LNET_MSG_REPLY == 3);
248         CLASSERT(LNET_MSG_HELLO == 4);
249
250         /* Checks for struct lnet_handle_wire */
251         CLASSERT((int)sizeof(struct lnet_handle_wire) == 16);
252         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) == 0);
253         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) == 8);
254         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_object_cookie) == 8);
255         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) == 8);
256
257         /* Checks for struct lnet_magicversion_t */
258         CLASSERT((int)sizeof(lnet_magicversion_t) == 8);
259         CLASSERT((int)offsetof(lnet_magicversion_t, magic) == 0);
260         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4);
261         CLASSERT((int)offsetof(lnet_magicversion_t, version_major) == 4);
262         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2);
263         CLASSERT((int)offsetof(lnet_magicversion_t, version_minor) == 6);
264         CLASSERT((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2);
265
266         /* Checks for struct lnet_hdr_t */
267         CLASSERT((int)sizeof(lnet_hdr_t) == 72);
268         CLASSERT((int)offsetof(lnet_hdr_t, dest_nid) == 0);
269         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8);
270         CLASSERT((int)offsetof(lnet_hdr_t, src_nid) == 8);
271         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8);
272         CLASSERT((int)offsetof(lnet_hdr_t, dest_pid) == 16);
273         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4);
274         CLASSERT((int)offsetof(lnet_hdr_t, src_pid) == 20);
275         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4);
276         CLASSERT((int)offsetof(lnet_hdr_t, type) == 24);
277         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->type) == 4);
278         CLASSERT((int)offsetof(lnet_hdr_t, payload_length) == 28);
279         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4);
280         CLASSERT((int)offsetof(lnet_hdr_t, msg) == 32);
281         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg) == 40);
282
283         /* Ack */
284         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32);
285         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16);
286         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48);
287         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8);
288         CLASSERT((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56);
289         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4);
290
291         /* Put */
292         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32);
293         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16);
294         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48);
295         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8);
296         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56);
297         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8);
298         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64);
299         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4);
300         CLASSERT((int)offsetof(lnet_hdr_t, msg.put.offset) == 68);
301         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4);
302
303         /* Get */
304         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32);
305         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16);
306         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48);
307         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8);
308         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56);
309         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4);
310         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60);
311         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4);
312         CLASSERT((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64);
313         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4);
314
315         /* Reply */
316         CLASSERT((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32);
317         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16);
318
319         /* Hello */
320         CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32);
321         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8);
322         CLASSERT((int)offsetof(lnet_hdr_t, msg.hello.type) == 40);
323         CLASSERT((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4);
324 }
325
326 static lnd_t *lnet_find_lnd_by_type(__u32 type)
327 {
328         lnd_t            *lnd;
329         struct list_head *tmp;
330
331         /* holding lnd mutex */
332         list_for_each(tmp, &the_lnet.ln_lnds) {
333                 lnd = list_entry(tmp, lnd_t, lnd_list);
334
335                 if (lnd->lnd_type == type)
336                         return lnd;
337         }
338         return NULL;
339 }
340
341 void
342 lnet_register_lnd (lnd_t *lnd)
343 {
344         mutex_lock(&the_lnet.ln_lnd_mutex);
345
346         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
347         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
348
349         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
350         lnd->lnd_refcount = 0;
351
352         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
353
354         mutex_unlock(&the_lnet.ln_lnd_mutex);
355 }
356 EXPORT_SYMBOL(lnet_register_lnd);
357
358 void
359 lnet_unregister_lnd (lnd_t *lnd)
360 {
361         mutex_lock(&the_lnet.ln_lnd_mutex);
362
363         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
364         LASSERT(lnd->lnd_refcount == 0);
365
366         list_del(&lnd->lnd_list);
367         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
368
369         mutex_unlock(&the_lnet.ln_lnd_mutex);
370 }
371 EXPORT_SYMBOL(lnet_unregister_lnd);
372
373 void
374 lnet_counters_get(lnet_counters_t *counters)
375 {
376         lnet_counters_t *ctr;
377         int             i;
378
379         memset(counters, 0, sizeof(*counters));
380
381         lnet_net_lock(LNET_LOCK_EX);
382
383         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
384                 counters->msgs_max     += ctr->msgs_max;
385                 counters->msgs_alloc   += ctr->msgs_alloc;
386                 counters->errors       += ctr->errors;
387                 counters->send_count   += ctr->send_count;
388                 counters->recv_count   += ctr->recv_count;
389                 counters->route_count  += ctr->route_count;
390                 counters->drop_count   += ctr->drop_count;
391                 counters->send_length  += ctr->send_length;
392                 counters->recv_length  += ctr->recv_length;
393                 counters->route_length += ctr->route_length;
394                 counters->drop_length  += ctr->drop_length;
395
396         }
397         lnet_net_unlock(LNET_LOCK_EX);
398 }
399 EXPORT_SYMBOL(lnet_counters_get);
400
401 void
402 lnet_counters_reset(void)
403 {
404         lnet_counters_t *counters;
405         int             i;
406
407         lnet_net_lock(LNET_LOCK_EX);
408
409         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
410                 memset(counters, 0, sizeof(lnet_counters_t));
411
412         lnet_net_unlock(LNET_LOCK_EX);
413 }
414
415 static char *
416 lnet_res_type2str(int type)
417 {
418         switch (type) {
419         default:
420                 LBUG();
421         case LNET_COOKIE_TYPE_MD:
422                 return "MD";
423         case LNET_COOKIE_TYPE_ME:
424                 return "ME";
425         case LNET_COOKIE_TYPE_EQ:
426                 return "EQ";
427         }
428 }
429
430 static void
431 lnet_res_container_cleanup(struct lnet_res_container *rec)
432 {
433         int     count = 0;
434
435         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
436                 return;
437
438         while (!list_empty(&rec->rec_active)) {
439                 struct list_head *e = rec->rec_active.next;
440
441                 list_del_init(e);
442                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
443                         lnet_eq_free(list_entry(e, lnet_eq_t, eq_list));
444
445                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
446                         lnet_md_free(list_entry(e, lnet_libmd_t, md_list));
447
448                 } else { /* NB: Active MEs should be attached on portals */
449                         LBUG();
450                 }
451                 count++;
452         }
453
454         if (count > 0) {
455                 /* Found alive MD/ME/EQ, user really should unlink/free
456                  * all of them before finalize LNet, but if someone didn't,
457                  * we have to recycle garbage for him */
458                 CERROR("%d active elements on exit of %s container\n",
459                        count, lnet_res_type2str(rec->rec_type));
460         }
461
462         if (rec->rec_lh_hash != NULL) {
463                 LIBCFS_FREE(rec->rec_lh_hash,
464                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
465                 rec->rec_lh_hash = NULL;
466         }
467
468         rec->rec_type = 0; /* mark it as finalized */
469 }
470
471 static int
472 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
473 {
474         int     rc = 0;
475         int     i;
476
477         LASSERT(rec->rec_type == 0);
478
479         rec->rec_type = type;
480         INIT_LIST_HEAD(&rec->rec_active);
481
482         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
483
484         /* Arbitrary choice of hash table size */
485         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
486                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
487         if (rec->rec_lh_hash == NULL) {
488                 rc = -ENOMEM;
489                 goto out;
490         }
491
492         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
493                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
494
495         return 0;
496
497 out:
498         CERROR("Failed to setup %s resource container\n",
499                lnet_res_type2str(type));
500         lnet_res_container_cleanup(rec);
501         return rc;
502 }
503
504 static void
505 lnet_res_containers_destroy(struct lnet_res_container **recs)
506 {
507         struct lnet_res_container       *rec;
508         int                             i;
509
510         cfs_percpt_for_each(rec, i, recs)
511                 lnet_res_container_cleanup(rec);
512
513         cfs_percpt_free(recs);
514 }
515
516 static struct lnet_res_container **
517 lnet_res_containers_create(int type)
518 {
519         struct lnet_res_container       **recs;
520         struct lnet_res_container       *rec;
521         int                             rc;
522         int                             i;
523
524         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
525         if (recs == NULL) {
526                 CERROR("Failed to allocate %s resource containers\n",
527                        lnet_res_type2str(type));
528                 return NULL;
529         }
530
531         cfs_percpt_for_each(rec, i, recs) {
532                 rc = lnet_res_container_setup(rec, i, type);
533                 if (rc != 0) {
534                         lnet_res_containers_destroy(recs);
535                         return NULL;
536                 }
537         }
538
539         return recs;
540 }
541
542 lnet_libhandle_t *
543 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
544 {
545         /* ALWAYS called with lnet_res_lock held */
546         struct list_head        *head;
547         lnet_libhandle_t        *lh;
548         unsigned int            hash;
549
550         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
551                 return NULL;
552
553         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
554         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
555
556         list_for_each_entry(lh, head, lh_hash_chain) {
557                 if (lh->lh_cookie == cookie)
558                         return lh;
559         }
560
561         return NULL;
562 }
563
564 void
565 lnet_res_lh_initialize(struct lnet_res_container *rec, lnet_libhandle_t *lh)
566 {
567         /* ALWAYS called with lnet_res_lock held */
568         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
569         unsigned int    hash;
570
571         lh->lh_cookie = rec->rec_lh_cookie;
572         rec->rec_lh_cookie += 1 << ibits;
573
574         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
575
576         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
577 }
578
579 static int lnet_unprepare(void);
580
581 static int
582 lnet_prepare(lnet_pid_t requested_pid)
583 {
584         /* Prepare to bring up the network */
585         struct lnet_res_container **recs;
586         int                       rc = 0;
587
588         if (requested_pid == LNET_PID_ANY) {
589                 /* Don't instantiate LNET just for me */
590                 return -ENETDOWN;
591         }
592
593         LASSERT(the_lnet.ln_refcount == 0);
594
595         the_lnet.ln_routing = 0;
596
597         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
598         the_lnet.ln_pid = requested_pid;
599
600         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
601         INIT_LIST_HEAD(&the_lnet.ln_peers);
602         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
603         INIT_LIST_HEAD(&the_lnet.ln_nets);
604         INIT_LIST_HEAD(&the_lnet.ln_routers);
605         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
606         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
607
608         rc = lnet_descriptor_setup();
609         if (rc != 0)
610                 goto failed;
611
612         rc = lnet_create_remote_nets_table();
613         if (rc != 0)
614                 goto failed;
615
616         /*
617          * NB the interface cookie in wire handles guards against delayed
618          * replies and ACKs appearing valid after reboot.
619          */
620         the_lnet.ln_interface_cookie = ktime_get_real_ns();
621
622         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
623                                                 sizeof(lnet_counters_t));
624         if (the_lnet.ln_counters == NULL) {
625                 CERROR("Failed to allocate counters for LNet\n");
626                 rc = -ENOMEM;
627                 goto failed;
628         }
629
630         rc = lnet_peer_tables_create();
631         if (rc != 0)
632                 goto failed;
633
634         rc = lnet_msg_containers_create();
635         if (rc != 0)
636                 goto failed;
637
638         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
639                                       LNET_COOKIE_TYPE_EQ);
640         if (rc != 0)
641                 goto failed;
642
643         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
644         if (recs == NULL) {
645                 rc = -ENOMEM;
646                 goto failed;
647         }
648
649         the_lnet.ln_me_containers = recs;
650
651         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
652         if (recs == NULL) {
653                 rc = -ENOMEM;
654                 goto failed;
655         }
656
657         the_lnet.ln_md_containers = recs;
658
659         rc = lnet_portals_create();
660         if (rc != 0) {
661                 CERROR("Failed to create portals for LNet: %d\n", rc);
662                 goto failed;
663         }
664
665         return 0;
666
667  failed:
668         lnet_unprepare();
669         return rc;
670 }
671
672 static int
673 lnet_unprepare (void)
674 {
675         /* NB no LNET_LOCK since this is the last reference.  All LND instances
676          * have shut down already, so it is safe to unlink and free all
677          * descriptors, even those that appear committed to a network op (eg MD
678          * with non-zero pending count) */
679
680         lnet_fail_nid(LNET_NID_ANY, 0);
681
682         LASSERT(the_lnet.ln_refcount == 0);
683         LASSERT(list_empty(&the_lnet.ln_test_peers));
684         LASSERT(list_empty(&the_lnet.ln_nets));
685
686         lnet_portals_destroy();
687
688         if (the_lnet.ln_md_containers != NULL) {
689                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
690                 the_lnet.ln_md_containers = NULL;
691         }
692
693         if (the_lnet.ln_me_containers != NULL) {
694                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
695                 the_lnet.ln_me_containers = NULL;
696         }
697
698         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
699
700         lnet_msg_containers_destroy();
701         lnet_peer_uninit();
702         lnet_rtrpools_free(0);
703
704         if (the_lnet.ln_counters != NULL) {
705                 cfs_percpt_free(the_lnet.ln_counters);
706                 the_lnet.ln_counters = NULL;
707         }
708         lnet_destroy_remote_nets_table();
709         lnet_descriptor_cleanup();
710
711         return 0;
712 }
713
714 lnet_ni_t  *
715 lnet_net2ni_locked(__u32 net_id, int cpt)
716 {
717         struct lnet_ni   *ni;
718         struct lnet_net  *net;
719
720         LASSERT(cpt != LNET_LOCK_EX);
721
722         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
723                 if (net->net_id == net_id) {
724                         ni = list_entry(net->net_ni_list.next, struct lnet_ni,
725                                         ni_netlist);
726                         return ni;
727                 }
728         }
729
730         return NULL;
731 }
732
733 lnet_ni_t *
734 lnet_net2ni(__u32 net)
735 {
736         lnet_ni_t *ni;
737
738         lnet_net_lock(0);
739         ni = lnet_net2ni_locked(net, 0);
740         lnet_net_unlock(0);
741
742         return ni;
743 }
744 EXPORT_SYMBOL(lnet_net2ni);
745
746 struct lnet_net *
747 lnet_get_net_locked(__u32 net_id)
748 {
749         struct lnet_net  *net;
750
751         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
752                 if (net->net_id == net_id)
753                         return net;
754         }
755
756         return NULL;
757 }
758
759 unsigned int
760 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
761 {
762         __u64           key = nid;
763         unsigned int    val;
764
765         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
766
767         if (number == 1)
768                 return 0;
769
770         val = hash_long(key, LNET_CPT_BITS);
771         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
772         if (val < number)
773                 return val;
774
775         return (unsigned int)(key + val + (val >> 1)) % number;
776 }
777
778 int
779 lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
780 {
781         struct lnet_net *net;
782
783         /* must called with hold of lnet_net_lock */
784         if (LNET_CPT_NUMBER == 1)
785                 return 0; /* the only one */
786
787         /*
788          * If NI is provided then use the CPT identified in the NI cpt
789          * list if one exists. If one doesn't exist, then that NI is
790          * associated with all CPTs and it follows that the net it belongs
791          * to is implicitly associated with all CPTs, so just hash the nid
792          * and return that.
793          */
794         if (ni != NULL) {
795                 if (ni->ni_cpts != NULL)
796                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
797                                                              ni->ni_ncpts)];
798                 else
799                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
800         }
801
802         /* no NI provided so look at the net */
803         net = lnet_get_net_locked(LNET_NIDNET(nid));
804
805         if (net != NULL && net->net_cpts != NULL) {
806                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
807         }
808
809         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
810 }
811
812 int
813 lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
814 {
815         int     cpt;
816         int     cpt2;
817
818         if (LNET_CPT_NUMBER == 1)
819                 return 0; /* the only one */
820
821         cpt = lnet_net_lock_current();
822
823         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
824
825         lnet_net_unlock(cpt);
826
827         return cpt2;
828 }
829 EXPORT_SYMBOL(lnet_cpt_of_nid);
830
831 int
832 lnet_islocalnet(__u32 net_id)
833 {
834         struct lnet_net *net;
835         int             cpt;
836         bool            local;
837
838         cpt = lnet_net_lock_current();
839
840         net = lnet_get_net_locked(net_id);
841
842         local = net != NULL;
843
844         lnet_net_unlock(cpt);
845
846         return local;
847 }
848
849 bool
850 lnet_is_ni_healthy_locked(struct lnet_ni *ni)
851 {
852         if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
853             ni->ni_state == LNET_NI_STATE_DEGRADED)
854                 return true;
855
856         return false;
857 }
858
859 lnet_ni_t  *
860 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
861 {
862         struct lnet_net  *net;
863         struct lnet_ni   *ni;
864
865         LASSERT(cpt != LNET_LOCK_EX);
866
867         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
868                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
869                         if (ni->ni_nid == nid)
870                                 return ni;
871                 }
872         }
873
874         return NULL;
875 }
876
877 lnet_ni_t *
878 lnet_nid2ni_addref(lnet_nid_t nid)
879 {
880         lnet_ni_t *ni;
881
882         lnet_net_lock(0);
883         ni = lnet_nid2ni_locked(nid, 0);
884         if (ni)
885                 lnet_ni_addref_locked(ni, 0);
886         lnet_net_unlock(0);
887
888         return ni;
889 }
890 EXPORT_SYMBOL(lnet_nid2ni_addref);
891
892 int
893 lnet_islocalnid(lnet_nid_t nid)
894 {
895         struct lnet_ni  *ni;
896         int             cpt;
897
898         cpt = lnet_net_lock_current();
899         ni = lnet_nid2ni_locked(nid, cpt);
900         lnet_net_unlock(cpt);
901
902         return ni != NULL;
903 }
904
905 int
906 lnet_count_acceptor_nets(void)
907 {
908         /* Return the # of NIs that need the acceptor. */
909         int              count = 0;
910         struct lnet_net  *net;
911         int              cpt;
912
913         cpt = lnet_net_lock_current();
914         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
915                 /* all socklnd type networks should have the acceptor
916                  * thread started */
917                 if (net->net_lnd->lnd_accept != NULL)
918                         count++;
919         }
920
921         lnet_net_unlock(cpt);
922
923         return count;
924 }
925
926 static struct lnet_ping_info *
927 lnet_ping_info_create(int num_ni)
928 {
929         struct lnet_ping_info *ping_info;
930         unsigned int     infosz;
931
932         infosz = offsetof(struct lnet_ping_info, pi_ni[num_ni]);
933         LIBCFS_ALLOC(ping_info, infosz);
934         if (ping_info == NULL) {
935                 CERROR("Can't allocate ping info[%d]\n", num_ni);
936                 return NULL;
937         }
938
939         ping_info->pi_nnis = num_ni;
940         ping_info->pi_pid = the_lnet.ln_pid;
941         ping_info->pi_magic = LNET_PROTO_PING_MAGIC;
942         ping_info->pi_features = LNET_PING_FEAT_NI_STATUS;
943
944         return ping_info;
945 }
946
947 static inline int
948 lnet_get_net_ni_count_locked(struct lnet_net *net)
949 {
950         struct lnet_ni  *ni;
951         int             count = 0;
952
953         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
954                 count++;
955
956         return count;
957 }
958
959 static inline int
960 lnet_get_ni_count(void)
961 {
962         struct lnet_ni  *ni;
963         struct lnet_net *net;
964         int             count = 0;
965
966         lnet_net_lock(0);
967
968         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
969                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
970                         count++;
971         }
972
973         lnet_net_unlock(0);
974
975         return count;
976 }
977
978 static inline void
979 lnet_ping_info_free(struct lnet_ping_info *pinfo)
980 {
981         LIBCFS_FREE(pinfo,
982                     offsetof(struct lnet_ping_info,
983                              pi_ni[pinfo->pi_nnis]));
984 }
985
986 static void
987 lnet_ping_info_destroy(void)
988 {
989         struct lnet_net *net;
990         struct lnet_ni  *ni;
991
992         lnet_net_lock(LNET_LOCK_EX);
993
994         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
995                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
996                         lnet_ni_lock(ni);
997                         ni->ni_status = NULL;
998                         lnet_ni_unlock(ni);
999                 }
1000         }
1001
1002         lnet_ping_info_free(the_lnet.ln_ping_info);
1003         the_lnet.ln_ping_info = NULL;
1004
1005         lnet_net_unlock(LNET_LOCK_EX);
1006 }
1007
1008 static void
1009 lnet_ping_event_handler(lnet_event_t *event)
1010 {
1011         struct lnet_ping_info *pinfo = event->md.user_ptr;
1012
1013         if (event->unlinked)
1014                 pinfo->pi_features = LNET_PING_FEAT_INVAL;
1015 }
1016
1017 static int
1018 lnet_ping_info_setup(struct lnet_ping_info **ppinfo, lnet_handle_md_t *md_handle,
1019                      int ni_count, bool set_eq)
1020 {
1021         lnet_handle_me_t  me_handle;
1022         lnet_process_id_t id = {LNET_NID_ANY, LNET_PID_ANY};
1023         lnet_md_t         md = {NULL};
1024         int               rc, rc2;
1025
1026         if (set_eq) {
1027                 rc = LNetEQAlloc(0, lnet_ping_event_handler,
1028                                  &the_lnet.ln_ping_target_eq);
1029                 if (rc != 0) {
1030                         CERROR("Can't allocate ping EQ: %d\n", rc);
1031                         return rc;
1032                 }
1033         }
1034
1035         *ppinfo = lnet_ping_info_create(ni_count);
1036         if (*ppinfo == NULL) {
1037                 rc = -ENOMEM;
1038                 goto failed_0;
1039         }
1040
1041         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1042                           LNET_PROTO_PING_MATCHBITS, 0,
1043                           LNET_UNLINK, LNET_INS_AFTER,
1044                           &me_handle);
1045         if (rc != 0) {
1046                 CERROR("Can't create ping ME: %d\n", rc);
1047                 goto failed_1;
1048         }
1049
1050         /* initialize md content */
1051         md.start     = *ppinfo;
1052         md.length    = offsetof(struct lnet_ping_info,
1053                                 pi_ni[(*ppinfo)->pi_nnis]);
1054         md.threshold = LNET_MD_THRESH_INF;
1055         md.max_size  = 0;
1056         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1057                        LNET_MD_MANAGE_REMOTE;
1058         md.user_ptr  = NULL;
1059         md.eq_handle = the_lnet.ln_ping_target_eq;
1060         md.user_ptr = *ppinfo;
1061
1062         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, md_handle);
1063         if (rc != 0) {
1064                 CERROR("Can't attach ping MD: %d\n", rc);
1065                 goto failed_2;
1066         }
1067
1068         return 0;
1069
1070 failed_2:
1071         rc2 = LNetMEUnlink(me_handle);
1072         LASSERT(rc2 == 0);
1073 failed_1:
1074         lnet_ping_info_free(*ppinfo);
1075         *ppinfo = NULL;
1076 failed_0:
1077         if (set_eq)
1078                 LNetEQFree(the_lnet.ln_ping_target_eq);
1079         return rc;
1080 }
1081
1082 static void
1083 lnet_ping_md_unlink(struct lnet_ping_info *pinfo, lnet_handle_md_t *md_handle)
1084 {
1085         sigset_t        blocked = cfs_block_allsigs();
1086
1087         LNetMDUnlink(*md_handle);
1088         LNetInvalidateHandle(md_handle);
1089
1090         /* NB md could be busy; this just starts the unlink */
1091         while (pinfo->pi_features != LNET_PING_FEAT_INVAL) {
1092                 CDEBUG(D_NET, "Still waiting for ping MD to unlink\n");
1093                 set_current_state(TASK_UNINTERRUPTIBLE);
1094                 schedule_timeout(cfs_time_seconds(1));
1095         }
1096
1097         cfs_restore_sigs(blocked);
1098 }
1099
1100 static void
1101 lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
1102 {
1103         int                     i;
1104         struct lnet_ni          *ni;
1105         struct lnet_net         *net;
1106         struct lnet_ni_status *ns;
1107
1108         i = 0;
1109         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1110                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1111                         LASSERT(i < ping_info->pi_nnis);
1112
1113                         ns = &ping_info->pi_ni[i];
1114
1115                         ns->ns_nid = ni->ni_nid;
1116
1117                         lnet_ni_lock(ni);
1118                         ns->ns_status = (ni->ni_status != NULL) ?
1119                                         ni->ni_status->ns_status :
1120                                                 LNET_NI_STATUS_UP;
1121                         ni->ni_status = ns;
1122                         lnet_ni_unlock(ni);
1123
1124                         i++;
1125                 }
1126
1127         }
1128 }
1129
1130 static void
1131 lnet_ping_target_update(struct lnet_ping_info *pinfo, lnet_handle_md_t md_handle)
1132 {
1133         struct lnet_ping_info *old_pinfo = NULL;
1134         lnet_handle_md_t old_md;
1135
1136         /* switch the NIs to point to the new ping info created */
1137         lnet_net_lock(LNET_LOCK_EX);
1138
1139         if (!the_lnet.ln_routing)
1140                 pinfo->pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1141         lnet_ping_info_install_locked(pinfo);
1142
1143         if (the_lnet.ln_ping_info != NULL) {
1144                 old_pinfo = the_lnet.ln_ping_info;
1145                 old_md = the_lnet.ln_ping_target_md;
1146         }
1147         the_lnet.ln_ping_target_md = md_handle;
1148         the_lnet.ln_ping_info = pinfo;
1149
1150         lnet_net_unlock(LNET_LOCK_EX);
1151
1152         if (old_pinfo != NULL) {
1153                 /* unlink the old ping info */
1154                 lnet_ping_md_unlink(old_pinfo, &old_md);
1155                 lnet_ping_info_free(old_pinfo);
1156         }
1157 }
1158
1159 static void
1160 lnet_ping_target_fini(void)
1161 {
1162         int             rc;
1163
1164         lnet_ping_md_unlink(the_lnet.ln_ping_info,
1165                             &the_lnet.ln_ping_target_md);
1166
1167         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1168         LASSERT(rc == 0);
1169
1170         lnet_ping_info_destroy();
1171 }
1172
1173 static int
1174 lnet_ni_tq_credits(lnet_ni_t *ni)
1175 {
1176         int     credits;
1177
1178         LASSERT(ni->ni_ncpts >= 1);
1179
1180         if (ni->ni_ncpts == 1)
1181                 return ni->ni_net->net_tunables.lct_max_tx_credits;
1182
1183         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
1184         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
1185         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
1186
1187         return credits;
1188 }
1189
1190 static void
1191 lnet_ni_unlink_locked(lnet_ni_t *ni)
1192 {
1193         if (!list_empty(&ni->ni_cptlist)) {
1194                 list_del_init(&ni->ni_cptlist);
1195                 lnet_ni_decref_locked(ni, 0);
1196         }
1197
1198         /* move it to zombie list and nobody can find it anymore */
1199         LASSERT(!list_empty(&ni->ni_netlist));
1200         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
1201         lnet_ni_decref_locked(ni, 0);
1202 }
1203
1204 static void
1205 lnet_clear_zombies_nis_locked(struct lnet_net *net)
1206 {
1207         int             i;
1208         int             islo;
1209         lnet_ni_t       *ni;
1210         struct list_head *zombie_list = &net->net_ni_zombie;
1211
1212         /*
1213          * Now wait for the NIs I just nuked to show up on the zombie
1214          * list and shut them down in guaranteed thread context
1215          */
1216         i = 2;
1217         while (!list_empty(zombie_list)) {
1218                 int     *ref;
1219                 int     j;
1220
1221                 ni = list_entry(zombie_list->next,
1222                                 lnet_ni_t, ni_netlist);
1223                 list_del_init(&ni->ni_netlist);
1224                 /* the ni should be in deleting state. If it's not it's
1225                  * a bug */
1226                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
1227                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1228                         if (*ref == 0)
1229                                 continue;
1230                         /* still busy, add it back to zombie list */
1231                         list_add(&ni->ni_netlist, zombie_list);
1232                         break;
1233                 }
1234
1235                 if (!list_empty(&ni->ni_netlist)) {
1236                         lnet_net_unlock(LNET_LOCK_EX);
1237                         ++i;
1238                         if ((i & (-i)) == i) {
1239                                 CDEBUG(D_WARNING,
1240                                        "Waiting for zombie LNI %s\n",
1241                                        libcfs_nid2str(ni->ni_nid));
1242                         }
1243                         set_current_state(TASK_UNINTERRUPTIBLE);
1244                         schedule_timeout(cfs_time_seconds(1));
1245                         lnet_net_lock(LNET_LOCK_EX);
1246                         continue;
1247                 }
1248
1249                 lnet_net_unlock(LNET_LOCK_EX);
1250
1251                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
1252
1253                 LASSERT(!in_interrupt());
1254                 (net->net_lnd->lnd_shutdown)(ni);
1255
1256                 if (!islo)
1257                         CDEBUG(D_LNI, "Removed LNI %s\n",
1258                               libcfs_nid2str(ni->ni_nid));
1259
1260                 lnet_ni_free(ni);
1261                 i = 2;
1262                 lnet_net_lock(LNET_LOCK_EX);
1263         }
1264 }
1265
1266 /* shutdown down the NI and release refcount */
1267 static void
1268 lnet_shutdown_lndni(struct lnet_ni *ni)
1269 {
1270         int i;
1271         struct lnet_net *net = ni->ni_net;
1272
1273         lnet_net_lock(LNET_LOCK_EX);
1274         ni->ni_state = LNET_NI_STATE_DELETING;
1275         lnet_ni_unlink_locked(ni);
1276         lnet_net_unlock(LNET_LOCK_EX);
1277
1278         /* clear messages for this NI on the lazy portal */
1279         for (i = 0; i < the_lnet.ln_nportals; i++)
1280                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
1281
1282         /* Do peer table cleanup for this ni */
1283         lnet_peer_tables_cleanup(ni);
1284
1285         lnet_net_lock(LNET_LOCK_EX);
1286         lnet_clear_zombies_nis_locked(net);
1287         lnet_net_unlock(LNET_LOCK_EX);
1288 }
1289
1290 static void
1291 lnet_shutdown_lndnet(struct lnet_net *net)
1292 {
1293         struct lnet_ni *ni;
1294
1295         lnet_net_lock(LNET_LOCK_EX);
1296
1297         net->net_state = LNET_NET_STATE_DELETING;
1298
1299         list_del_init(&net->net_list);
1300
1301         while (!list_empty(&net->net_ni_list)) {
1302                 ni = list_entry(net->net_ni_list.next,
1303                                 lnet_ni_t, ni_netlist);
1304                 lnet_net_unlock(LNET_LOCK_EX);
1305                 lnet_shutdown_lndni(ni);
1306                 lnet_net_lock(LNET_LOCK_EX);
1307         }
1308
1309         /*
1310          * decrement ref count on lnd only when the entire network goes
1311          * away
1312          */
1313         net->net_lnd->lnd_refcount--;
1314
1315         lnet_net_unlock(LNET_LOCK_EX);
1316
1317         lnet_net_free(net);
1318 }
1319
1320 static void
1321 lnet_shutdown_lndnets(void)
1322 {
1323         struct lnet_net *net;
1324
1325         /* NB called holding the global mutex */
1326
1327         /* All quiet on the API front */
1328         LASSERT(!the_lnet.ln_shutdown);
1329         LASSERT(the_lnet.ln_refcount == 0);
1330
1331         lnet_net_lock(LNET_LOCK_EX);
1332         the_lnet.ln_shutdown = 1;       /* flag shutdown */
1333
1334         while (!list_empty(&the_lnet.ln_nets)) {
1335                 /*
1336                  * move the nets to the zombie list to avoid them being
1337                  * picked up for new work. LONET is also included in the
1338                  * Nets that will be moved to the zombie list
1339                  */
1340                 net = list_entry(the_lnet.ln_nets.next,
1341                                  struct lnet_net, net_list);
1342                 list_move(&net->net_list, &the_lnet.ln_net_zombie);
1343         }
1344
1345         /* Drop the cached loopback Net. */
1346         if (the_lnet.ln_loni != NULL) {
1347                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1348                 the_lnet.ln_loni = NULL;
1349         }
1350         lnet_net_unlock(LNET_LOCK_EX);
1351
1352         /* iterate through the net zombie list and delete each net */
1353         while (!list_empty(&the_lnet.ln_net_zombie)) {
1354                 net = list_entry(the_lnet.ln_net_zombie.next,
1355                                  struct lnet_net, net_list);
1356                 lnet_shutdown_lndnet(net);
1357         }
1358
1359         lnet_net_lock(LNET_LOCK_EX);
1360         the_lnet.ln_shutdown = 0;
1361         lnet_net_unlock(LNET_LOCK_EX);
1362 }
1363
1364 static int
1365 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
1366 {
1367         int                     rc = -EINVAL;
1368         struct lnet_tx_queue    *tq;
1369         int                     i;
1370         struct lnet_net         *net = ni->ni_net;
1371
1372         mutex_lock(&the_lnet.ln_lnd_mutex);
1373
1374         if (tun) {
1375                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
1376                 ni->ni_lnd_tunables_set = true;
1377         }
1378
1379         rc = (net->net_lnd->lnd_startup)(ni);
1380
1381         mutex_unlock(&the_lnet.ln_lnd_mutex);
1382
1383         if (rc != 0) {
1384                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1385                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
1386                 lnet_net_lock(LNET_LOCK_EX);
1387                 net->net_lnd->lnd_refcount--;
1388                 lnet_net_unlock(LNET_LOCK_EX);
1389                 goto failed0;
1390         }
1391
1392         ni->ni_state = LNET_NI_STATE_ACTIVE;
1393
1394         /* We keep a reference on the loopback net through the loopback NI */
1395         if (net->net_lnd->lnd_type == LOLND) {
1396                 lnet_ni_addref(ni);
1397                 LASSERT(the_lnet.ln_loni == NULL);
1398                 the_lnet.ln_loni = ni;
1399                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
1400                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
1401                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
1402                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
1403                 return 0;
1404         }
1405
1406         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
1407             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
1408                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1409                                    libcfs_lnd2str(net->net_lnd->lnd_type),
1410                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
1411                                         "" : "per-peer ");
1412                 /* shutdown the NI since if we get here then it must've already
1413                  * been started
1414                  */
1415                 lnet_shutdown_lndni(ni);
1416                 return -EINVAL;
1417         }
1418
1419         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1420                 tq->tq_credits_min =
1421                 tq->tq_credits_max =
1422                 tq->tq_credits = lnet_ni_tq_credits(ni);
1423         }
1424
1425         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1426                 libcfs_nid2str(ni->ni_nid),
1427                 ni->ni_net->net_tunables.lct_peer_tx_credits,
1428                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1429                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
1430                 ni->ni_net->net_tunables.lct_peer_timeout);
1431
1432         return 0;
1433 failed0:
1434         lnet_ni_free(ni);
1435         return rc;
1436 }
1437
1438 static int
1439 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
1440 {
1441         struct lnet_ni          *ni;
1442         struct lnet_net         *net_l = NULL;
1443         struct list_head        local_ni_list;
1444         int                     rc;
1445         int                     ni_count = 0;
1446         __u32                   lnd_type;
1447         lnd_t                   *lnd;
1448         int                     peer_timeout =
1449                 net->net_tunables.lct_peer_timeout;
1450         int                     maxtxcredits =
1451                 net->net_tunables.lct_max_tx_credits;
1452         int                     peerrtrcredits =
1453                 net->net_tunables.lct_peer_rtr_credits;
1454
1455         INIT_LIST_HEAD(&local_ni_list);
1456
1457         /*
1458          * make sure that this net is unique. If it isn't then
1459          * we are adding interfaces to an already existing network, and
1460          * 'net' is just a convenient way to pass in the list.
1461          * if it is unique we need to find the LND and load it if
1462          * necessary.
1463          */
1464         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
1465                 lnd_type = LNET_NETTYP(net->net_id);
1466
1467                 LASSERT(libcfs_isknown_lnd(lnd_type));
1468
1469                 if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
1470                     lnd_type == IIBLND || lnd_type == VIBLND) {
1471                         CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
1472                         rc = -EINVAL;
1473                         goto failed0;
1474                 }
1475
1476                 mutex_lock(&the_lnet.ln_lnd_mutex);
1477                 lnd = lnet_find_lnd_by_type(lnd_type);
1478
1479                 if (lnd == NULL) {
1480                         mutex_unlock(&the_lnet.ln_lnd_mutex);
1481                         rc = request_module("%s", libcfs_lnd2modname(lnd_type));
1482                         mutex_lock(&the_lnet.ln_lnd_mutex);
1483
1484                         lnd = lnet_find_lnd_by_type(lnd_type);
1485                         if (lnd == NULL) {
1486                                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1487                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1488                                 libcfs_lnd2str(lnd_type),
1489                                 libcfs_lnd2modname(lnd_type), rc);
1490 #ifndef HAVE_MODULE_LOADING_SUPPORT
1491                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1492                                                 "compiled with kernel module "
1493                                                 "loading support.");
1494 #endif
1495                                 rc = -EINVAL;
1496                                 goto failed0;
1497                         }
1498                 }
1499
1500                 lnet_net_lock(LNET_LOCK_EX);
1501                 lnd->lnd_refcount++;
1502                 lnet_net_unlock(LNET_LOCK_EX);
1503
1504                 net->net_lnd = lnd;
1505
1506                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1507
1508                 net_l = net;
1509         }
1510
1511         /*
1512          * net_l: if the network being added is unique then net_l
1513          *        will point to that network
1514          *        if the network being added is not unique then
1515          *        net_l points to the existing network.
1516          *
1517          * When we enter the loop below, we'll pick NIs off he
1518          * network beign added and start them up, then add them to
1519          * a local ni list. Once we've successfully started all
1520          * the NIs then we join the local NI list (of started up
1521          * networks) with the net_l->net_ni_list, which should
1522          * point to the correct network to add the new ni list to
1523          *
1524          * If any of the new NIs fail to start up, then we want to
1525          * iterate through the local ni list, which should include
1526          * any NIs which were successfully started up, and shut
1527          * them down.
1528          *
1529          * After than we want to delete the network being added,
1530          * to avoid a memory leak.
1531          */
1532
1533         /*
1534          * When a network uses TCP bonding then all its interfaces
1535          * must be specified when the network is first defined: the
1536          * TCP bonding code doesn't allow for interfaces to be added
1537          * or removed.
1538          */
1539         if (net_l != net && net_l != NULL && use_tcp_bonding &&
1540             LNET_NETTYP(net_l->net_id) == SOCKLND) {
1541                 rc = -EINVAL;
1542                 goto failed0;
1543         }
1544
1545         while (!list_empty(&net->net_ni_added)) {
1546                 ni = list_entry(net->net_ni_added.next, struct lnet_ni,
1547                                 ni_netlist);
1548                 list_del_init(&ni->ni_netlist);
1549
1550                 /* make sure that the the NI we're about to start
1551                  * up is actually unique. if it's not fail. */
1552                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
1553                                         ni->ni_interfaces[0])) {
1554                         rc = -EINVAL;
1555                         goto failed1;
1556                 }
1557
1558                 /* adjust the pointer the parent network, just in case it
1559                  * the net is a duplicate */
1560                 ni->ni_net = net_l;
1561
1562                 rc = lnet_startup_lndni(ni, tun);
1563
1564                 LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
1565                         ni->ni_net->net_lnd->lnd_query != NULL);
1566
1567                 if (rc < 0)
1568                         goto failed1;
1569
1570                 lnet_ni_addref(ni);
1571                 list_add_tail(&ni->ni_netlist, &local_ni_list);
1572
1573                 ni_count++;
1574         }
1575
1576         lnet_net_lock(LNET_LOCK_EX);
1577         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
1578         lnet_incr_dlc_seq();
1579         lnet_net_unlock(LNET_LOCK_EX);
1580
1581         /* if the network is not unique then we don't want to keep
1582          * it around after we're done. Free it. Otherwise add that
1583          * net to the global the_lnet.ln_nets */
1584         if (net_l != net && net_l != NULL) {
1585                 /*
1586                  * TODO - note. currently the tunables can not be updated
1587                  * once added
1588                  */
1589                 lnet_net_free(net);
1590         } else {
1591                 net->net_state = LNET_NET_STATE_ACTIVE;
1592                 /*
1593                  * restore tunables after it has been overwitten by the
1594                  * lnd
1595                  */
1596                 if (peer_timeout != -1)
1597                         net->net_tunables.lct_peer_timeout = peer_timeout;
1598                 if (maxtxcredits != -1)
1599                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
1600                 if (peerrtrcredits != -1)
1601                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
1602
1603                 lnet_net_lock(LNET_LOCK_EX);
1604                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
1605                 lnet_net_unlock(LNET_LOCK_EX);
1606         }
1607
1608         return ni_count;
1609
1610 failed1:
1611         /*
1612          * shutdown the new NIs that are being started up
1613          * free the NET being started
1614          */
1615         while (!list_empty(&local_ni_list)) {
1616                 ni = list_entry(local_ni_list.next, struct lnet_ni,
1617                                 ni_netlist);
1618
1619                 lnet_shutdown_lndni(ni);
1620         }
1621
1622 failed0:
1623         lnet_net_free(net);
1624
1625         return rc;
1626 }
1627
1628 static int
1629 lnet_startup_lndnets(struct list_head *netlist)
1630 {
1631         struct lnet_net         *net;
1632         int                     rc;
1633         int                     ni_count = 0;
1634
1635         while (!list_empty(netlist)) {
1636                 net = list_entry(netlist->next, struct lnet_net, net_list);
1637                 list_del_init(&net->net_list);
1638
1639                 rc = lnet_startup_lndnet(net, NULL);
1640
1641                 if (rc < 0)
1642                         goto failed;
1643
1644                 ni_count += rc;
1645         }
1646
1647         return ni_count;
1648 failed:
1649         lnet_shutdown_lndnets();
1650
1651         return rc;
1652 }
1653
1654 /**
1655  * Initialize LNet library.
1656  *
1657  * Automatically called at module loading time. Caller has to call
1658  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
1659  * latter returned 0. It must be called exactly once.
1660  *
1661  * \retval 0 on success
1662  * \retval -ve on failures.
1663  */
1664 int lnet_lib_init(void)
1665 {
1666         int rc;
1667
1668         lnet_assert_wire_constants();
1669
1670         memset(&the_lnet, 0, sizeof(the_lnet));
1671
1672         /* refer to global cfs_cpt_table for now */
1673         the_lnet.ln_cpt_table   = cfs_cpt_table;
1674         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1675
1676         LASSERT(the_lnet.ln_cpt_number > 0);
1677         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1678                 /* we are under risk of consuming all lh_cookie */
1679                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1680                        "please change setting of CPT-table and retry\n",
1681                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1682                 return -E2BIG;
1683         }
1684
1685         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1686                 the_lnet.ln_cpt_bits++;
1687
1688         rc = lnet_create_locks();
1689         if (rc != 0) {
1690                 CERROR("Can't create LNet global locks: %d\n", rc);
1691                 return rc;
1692         }
1693
1694         the_lnet.ln_refcount = 0;
1695         LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
1696         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1697         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
1698         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1699         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1700
1701         /* The hash table size is the number of bits it takes to express the set
1702          * ln_num_routes, minus 1 (better to under estimate than over so we
1703          * don't waste memory). */
1704         if (rnet_htable_size <= 0)
1705                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1706         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1707                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1708         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1709                                            order_base_2(rnet_htable_size) - 1);
1710
1711         /* All LNDs apart from the LOLND are in separate modules.  They
1712          * register themselves when their module loads, and unregister
1713          * themselves when their module is unloaded. */
1714         lnet_register_lnd(&the_lolnd);
1715         return 0;
1716 }
1717
1718 /**
1719  * Finalize LNet library.
1720  *
1721  * \pre lnet_lib_init() called with success.
1722  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1723  */
1724 void lnet_lib_exit(void)
1725 {
1726         LASSERT(the_lnet.ln_refcount == 0);
1727
1728         while (!list_empty(&the_lnet.ln_lnds))
1729                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1730                                                lnd_t, lnd_list));
1731         lnet_destroy_locks();
1732 }
1733
1734 /**
1735  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1736  *
1737  * Users must call this function at least once before any other functions.
1738  * For each successful call there must be a corresponding call to
1739  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1740  * ignored.
1741  *
1742  * The PID used by LNet may be different from the one requested.
1743  * See LNetGetId().
1744  *
1745  * \param requested_pid PID requested by the caller.
1746  *
1747  * \return >= 0 on success, and < 0 error code on failures.
1748  */
1749 int
1750 LNetNIInit(lnet_pid_t requested_pid)
1751 {
1752         int                     im_a_router = 0;
1753         int                     rc;
1754         int                     ni_count;
1755         struct lnet_ping_info   *pinfo;
1756         lnet_handle_md_t        md_handle;
1757         struct list_head        net_head;
1758         struct lnet_net         *net;
1759
1760         INIT_LIST_HEAD(&net_head);
1761
1762         mutex_lock(&the_lnet.ln_api_mutex);
1763
1764         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1765
1766         if (the_lnet.ln_refcount > 0) {
1767                 rc = the_lnet.ln_refcount++;
1768                 mutex_unlock(&the_lnet.ln_api_mutex);
1769                 return rc;
1770         }
1771
1772         rc = lnet_prepare(requested_pid);
1773         if (rc != 0) {
1774                 mutex_unlock(&the_lnet.ln_api_mutex);
1775                 return rc;
1776         }
1777
1778         /* create a network for Loopback network */
1779         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
1780         if (net == NULL) {
1781                 rc = -ENOMEM;
1782                 goto err_empty_list;
1783         }
1784
1785         /* Add in the loopback NI */
1786         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
1787                 rc = -ENOMEM;
1788                 goto err_empty_list;
1789         }
1790
1791         /* If LNet is being initialized via DLC it is possible
1792          * that the user requests not to load module parameters (ones which
1793          * are supported by DLC) on initialization.  Therefore, make sure not
1794          * to load networks, routes and forwarding from module parameters
1795          * in this case.  On cleanup in case of failure only clean up
1796          * routes if it has been loaded */
1797         if (!the_lnet.ln_nis_from_mod_params) {
1798                 rc = lnet_parse_networks(&net_head, lnet_get_networks(),
1799                                          use_tcp_bonding);
1800                 if (rc < 0)
1801                         goto err_empty_list;
1802         }
1803
1804         ni_count = lnet_startup_lndnets(&net_head);
1805         if (ni_count < 0) {
1806                 rc = ni_count;
1807                 goto err_empty_list;
1808         }
1809
1810         if (!the_lnet.ln_nis_from_mod_params) {
1811                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1812                 if (rc != 0)
1813                         goto err_shutdown_lndnis;
1814
1815                 rc = lnet_check_routes();
1816                 if (rc != 0)
1817                         goto err_destroy_routes;
1818
1819                 rc = lnet_rtrpools_alloc(im_a_router);
1820                 if (rc != 0)
1821                         goto err_destroy_routes;
1822         }
1823
1824         rc = lnet_acceptor_start();
1825         if (rc != 0)
1826                 goto err_destroy_routes;
1827
1828         the_lnet.ln_refcount = 1;
1829         /* Now I may use my own API functions... */
1830
1831         rc = lnet_ping_info_setup(&pinfo, &md_handle, ni_count, true);
1832         if (rc != 0)
1833                 goto err_acceptor_stop;
1834
1835         lnet_ping_target_update(pinfo, md_handle);
1836
1837         rc = lnet_router_checker_start();
1838         if (rc != 0)
1839                 goto err_stop_ping;
1840
1841         lnet_fault_init();
1842         lnet_proc_init();
1843
1844         mutex_unlock(&the_lnet.ln_api_mutex);
1845
1846         return 0;
1847
1848 err_stop_ping:
1849         lnet_ping_target_fini();
1850 err_acceptor_stop:
1851         the_lnet.ln_refcount = 0;
1852         lnet_acceptor_stop();
1853 err_destroy_routes:
1854         if (!the_lnet.ln_nis_from_mod_params)
1855                 lnet_destroy_routes();
1856 err_shutdown_lndnis:
1857         lnet_shutdown_lndnets();
1858 err_empty_list:
1859         lnet_unprepare();
1860         LASSERT(rc < 0);
1861         mutex_unlock(&the_lnet.ln_api_mutex);
1862         while (!list_empty(&net_head)) {
1863                 struct lnet_net *net;
1864
1865                 net = list_entry(net_head.next, struct lnet_net, net_list);
1866                 list_del_init(&net->net_list);
1867                 lnet_net_free(net);
1868         }
1869         return rc;
1870 }
1871 EXPORT_SYMBOL(LNetNIInit);
1872
1873 /**
1874  * Stop LNet interfaces, routing, and forwarding.
1875  *
1876  * Users must call this function once for each successful call to LNetNIInit().
1877  * Once the LNetNIFini() operation has been started, the results of pending
1878  * API operations are undefined.
1879  *
1880  * \return always 0 for current implementation.
1881  */
1882 int
1883 LNetNIFini()
1884 {
1885         mutex_lock(&the_lnet.ln_api_mutex);
1886
1887         LASSERT(the_lnet.ln_refcount > 0);
1888
1889         if (the_lnet.ln_refcount != 1) {
1890                 the_lnet.ln_refcount--;
1891         } else {
1892                 LASSERT(!the_lnet.ln_niinit_self);
1893
1894                 lnet_fault_fini();
1895
1896                 lnet_proc_fini();
1897                 lnet_router_checker_stop();
1898                 lnet_ping_target_fini();
1899
1900                 /* Teardown fns that use my own API functions BEFORE here */
1901                 the_lnet.ln_refcount = 0;
1902
1903                 lnet_acceptor_stop();
1904                 lnet_destroy_routes();
1905                 lnet_shutdown_lndnets();
1906                 lnet_unprepare();
1907         }
1908
1909         mutex_unlock(&the_lnet.ln_api_mutex);
1910         return 0;
1911 }
1912 EXPORT_SYMBOL(LNetNIFini);
1913
1914 /**
1915  * Grabs the ni data from the ni structure and fills the out
1916  * parameters
1917  *
1918  * \param[in] ni network        interface structure
1919  * \param[out] cpt_count        the number of cpts the ni is on
1920  * \param[out] nid              Network Interface ID
1921  * \param[out] peer_timeout     NI peer timeout
1922  * \param[out] peer_tx_crdits   NI peer transmit credits
1923  * \param[out] peer_rtr_credits NI peer router credits
1924  * \param[out] max_tx_credits   NI max transmit credit
1925  * \param[out] net_config       Network configuration
1926  */
1927 static void
1928 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
1929 {
1930         struct lnet_ioctl_net_config *net_config;
1931         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
1932         size_t min_size, tunable_size = 0;
1933         int i;
1934
1935         if (!ni || !config)
1936                 return;
1937
1938         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
1939         if (!net_config)
1940                 return;
1941
1942         BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
1943                      ARRAY_SIZE(net_config->ni_interfaces));
1944
1945         for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
1946                 if (!ni->ni_interfaces[i])
1947                         break;
1948
1949                 strncpy(net_config->ni_interfaces[i],
1950                         ni->ni_interfaces[i],
1951                         sizeof(net_config->ni_interfaces[i]));
1952         }
1953
1954         config->cfg_nid = ni->ni_nid;
1955         config->cfg_config_u.cfg_net.net_peer_timeout =
1956                 ni->ni_net->net_tunables.lct_peer_timeout;
1957         config->cfg_config_u.cfg_net.net_max_tx_credits =
1958                 ni->ni_net->net_tunables.lct_max_tx_credits;
1959         config->cfg_config_u.cfg_net.net_peer_tx_credits =
1960                 ni->ni_net->net_tunables.lct_peer_tx_credits;
1961         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
1962                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
1963
1964         net_config->ni_status = ni->ni_status->ns_status;
1965
1966         if (ni->ni_cpts) {
1967                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
1968
1969                 for (i = 0; i < num_cpts; i++)
1970                         net_config->ni_cpts[i] = ni->ni_cpts[i];
1971
1972                 config->cfg_ncpts = num_cpts;
1973         }
1974
1975         /*
1976          * See if user land tools sent in a newer and larger version
1977          * of struct lnet_tunables than what the kernel uses.
1978          */
1979         min_size = sizeof(*config) + sizeof(*net_config);
1980
1981         if (config->cfg_hdr.ioc_len > min_size)
1982                 tunable_size = config->cfg_hdr.ioc_len - min_size;
1983
1984         /* Don't copy too much data to user space */
1985         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
1986         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
1987
1988         if (lnd_cfg && min_size) {
1989                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
1990                 config->cfg_config_u.cfg_net.net_interface_count = 1;
1991
1992                 /* Tell user land that kernel side has less data */
1993                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
1994                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
1995                         config->cfg_hdr.ioc_len -= min_size;
1996                 }
1997         }
1998 }
1999
2000 struct lnet_ni *
2001 lnet_get_ni_idx_locked(int idx)
2002 {
2003         struct lnet_ni          *ni;
2004         struct lnet_net         *net;
2005
2006         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2007                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2008                         if (idx-- == 0)
2009                                 return ni;
2010                 }
2011         }
2012
2013         return NULL;
2014 }
2015
2016 struct lnet_ni *
2017 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
2018 {
2019         struct lnet_ni          *ni;
2020         struct lnet_net         *net = mynet;
2021
2022         if (prev == NULL) {
2023                 if (net == NULL)
2024                         net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
2025                                         net_list);
2026                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2027                                 ni_netlist);
2028
2029                 return ni;
2030         }
2031
2032         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
2033                 /* if you reached the end of the ni list and the net is
2034                  * specified, then there are no more nis in that net */
2035                 if (net != NULL)
2036                         return NULL;
2037
2038                 /* we reached the end of this net ni list. move to the
2039                  * next net */
2040                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
2041                         /* no more nets and no more NIs. */
2042                         return NULL;
2043
2044                 /* get the next net */
2045                 net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
2046                                  net_list);
2047                 /* get the ni on it */
2048                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2049                                 ni_netlist);
2050
2051                 return ni;
2052         }
2053
2054         /* there are more nis left */
2055         ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
2056
2057         return ni;
2058 }
2059
2060 int
2061 lnet_get_net_config(struct lnet_ioctl_config_data *config)
2062 {
2063         struct lnet_ni *ni;
2064         int cpt;
2065         int rc = -ENOENT;
2066         int idx = config->cfg_count;
2067
2068         cpt = lnet_net_lock_current();
2069
2070         ni = lnet_get_ni_idx_locked(idx);
2071
2072         if (ni != NULL) {
2073                 rc = 0;
2074                 lnet_ni_lock(ni);
2075                 lnet_fill_ni_info(ni, config);
2076                 lnet_ni_unlock(ni);
2077         }
2078
2079         lnet_net_unlock(cpt);
2080         return rc;
2081 }
2082
2083 int
2084 lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
2085 {
2086         char                    *nets = conf->cfg_config_u.cfg_net.net_intf;
2087         struct lnet_ping_info   *pinfo;
2088         lnet_handle_md_t        md_handle;
2089         struct lnet_net         *net;
2090         struct list_head        net_head;
2091         int                     rc;
2092         lnet_remotenet_t        *rnet;
2093         int                     net_ni_count;
2094         int                     num_acceptor_nets;
2095         __u32                   net_type;
2096         struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
2097
2098         INIT_LIST_HEAD(&net_head);
2099
2100         if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
2101                 lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
2102
2103         /* Create a net/ni structures for the network string */
2104         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2105         if (rc <= 0)
2106                 return rc == 0 ? -EINVAL : rc;
2107
2108         mutex_lock(&the_lnet.ln_api_mutex);
2109
2110         if (rc > 1) {
2111                 rc = -EINVAL; /* only add one network per call */
2112                 goto failed0;
2113         }
2114
2115         net = list_entry(net_head.next, struct lnet_net, net_list);
2116
2117         lnet_net_lock(LNET_LOCK_EX);
2118         rnet = lnet_find_rnet_locked(net->net_id);
2119         lnet_net_unlock(LNET_LOCK_EX);
2120         /* make sure that the net added doesn't invalidate the current
2121          * configuration LNet is keeping */
2122         if (rnet != NULL) {
2123                 CERROR("Adding net %s will invalidate routing configuration\n",
2124                        nets);
2125                 rc = -EUSERS;
2126                 goto failed0;
2127         }
2128
2129         /*
2130          * make sure you calculate the correct number of slots in the ping
2131          * info. Since the ping info is a flattened list of all the NIs,
2132          * we should allocate enough slots to accomodate the number of NIs
2133          * which will be added.
2134          *
2135          * We can use lnet_get_net_ni_count_locked() since the net is not
2136          * on a public list yet, so locking is not a problem
2137          */
2138         net_ni_count = lnet_get_net_ni_count_locked(net);
2139
2140         rc = lnet_ping_info_setup(&pinfo, &md_handle,
2141                                   net_ni_count + lnet_get_ni_count(),
2142                                   false);
2143         if (rc != 0)
2144                 goto failed0;
2145
2146         list_del_init(&net->net_list);
2147
2148         if (lnd_tunables)
2149                 memcpy(&net->net_tunables,
2150                        &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
2151
2152         /*
2153          * before starting this network get a count of the current TCP
2154          * networks which require the acceptor thread running. If that
2155          * count is == 0 before we start up this network, then we'd want to
2156          * start up the acceptor thread after starting up this network
2157          */
2158         num_acceptor_nets = lnet_count_acceptor_nets();
2159
2160         /*
2161          * lnd_startup_lndnet() can deallocate 'net' even if it it returns
2162          * success, because we endded up adding interfaces to an existing
2163          * network. So grab the net_type now
2164          */
2165         net_type = LNET_NETTYP(net->net_id);
2166
2167         rc = lnet_startup_lndnet(net,
2168                                  (lnd_tunables) ? &lnd_tunables->lt_tun : NULL);
2169         if (rc < 0)
2170                 goto failed1;
2171
2172         /*
2173          * Start the acceptor thread if this is the first network
2174          * being added that requires the thread.
2175          */
2176         if (net_type == SOCKLND && num_acceptor_nets == 0)
2177         {
2178                 rc = lnet_acceptor_start();
2179                 if (rc < 0) {
2180                         /* shutdown the net that we just started */
2181                         CERROR("Failed to start up acceptor thread\n");
2182                         /*
2183                          * Note that if we needed to start the acceptor
2184                          * thread, then 'net' must have been the first TCP
2185                          * network, therefore was unique, and therefore
2186                          * wasn't deallocated by lnet_startup_lndnet()
2187                          */
2188                         lnet_shutdown_lndnet(net);
2189                         goto failed1;
2190                 }
2191         }
2192
2193         lnet_ping_target_update(pinfo, md_handle);
2194         mutex_unlock(&the_lnet.ln_api_mutex);
2195
2196         return 0;
2197
2198 failed1:
2199         lnet_ping_md_unlink(pinfo, &md_handle);
2200         lnet_ping_info_free(pinfo);
2201 failed0:
2202         mutex_unlock(&the_lnet.ln_api_mutex);
2203         while (!list_empty(&net_head)) {
2204                 net = list_entry(net_head.next, struct lnet_net, net_list);
2205                 list_del_init(&net->net_list);
2206                 lnet_net_free(net);
2207         }
2208         return rc;
2209 }
2210
2211 int
2212 lnet_dyn_del_ni(__u32 net_id)
2213 {
2214         struct lnet_net  *net;
2215         struct lnet_ping_info *pinfo;
2216         lnet_handle_md_t  md_handle;
2217         int               rc;
2218         int               net_ni_count;
2219
2220         /* don't allow userspace to shutdown the LOLND */
2221         if (LNET_NETTYP(net_id) == LOLND)
2222                 return -EINVAL;
2223
2224         mutex_lock(&the_lnet.ln_api_mutex);
2225
2226         lnet_net_lock(0);
2227
2228         net = lnet_get_net_locked(net_id);
2229         if (net == NULL) {
2230                 rc = -EINVAL;
2231                 goto out;
2232         }
2233
2234         net_ni_count = lnet_get_net_ni_count_locked(net);
2235
2236         lnet_net_unlock(0);
2237
2238         /* create and link a new ping info, before removing the old one */
2239         rc = lnet_ping_info_setup(&pinfo, &md_handle,
2240                                   lnet_get_ni_count() - net_ni_count, false);
2241         if (rc != 0)
2242                 goto out;
2243
2244         lnet_shutdown_lndnet(net);
2245
2246         if (lnet_count_acceptor_nets() == 0)
2247                 lnet_acceptor_stop();
2248
2249         lnet_ping_target_update(pinfo, md_handle);
2250
2251 out:
2252         mutex_unlock(&the_lnet.ln_api_mutex);
2253
2254         return rc;
2255 }
2256
2257 void lnet_incr_dlc_seq(void)
2258 {
2259         atomic_inc(&lnet_dlc_seq_no);
2260 }
2261
2262 __u32 lnet_get_dlc_seq_locked(void)
2263 {
2264         return atomic_read(&lnet_dlc_seq_no);
2265 }
2266
2267 /**
2268  * LNet ioctl handler.
2269  *
2270  */
2271 int
2272 LNetCtl(unsigned int cmd, void *arg)
2273 {
2274         struct libcfs_ioctl_data *data = arg;
2275         struct lnet_ioctl_config_data *config;
2276         lnet_process_id_t         id = {0};
2277         lnet_ni_t                *ni;
2278         int                       rc;
2279
2280         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
2281                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
2282
2283         switch (cmd) {
2284         case IOC_LIBCFS_GET_NI:
2285                 rc = LNetGetId(data->ioc_count, &id);
2286                 data->ioc_nid = id.nid;
2287                 return rc;
2288
2289         case IOC_LIBCFS_FAIL_NID:
2290                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2291
2292         case IOC_LIBCFS_ADD_ROUTE:
2293                 config = arg;
2294
2295                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2296                         return -EINVAL;
2297
2298                 mutex_lock(&the_lnet.ln_api_mutex);
2299                 rc = lnet_add_route(config->cfg_net,
2300                                     config->cfg_config_u.cfg_route.rtr_hop,
2301                                     config->cfg_nid,
2302                                     config->cfg_config_u.cfg_route.
2303                                         rtr_priority);
2304                 if (rc == 0) {
2305                         rc = lnet_check_routes();
2306                         if (rc != 0)
2307                                 lnet_del_route(config->cfg_net,
2308                                                config->cfg_nid);
2309                 }
2310                 mutex_unlock(&the_lnet.ln_api_mutex);
2311                 return rc;
2312
2313         case IOC_LIBCFS_DEL_ROUTE:
2314                 config = arg;
2315
2316                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2317                         return -EINVAL;
2318
2319                 mutex_lock(&the_lnet.ln_api_mutex);
2320                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2321                 mutex_unlock(&the_lnet.ln_api_mutex);
2322                 return rc;
2323
2324         case IOC_LIBCFS_GET_ROUTE:
2325                 config = arg;
2326
2327                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2328                         return -EINVAL;
2329
2330                 return lnet_get_route(config->cfg_count,
2331                                       &config->cfg_net,
2332                                       &config->cfg_config_u.cfg_route.rtr_hop,
2333                                       &config->cfg_nid,
2334                                       &config->cfg_config_u.cfg_route.rtr_flags,
2335                                       &config->cfg_config_u.cfg_route.
2336                                         rtr_priority);
2337
2338         case IOC_LIBCFS_GET_NET: {
2339                 size_t total = sizeof(*config) +
2340                                sizeof(struct lnet_ioctl_net_config);
2341                 config = arg;
2342
2343                 if (config->cfg_hdr.ioc_len < total)
2344                         return -EINVAL;
2345
2346                 return lnet_get_net_config(config);
2347         }
2348
2349         case IOC_LIBCFS_GET_LNET_STATS:
2350         {
2351                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2352
2353                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
2354                         return -EINVAL;
2355
2356                 lnet_counters_get(&lnet_stats->st_cntrs);
2357                 return 0;
2358         }
2359
2360         case IOC_LIBCFS_CONFIG_RTR:
2361                 config = arg;
2362
2363                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2364                         return -EINVAL;
2365
2366                 mutex_lock(&the_lnet.ln_api_mutex);
2367                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2368                         rc = lnet_rtrpools_enable();
2369                         mutex_unlock(&the_lnet.ln_api_mutex);
2370                         return rc;
2371                 }
2372                 lnet_rtrpools_disable();
2373                 mutex_unlock(&the_lnet.ln_api_mutex);
2374                 return 0;
2375
2376         case IOC_LIBCFS_ADD_BUF:
2377                 config = arg;
2378
2379                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2380                         return -EINVAL;
2381
2382                 mutex_lock(&the_lnet.ln_api_mutex);
2383                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2384                                                 buf_tiny,
2385                                           config->cfg_config_u.cfg_buffers.
2386                                                 buf_small,
2387                                           config->cfg_config_u.cfg_buffers.
2388                                                 buf_large);
2389                 mutex_unlock(&the_lnet.ln_api_mutex);
2390                 return rc;
2391
2392         case IOC_LIBCFS_GET_BUF: {
2393                 struct lnet_ioctl_pool_cfg *pool_cfg;
2394                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
2395
2396                 config = arg;
2397
2398                 if (config->cfg_hdr.ioc_len < total)
2399                         return -EINVAL;
2400
2401                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2402                 return lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2403         }
2404
2405         case IOC_LIBCFS_ADD_PEER_NI: {
2406                 struct lnet_ioctl_peer_cfg *cfg = arg;
2407
2408                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
2409                         return -EINVAL;
2410
2411                 return lnet_add_peer_ni_to_peer(cfg->prcfg_key_nid,
2412                                                 cfg->prcfg_cfg_nid);
2413         }
2414
2415         case IOC_LIBCFS_DEL_PEER_NI: {
2416                 struct lnet_ioctl_peer_cfg *cfg = arg;
2417
2418                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
2419                         return -EINVAL;
2420
2421                 return lnet_del_peer_ni_from_peer(cfg->prcfg_key_nid,
2422                                                   cfg->prcfg_cfg_nid);
2423         }
2424
2425         case IOC_LIBCFS_GET_PEER_INFO: {
2426                 struct lnet_ioctl_peer *peer_info = arg;
2427
2428                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
2429                         return -EINVAL;
2430
2431                 return lnet_get_peer_ni_info(
2432                    peer_info->pr_count,
2433                    &peer_info->pr_nid,
2434                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2435                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2436                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2437                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2438                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2439                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2440                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_rtr_credits,
2441                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2442         }
2443
2444         case IOC_LIBCFS_GET_PEER_NI: {
2445                 struct lnet_ioctl_peer_cfg *cfg = arg;
2446                 struct lnet_peer_ni_credit_info *lpni_cri;
2447                 size_t total = sizeof(*cfg) + sizeof(*lpni_cri);
2448
2449                 if (cfg->prcfg_hdr.ioc_len < total)
2450                         return -EINVAL;
2451
2452                 lpni_cri = (struct lnet_peer_ni_credit_info*) cfg->prcfg_bulk;
2453
2454                 return lnet_get_peer_info(cfg->prcfg_idx, &cfg->prcfg_key_nid,
2455                                           &cfg->prcfg_cfg_nid, lpni_cri);
2456         }
2457
2458         case IOC_LIBCFS_NOTIFY_ROUTER: {
2459                 unsigned long jiffies_passed;
2460
2461                 jiffies_passed = ktime_get_real_seconds() - data->ioc_u64[0];
2462                 jiffies_passed = cfs_time_seconds(jiffies_passed);
2463
2464                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2465                                    jiffies - jiffies_passed);
2466         }
2467
2468         case IOC_LIBCFS_LNET_DIST:
2469                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2470                 if (rc < 0 && rc != -EHOSTUNREACH)
2471                         return rc;
2472
2473                 data->ioc_u32[0] = rc;
2474                 return 0;
2475
2476         case IOC_LIBCFS_TESTPROTOCOMPAT:
2477                 lnet_net_lock(LNET_LOCK_EX);
2478                 the_lnet.ln_testprotocompat = data->ioc_flags;
2479                 lnet_net_unlock(LNET_LOCK_EX);
2480                 return 0;
2481
2482         case IOC_LIBCFS_LNET_FAULT:
2483                 return lnet_fault_ctl(data->ioc_flags, data);
2484
2485         case IOC_LIBCFS_PING: {
2486                 signed long timeout;
2487
2488                 id.nid = data->ioc_nid;
2489                 id.pid = data->ioc_u32[0];
2490
2491                 /* Don't block longer than 2 minutes */
2492                 if (data->ioc_u32[1] > 120 * MSEC_PER_SEC)
2493                         return -EINVAL;
2494
2495                 /* If timestamp is negative then disable timeout */
2496                 if ((s32)data->ioc_u32[1] < 0)
2497                         timeout = MAX_SCHEDULE_TIMEOUT;
2498                 else
2499                         timeout = msecs_to_jiffies(data->ioc_u32[1]);
2500
2501                 rc = lnet_ping(id, timeout, data->ioc_pbuf1,
2502                                data->ioc_plen1 / sizeof(lnet_process_id_t));
2503                 if (rc < 0)
2504                         return rc;
2505                 data->ioc_count = rc;
2506                 return 0;
2507         }
2508         default:
2509                 ni = lnet_net2ni(data->ioc_net);
2510                 if (ni == NULL)
2511                         return -EINVAL;
2512
2513                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
2514                         rc = -EINVAL;
2515                 else
2516                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
2517
2518                 return rc;
2519         }
2520         /* not reached */
2521 }
2522 EXPORT_SYMBOL(LNetCtl);
2523
2524 void LNetDebugPeer(lnet_process_id_t id)
2525 {
2526         lnet_debug_peer(id.nid);
2527 }
2528 EXPORT_SYMBOL(LNetDebugPeer);
2529
2530 /**
2531  * Retrieve the lnet_process_id_t ID of LNet interface at \a index. Note that
2532  * all interfaces share a same PID, as requested by LNetNIInit().
2533  *
2534  * \param index Index of the interface to look up.
2535  * \param id On successful return, this location will hold the
2536  * lnet_process_id_t ID of the interface.
2537  *
2538  * \retval 0 If an interface exists at \a index.
2539  * \retval -ENOENT If no interface has been found.
2540  */
2541 int
2542 LNetGetId(unsigned int index, lnet_process_id_t *id)
2543 {
2544         struct lnet_ni   *ni;
2545         struct lnet_net  *net;
2546         int               cpt;
2547         int               rc = -ENOENT;
2548
2549         LASSERT(the_lnet.ln_refcount > 0);
2550
2551         cpt = lnet_net_lock_current();
2552
2553         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2554                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2555                         if (index-- != 0)
2556                                 continue;
2557
2558                         id->nid = ni->ni_nid;
2559                         id->pid = the_lnet.ln_pid;
2560                         rc = 0;
2561                         break;
2562                 }
2563         }
2564
2565         lnet_net_unlock(cpt);
2566         return rc;
2567 }
2568 EXPORT_SYMBOL(LNetGetId);
2569
2570 /**
2571  * Print a string representation of handle \a h into buffer \a str of
2572  * \a len bytes.
2573  */
2574 void
2575 LNetSnprintHandle(char *str, int len, lnet_handle_any_t h)
2576 {
2577         snprintf(str, len, "%#llx", h.cookie);
2578 }
2579 EXPORT_SYMBOL(LNetSnprintHandle);
2580
2581 static int lnet_ping(lnet_process_id_t id, signed long timeout,
2582                      lnet_process_id_t __user *ids, int n_ids)
2583 {
2584         lnet_handle_eq_t     eqh;
2585         lnet_handle_md_t     mdh;
2586         lnet_event_t         event;
2587         lnet_md_t            md = { NULL };
2588         int                  which;
2589         int                  unlinked = 0;
2590         int                  replied = 0;
2591         const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC);
2592         int                  infosz;
2593         struct lnet_ping_info    *info;
2594         lnet_process_id_t    tmpid;
2595         int                  i;
2596         int                  nob;
2597         int                  rc;
2598         int                  rc2;
2599         sigset_t         blocked;
2600
2601         infosz = offsetof(struct lnet_ping_info, pi_ni[n_ids]);
2602
2603         /* n_ids limit is arbitrary */
2604         if (n_ids <= 0 || n_ids > 20 || id.nid == LNET_NID_ANY)
2605                 return -EINVAL;
2606
2607         if (id.pid == LNET_PID_ANY)
2608                 id.pid = LNET_PID_LUSTRE;
2609
2610         LIBCFS_ALLOC(info, infosz);
2611         if (info == NULL)
2612                 return -ENOMEM;
2613
2614         /* NB 2 events max (including any unlink event) */
2615         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
2616         if (rc != 0) {
2617                 CERROR("Can't allocate EQ: %d\n", rc);
2618                 goto out_0;
2619         }
2620
2621         /* initialize md content */
2622         md.start     = info;
2623         md.length    = infosz;
2624         md.threshold = 2; /*GET/REPLY*/
2625         md.max_size  = 0;
2626         md.options   = LNET_MD_TRUNCATE;
2627         md.user_ptr  = NULL;
2628         md.eq_handle = eqh;
2629
2630         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
2631         if (rc != 0) {
2632                 CERROR("Can't bind MD: %d\n", rc);
2633                 goto out_1;
2634         }
2635
2636         rc = LNetGet(LNET_NID_ANY, mdh, id,
2637                      LNET_RESERVED_PORTAL,
2638                      LNET_PROTO_PING_MATCHBITS, 0);
2639
2640         if (rc != 0) {
2641                 /* Don't CERROR; this could be deliberate! */
2642
2643                 rc2 = LNetMDUnlink(mdh);
2644                 LASSERT(rc2 == 0);
2645
2646                 /* NB must wait for the UNLINK event below... */
2647                 unlinked = 1;
2648                 timeout = a_long_time;
2649         }
2650
2651         do {
2652                 /* MUST block for unlink to complete */
2653                 if (unlinked)
2654                         blocked = cfs_block_allsigs();
2655
2656                 rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which);
2657
2658                 if (unlinked)
2659                         cfs_restore_sigs(blocked);
2660
2661                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
2662                        (rc2 <= 0) ? -1 : event.type,
2663                        (rc2 <= 0) ? -1 : event.status,
2664                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
2665
2666                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
2667
2668                 if (rc2 <= 0 || event.status != 0) {
2669                         /* timeout or error */
2670                         if (!replied && rc == 0)
2671                                 rc = (rc2 < 0) ? rc2 :
2672                                      (rc2 == 0) ? -ETIMEDOUT :
2673                                      event.status;
2674
2675                         if (!unlinked) {
2676                                 /* Ensure completion in finite time... */
2677                                 LNetMDUnlink(mdh);
2678                                 /* No assertion (racing with network) */
2679                                 unlinked = 1;
2680                                 timeout = a_long_time;
2681                         } else if (rc2 == 0) {
2682                                 /* timed out waiting for unlink */
2683                                 CWARN("ping %s: late network completion\n",
2684                                       libcfs_id2str(id));
2685                         }
2686                 } else if (event.type == LNET_EVENT_REPLY) {
2687                         replied = 1;
2688                         rc = event.mlength;
2689                 }
2690
2691         } while (rc2 <= 0 || !event.unlinked);
2692
2693         if (!replied) {
2694                 if (rc >= 0)
2695                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
2696                               libcfs_id2str(id));
2697                 rc = -EIO;
2698                 goto out_1;
2699         }
2700
2701         nob = rc;
2702         LASSERT(nob >= 0 && nob <= infosz);
2703
2704         rc = -EPROTO;                           /* if I can't parse... */
2705
2706         if (nob < 8) {
2707                 /* can't check magic/version */
2708                 CERROR("%s: ping info too short %d\n",
2709                        libcfs_id2str(id), nob);
2710                 goto out_1;
2711         }
2712
2713         if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
2714                 lnet_swap_pinginfo(info);
2715         } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
2716                 CERROR("%s: Unexpected magic %08x\n",
2717                        libcfs_id2str(id), info->pi_magic);
2718                 goto out_1;
2719         }
2720
2721         if ((info->pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
2722                 CERROR("%s: ping w/o NI status: 0x%x\n",
2723                        libcfs_id2str(id), info->pi_features);
2724                 goto out_1;
2725         }
2726
2727         if (nob < offsetof(struct lnet_ping_info, pi_ni[0])) {
2728                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
2729                        nob, (int)offsetof(struct lnet_ping_info, pi_ni[0]));
2730                 goto out_1;
2731         }
2732
2733         if (info->pi_nnis < n_ids)
2734                 n_ids = info->pi_nnis;
2735
2736         if (nob < offsetof(struct lnet_ping_info, pi_ni[n_ids])) {
2737                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
2738                        nob, (int)offsetof(struct lnet_ping_info, pi_ni[n_ids]));
2739                 goto out_1;
2740         }
2741
2742         rc = -EFAULT;                           /* If I SEGV... */
2743
2744         memset(&tmpid, 0, sizeof(tmpid));
2745         for (i = 0; i < n_ids; i++) {
2746                 tmpid.pid = info->pi_pid;
2747                 tmpid.nid = info->pi_ni[i].ns_nid;
2748                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
2749                         goto out_1;
2750         }
2751         rc = info->pi_nnis;
2752
2753  out_1:
2754         rc2 = LNetEQFree(eqh);
2755         if (rc2 != 0)
2756                 CERROR("rc2 %d\n", rc2);
2757         LASSERT(rc2 == 0);
2758
2759  out_0:
2760         LIBCFS_FREE(info, infosz);
2761         return rc;
2762 }