Whamcloud - gitweb
LU-9480 lnet: rename lnet_add/del_peer_ni_to/from_peer()
[fs/lustre-release.git] / lnet / lnet / api-ni.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32
33 #define DEBUG_SUBSYSTEM S_LNET
34 #include <linux/log2.h>
35 #include <linux/ktime.h>
36 #include <linux/moduleparam.h>
37
38 #include <lnet/lib-lnet.h>
39
40 #define D_LNI D_CONSOLE
41
42 struct lnet the_lnet;           /* THE state of the network */
43 EXPORT_SYMBOL(the_lnet);
44
45 static char *ip2nets = "";
46 module_param(ip2nets, charp, 0444);
47 MODULE_PARM_DESC(ip2nets, "LNET network <- IP table");
48
49 static char *networks = "";
50 module_param(networks, charp, 0444);
51 MODULE_PARM_DESC(networks, "local networks");
52
53 static char *routes = "";
54 module_param(routes, charp, 0444);
55 MODULE_PARM_DESC(routes, "routes to non-local networks");
56
57 static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
58 module_param(rnet_htable_size, int, 0444);
59 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
60
61 static int use_tcp_bonding = false;
62 module_param(use_tcp_bonding, int, 0444);
63 MODULE_PARM_DESC(use_tcp_bonding,
64                  "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
65
66 unsigned int lnet_numa_range = 0;
67 module_param(lnet_numa_range, uint, 0444);
68 MODULE_PARM_DESC(lnet_numa_range,
69                 "NUMA range to consider during Multi-Rail selection");
70
71 static int lnet_interfaces_max = LNET_INTERFACES_MAX_DEFAULT;
72 static int intf_max_set(const char *val, struct kernel_param *kp);
73 module_param_call(lnet_interfaces_max, intf_max_set, param_get_int,
74                   &lnet_interfaces_max, S_IRUGO|S_IWUSR);
75 MODULE_PARM_DESC(lnet_interfaces_max,
76                 "Maximum number of interfaces in a node.");
77
78 /*
79  * This sequence number keeps track of how many times DLC was used to
80  * update the local NIs. It is incremented when a NI is added or
81  * removed and checked when sending a message to determine if there is
82  * a need to re-run the selection algorithm. See lnet_select_pathway()
83  * for more details on its usage.
84  */
85 static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0);
86
87 static int lnet_ping(struct lnet_process_id id, signed long timeout,
88                      struct lnet_process_id __user *ids, int n_ids);
89
90 static int
91 intf_max_set(const char *val, struct kernel_param *kp)
92 {
93         int value, rc;
94
95         rc = kstrtoint(val, 0, &value);
96         if (rc) {
97                 CERROR("Invalid module parameter value for 'lnet_interfaces_max'\n");
98                 return rc;
99         }
100
101         if (value < LNET_INTERFACES_MIN) {
102                 CWARN("max interfaces provided are too small, setting to %d\n",
103                       LNET_INTERFACES_MIN);
104                 value = LNET_INTERFACES_MIN;
105         }
106
107         *(int *)kp->arg = value;
108
109         return 0;
110 }
111
112 static char *
113 lnet_get_routes(void)
114 {
115         return routes;
116 }
117
118 static char *
119 lnet_get_networks(void)
120 {
121         char   *nets;
122         int     rc;
123
124         if (*networks != 0 && *ip2nets != 0) {
125                 LCONSOLE_ERROR_MSG(0x101, "Please specify EITHER 'networks' or "
126                                    "'ip2nets' but not both at once\n");
127                 return NULL;
128         }
129
130         if (*ip2nets != 0) {
131                 rc = lnet_parse_ip2nets(&nets, ip2nets);
132                 return (rc == 0) ? nets : NULL;
133         }
134
135         if (*networks != 0)
136                 return networks;
137
138         return "tcp";
139 }
140
141 static void
142 lnet_init_locks(void)
143 {
144         spin_lock_init(&the_lnet.ln_eq_wait_lock);
145         init_waitqueue_head(&the_lnet.ln_eq_waitq);
146         init_waitqueue_head(&the_lnet.ln_rc_waitq);
147         mutex_init(&the_lnet.ln_lnd_mutex);
148         mutex_init(&the_lnet.ln_api_mutex);
149 }
150
151 static void
152 lnet_fini_locks(void)
153 {
154 }
155
156 struct kmem_cache *lnet_mes_cachep;        /* MEs kmem_cache */
157 struct kmem_cache *lnet_small_mds_cachep;  /* <= LNET_SMALL_MD_SIZE bytes
158                                             *  MDs kmem_cache */
159
160 static int
161 lnet_descriptor_setup(void)
162 {
163         /* create specific kmem_cache for MEs and small MDs (i.e., originally
164          * allocated in <size-xxx> kmem_cache).
165          */
166         lnet_mes_cachep = kmem_cache_create("lnet_MEs", sizeof(struct lnet_me),
167                                             0, 0, NULL);
168         if (!lnet_mes_cachep)
169                 return -ENOMEM;
170
171         lnet_small_mds_cachep = kmem_cache_create("lnet_small_MDs",
172                                                   LNET_SMALL_MD_SIZE, 0, 0,
173                                                   NULL);
174         if (!lnet_small_mds_cachep)
175                 return -ENOMEM;
176
177         return 0;
178 }
179
180 static void
181 lnet_descriptor_cleanup(void)
182 {
183
184         if (lnet_small_mds_cachep) {
185                 kmem_cache_destroy(lnet_small_mds_cachep);
186                 lnet_small_mds_cachep = NULL;
187         }
188
189         if (lnet_mes_cachep) {
190                 kmem_cache_destroy(lnet_mes_cachep);
191                 lnet_mes_cachep = NULL;
192         }
193 }
194
195 static int
196 lnet_create_remote_nets_table(void)
197 {
198         int               i;
199         struct list_head *hash;
200
201         LASSERT(the_lnet.ln_remote_nets_hash == NULL);
202         LASSERT(the_lnet.ln_remote_nets_hbits > 0);
203         LIBCFS_ALLOC(hash, LNET_REMOTE_NETS_HASH_SIZE * sizeof(*hash));
204         if (hash == NULL) {
205                 CERROR("Failed to create remote nets hash table\n");
206                 return -ENOMEM;
207         }
208
209         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
210                 INIT_LIST_HEAD(&hash[i]);
211         the_lnet.ln_remote_nets_hash = hash;
212         return 0;
213 }
214
215 static void
216 lnet_destroy_remote_nets_table(void)
217 {
218         int i;
219
220         if (the_lnet.ln_remote_nets_hash == NULL)
221                 return;
222
223         for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++)
224                 LASSERT(list_empty(&the_lnet.ln_remote_nets_hash[i]));
225
226         LIBCFS_FREE(the_lnet.ln_remote_nets_hash,
227                     LNET_REMOTE_NETS_HASH_SIZE *
228                     sizeof(the_lnet.ln_remote_nets_hash[0]));
229         the_lnet.ln_remote_nets_hash = NULL;
230 }
231
232 static void
233 lnet_destroy_locks(void)
234 {
235         if (the_lnet.ln_res_lock != NULL) {
236                 cfs_percpt_lock_free(the_lnet.ln_res_lock);
237                 the_lnet.ln_res_lock = NULL;
238         }
239
240         if (the_lnet.ln_net_lock != NULL) {
241                 cfs_percpt_lock_free(the_lnet.ln_net_lock);
242                 the_lnet.ln_net_lock = NULL;
243         }
244
245         lnet_fini_locks();
246 }
247
248 static int
249 lnet_create_locks(void)
250 {
251         lnet_init_locks();
252
253         the_lnet.ln_res_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
254         if (the_lnet.ln_res_lock == NULL)
255                 goto failed;
256
257         the_lnet.ln_net_lock = cfs_percpt_lock_alloc(lnet_cpt_table());
258         if (the_lnet.ln_net_lock == NULL)
259                 goto failed;
260
261         return 0;
262
263  failed:
264         lnet_destroy_locks();
265         return -ENOMEM;
266 }
267
268 static void lnet_assert_wire_constants(void)
269 {
270         /* Wire protocol assertions generated by 'wirecheck'
271          * running on Linux robert.bartonsoftware.com 2.6.8-1.521
272          * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux
273          * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */
274
275         /* Constants... */
276         CLASSERT(LNET_PROTO_TCP_MAGIC == 0xeebc0ded);
277         CLASSERT(LNET_PROTO_TCP_VERSION_MAJOR == 1);
278         CLASSERT(LNET_PROTO_TCP_VERSION_MINOR == 0);
279         CLASSERT(LNET_MSG_ACK == 0);
280         CLASSERT(LNET_MSG_PUT == 1);
281         CLASSERT(LNET_MSG_GET == 2);
282         CLASSERT(LNET_MSG_REPLY == 3);
283         CLASSERT(LNET_MSG_HELLO == 4);
284
285         /* Checks for struct lnet_handle_wire */
286         CLASSERT((int)sizeof(struct lnet_handle_wire) == 16);
287         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_interface_cookie) == 0);
288         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_interface_cookie) == 8);
289         CLASSERT((int)offsetof(struct lnet_handle_wire, wh_object_cookie) == 8);
290         CLASSERT((int)sizeof(((struct lnet_handle_wire *)0)->wh_object_cookie) == 8);
291
292         /* Checks for struct struct lnet_magicversion */
293         CLASSERT((int)sizeof(struct lnet_magicversion) == 8);
294         CLASSERT((int)offsetof(struct lnet_magicversion, magic) == 0);
295         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->magic) == 4);
296         CLASSERT((int)offsetof(struct lnet_magicversion, version_major) == 4);
297         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->version_major) == 2);
298         CLASSERT((int)offsetof(struct lnet_magicversion, version_minor) == 6);
299         CLASSERT((int)sizeof(((struct lnet_magicversion *)0)->version_minor) == 2);
300
301         /* Checks for struct struct lnet_hdr */
302         CLASSERT((int)sizeof(struct lnet_hdr) == 72);
303         CLASSERT((int)offsetof(struct lnet_hdr, dest_nid) == 0);
304         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->dest_nid) == 8);
305         CLASSERT((int)offsetof(struct lnet_hdr, src_nid) == 8);
306         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->src_nid) == 8);
307         CLASSERT((int)offsetof(struct lnet_hdr, dest_pid) == 16);
308         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->dest_pid) == 4);
309         CLASSERT((int)offsetof(struct lnet_hdr, src_pid) == 20);
310         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->src_pid) == 4);
311         CLASSERT((int)offsetof(struct lnet_hdr, type) == 24);
312         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->type) == 4);
313         CLASSERT((int)offsetof(struct lnet_hdr, payload_length) == 28);
314         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->payload_length) == 4);
315         CLASSERT((int)offsetof(struct lnet_hdr, msg) == 32);
316         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg) == 40);
317
318         /* Ack */
319         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.dst_wmd) == 32);
320         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.dst_wmd) == 16);
321         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.match_bits) == 48);
322         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.match_bits) == 8);
323         CLASSERT((int)offsetof(struct lnet_hdr, msg.ack.mlength) == 56);
324         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.ack.mlength) == 4);
325
326         /* Put */
327         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.ack_wmd) == 32);
328         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.ack_wmd) == 16);
329         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.match_bits) == 48);
330         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.match_bits) == 8);
331         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.hdr_data) == 56);
332         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.hdr_data) == 8);
333         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.ptl_index) == 64);
334         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.ptl_index) == 4);
335         CLASSERT((int)offsetof(struct lnet_hdr, msg.put.offset) == 68);
336         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.put.offset) == 4);
337
338         /* Get */
339         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.return_wmd) == 32);
340         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.return_wmd) == 16);
341         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.match_bits) == 48);
342         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.match_bits) == 8);
343         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.ptl_index) == 56);
344         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.ptl_index) == 4);
345         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.src_offset) == 60);
346         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.src_offset) == 4);
347         CLASSERT((int)offsetof(struct lnet_hdr, msg.get.sink_length) == 64);
348         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.get.sink_length) == 4);
349
350         /* Reply */
351         CLASSERT((int)offsetof(struct lnet_hdr, msg.reply.dst_wmd) == 32);
352         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.reply.dst_wmd) == 16);
353
354         /* Hello */
355         CLASSERT((int)offsetof(struct lnet_hdr, msg.hello.incarnation) == 32);
356         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.hello.incarnation) == 8);
357         CLASSERT((int)offsetof(struct lnet_hdr, msg.hello.type) == 40);
358         CLASSERT((int)sizeof(((struct lnet_hdr *)0)->msg.hello.type) == 4);
359
360         /* Checks for struct lnet_ni_status and related constants */
361         CLASSERT(LNET_NI_STATUS_INVALID == 0x00000000);
362         CLASSERT(LNET_NI_STATUS_UP == 0x15aac0de);
363         CLASSERT(LNET_NI_STATUS_DOWN == 0xdeadface);
364
365         /* Checks for struct lnet_ni_status */
366         CLASSERT((int)sizeof(struct lnet_ni_status) == 16);
367         CLASSERT((int)offsetof(struct lnet_ni_status, ns_nid) == 0);
368         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) == 8);
369         CLASSERT((int)offsetof(struct lnet_ni_status, ns_status) == 8);
370         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_status) == 4);
371         CLASSERT((int)offsetof(struct lnet_ni_status, ns_unused) == 12);
372         CLASSERT((int)sizeof(((struct lnet_ni_status *)0)->ns_unused) == 4);
373
374         /* Checks for struct lnet_ping_info and related constants */
375         CLASSERT(LNET_PROTO_PING_MAGIC == 0x70696E67);
376         CLASSERT(LNET_PING_FEAT_INVAL == 0);
377         CLASSERT(LNET_PING_FEAT_BASE == 1);
378         CLASSERT(LNET_PING_FEAT_NI_STATUS == 2);
379         CLASSERT(LNET_PING_FEAT_RTE_DISABLED == 4);
380         CLASSERT(LNET_PING_FEAT_MULTI_RAIL == 8);
381         CLASSERT(LNET_PING_FEAT_DISCOVERY == 16);
382         CLASSERT(LNET_PING_FEAT_BITS == 31);
383
384         /* Checks for struct lnet_ping_info */
385         CLASSERT((int)sizeof(struct lnet_ping_info) == 16);
386         CLASSERT((int)offsetof(struct lnet_ping_info, pi_magic) == 0);
387         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_magic) == 4);
388         CLASSERT((int)offsetof(struct lnet_ping_info, pi_features) == 4);
389         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_features) == 4);
390         CLASSERT((int)offsetof(struct lnet_ping_info, pi_pid) == 8);
391         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_pid) == 4);
392         CLASSERT((int)offsetof(struct lnet_ping_info, pi_nnis) == 12);
393         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_nnis) == 4);
394         CLASSERT((int)offsetof(struct lnet_ping_info, pi_ni) == 16);
395         CLASSERT((int)sizeof(((struct lnet_ping_info *)0)->pi_ni) == 0);
396 }
397
398 static struct lnet_lnd *lnet_find_lnd_by_type(__u32 type)
399 {
400         struct lnet_lnd *lnd;
401         struct list_head *tmp;
402
403         /* holding lnd mutex */
404         list_for_each(tmp, &the_lnet.ln_lnds) {
405                 lnd = list_entry(tmp, struct lnet_lnd, lnd_list);
406
407                 if (lnd->lnd_type == type)
408                         return lnd;
409         }
410         return NULL;
411 }
412
413 void
414 lnet_register_lnd(struct lnet_lnd *lnd)
415 {
416         mutex_lock(&the_lnet.ln_lnd_mutex);
417
418         LASSERT(libcfs_isknown_lnd(lnd->lnd_type));
419         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
420
421         list_add_tail(&lnd->lnd_list, &the_lnet.ln_lnds);
422         lnd->lnd_refcount = 0;
423
424         CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type));
425
426         mutex_unlock(&the_lnet.ln_lnd_mutex);
427 }
428 EXPORT_SYMBOL(lnet_register_lnd);
429
430 void
431 lnet_unregister_lnd(struct lnet_lnd *lnd)
432 {
433         mutex_lock(&the_lnet.ln_lnd_mutex);
434
435         LASSERT(lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
436         LASSERT(lnd->lnd_refcount == 0);
437
438         list_del(&lnd->lnd_list);
439         CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
440
441         mutex_unlock(&the_lnet.ln_lnd_mutex);
442 }
443 EXPORT_SYMBOL(lnet_unregister_lnd);
444
445 void
446 lnet_counters_get(struct lnet_counters *counters)
447 {
448         struct lnet_counters *ctr;
449         int             i;
450
451         memset(counters, 0, sizeof(*counters));
452
453         lnet_net_lock(LNET_LOCK_EX);
454
455         cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
456                 counters->msgs_max     += ctr->msgs_max;
457                 counters->msgs_alloc   += ctr->msgs_alloc;
458                 counters->errors       += ctr->errors;
459                 counters->send_count   += ctr->send_count;
460                 counters->recv_count   += ctr->recv_count;
461                 counters->route_count  += ctr->route_count;
462                 counters->drop_count   += ctr->drop_count;
463                 counters->send_length  += ctr->send_length;
464                 counters->recv_length  += ctr->recv_length;
465                 counters->route_length += ctr->route_length;
466                 counters->drop_length  += ctr->drop_length;
467
468         }
469         lnet_net_unlock(LNET_LOCK_EX);
470 }
471 EXPORT_SYMBOL(lnet_counters_get);
472
473 void
474 lnet_counters_reset(void)
475 {
476         struct lnet_counters *counters;
477         int             i;
478
479         lnet_net_lock(LNET_LOCK_EX);
480
481         cfs_percpt_for_each(counters, i, the_lnet.ln_counters)
482                 memset(counters, 0, sizeof(struct lnet_counters));
483
484         lnet_net_unlock(LNET_LOCK_EX);
485 }
486
487 static char *
488 lnet_res_type2str(int type)
489 {
490         switch (type) {
491         default:
492                 LBUG();
493         case LNET_COOKIE_TYPE_MD:
494                 return "MD";
495         case LNET_COOKIE_TYPE_ME:
496                 return "ME";
497         case LNET_COOKIE_TYPE_EQ:
498                 return "EQ";
499         }
500 }
501
502 static void
503 lnet_res_container_cleanup(struct lnet_res_container *rec)
504 {
505         int     count = 0;
506
507         if (rec->rec_type == 0) /* not set yet, it's uninitialized */
508                 return;
509
510         while (!list_empty(&rec->rec_active)) {
511                 struct list_head *e = rec->rec_active.next;
512
513                 list_del_init(e);
514                 if (rec->rec_type == LNET_COOKIE_TYPE_EQ) {
515                         lnet_eq_free(list_entry(e, struct lnet_eq, eq_list));
516
517                 } else if (rec->rec_type == LNET_COOKIE_TYPE_MD) {
518                         lnet_md_free(list_entry(e, struct lnet_libmd, md_list));
519
520                 } else { /* NB: Active MEs should be attached on portals */
521                         LBUG();
522                 }
523                 count++;
524         }
525
526         if (count > 0) {
527                 /* Found alive MD/ME/EQ, user really should unlink/free
528                  * all of them before finalize LNet, but if someone didn't,
529                  * we have to recycle garbage for him */
530                 CERROR("%d active elements on exit of %s container\n",
531                        count, lnet_res_type2str(rec->rec_type));
532         }
533
534         if (rec->rec_lh_hash != NULL) {
535                 LIBCFS_FREE(rec->rec_lh_hash,
536                             LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
537                 rec->rec_lh_hash = NULL;
538         }
539
540         rec->rec_type = 0; /* mark it as finalized */
541 }
542
543 static int
544 lnet_res_container_setup(struct lnet_res_container *rec, int cpt, int type)
545 {
546         int     rc = 0;
547         int     i;
548
549         LASSERT(rec->rec_type == 0);
550
551         rec->rec_type = type;
552         INIT_LIST_HEAD(&rec->rec_active);
553
554         rec->rec_lh_cookie = (cpt << LNET_COOKIE_TYPE_BITS) | type;
555
556         /* Arbitrary choice of hash table size */
557         LIBCFS_CPT_ALLOC(rec->rec_lh_hash, lnet_cpt_table(), cpt,
558                          LNET_LH_HASH_SIZE * sizeof(rec->rec_lh_hash[0]));
559         if (rec->rec_lh_hash == NULL) {
560                 rc = -ENOMEM;
561                 goto out;
562         }
563
564         for (i = 0; i < LNET_LH_HASH_SIZE; i++)
565                 INIT_LIST_HEAD(&rec->rec_lh_hash[i]);
566
567         return 0;
568
569 out:
570         CERROR("Failed to setup %s resource container\n",
571                lnet_res_type2str(type));
572         lnet_res_container_cleanup(rec);
573         return rc;
574 }
575
576 static void
577 lnet_res_containers_destroy(struct lnet_res_container **recs)
578 {
579         struct lnet_res_container       *rec;
580         int                             i;
581
582         cfs_percpt_for_each(rec, i, recs)
583                 lnet_res_container_cleanup(rec);
584
585         cfs_percpt_free(recs);
586 }
587
588 static struct lnet_res_container **
589 lnet_res_containers_create(int type)
590 {
591         struct lnet_res_container       **recs;
592         struct lnet_res_container       *rec;
593         int                             rc;
594         int                             i;
595
596         recs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*rec));
597         if (recs == NULL) {
598                 CERROR("Failed to allocate %s resource containers\n",
599                        lnet_res_type2str(type));
600                 return NULL;
601         }
602
603         cfs_percpt_for_each(rec, i, recs) {
604                 rc = lnet_res_container_setup(rec, i, type);
605                 if (rc != 0) {
606                         lnet_res_containers_destroy(recs);
607                         return NULL;
608                 }
609         }
610
611         return recs;
612 }
613
614 struct lnet_libhandle *
615 lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie)
616 {
617         /* ALWAYS called with lnet_res_lock held */
618         struct list_head        *head;
619         struct lnet_libhandle   *lh;
620         unsigned int            hash;
621
622         if ((cookie & LNET_COOKIE_MASK) != rec->rec_type)
623                 return NULL;
624
625         hash = cookie >> (LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS);
626         head = &rec->rec_lh_hash[hash & LNET_LH_HASH_MASK];
627
628         list_for_each_entry(lh, head, lh_hash_chain) {
629                 if (lh->lh_cookie == cookie)
630                         return lh;
631         }
632
633         return NULL;
634 }
635
636 void
637 lnet_res_lh_initialize(struct lnet_res_container *rec,
638                        struct lnet_libhandle *lh)
639 {
640         /* ALWAYS called with lnet_res_lock held */
641         unsigned int    ibits = LNET_COOKIE_TYPE_BITS + LNET_CPT_BITS;
642         unsigned int    hash;
643
644         lh->lh_cookie = rec->rec_lh_cookie;
645         rec->rec_lh_cookie += 1 << ibits;
646
647         hash = (lh->lh_cookie >> ibits) & LNET_LH_HASH_MASK;
648
649         list_add(&lh->lh_hash_chain, &rec->rec_lh_hash[hash]);
650 }
651
652 static int lnet_unprepare(void);
653
654 static int
655 lnet_prepare(lnet_pid_t requested_pid)
656 {
657         /* Prepare to bring up the network */
658         struct lnet_res_container **recs;
659         int                       rc = 0;
660
661         if (requested_pid == LNET_PID_ANY) {
662                 /* Don't instantiate LNET just for me */
663                 return -ENETDOWN;
664         }
665
666         LASSERT(the_lnet.ln_refcount == 0);
667
668         the_lnet.ln_routing = 0;
669
670         LASSERT((requested_pid & LNET_PID_USERFLAG) == 0);
671         the_lnet.ln_pid = requested_pid;
672
673         INIT_LIST_HEAD(&the_lnet.ln_test_peers);
674         INIT_LIST_HEAD(&the_lnet.ln_peers);
675         INIT_LIST_HEAD(&the_lnet.ln_remote_peer_ni_list);
676         INIT_LIST_HEAD(&the_lnet.ln_nets);
677         INIT_LIST_HEAD(&the_lnet.ln_routers);
678         INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
679         INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
680
681         rc = lnet_descriptor_setup();
682         if (rc != 0)
683                 goto failed;
684
685         rc = lnet_create_remote_nets_table();
686         if (rc != 0)
687                 goto failed;
688
689         /*
690          * NB the interface cookie in wire handles guards against delayed
691          * replies and ACKs appearing valid after reboot.
692          */
693         the_lnet.ln_interface_cookie = ktime_get_real_ns();
694
695         the_lnet.ln_counters = cfs_percpt_alloc(lnet_cpt_table(),
696                                                 sizeof(struct lnet_counters));
697         if (the_lnet.ln_counters == NULL) {
698                 CERROR("Failed to allocate counters for LNet\n");
699                 rc = -ENOMEM;
700                 goto failed;
701         }
702
703         rc = lnet_peer_tables_create();
704         if (rc != 0)
705                 goto failed;
706
707         rc = lnet_msg_containers_create();
708         if (rc != 0)
709                 goto failed;
710
711         rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0,
712                                       LNET_COOKIE_TYPE_EQ);
713         if (rc != 0)
714                 goto failed;
715
716         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME);
717         if (recs == NULL) {
718                 rc = -ENOMEM;
719                 goto failed;
720         }
721
722         the_lnet.ln_me_containers = recs;
723
724         recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD);
725         if (recs == NULL) {
726                 rc = -ENOMEM;
727                 goto failed;
728         }
729
730         the_lnet.ln_md_containers = recs;
731
732         rc = lnet_portals_create();
733         if (rc != 0) {
734                 CERROR("Failed to create portals for LNet: %d\n", rc);
735                 goto failed;
736         }
737
738         return 0;
739
740  failed:
741         lnet_unprepare();
742         return rc;
743 }
744
745 static int
746 lnet_unprepare (void)
747 {
748         /* NB no LNET_LOCK since this is the last reference.  All LND instances
749          * have shut down already, so it is safe to unlink and free all
750          * descriptors, even those that appear committed to a network op (eg MD
751          * with non-zero pending count) */
752
753         lnet_fail_nid(LNET_NID_ANY, 0);
754
755         LASSERT(the_lnet.ln_refcount == 0);
756         LASSERT(list_empty(&the_lnet.ln_test_peers));
757         LASSERT(list_empty(&the_lnet.ln_nets));
758
759         lnet_portals_destroy();
760
761         if (the_lnet.ln_md_containers != NULL) {
762                 lnet_res_containers_destroy(the_lnet.ln_md_containers);
763                 the_lnet.ln_md_containers = NULL;
764         }
765
766         if (the_lnet.ln_me_containers != NULL) {
767                 lnet_res_containers_destroy(the_lnet.ln_me_containers);
768                 the_lnet.ln_me_containers = NULL;
769         }
770
771         lnet_res_container_cleanup(&the_lnet.ln_eq_container);
772
773         lnet_msg_containers_destroy();
774         lnet_peer_uninit();
775         lnet_rtrpools_free(0);
776
777         if (the_lnet.ln_counters != NULL) {
778                 cfs_percpt_free(the_lnet.ln_counters);
779                 the_lnet.ln_counters = NULL;
780         }
781         lnet_destroy_remote_nets_table();
782         lnet_descriptor_cleanup();
783
784         return 0;
785 }
786
787 struct lnet_ni  *
788 lnet_net2ni_locked(__u32 net_id, int cpt)
789 {
790         struct lnet_ni   *ni;
791         struct lnet_net  *net;
792
793         LASSERT(cpt != LNET_LOCK_EX);
794
795         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
796                 if (net->net_id == net_id) {
797                         ni = list_entry(net->net_ni_list.next, struct lnet_ni,
798                                         ni_netlist);
799                         return ni;
800                 }
801         }
802
803         return NULL;
804 }
805
806 struct lnet_ni *
807 lnet_net2ni_addref(__u32 net)
808 {
809         struct lnet_ni *ni;
810
811         lnet_net_lock(0);
812         ni = lnet_net2ni_locked(net, 0);
813         if (ni)
814                 lnet_ni_addref_locked(ni, 0);
815         lnet_net_unlock(0);
816
817         return ni;
818 }
819 EXPORT_SYMBOL(lnet_net2ni_addref);
820
821 struct lnet_net *
822 lnet_get_net_locked(__u32 net_id)
823 {
824         struct lnet_net  *net;
825
826         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
827                 if (net->net_id == net_id)
828                         return net;
829         }
830
831         return NULL;
832 }
833
834 unsigned int
835 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
836 {
837         __u64           key = nid;
838         unsigned int    val;
839
840         LASSERT(number >= 1 && number <= LNET_CPT_NUMBER);
841
842         if (number == 1)
843                 return 0;
844
845         val = hash_long(key, LNET_CPT_BITS);
846         /* NB: LNET_CP_NUMBER doesn't have to be PO2 */
847         if (val < number)
848                 return val;
849
850         return (unsigned int)(key + val + (val >> 1)) % number;
851 }
852
853 int
854 lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
855 {
856         struct lnet_net *net;
857
858         /* must called with hold of lnet_net_lock */
859         if (LNET_CPT_NUMBER == 1)
860                 return 0; /* the only one */
861
862         /*
863          * If NI is provided then use the CPT identified in the NI cpt
864          * list if one exists. If one doesn't exist, then that NI is
865          * associated with all CPTs and it follows that the net it belongs
866          * to is implicitly associated with all CPTs, so just hash the nid
867          * and return that.
868          */
869         if (ni != NULL) {
870                 if (ni->ni_cpts != NULL)
871                         return ni->ni_cpts[lnet_nid_cpt_hash(nid,
872                                                              ni->ni_ncpts)];
873                 else
874                         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
875         }
876
877         /* no NI provided so look at the net */
878         net = lnet_get_net_locked(LNET_NIDNET(nid));
879
880         if (net != NULL && net->net_cpts != NULL) {
881                 return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
882         }
883
884         return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
885 }
886
887 int
888 lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
889 {
890         int     cpt;
891         int     cpt2;
892
893         if (LNET_CPT_NUMBER == 1)
894                 return 0; /* the only one */
895
896         cpt = lnet_net_lock_current();
897
898         cpt2 = lnet_cpt_of_nid_locked(nid, ni);
899
900         lnet_net_unlock(cpt);
901
902         return cpt2;
903 }
904 EXPORT_SYMBOL(lnet_cpt_of_nid);
905
906 int
907 lnet_islocalnet(__u32 net_id)
908 {
909         struct lnet_net *net;
910         int             cpt;
911         bool            local;
912
913         cpt = lnet_net_lock_current();
914
915         net = lnet_get_net_locked(net_id);
916
917         local = net != NULL;
918
919         lnet_net_unlock(cpt);
920
921         return local;
922 }
923
924 bool
925 lnet_is_ni_healthy_locked(struct lnet_ni *ni)
926 {
927         if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
928             ni->ni_state == LNET_NI_STATE_DEGRADED)
929                 return true;
930
931         return false;
932 }
933
934 struct lnet_ni  *
935 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
936 {
937         struct lnet_net  *net;
938         struct lnet_ni   *ni;
939
940         LASSERT(cpt != LNET_LOCK_EX);
941
942         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
943                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
944                         if (ni->ni_nid == nid)
945                                 return ni;
946                 }
947         }
948
949         return NULL;
950 }
951
952 struct lnet_ni *
953 lnet_nid2ni_addref(lnet_nid_t nid)
954 {
955         struct lnet_ni *ni;
956
957         lnet_net_lock(0);
958         ni = lnet_nid2ni_locked(nid, 0);
959         if (ni)
960                 lnet_ni_addref_locked(ni, 0);
961         lnet_net_unlock(0);
962
963         return ni;
964 }
965 EXPORT_SYMBOL(lnet_nid2ni_addref);
966
967 int
968 lnet_islocalnid(lnet_nid_t nid)
969 {
970         struct lnet_ni  *ni;
971         int             cpt;
972
973         cpt = lnet_net_lock_current();
974         ni = lnet_nid2ni_locked(nid, cpt);
975         lnet_net_unlock(cpt);
976
977         return ni != NULL;
978 }
979
980 int
981 lnet_count_acceptor_nets(void)
982 {
983         /* Return the # of NIs that need the acceptor. */
984         int              count = 0;
985         struct lnet_net  *net;
986         int              cpt;
987
988         cpt = lnet_net_lock_current();
989         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
990                 /* all socklnd type networks should have the acceptor
991                  * thread started */
992                 if (net->net_lnd->lnd_accept != NULL)
993                         count++;
994         }
995
996         lnet_net_unlock(cpt);
997
998         return count;
999 }
1000
1001 struct lnet_ping_buffer *
1002 lnet_ping_buffer_alloc(int nnis, gfp_t gfp)
1003 {
1004         struct lnet_ping_buffer *pbuf;
1005
1006         LIBCFS_ALLOC_GFP(pbuf, LNET_PING_BUFFER_SIZE(nnis), gfp);
1007         if (pbuf) {
1008                 pbuf->pb_nnis = nnis;
1009                 atomic_set(&pbuf->pb_refcnt, 1);
1010         }
1011
1012         return pbuf;
1013 }
1014
1015 void
1016 lnet_ping_buffer_free(struct lnet_ping_buffer *pbuf)
1017 {
1018         LASSERT(lnet_ping_buffer_numref(pbuf) == 0);
1019         LIBCFS_FREE(pbuf, LNET_PING_BUFFER_SIZE(pbuf->pb_nnis));
1020 }
1021
1022 static struct lnet_ping_buffer *
1023 lnet_ping_target_create(int nnis)
1024 {
1025         struct lnet_ping_buffer *pbuf;
1026
1027         pbuf = lnet_ping_buffer_alloc(nnis, GFP_NOFS);
1028         if (pbuf == NULL) {
1029                 CERROR("Can't allocate ping source [%d]\n", nnis);
1030                 return NULL;
1031         }
1032
1033         pbuf->pb_info.pi_nnis = nnis;
1034         pbuf->pb_info.pi_pid = the_lnet.ln_pid;
1035         pbuf->pb_info.pi_magic = LNET_PROTO_PING_MAGIC;
1036         pbuf->pb_info.pi_features = LNET_PING_FEAT_NI_STATUS;
1037
1038         return pbuf;
1039 }
1040
1041 static inline int
1042 lnet_get_net_ni_count_locked(struct lnet_net *net)
1043 {
1044         struct lnet_ni  *ni;
1045         int             count = 0;
1046
1047         list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1048                 count++;
1049
1050         return count;
1051 }
1052
1053 static inline int
1054 lnet_get_net_ni_count_pre(struct lnet_net *net)
1055 {
1056         struct lnet_ni  *ni;
1057         int             count = 0;
1058
1059         list_for_each_entry(ni, &net->net_ni_added, ni_netlist)
1060                 count++;
1061
1062         return count;
1063 }
1064
1065 static inline int
1066 lnet_get_ni_count(void)
1067 {
1068         struct lnet_ni  *ni;
1069         struct lnet_net *net;
1070         int             count = 0;
1071
1072         lnet_net_lock(0);
1073
1074         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1075                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
1076                         count++;
1077         }
1078
1079         lnet_net_unlock(0);
1080
1081         return count;
1082 }
1083
1084 int
1085 lnet_ping_info_validate(struct lnet_ping_info *pinfo)
1086 {
1087         if (!pinfo)
1088                 return -EINVAL;
1089         if (pinfo->pi_magic != LNET_PROTO_PING_MAGIC)
1090                 return -EPROTO;
1091         if (!(pinfo->pi_features & LNET_PING_FEAT_NI_STATUS))
1092                 return -EPROTO;
1093         /* Loopback is guaranteed to be present */
1094         if (pinfo->pi_nnis < 1 || pinfo->pi_nnis > lnet_interfaces_max)
1095                 return -ERANGE;
1096         if (LNET_NETTYP(LNET_NIDNET(LNET_PING_INFO_LONI(pinfo))) != LOLND)
1097                 return -EPROTO;
1098         return 0;
1099 }
1100
1101 static void
1102 lnet_ping_target_destroy(void)
1103 {
1104         struct lnet_net *net;
1105         struct lnet_ni  *ni;
1106
1107         lnet_net_lock(LNET_LOCK_EX);
1108
1109         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1110                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1111                         lnet_ni_lock(ni);
1112                         ni->ni_status = NULL;
1113                         lnet_ni_unlock(ni);
1114                 }
1115         }
1116
1117         lnet_ping_buffer_decref(the_lnet.ln_ping_target);
1118         the_lnet.ln_ping_target = NULL;
1119
1120         lnet_net_unlock(LNET_LOCK_EX);
1121 }
1122
1123 static void
1124 lnet_ping_target_event_handler(struct lnet_event *event)
1125 {
1126         struct lnet_ping_buffer *pbuf = event->md.user_ptr;
1127
1128         if (event->unlinked)
1129                 lnet_ping_buffer_decref(pbuf);
1130 }
1131
1132 static int
1133 lnet_ping_target_setup(struct lnet_ping_buffer **ppbuf,
1134                        struct lnet_handle_md *ping_mdh,
1135                        int ni_count, bool set_eq)
1136 {
1137         struct lnet_process_id id = {
1138                 .nid = LNET_NID_ANY,
1139                 .pid = LNET_PID_ANY
1140         };
1141         struct lnet_handle_me me_handle;
1142         struct lnet_md md = { NULL };
1143         int rc, rc2;
1144
1145         if (set_eq) {
1146                 rc = LNetEQAlloc(0, lnet_ping_target_event_handler,
1147                                  &the_lnet.ln_ping_target_eq);
1148                 if (rc != 0) {
1149                         CERROR("Can't allocate ping buffer EQ: %d\n", rc);
1150                         return rc;
1151                 }
1152         }
1153
1154         *ppbuf = lnet_ping_target_create(ni_count);
1155         if (*ppbuf == NULL) {
1156                 rc = -ENOMEM;
1157                 goto fail_free_eq;
1158         }
1159
1160         /* Ping target ME/MD */
1161         rc = LNetMEAttach(LNET_RESERVED_PORTAL, id,
1162                           LNET_PROTO_PING_MATCHBITS, 0,
1163                           LNET_UNLINK, LNET_INS_AFTER,
1164                           &me_handle);
1165         if (rc != 0) {
1166                 CERROR("Can't create ping target ME: %d\n", rc);
1167                 goto fail_decref_ping_buffer;
1168         }
1169
1170         /* initialize md content */
1171         md.start     = &(*ppbuf)->pb_info;
1172         md.length    = LNET_PING_INFO_SIZE((*ppbuf)->pb_nnis);
1173         md.threshold = LNET_MD_THRESH_INF;
1174         md.max_size  = 0;
1175         md.options   = LNET_MD_OP_GET | LNET_MD_TRUNCATE |
1176                        LNET_MD_MANAGE_REMOTE;
1177         md.eq_handle = the_lnet.ln_ping_target_eq;
1178         md.user_ptr  = *ppbuf;
1179
1180         rc = LNetMDAttach(me_handle, md, LNET_RETAIN, ping_mdh);
1181         if (rc != 0) {
1182                 CERROR("Can't attach ping target MD: %d\n", rc);
1183                 goto fail_unlink_ping_me;
1184         }
1185         lnet_ping_buffer_addref(*ppbuf);
1186
1187         return 0;
1188
1189 fail_unlink_ping_me:
1190         rc2 = LNetMEUnlink(me_handle);
1191         LASSERT(rc2 == 0);
1192 fail_decref_ping_buffer:
1193         LASSERT(lnet_ping_buffer_numref(*ppbuf) == 1);
1194         lnet_ping_buffer_decref(*ppbuf);
1195         *ppbuf = NULL;
1196 fail_free_eq:
1197         if (set_eq) {
1198                 rc2 = LNetEQFree(the_lnet.ln_ping_target_eq);
1199                 LASSERT(rc2 == 0);
1200         }
1201         return rc;
1202 }
1203
1204 static void
1205 lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
1206                     struct lnet_handle_md *ping_mdh)
1207 {
1208         sigset_t        blocked = cfs_block_allsigs();
1209
1210         LNetMDUnlink(*ping_mdh);
1211         LNetInvalidateMDHandle(ping_mdh);
1212
1213         /* NB the MD could be busy; this just starts the unlink */
1214         while (lnet_ping_buffer_numref(pbuf) > 1) {
1215                 CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
1216                 set_current_state(TASK_UNINTERRUPTIBLE);
1217                 schedule_timeout(cfs_time_seconds(1));
1218         }
1219
1220         cfs_restore_sigs(blocked);
1221 }
1222
1223 static void
1224 lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
1225 {
1226         struct lnet_ni          *ni;
1227         struct lnet_net         *net;
1228         struct lnet_ni_status *ns;
1229         int                     i;
1230         int                     rc;
1231
1232         i = 0;
1233         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
1234                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
1235                         LASSERT(i < pbuf->pb_nnis);
1236
1237                         ns = &pbuf->pb_info.pi_ni[i];
1238
1239                         ns->ns_nid = ni->ni_nid;
1240
1241                         lnet_ni_lock(ni);
1242                         ns->ns_status = (ni->ni_status != NULL) ?
1243                                          ni->ni_status->ns_status :
1244                                                 LNET_NI_STATUS_UP;
1245                         ni->ni_status = ns;
1246                         lnet_ni_unlock(ni);
1247
1248                         i++;
1249                 }
1250         }
1251         /*
1252          * We (ab)use the ns_status of the loopback interface to
1253          * transmit the sequence number. The first interface listed
1254          * must be the loopback interface.
1255          */
1256         rc = lnet_ping_info_validate(&pbuf->pb_info);
1257         if (rc) {
1258                 LCONSOLE_EMERG("Invalid ping target: %d\n", rc);
1259                 LBUG();
1260         }
1261         LNET_PING_BUFFER_SEQNO(pbuf) =
1262                 atomic_inc_return(&the_lnet.ln_ping_target_seqno);
1263 }
1264
1265 static void
1266 lnet_ping_target_update(struct lnet_ping_buffer *pbuf,
1267                         struct lnet_handle_md ping_mdh)
1268 {
1269         struct lnet_ping_buffer *old_pbuf = NULL;
1270         struct lnet_handle_md old_ping_md;
1271
1272         /* switch the NIs to point to the new ping info created */
1273         lnet_net_lock(LNET_LOCK_EX);
1274
1275         if (!the_lnet.ln_routing)
1276                 pbuf->pb_info.pi_features |= LNET_PING_FEAT_RTE_DISABLED;
1277
1278         /* Ensure only known feature bits have been set. */
1279         LASSERT(pbuf->pb_info.pi_features & LNET_PING_FEAT_BITS);
1280         LASSERT(!(pbuf->pb_info.pi_features & ~LNET_PING_FEAT_BITS));
1281
1282         lnet_ping_target_install_locked(pbuf);
1283
1284         if (the_lnet.ln_ping_target) {
1285                 old_pbuf = the_lnet.ln_ping_target;
1286                 old_ping_md = the_lnet.ln_ping_target_md;
1287         }
1288         the_lnet.ln_ping_target_md = ping_mdh;
1289         the_lnet.ln_ping_target = pbuf;
1290
1291         lnet_net_unlock(LNET_LOCK_EX);
1292
1293         if (old_pbuf) {
1294                 /* unlink and free the old ping info */
1295                 lnet_ping_md_unlink(old_pbuf, &old_ping_md);
1296                 lnet_ping_buffer_decref(old_pbuf);
1297         }
1298 }
1299
1300 static void
1301 lnet_ping_target_fini(void)
1302 {
1303         int             rc;
1304
1305         lnet_ping_md_unlink(the_lnet.ln_ping_target,
1306                             &the_lnet.ln_ping_target_md);
1307
1308         rc = LNetEQFree(the_lnet.ln_ping_target_eq);
1309         LASSERT(rc == 0);
1310
1311         lnet_ping_target_destroy();
1312 }
1313
1314 static int
1315 lnet_ni_tq_credits(struct lnet_ni *ni)
1316 {
1317         int     credits;
1318
1319         LASSERT(ni->ni_ncpts >= 1);
1320
1321         if (ni->ni_ncpts == 1)
1322                 return ni->ni_net->net_tunables.lct_max_tx_credits;
1323
1324         credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
1325         credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
1326         credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
1327
1328         return credits;
1329 }
1330
1331 static void
1332 lnet_ni_unlink_locked(struct lnet_ni *ni)
1333 {
1334         if (!list_empty(&ni->ni_cptlist)) {
1335                 list_del_init(&ni->ni_cptlist);
1336                 lnet_ni_decref_locked(ni, 0);
1337         }
1338
1339         /* move it to zombie list and nobody can find it anymore */
1340         LASSERT(!list_empty(&ni->ni_netlist));
1341         list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
1342         lnet_ni_decref_locked(ni, 0);
1343 }
1344
1345 static void
1346 lnet_clear_zombies_nis_locked(struct lnet_net *net)
1347 {
1348         int             i;
1349         int             islo;
1350         struct lnet_ni  *ni;
1351         struct list_head *zombie_list = &net->net_ni_zombie;
1352
1353         /*
1354          * Now wait for the NIs I just nuked to show up on the zombie
1355          * list and shut them down in guaranteed thread context
1356          */
1357         i = 2;
1358         while (!list_empty(zombie_list)) {
1359                 int     *ref;
1360                 int     j;
1361
1362                 ni = list_entry(zombie_list->next,
1363                                 struct lnet_ni, ni_netlist);
1364                 list_del_init(&ni->ni_netlist);
1365                 /* the ni should be in deleting state. If it's not it's
1366                  * a bug */
1367                 LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
1368                 cfs_percpt_for_each(ref, j, ni->ni_refs) {
1369                         if (*ref == 0)
1370                                 continue;
1371                         /* still busy, add it back to zombie list */
1372                         list_add(&ni->ni_netlist, zombie_list);
1373                         break;
1374                 }
1375
1376                 if (!list_empty(&ni->ni_netlist)) {
1377                         lnet_net_unlock(LNET_LOCK_EX);
1378                         ++i;
1379                         if ((i & (-i)) == i) {
1380                                 CDEBUG(D_WARNING,
1381                                        "Waiting for zombie LNI %s\n",
1382                                        libcfs_nid2str(ni->ni_nid));
1383                         }
1384                         set_current_state(TASK_UNINTERRUPTIBLE);
1385                         schedule_timeout(cfs_time_seconds(1));
1386                         lnet_net_lock(LNET_LOCK_EX);
1387                         continue;
1388                 }
1389
1390                 lnet_net_unlock(LNET_LOCK_EX);
1391
1392                 islo = ni->ni_net->net_lnd->lnd_type == LOLND;
1393
1394                 LASSERT(!in_interrupt());
1395                 (net->net_lnd->lnd_shutdown)(ni);
1396
1397                 if (!islo)
1398                         CDEBUG(D_LNI, "Removed LNI %s\n",
1399                               libcfs_nid2str(ni->ni_nid));
1400
1401                 lnet_ni_free(ni);
1402                 i = 2;
1403                 lnet_net_lock(LNET_LOCK_EX);
1404         }
1405 }
1406
1407 /* shutdown down the NI and release refcount */
1408 static void
1409 lnet_shutdown_lndni(struct lnet_ni *ni)
1410 {
1411         int i;
1412         struct lnet_net *net = ni->ni_net;
1413
1414         lnet_net_lock(LNET_LOCK_EX);
1415         ni->ni_state = LNET_NI_STATE_DELETING;
1416         lnet_ni_unlink_locked(ni);
1417         lnet_incr_dlc_seq();
1418         lnet_net_unlock(LNET_LOCK_EX);
1419
1420         /* clear messages for this NI on the lazy portal */
1421         for (i = 0; i < the_lnet.ln_nportals; i++)
1422                 lnet_clear_lazy_portal(ni, i, "Shutting down NI");
1423
1424         lnet_net_lock(LNET_LOCK_EX);
1425         lnet_clear_zombies_nis_locked(net);
1426         lnet_net_unlock(LNET_LOCK_EX);
1427 }
1428
1429 static void
1430 lnet_shutdown_lndnet(struct lnet_net *net)
1431 {
1432         struct lnet_ni *ni;
1433
1434         lnet_net_lock(LNET_LOCK_EX);
1435
1436         net->net_state = LNET_NET_STATE_DELETING;
1437
1438         list_del_init(&net->net_list);
1439
1440         while (!list_empty(&net->net_ni_list)) {
1441                 ni = list_entry(net->net_ni_list.next,
1442                                 struct lnet_ni, ni_netlist);
1443                 lnet_net_unlock(LNET_LOCK_EX);
1444                 lnet_shutdown_lndni(ni);
1445                 lnet_net_lock(LNET_LOCK_EX);
1446         }
1447
1448         lnet_net_unlock(LNET_LOCK_EX);
1449
1450         /* Do peer table cleanup for this net */
1451         lnet_peer_tables_cleanup(net);
1452
1453         lnet_net_lock(LNET_LOCK_EX);
1454         /*
1455          * decrement ref count on lnd only when the entire network goes
1456          * away
1457          */
1458         net->net_lnd->lnd_refcount--;
1459
1460         lnet_net_unlock(LNET_LOCK_EX);
1461
1462         lnet_net_free(net);
1463 }
1464
1465 static void
1466 lnet_shutdown_lndnets(void)
1467 {
1468         struct lnet_net *net;
1469
1470         /* NB called holding the global mutex */
1471
1472         /* All quiet on the API front */
1473         LASSERT(the_lnet.ln_state == LNET_STATE_RUNNING);
1474         LASSERT(the_lnet.ln_refcount == 0);
1475
1476         lnet_net_lock(LNET_LOCK_EX);
1477         the_lnet.ln_state = LNET_STATE_STOPPING;
1478
1479         while (!list_empty(&the_lnet.ln_nets)) {
1480                 /*
1481                  * move the nets to the zombie list to avoid them being
1482                  * picked up for new work. LONET is also included in the
1483                  * Nets that will be moved to the zombie list
1484                  */
1485                 net = list_entry(the_lnet.ln_nets.next,
1486                                  struct lnet_net, net_list);
1487                 list_move(&net->net_list, &the_lnet.ln_net_zombie);
1488         }
1489
1490         /* Drop the cached loopback Net. */
1491         if (the_lnet.ln_loni != NULL) {
1492                 lnet_ni_decref_locked(the_lnet.ln_loni, 0);
1493                 the_lnet.ln_loni = NULL;
1494         }
1495         lnet_net_unlock(LNET_LOCK_EX);
1496
1497         /* iterate through the net zombie list and delete each net */
1498         while (!list_empty(&the_lnet.ln_net_zombie)) {
1499                 net = list_entry(the_lnet.ln_net_zombie.next,
1500                                  struct lnet_net, net_list);
1501                 lnet_shutdown_lndnet(net);
1502         }
1503
1504         lnet_net_lock(LNET_LOCK_EX);
1505         the_lnet.ln_state = LNET_STATE_SHUTDOWN;
1506         lnet_net_unlock(LNET_LOCK_EX);
1507 }
1508
1509 static int
1510 lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
1511 {
1512         int                     rc = -EINVAL;
1513         struct lnet_tx_queue    *tq;
1514         int                     i;
1515         struct lnet_net         *net = ni->ni_net;
1516
1517         mutex_lock(&the_lnet.ln_lnd_mutex);
1518
1519         if (tun) {
1520                 memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
1521                 ni->ni_lnd_tunables_set = true;
1522         }
1523
1524         rc = (net->net_lnd->lnd_startup)(ni);
1525
1526         mutex_unlock(&the_lnet.ln_lnd_mutex);
1527
1528         if (rc != 0) {
1529                 LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
1530                                    rc, libcfs_lnd2str(net->net_lnd->lnd_type));
1531                 lnet_net_lock(LNET_LOCK_EX);
1532                 net->net_lnd->lnd_refcount--;
1533                 lnet_net_unlock(LNET_LOCK_EX);
1534                 goto failed0;
1535         }
1536
1537         ni->ni_state = LNET_NI_STATE_ACTIVE;
1538
1539         /* We keep a reference on the loopback net through the loopback NI */
1540         if (net->net_lnd->lnd_type == LOLND) {
1541                 lnet_ni_addref(ni);
1542                 LASSERT(the_lnet.ln_loni == NULL);
1543                 the_lnet.ln_loni = ni;
1544                 ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
1545                 ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
1546                 ni->ni_net->net_tunables.lct_max_tx_credits = 0;
1547                 ni->ni_net->net_tunables.lct_peer_timeout = 0;
1548                 return 0;
1549         }
1550
1551         if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
1552             ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
1553                 LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
1554                                    libcfs_lnd2str(net->net_lnd->lnd_type),
1555                                    ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
1556                                         "" : "per-peer ");
1557                 /* shutdown the NI since if we get here then it must've already
1558                  * been started
1559                  */
1560                 lnet_shutdown_lndni(ni);
1561                 return -EINVAL;
1562         }
1563
1564         cfs_percpt_for_each(tq, i, ni->ni_tx_queues) {
1565                 tq->tq_credits_min =
1566                 tq->tq_credits_max =
1567                 tq->tq_credits = lnet_ni_tq_credits(ni);
1568         }
1569
1570         atomic_set(&ni->ni_tx_credits,
1571                    lnet_ni_tq_credits(ni) * ni->ni_ncpts);
1572
1573         CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
1574                 libcfs_nid2str(ni->ni_nid),
1575                 ni->ni_net->net_tunables.lct_peer_tx_credits,
1576                 lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
1577                 ni->ni_net->net_tunables.lct_peer_rtr_credits,
1578                 ni->ni_net->net_tunables.lct_peer_timeout);
1579
1580         return 0;
1581 failed0:
1582         lnet_ni_free(ni);
1583         return rc;
1584 }
1585
1586 static int
1587 lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
1588 {
1589         struct lnet_ni *ni;
1590         struct lnet_net *net_l = NULL;
1591         struct list_head        local_ni_list;
1592         int                     rc;
1593         int                     ni_count = 0;
1594         __u32                   lnd_type;
1595         struct lnet_lnd *lnd;
1596         int                     peer_timeout =
1597                 net->net_tunables.lct_peer_timeout;
1598         int                     maxtxcredits =
1599                 net->net_tunables.lct_max_tx_credits;
1600         int                     peerrtrcredits =
1601                 net->net_tunables.lct_peer_rtr_credits;
1602
1603         INIT_LIST_HEAD(&local_ni_list);
1604
1605         /*
1606          * make sure that this net is unique. If it isn't then
1607          * we are adding interfaces to an already existing network, and
1608          * 'net' is just a convenient way to pass in the list.
1609          * if it is unique we need to find the LND and load it if
1610          * necessary.
1611          */
1612         if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
1613                 lnd_type = LNET_NETTYP(net->net_id);
1614
1615                 LASSERT(libcfs_isknown_lnd(lnd_type));
1616
1617                 mutex_lock(&the_lnet.ln_lnd_mutex);
1618                 lnd = lnet_find_lnd_by_type(lnd_type);
1619
1620                 if (lnd == NULL) {
1621                         mutex_unlock(&the_lnet.ln_lnd_mutex);
1622                         rc = request_module("%s", libcfs_lnd2modname(lnd_type));
1623                         mutex_lock(&the_lnet.ln_lnd_mutex);
1624
1625                         lnd = lnet_find_lnd_by_type(lnd_type);
1626                         if (lnd == NULL) {
1627                                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1628                                 CERROR("Can't load LND %s, module %s, rc=%d\n",
1629                                 libcfs_lnd2str(lnd_type),
1630                                 libcfs_lnd2modname(lnd_type), rc);
1631 #ifndef HAVE_MODULE_LOADING_SUPPORT
1632                                 LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
1633                                                 "compiled with kernel module "
1634                                                 "loading support.");
1635 #endif
1636                                 rc = -EINVAL;
1637                                 goto failed0;
1638                         }
1639                 }
1640
1641                 lnet_net_lock(LNET_LOCK_EX);
1642                 lnd->lnd_refcount++;
1643                 lnet_net_unlock(LNET_LOCK_EX);
1644
1645                 net->net_lnd = lnd;
1646
1647                 mutex_unlock(&the_lnet.ln_lnd_mutex);
1648
1649                 net_l = net;
1650         }
1651
1652         /*
1653          * net_l: if the network being added is unique then net_l
1654          *        will point to that network
1655          *        if the network being added is not unique then
1656          *        net_l points to the existing network.
1657          *
1658          * When we enter the loop below, we'll pick NIs off he
1659          * network beign added and start them up, then add them to
1660          * a local ni list. Once we've successfully started all
1661          * the NIs then we join the local NI list (of started up
1662          * networks) with the net_l->net_ni_list, which should
1663          * point to the correct network to add the new ni list to
1664          *
1665          * If any of the new NIs fail to start up, then we want to
1666          * iterate through the local ni list, which should include
1667          * any NIs which were successfully started up, and shut
1668          * them down.
1669          *
1670          * After than we want to delete the network being added,
1671          * to avoid a memory leak.
1672          */
1673
1674         /*
1675          * When a network uses TCP bonding then all its interfaces
1676          * must be specified when the network is first defined: the
1677          * TCP bonding code doesn't allow for interfaces to be added
1678          * or removed.
1679          */
1680         if (net_l != net && net_l != NULL && use_tcp_bonding &&
1681             LNET_NETTYP(net_l->net_id) == SOCKLND) {
1682                 rc = -EINVAL;
1683                 goto failed0;
1684         }
1685
1686         while (!list_empty(&net->net_ni_added)) {
1687                 ni = list_entry(net->net_ni_added.next, struct lnet_ni,
1688                                 ni_netlist);
1689                 list_del_init(&ni->ni_netlist);
1690
1691                 /* make sure that the the NI we're about to start
1692                  * up is actually unique. if it's not fail. */
1693                 if (!lnet_ni_unique_net(&net_l->net_ni_list,
1694                                         ni->ni_interfaces[0])) {
1695                         rc = -EINVAL;
1696                         goto failed1;
1697                 }
1698
1699                 /* adjust the pointer the parent network, just in case it
1700                  * the net is a duplicate */
1701                 ni->ni_net = net_l;
1702
1703                 rc = lnet_startup_lndni(ni, tun);
1704
1705                 LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
1706                         ni->ni_net->net_lnd->lnd_query != NULL);
1707
1708                 if (rc < 0)
1709                         goto failed1;
1710
1711                 lnet_ni_addref(ni);
1712                 list_add_tail(&ni->ni_netlist, &local_ni_list);
1713
1714                 ni_count++;
1715         }
1716
1717         lnet_net_lock(LNET_LOCK_EX);
1718         list_splice_tail(&local_ni_list, &net_l->net_ni_list);
1719         lnet_incr_dlc_seq();
1720         lnet_net_unlock(LNET_LOCK_EX);
1721
1722         /* if the network is not unique then we don't want to keep
1723          * it around after we're done. Free it. Otherwise add that
1724          * net to the global the_lnet.ln_nets */
1725         if (net_l != net && net_l != NULL) {
1726                 /*
1727                  * TODO - note. currently the tunables can not be updated
1728                  * once added
1729                  */
1730                 lnet_net_free(net);
1731         } else {
1732                 net->net_state = LNET_NET_STATE_ACTIVE;
1733                 /*
1734                  * restore tunables after it has been overwitten by the
1735                  * lnd
1736                  */
1737                 if (peer_timeout != -1)
1738                         net->net_tunables.lct_peer_timeout = peer_timeout;
1739                 if (maxtxcredits != -1)
1740                         net->net_tunables.lct_max_tx_credits = maxtxcredits;
1741                 if (peerrtrcredits != -1)
1742                         net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
1743
1744                 lnet_net_lock(LNET_LOCK_EX);
1745                 list_add_tail(&net->net_list, &the_lnet.ln_nets);
1746                 lnet_net_unlock(LNET_LOCK_EX);
1747         }
1748
1749         return ni_count;
1750
1751 failed1:
1752         /*
1753          * shutdown the new NIs that are being started up
1754          * free the NET being started
1755          */
1756         while (!list_empty(&local_ni_list)) {
1757                 ni = list_entry(local_ni_list.next, struct lnet_ni,
1758                                 ni_netlist);
1759
1760                 lnet_shutdown_lndni(ni);
1761         }
1762
1763 failed0:
1764         lnet_net_free(net);
1765
1766         return rc;
1767 }
1768
1769 static int
1770 lnet_startup_lndnets(struct list_head *netlist)
1771 {
1772         struct lnet_net         *net;
1773         int                     rc;
1774         int                     ni_count = 0;
1775
1776         /*
1777          * Change to running state before bringing up the LNDs. This
1778          * allows lnet_shutdown_lndnets() to assert that we've passed
1779          * through here.
1780          */
1781         lnet_net_lock(LNET_LOCK_EX);
1782         the_lnet.ln_state = LNET_STATE_RUNNING;
1783         lnet_net_unlock(LNET_LOCK_EX);
1784
1785         while (!list_empty(netlist)) {
1786                 net = list_entry(netlist->next, struct lnet_net, net_list);
1787                 list_del_init(&net->net_list);
1788
1789                 rc = lnet_startup_lndnet(net, NULL);
1790
1791                 if (rc < 0)
1792                         goto failed;
1793
1794                 ni_count += rc;
1795         }
1796
1797         return ni_count;
1798 failed:
1799         lnet_shutdown_lndnets();
1800
1801         return rc;
1802 }
1803
1804 /**
1805  * Initialize LNet library.
1806  *
1807  * Automatically called at module loading time. Caller has to call
1808  * lnet_lib_exit() after a call to lnet_lib_init(), if and only if the
1809  * latter returned 0. It must be called exactly once.
1810  *
1811  * \retval 0 on success
1812  * \retval -ve on failures.
1813  */
1814 int lnet_lib_init(void)
1815 {
1816         int rc;
1817
1818         lnet_assert_wire_constants();
1819
1820         memset(&the_lnet, 0, sizeof(the_lnet));
1821
1822         /* refer to global cfs_cpt_table for now */
1823         the_lnet.ln_cpt_table   = cfs_cpt_table;
1824         the_lnet.ln_cpt_number  = cfs_cpt_number(cfs_cpt_table);
1825
1826         LASSERT(the_lnet.ln_cpt_number > 0);
1827         if (the_lnet.ln_cpt_number > LNET_CPT_MAX) {
1828                 /* we are under risk of consuming all lh_cookie */
1829                 CERROR("Can't have %d CPTs for LNet (max allowed is %d), "
1830                        "please change setting of CPT-table and retry\n",
1831                        the_lnet.ln_cpt_number, LNET_CPT_MAX);
1832                 return -E2BIG;
1833         }
1834
1835         while ((1 << the_lnet.ln_cpt_bits) < the_lnet.ln_cpt_number)
1836                 the_lnet.ln_cpt_bits++;
1837
1838         rc = lnet_create_locks();
1839         if (rc != 0) {
1840                 CERROR("Can't create LNet global locks: %d\n", rc);
1841                 return rc;
1842         }
1843
1844         the_lnet.ln_refcount = 0;
1845         LNetInvalidateEQHandle(&the_lnet.ln_rc_eqh);
1846         INIT_LIST_HEAD(&the_lnet.ln_lnds);
1847         INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
1848         INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
1849         INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
1850
1851         /* The hash table size is the number of bits it takes to express the set
1852          * ln_num_routes, minus 1 (better to under estimate than over so we
1853          * don't waste memory). */
1854         if (rnet_htable_size <= 0)
1855                 rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
1856         else if (rnet_htable_size > LNET_REMOTE_NETS_HASH_MAX)
1857                 rnet_htable_size = LNET_REMOTE_NETS_HASH_MAX;
1858         the_lnet.ln_remote_nets_hbits = max_t(int, 1,
1859                                            order_base_2(rnet_htable_size) - 1);
1860
1861         /* All LNDs apart from the LOLND are in separate modules.  They
1862          * register themselves when their module loads, and unregister
1863          * themselves when their module is unloaded. */
1864         lnet_register_lnd(&the_lolnd);
1865         return 0;
1866 }
1867
1868 /**
1869  * Finalize LNet library.
1870  *
1871  * \pre lnet_lib_init() called with success.
1872  * \pre All LNet users called LNetNIFini() for matching LNetNIInit() calls.
1873  */
1874 void lnet_lib_exit(void)
1875 {
1876         LASSERT(the_lnet.ln_refcount == 0);
1877
1878         while (!list_empty(&the_lnet.ln_lnds))
1879                 lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next,
1880                                                struct lnet_lnd, lnd_list));
1881         lnet_destroy_locks();
1882 }
1883
1884 /**
1885  * Set LNet PID and start LNet interfaces, routing, and forwarding.
1886  *
1887  * Users must call this function at least once before any other functions.
1888  * For each successful call there must be a corresponding call to
1889  * LNetNIFini(). For subsequent calls to LNetNIInit(), \a requested_pid is
1890  * ignored.
1891  *
1892  * The PID used by LNet may be different from the one requested.
1893  * See LNetGetId().
1894  *
1895  * \param requested_pid PID requested by the caller.
1896  *
1897  * \return >= 0 on success, and < 0 error code on failures.
1898  */
1899 int
1900 LNetNIInit(lnet_pid_t requested_pid)
1901 {
1902         int                     im_a_router = 0;
1903         int                     rc;
1904         int                     ni_count;
1905         struct lnet_ping_buffer *pbuf;
1906         struct lnet_handle_md   ping_mdh;
1907         struct list_head        net_head;
1908         struct lnet_net         *net;
1909
1910         INIT_LIST_HEAD(&net_head);
1911
1912         mutex_lock(&the_lnet.ln_api_mutex);
1913
1914         CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount);
1915
1916         if (the_lnet.ln_refcount > 0) {
1917                 rc = the_lnet.ln_refcount++;
1918                 mutex_unlock(&the_lnet.ln_api_mutex);
1919                 return rc;
1920         }
1921
1922         rc = lnet_prepare(requested_pid);
1923         if (rc != 0) {
1924                 mutex_unlock(&the_lnet.ln_api_mutex);
1925                 return rc;
1926         }
1927
1928         /* create a network for Loopback network */
1929         net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
1930         if (net == NULL) {
1931                 rc = -ENOMEM;
1932                 goto err_empty_list;
1933         }
1934
1935         /* Add in the loopback NI */
1936         if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
1937                 rc = -ENOMEM;
1938                 goto err_empty_list;
1939         }
1940
1941         /* If LNet is being initialized via DLC it is possible
1942          * that the user requests not to load module parameters (ones which
1943          * are supported by DLC) on initialization.  Therefore, make sure not
1944          * to load networks, routes and forwarding from module parameters
1945          * in this case.  On cleanup in case of failure only clean up
1946          * routes if it has been loaded */
1947         if (!the_lnet.ln_nis_from_mod_params) {
1948                 rc = lnet_parse_networks(&net_head, lnet_get_networks(),
1949                                          use_tcp_bonding);
1950                 if (rc < 0)
1951                         goto err_empty_list;
1952         }
1953
1954         ni_count = lnet_startup_lndnets(&net_head);
1955         if (ni_count < 0) {
1956                 rc = ni_count;
1957                 goto err_empty_list;
1958         }
1959
1960         if (!the_lnet.ln_nis_from_mod_params) {
1961                 rc = lnet_parse_routes(lnet_get_routes(), &im_a_router);
1962                 if (rc != 0)
1963                         goto err_shutdown_lndnis;
1964
1965                 rc = lnet_check_routes();
1966                 if (rc != 0)
1967                         goto err_destroy_routes;
1968
1969                 rc = lnet_rtrpools_alloc(im_a_router);
1970                 if (rc != 0)
1971                         goto err_destroy_routes;
1972         }
1973
1974         rc = lnet_acceptor_start();
1975         if (rc != 0)
1976                 goto err_destroy_routes;
1977
1978         the_lnet.ln_refcount = 1;
1979         /* Now I may use my own API functions... */
1980
1981         rc = lnet_ping_target_setup(&pbuf, &ping_mdh, ni_count, true);
1982         if (rc != 0)
1983                 goto err_acceptor_stop;
1984
1985         lnet_ping_target_update(pbuf, ping_mdh);
1986
1987         rc = lnet_router_checker_start();
1988         if (rc != 0)
1989                 goto err_stop_ping;
1990
1991         lnet_fault_init();
1992         lnet_proc_init();
1993
1994         mutex_unlock(&the_lnet.ln_api_mutex);
1995
1996         return 0;
1997
1998 err_stop_ping:
1999         lnet_ping_target_fini();
2000 err_acceptor_stop:
2001         the_lnet.ln_refcount = 0;
2002         lnet_acceptor_stop();
2003 err_destroy_routes:
2004         if (!the_lnet.ln_nis_from_mod_params)
2005                 lnet_destroy_routes();
2006 err_shutdown_lndnis:
2007         lnet_shutdown_lndnets();
2008 err_empty_list:
2009         lnet_unprepare();
2010         LASSERT(rc < 0);
2011         mutex_unlock(&the_lnet.ln_api_mutex);
2012         while (!list_empty(&net_head)) {
2013                 struct lnet_net *net;
2014
2015                 net = list_entry(net_head.next, struct lnet_net, net_list);
2016                 list_del_init(&net->net_list);
2017                 lnet_net_free(net);
2018         }
2019         return rc;
2020 }
2021 EXPORT_SYMBOL(LNetNIInit);
2022
2023 /**
2024  * Stop LNet interfaces, routing, and forwarding.
2025  *
2026  * Users must call this function once for each successful call to LNetNIInit().
2027  * Once the LNetNIFini() operation has been started, the results of pending
2028  * API operations are undefined.
2029  *
2030  * \return always 0 for current implementation.
2031  */
2032 int
2033 LNetNIFini()
2034 {
2035         mutex_lock(&the_lnet.ln_api_mutex);
2036
2037         LASSERT(the_lnet.ln_refcount > 0);
2038
2039         if (the_lnet.ln_refcount != 1) {
2040                 the_lnet.ln_refcount--;
2041         } else {
2042                 LASSERT(!the_lnet.ln_niinit_self);
2043
2044                 lnet_fault_fini();
2045
2046                 lnet_proc_fini();
2047                 lnet_router_checker_stop();
2048                 lnet_ping_target_fini();
2049
2050                 /* Teardown fns that use my own API functions BEFORE here */
2051                 the_lnet.ln_refcount = 0;
2052
2053                 lnet_acceptor_stop();
2054                 lnet_destroy_routes();
2055                 lnet_shutdown_lndnets();
2056                 lnet_unprepare();
2057         }
2058
2059         mutex_unlock(&the_lnet.ln_api_mutex);
2060         return 0;
2061 }
2062 EXPORT_SYMBOL(LNetNIFini);
2063
2064 /**
2065  * Grabs the ni data from the ni structure and fills the out
2066  * parameters
2067  *
2068  * \param[in] ni network        interface structure
2069  * \param[out] cfg_ni           NI config information
2070  * \param[out] tun              network and LND tunables
2071  */
2072 static void
2073 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
2074                    struct lnet_ioctl_config_lnd_tunables *tun,
2075                    struct lnet_ioctl_element_stats *stats,
2076                    __u32 tun_size)
2077 {
2078         size_t min_size = 0;
2079         int i;
2080
2081         if (!ni || !cfg_ni || !tun)
2082                 return;
2083
2084         if (ni->ni_interfaces[0] != NULL) {
2085                 for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
2086                         if (ni->ni_interfaces[i] != NULL) {
2087                                 strncpy(cfg_ni->lic_ni_intf[i],
2088                                         ni->ni_interfaces[i],
2089                                         sizeof(cfg_ni->lic_ni_intf[i]));
2090                         }
2091                 }
2092         }
2093
2094         cfg_ni->lic_nid = ni->ni_nid;
2095         if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
2096                 cfg_ni->lic_status = LNET_NI_STATUS_UP;
2097         else
2098                 cfg_ni->lic_status = ni->ni_status->ns_status;
2099         cfg_ni->lic_tcp_bonding = use_tcp_bonding;
2100         cfg_ni->lic_dev_cpt = ni->ni_dev_cpt;
2101
2102         memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
2103
2104         if (stats) {
2105                 stats->iel_send_count = atomic_read(&ni->ni_stats.send_count);
2106                 stats->iel_recv_count = atomic_read(&ni->ni_stats.recv_count);
2107         }
2108
2109         /*
2110          * tun->lt_tun will always be present, but in order to be
2111          * backwards compatible, we need to deal with the cases when
2112          * tun->lt_tun is smaller than what the kernel has, because it
2113          * comes from an older version of a userspace program, then we'll
2114          * need to copy as much information as we have available space.
2115          */
2116         min_size = tun_size - sizeof(tun->lt_cmn);
2117         memcpy(&tun->lt_tun, &ni->ni_lnd_tunables, min_size);
2118
2119         /* copy over the cpts */
2120         if (ni->ni_ncpts == LNET_CPT_NUMBER &&
2121             ni->ni_cpts == NULL)  {
2122                 for (i = 0; i < ni->ni_ncpts; i++)
2123                         cfg_ni->lic_cpts[i] = i;
2124         } else {
2125                 for (i = 0;
2126                      ni->ni_cpts != NULL && i < ni->ni_ncpts &&
2127                      i < LNET_MAX_SHOW_NUM_CPT;
2128                      i++)
2129                         cfg_ni->lic_cpts[i] = ni->ni_cpts[i];
2130         }
2131         cfg_ni->lic_ncpts = ni->ni_ncpts;
2132 }
2133
2134 /**
2135  * NOTE: This is a legacy function left in the code to be backwards
2136  * compatible with older userspace programs. It should eventually be
2137  * removed.
2138  *
2139  * Grabs the ni data from the ni structure and fills the out
2140  * parameters
2141  *
2142  * \param[in] ni network        interface structure
2143  * \param[out] config           config information
2144  */
2145 static void
2146 lnet_fill_ni_info_legacy(struct lnet_ni *ni,
2147                          struct lnet_ioctl_config_data *config)
2148 {
2149         struct lnet_ioctl_net_config *net_config;
2150         struct lnet_ioctl_config_lnd_tunables *lnd_cfg = NULL;
2151         size_t min_size, tunable_size = 0;
2152         int i;
2153
2154         if (!ni || !config)
2155                 return;
2156
2157         net_config = (struct lnet_ioctl_net_config *) config->cfg_bulk;
2158         if (!net_config)
2159                 return;
2160
2161         BUILD_BUG_ON(ARRAY_SIZE(ni->ni_interfaces) !=
2162                      ARRAY_SIZE(net_config->ni_interfaces));
2163
2164         for (i = 0; i < ARRAY_SIZE(ni->ni_interfaces); i++) {
2165                 if (!ni->ni_interfaces[i])
2166                         break;
2167
2168                 strncpy(net_config->ni_interfaces[i],
2169                         ni->ni_interfaces[i],
2170                         sizeof(net_config->ni_interfaces[i]));
2171         }
2172
2173         config->cfg_nid = ni->ni_nid;
2174         config->cfg_config_u.cfg_net.net_peer_timeout =
2175                 ni->ni_net->net_tunables.lct_peer_timeout;
2176         config->cfg_config_u.cfg_net.net_max_tx_credits =
2177                 ni->ni_net->net_tunables.lct_max_tx_credits;
2178         config->cfg_config_u.cfg_net.net_peer_tx_credits =
2179                 ni->ni_net->net_tunables.lct_peer_tx_credits;
2180         config->cfg_config_u.cfg_net.net_peer_rtr_credits =
2181                 ni->ni_net->net_tunables.lct_peer_rtr_credits;
2182
2183         if (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND)
2184                 net_config->ni_status = LNET_NI_STATUS_UP;
2185         else
2186                 net_config->ni_status = ni->ni_status->ns_status;
2187
2188         if (ni->ni_cpts) {
2189                 int num_cpts = min(ni->ni_ncpts, LNET_MAX_SHOW_NUM_CPT);
2190
2191                 for (i = 0; i < num_cpts; i++)
2192                         net_config->ni_cpts[i] = ni->ni_cpts[i];
2193
2194                 config->cfg_ncpts = num_cpts;
2195         }
2196
2197         /*
2198          * See if user land tools sent in a newer and larger version
2199          * of struct lnet_tunables than what the kernel uses.
2200          */
2201         min_size = sizeof(*config) + sizeof(*net_config);
2202
2203         if (config->cfg_hdr.ioc_len > min_size)
2204                 tunable_size = config->cfg_hdr.ioc_len - min_size;
2205
2206         /* Don't copy too much data to user space */
2207         min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
2208         lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
2209
2210         if (lnd_cfg && min_size) {
2211                 memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
2212                 config->cfg_config_u.cfg_net.net_interface_count = 1;
2213
2214                 /* Tell user land that kernel side has less data */
2215                 if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
2216                         min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
2217                         config->cfg_hdr.ioc_len -= min_size;
2218                 }
2219         }
2220 }
2221
2222 struct lnet_ni *
2223 lnet_get_ni_idx_locked(int idx)
2224 {
2225         struct lnet_ni          *ni;
2226         struct lnet_net         *net;
2227
2228         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
2229                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
2230                         if (idx-- == 0)
2231                                 return ni;
2232                 }
2233         }
2234
2235         return NULL;
2236 }
2237
2238 struct lnet_ni *
2239 lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
2240 {
2241         struct lnet_ni          *ni;
2242         struct lnet_net         *net = mynet;
2243
2244         if (prev == NULL) {
2245                 if (net == NULL)
2246                         net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
2247                                         net_list);
2248                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2249                                 ni_netlist);
2250
2251                 return ni;
2252         }
2253
2254         if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
2255                 /* if you reached the end of the ni list and the net is
2256                  * specified, then there are no more nis in that net */
2257                 if (net != NULL)
2258                         return NULL;
2259
2260                 /* we reached the end of this net ni list. move to the
2261                  * next net */
2262                 if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
2263                         /* no more nets and no more NIs. */
2264                         return NULL;
2265
2266                 /* get the next net */
2267                 net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
2268                                  net_list);
2269                 /* get the ni on it */
2270                 ni = list_entry(net->net_ni_list.next, struct lnet_ni,
2271                                 ni_netlist);
2272
2273                 return ni;
2274         }
2275
2276         /* there are more nis left */
2277         ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
2278
2279         return ni;
2280 }
2281
2282 int
2283 lnet_get_net_config(struct lnet_ioctl_config_data *config)
2284 {
2285         struct lnet_ni *ni;
2286         int cpt;
2287         int rc = -ENOENT;
2288         int idx = config->cfg_count;
2289
2290         cpt = lnet_net_lock_current();
2291
2292         ni = lnet_get_ni_idx_locked(idx);
2293
2294         if (ni != NULL) {
2295                 rc = 0;
2296                 lnet_ni_lock(ni);
2297                 lnet_fill_ni_info_legacy(ni, config);
2298                 lnet_ni_unlock(ni);
2299         }
2300
2301         lnet_net_unlock(cpt);
2302         return rc;
2303 }
2304
2305 int
2306 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
2307                    struct lnet_ioctl_config_lnd_tunables *tun,
2308                    struct lnet_ioctl_element_stats *stats,
2309                    __u32 tun_size)
2310 {
2311         struct lnet_ni          *ni;
2312         int                     cpt;
2313         int                     rc = -ENOENT;
2314
2315         if (!cfg_ni || !tun || !stats)
2316                 return -EINVAL;
2317
2318         cpt = lnet_net_lock_current();
2319
2320         ni = lnet_get_ni_idx_locked(cfg_ni->lic_idx);
2321
2322         if (ni) {
2323                 rc = 0;
2324                 lnet_ni_lock(ni);
2325                 lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
2326                 lnet_ni_unlock(ni);
2327         }
2328
2329         lnet_net_unlock(cpt);
2330         return rc;
2331 }
2332
2333 static int lnet_add_net_common(struct lnet_net *net,
2334                                struct lnet_ioctl_config_lnd_tunables *tun)
2335 {
2336         __u32                   net_id;
2337         struct lnet_ping_buffer *pbuf;
2338         struct lnet_handle_md   ping_mdh;
2339         int                     rc;
2340         struct lnet_remotenet *rnet;
2341         int                     net_ni_count;
2342         int                     num_acceptor_nets;
2343
2344         lnet_net_lock(LNET_LOCK_EX);
2345         rnet = lnet_find_rnet_locked(net->net_id);
2346         lnet_net_unlock(LNET_LOCK_EX);
2347         /*
2348          * make sure that the net added doesn't invalidate the current
2349          * configuration LNet is keeping
2350          */
2351         if (rnet) {
2352                 CERROR("Adding net %s will invalidate routing configuration\n",
2353                        libcfs_net2str(net->net_id));
2354                 lnet_net_free(net);
2355                 return -EUSERS;
2356         }
2357
2358         /*
2359          * make sure you calculate the correct number of slots in the ping
2360          * buffer. Since the ping info is a flattened list of all the NIs,
2361          * we should allocate enough slots to accomodate the number of NIs
2362          * which will be added.
2363          *
2364          * since ni hasn't been configured yet, use
2365          * lnet_get_net_ni_count_pre() which checks the net_ni_added list
2366          */
2367         net_ni_count = lnet_get_net_ni_count_pre(net);
2368
2369         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2370                                     net_ni_count + lnet_get_ni_count(),
2371                                     false);
2372         if (rc < 0) {
2373                 lnet_net_free(net);
2374                 return rc;
2375         }
2376
2377         if (tun)
2378                 memcpy(&net->net_tunables,
2379                        &tun->lt_cmn, sizeof(net->net_tunables));
2380         else
2381                 memset(&net->net_tunables, -1, sizeof(net->net_tunables));
2382
2383         /*
2384          * before starting this network get a count of the current TCP
2385          * networks which require the acceptor thread running. If that
2386          * count is == 0 before we start up this network, then we'd want to
2387          * start up the acceptor thread after starting up this network
2388          */
2389         num_acceptor_nets = lnet_count_acceptor_nets();
2390
2391         net_id = net->net_id;
2392
2393         rc = lnet_startup_lndnet(net,
2394                                  (tun) ? &tun->lt_tun : NULL);
2395         if (rc < 0)
2396                 goto failed;
2397
2398         lnet_net_lock(LNET_LOCK_EX);
2399         net = lnet_get_net_locked(net_id);
2400         lnet_net_unlock(LNET_LOCK_EX);
2401
2402         LASSERT(net);
2403
2404         /*
2405          * Start the acceptor thread if this is the first network
2406          * being added that requires the thread.
2407          */
2408         if (net->net_lnd->lnd_accept && num_acceptor_nets == 0) {
2409                 rc = lnet_acceptor_start();
2410                 if (rc < 0) {
2411                         /* shutdown the net that we just started */
2412                         CERROR("Failed to start up acceptor thread\n");
2413                         lnet_shutdown_lndnet(net);
2414                         goto failed;
2415                 }
2416         }
2417
2418         lnet_net_lock(LNET_LOCK_EX);
2419         lnet_peer_net_added(net);
2420         lnet_net_unlock(LNET_LOCK_EX);
2421
2422         lnet_ping_target_update(pbuf, ping_mdh);
2423
2424         return 0;
2425
2426 failed:
2427         lnet_ping_md_unlink(pbuf, &ping_mdh);
2428         lnet_ping_buffer_decref(pbuf);
2429         return rc;
2430 }
2431
2432 static int lnet_handle_legacy_ip2nets(char *ip2nets,
2433                                       struct lnet_ioctl_config_lnd_tunables *tun)
2434 {
2435         struct lnet_net *net;
2436         char *nets;
2437         int rc;
2438         struct list_head net_head;
2439
2440         INIT_LIST_HEAD(&net_head);
2441
2442         rc = lnet_parse_ip2nets(&nets, ip2nets);
2443         if (rc < 0)
2444                 return rc;
2445
2446         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2447         if (rc < 0)
2448                 return rc;
2449
2450         mutex_lock(&the_lnet.ln_api_mutex);
2451         while (!list_empty(&net_head)) {
2452                 net = list_entry(net_head.next, struct lnet_net, net_list);
2453                 list_del_init(&net->net_list);
2454                 rc = lnet_add_net_common(net, tun);
2455                 if (rc < 0)
2456                         goto out;
2457         }
2458
2459 out:
2460         mutex_unlock(&the_lnet.ln_api_mutex);
2461
2462         while (!list_empty(&net_head)) {
2463                 net = list_entry(net_head.next, struct lnet_net, net_list);
2464                 list_del_init(&net->net_list);
2465                 lnet_net_free(net);
2466         }
2467         return rc;
2468 }
2469
2470 int lnet_dyn_add_ni(struct lnet_ioctl_config_ni *conf)
2471 {
2472         struct lnet_net *net;
2473         struct lnet_ni *ni;
2474         struct lnet_ioctl_config_lnd_tunables *tun = NULL;
2475         int rc, i;
2476         __u32 net_id;
2477
2478         /* get the tunables if they are available */
2479         if (conf->lic_cfg_hdr.ioc_len >=
2480             sizeof(*conf) + sizeof(*tun))
2481                 tun = (struct lnet_ioctl_config_lnd_tunables *)
2482                         conf->lic_bulk;
2483
2484         /* handle legacy ip2nets from DLC */
2485         if (conf->lic_legacy_ip2nets[0] != '\0')
2486                 return lnet_handle_legacy_ip2nets(conf->lic_legacy_ip2nets,
2487                                                   tun);
2488
2489         net_id = LNET_NIDNET(conf->lic_nid);
2490
2491         net = lnet_net_alloc(net_id, NULL);
2492         if (!net)
2493                 return -ENOMEM;
2494
2495         for (i = 0; i < conf->lic_ncpts; i++) {
2496                 if (conf->lic_cpts[i] >= LNET_CPT_NUMBER)
2497                         return -EINVAL;
2498         }
2499
2500         ni = lnet_ni_alloc_w_cpt_array(net, conf->lic_cpts, conf->lic_ncpts,
2501                                        conf->lic_ni_intf[0]);
2502         if (!ni)
2503                 return -ENOMEM;
2504
2505         mutex_lock(&the_lnet.ln_api_mutex);
2506
2507         rc = lnet_add_net_common(net, tun);
2508
2509         mutex_unlock(&the_lnet.ln_api_mutex);
2510
2511         return rc;
2512 }
2513
2514 int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf)
2515 {
2516         struct lnet_net  *net;
2517         struct lnet_ni *ni;
2518         __u32 net_id = LNET_NIDNET(conf->lic_nid);
2519         struct lnet_ping_buffer *pbuf;
2520         struct lnet_handle_md  ping_mdh;
2521         int               rc;
2522         int               net_count;
2523         __u32             addr;
2524
2525         /* don't allow userspace to shutdown the LOLND */
2526         if (LNET_NETTYP(net_id) == LOLND)
2527                 return -EINVAL;
2528
2529         mutex_lock(&the_lnet.ln_api_mutex);
2530
2531         lnet_net_lock(0);
2532
2533         net = lnet_get_net_locked(net_id);
2534         if (!net) {
2535                 CERROR("net %s not found\n",
2536                        libcfs_net2str(net_id));
2537                 rc = -ENOENT;
2538                 goto unlock_net;
2539         }
2540
2541         addr = LNET_NIDADDR(conf->lic_nid);
2542         if (addr == 0) {
2543                 /* remove the entire net */
2544                 net_count = lnet_get_net_ni_count_locked(net);
2545
2546                 lnet_net_unlock(0);
2547
2548                 /* create and link a new ping info, before removing the old one */
2549                 rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2550                                         lnet_get_ni_count() - net_count,
2551                                         false);
2552                 if (rc != 0)
2553                         goto unlock_api_mutex;
2554
2555                 lnet_shutdown_lndnet(net);
2556
2557                 if (lnet_count_acceptor_nets() == 0)
2558                         lnet_acceptor_stop();
2559
2560                 lnet_ping_target_update(pbuf, ping_mdh);
2561
2562                 goto unlock_api_mutex;
2563         }
2564
2565         ni = lnet_nid2ni_locked(conf->lic_nid, 0);
2566         if (!ni) {
2567                 CERROR("nid %s not found\n",
2568                        libcfs_nid2str(conf->lic_nid));
2569                 rc = -ENOENT;
2570                 goto unlock_net;
2571         }
2572
2573         net_count = lnet_get_net_ni_count_locked(net);
2574
2575         lnet_net_unlock(0);
2576
2577         /* create and link a new ping info, before removing the old one */
2578         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2579                                   lnet_get_ni_count() - 1, false);
2580         if (rc != 0)
2581                 goto unlock_api_mutex;
2582
2583         lnet_shutdown_lndni(ni);
2584
2585         if (lnet_count_acceptor_nets() == 0)
2586                 lnet_acceptor_stop();
2587
2588         lnet_ping_target_update(pbuf, ping_mdh);
2589
2590         /* check if the net is empty and remove it if it is */
2591         if (net_count == 1)
2592                 lnet_shutdown_lndnet(net);
2593
2594         goto unlock_api_mutex;
2595
2596 unlock_net:
2597         lnet_net_unlock(0);
2598 unlock_api_mutex:
2599         mutex_unlock(&the_lnet.ln_api_mutex);
2600
2601         return rc;
2602 }
2603
2604 /*
2605  * lnet_dyn_add_net and lnet_dyn_del_net are now deprecated.
2606  * They are only expected to be called for unique networks.
2607  * That can be as a result of older DLC library
2608  * calls. Multi-Rail DLC and beyond no longer uses these APIs.
2609  */
2610 int
2611 lnet_dyn_add_net(struct lnet_ioctl_config_data *conf)
2612 {
2613         struct lnet_net         *net;
2614         struct list_head        net_head;
2615         int                     rc;
2616         struct lnet_ioctl_config_lnd_tunables tun;
2617         char *nets = conf->cfg_config_u.cfg_net.net_intf;
2618
2619         INIT_LIST_HEAD(&net_head);
2620
2621         /* Create a net/ni structures for the network string */
2622         rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
2623         if (rc <= 0)
2624                 return rc == 0 ? -EINVAL : rc;
2625
2626         mutex_lock(&the_lnet.ln_api_mutex);
2627
2628         if (rc > 1) {
2629                 rc = -EINVAL; /* only add one network per call */
2630                 goto out_unlock_clean;
2631         }
2632
2633         net = list_entry(net_head.next, struct lnet_net, net_list);
2634         list_del_init(&net->net_list);
2635
2636         LASSERT(lnet_net_unique(net->net_id, &the_lnet.ln_nets, NULL));
2637
2638         memset(&tun, 0, sizeof(tun));
2639
2640         tun.lt_cmn.lct_peer_timeout =
2641           conf->cfg_config_u.cfg_net.net_peer_timeout;
2642         tun.lt_cmn.lct_peer_tx_credits =
2643           conf->cfg_config_u.cfg_net.net_peer_tx_credits;
2644         tun.lt_cmn.lct_peer_rtr_credits =
2645           conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
2646         tun.lt_cmn.lct_max_tx_credits =
2647           conf->cfg_config_u.cfg_net.net_max_tx_credits;
2648
2649         rc = lnet_add_net_common(net, &tun);
2650
2651 out_unlock_clean:
2652         mutex_unlock(&the_lnet.ln_api_mutex);
2653         while (!list_empty(&net_head)) {
2654                 /* net_head list is empty in success case */
2655                 net = list_entry(net_head.next, struct lnet_net, net_list);
2656                 list_del_init(&net->net_list);
2657                 lnet_net_free(net);
2658         }
2659         return rc;
2660 }
2661
2662 int
2663 lnet_dyn_del_net(__u32 net_id)
2664 {
2665         struct lnet_net  *net;
2666         struct lnet_ping_buffer *pbuf;
2667         struct lnet_handle_md ping_mdh;
2668         int               rc;
2669         int               net_ni_count;
2670
2671         /* don't allow userspace to shutdown the LOLND */
2672         if (LNET_NETTYP(net_id) == LOLND)
2673                 return -EINVAL;
2674
2675         mutex_lock(&the_lnet.ln_api_mutex);
2676
2677         lnet_net_lock(0);
2678
2679         net = lnet_get_net_locked(net_id);
2680         if (net == NULL) {
2681                 lnet_net_unlock(0);
2682                 rc = -EINVAL;
2683                 goto out;
2684         }
2685
2686         net_ni_count = lnet_get_net_ni_count_locked(net);
2687
2688         lnet_net_unlock(0);
2689
2690         /* create and link a new ping info, before removing the old one */
2691         rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
2692                                     lnet_get_ni_count() - net_ni_count, false);
2693         if (rc != 0)
2694                 goto out;
2695
2696         lnet_shutdown_lndnet(net);
2697
2698         if (lnet_count_acceptor_nets() == 0)
2699                 lnet_acceptor_stop();
2700
2701         lnet_ping_target_update(pbuf, ping_mdh);
2702
2703 out:
2704         mutex_unlock(&the_lnet.ln_api_mutex);
2705
2706         return rc;
2707 }
2708
2709 void lnet_incr_dlc_seq(void)
2710 {
2711         atomic_inc(&lnet_dlc_seq_no);
2712 }
2713
2714 __u32 lnet_get_dlc_seq_locked(void)
2715 {
2716         return atomic_read(&lnet_dlc_seq_no);
2717 }
2718
2719 /**
2720  * LNet ioctl handler.
2721  *
2722  */
2723 int
2724 LNetCtl(unsigned int cmd, void *arg)
2725 {
2726         struct libcfs_ioctl_data *data = arg;
2727         struct lnet_ioctl_config_data *config;
2728         struct lnet_process_id    id = {0};
2729         struct lnet_ni           *ni;
2730         int                       rc;
2731
2732         BUILD_BUG_ON(sizeof(struct lnet_ioctl_net_config) +
2733                      sizeof(struct lnet_ioctl_config_data) > LIBCFS_IOC_DATA_MAX);
2734
2735         switch (cmd) {
2736         case IOC_LIBCFS_GET_NI:
2737                 rc = LNetGetId(data->ioc_count, &id);
2738                 data->ioc_nid = id.nid;
2739                 return rc;
2740
2741         case IOC_LIBCFS_FAIL_NID:
2742                 return lnet_fail_nid(data->ioc_nid, data->ioc_count);
2743
2744         case IOC_LIBCFS_ADD_ROUTE:
2745                 config = arg;
2746
2747                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2748                         return -EINVAL;
2749
2750                 mutex_lock(&the_lnet.ln_api_mutex);
2751                 rc = lnet_add_route(config->cfg_net,
2752                                     config->cfg_config_u.cfg_route.rtr_hop,
2753                                     config->cfg_nid,
2754                                     config->cfg_config_u.cfg_route.
2755                                         rtr_priority);
2756                 if (rc == 0) {
2757                         rc = lnet_check_routes();
2758                         if (rc != 0)
2759                                 lnet_del_route(config->cfg_net,
2760                                                config->cfg_nid);
2761                 }
2762                 mutex_unlock(&the_lnet.ln_api_mutex);
2763                 return rc;
2764
2765         case IOC_LIBCFS_DEL_ROUTE:
2766                 config = arg;
2767
2768                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2769                         return -EINVAL;
2770
2771                 mutex_lock(&the_lnet.ln_api_mutex);
2772                 rc = lnet_del_route(config->cfg_net, config->cfg_nid);
2773                 mutex_unlock(&the_lnet.ln_api_mutex);
2774                 return rc;
2775
2776         case IOC_LIBCFS_GET_ROUTE:
2777                 config = arg;
2778
2779                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2780                         return -EINVAL;
2781
2782                 mutex_lock(&the_lnet.ln_api_mutex);
2783                 rc = lnet_get_route(config->cfg_count,
2784                                     &config->cfg_net,
2785                                     &config->cfg_config_u.cfg_route.rtr_hop,
2786                                     &config->cfg_nid,
2787                                     &config->cfg_config_u.cfg_route.rtr_flags,
2788                                     &config->cfg_config_u.cfg_route.
2789                                         rtr_priority);
2790                 mutex_unlock(&the_lnet.ln_api_mutex);
2791                 return rc;
2792
2793         case IOC_LIBCFS_GET_LOCAL_NI: {
2794                 struct lnet_ioctl_config_ni *cfg_ni;
2795                 struct lnet_ioctl_config_lnd_tunables *tun = NULL;
2796                 struct lnet_ioctl_element_stats *stats;
2797                 __u32 tun_size;
2798
2799                 cfg_ni = arg;
2800                 /* get the tunables if they are available */
2801                 if (cfg_ni->lic_cfg_hdr.ioc_len <
2802                     sizeof(*cfg_ni) + sizeof(*stats)+ sizeof(*tun))
2803                         return -EINVAL;
2804
2805                 stats = (struct lnet_ioctl_element_stats *)
2806                         cfg_ni->lic_bulk;
2807                 tun = (struct lnet_ioctl_config_lnd_tunables *)
2808                                 (cfg_ni->lic_bulk + sizeof(*stats));
2809
2810                 tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
2811                         sizeof(*stats);
2812
2813                 mutex_lock(&the_lnet.ln_api_mutex);
2814                 rc = lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
2815                 mutex_unlock(&the_lnet.ln_api_mutex);
2816                 return rc;
2817         }
2818
2819         case IOC_LIBCFS_GET_NET: {
2820                 size_t total = sizeof(*config) +
2821                                sizeof(struct lnet_ioctl_net_config);
2822                 config = arg;
2823
2824                 if (config->cfg_hdr.ioc_len < total)
2825                         return -EINVAL;
2826
2827                 mutex_lock(&the_lnet.ln_api_mutex);
2828                 rc = lnet_get_net_config(config);
2829                 mutex_unlock(&the_lnet.ln_api_mutex);
2830                 return rc;
2831         }
2832
2833         case IOC_LIBCFS_GET_LNET_STATS:
2834         {
2835                 struct lnet_ioctl_lnet_stats *lnet_stats = arg;
2836
2837                 if (lnet_stats->st_hdr.ioc_len < sizeof(*lnet_stats))
2838                         return -EINVAL;
2839
2840                 mutex_lock(&the_lnet.ln_api_mutex);
2841                 lnet_counters_get(&lnet_stats->st_cntrs);
2842                 mutex_unlock(&the_lnet.ln_api_mutex);
2843                 return 0;
2844         }
2845
2846         case IOC_LIBCFS_CONFIG_RTR:
2847                 config = arg;
2848
2849                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2850                         return -EINVAL;
2851
2852                 mutex_lock(&the_lnet.ln_api_mutex);
2853                 if (config->cfg_config_u.cfg_buffers.buf_enable) {
2854                         rc = lnet_rtrpools_enable();
2855                         mutex_unlock(&the_lnet.ln_api_mutex);
2856                         return rc;
2857                 }
2858                 lnet_rtrpools_disable();
2859                 mutex_unlock(&the_lnet.ln_api_mutex);
2860                 return 0;
2861
2862         case IOC_LIBCFS_ADD_BUF:
2863                 config = arg;
2864
2865                 if (config->cfg_hdr.ioc_len < sizeof(*config))
2866                         return -EINVAL;
2867
2868                 mutex_lock(&the_lnet.ln_api_mutex);
2869                 rc = lnet_rtrpools_adjust(config->cfg_config_u.cfg_buffers.
2870                                                 buf_tiny,
2871                                           config->cfg_config_u.cfg_buffers.
2872                                                 buf_small,
2873                                           config->cfg_config_u.cfg_buffers.
2874                                                 buf_large);
2875                 mutex_unlock(&the_lnet.ln_api_mutex);
2876                 return rc;
2877
2878         case IOC_LIBCFS_SET_NUMA_RANGE: {
2879                 struct lnet_ioctl_set_value *numa;
2880                 numa = arg;
2881                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
2882                         return -EINVAL;
2883                 lnet_net_lock(LNET_LOCK_EX);
2884                 lnet_numa_range = numa->sv_value;
2885                 lnet_net_unlock(LNET_LOCK_EX);
2886                 return 0;
2887         }
2888
2889         case IOC_LIBCFS_GET_NUMA_RANGE: {
2890                 struct lnet_ioctl_set_value *numa;
2891                 numa = arg;
2892                 if (numa->sv_hdr.ioc_len != sizeof(*numa))
2893                         return -EINVAL;
2894                 numa->sv_value = lnet_numa_range;
2895                 return 0;
2896         }
2897
2898         case IOC_LIBCFS_GET_BUF: {
2899                 struct lnet_ioctl_pool_cfg *pool_cfg;
2900                 size_t total = sizeof(*config) + sizeof(*pool_cfg);
2901
2902                 config = arg;
2903
2904                 if (config->cfg_hdr.ioc_len < total)
2905                         return -EINVAL;
2906
2907                 pool_cfg = (struct lnet_ioctl_pool_cfg *)config->cfg_bulk;
2908
2909                 mutex_lock(&the_lnet.ln_api_mutex);
2910                 rc = lnet_get_rtr_pool_cfg(config->cfg_count, pool_cfg);
2911                 mutex_unlock(&the_lnet.ln_api_mutex);
2912                 return rc;
2913         }
2914
2915         case IOC_LIBCFS_ADD_PEER_NI: {
2916                 struct lnet_ioctl_peer_cfg *cfg = arg;
2917
2918                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
2919                         return -EINVAL;
2920
2921                 mutex_lock(&the_lnet.ln_api_mutex);
2922                 rc = lnet_add_peer_ni(cfg->prcfg_prim_nid,
2923                                       cfg->prcfg_cfg_nid,
2924                                       cfg->prcfg_mr);
2925                 mutex_unlock(&the_lnet.ln_api_mutex);
2926                 return rc;
2927         }
2928
2929         case IOC_LIBCFS_DEL_PEER_NI: {
2930                 struct lnet_ioctl_peer_cfg *cfg = arg;
2931
2932                 if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
2933                         return -EINVAL;
2934
2935                 mutex_lock(&the_lnet.ln_api_mutex);
2936                 rc = lnet_del_peer_ni(cfg->prcfg_prim_nid,
2937                                       cfg->prcfg_cfg_nid);
2938                 mutex_unlock(&the_lnet.ln_api_mutex);
2939                 return rc;
2940         }
2941
2942         case IOC_LIBCFS_GET_PEER_INFO: {
2943                 struct lnet_ioctl_peer *peer_info = arg;
2944
2945                 if (peer_info->pr_hdr.ioc_len < sizeof(*peer_info))
2946                         return -EINVAL;
2947
2948                 mutex_lock(&the_lnet.ln_api_mutex);
2949                 rc = lnet_get_peer_ni_info(
2950                    peer_info->pr_count,
2951                    &peer_info->pr_nid,
2952                    peer_info->pr_lnd_u.pr_peer_credits.cr_aliveness,
2953                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ncpt,
2954                    &peer_info->pr_lnd_u.pr_peer_credits.cr_refcount,
2955                    &peer_info->pr_lnd_u.pr_peer_credits.cr_ni_peer_tx_credits,
2956                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_credits,
2957                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_rtr_credits,
2958                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_min_tx_credits,
2959                    &peer_info->pr_lnd_u.pr_peer_credits.cr_peer_tx_qnob);
2960                 mutex_unlock(&the_lnet.ln_api_mutex);
2961                 return rc;
2962         }
2963
2964         case IOC_LIBCFS_GET_PEER_NI: {
2965                 struct lnet_ioctl_peer_cfg *cfg = arg;
2966                 struct lnet_peer_ni_credit_info __user *lpni_cri;
2967                 struct lnet_ioctl_element_stats __user *lpni_stats;
2968                 size_t usr_size = sizeof(*lpni_cri) + sizeof(*lpni_stats);
2969
2970                 if ((cfg->prcfg_hdr.ioc_len != sizeof(*cfg)) ||
2971                     (cfg->prcfg_size != usr_size))
2972                         return -EINVAL;
2973
2974                 lpni_cri = cfg->prcfg_bulk;
2975                 lpni_stats = cfg->prcfg_bulk + sizeof(*lpni_cri);
2976
2977                 mutex_lock(&the_lnet.ln_api_mutex);
2978                 rc = lnet_get_peer_info(cfg->prcfg_count, &cfg->prcfg_prim_nid,
2979                                         &cfg->prcfg_cfg_nid, &cfg->prcfg_mr,
2980                                         lpni_cri, lpni_stats);
2981                 mutex_unlock(&the_lnet.ln_api_mutex);
2982                 return rc;
2983         }
2984
2985         case IOC_LIBCFS_NOTIFY_ROUTER: {
2986                 unsigned long jiffies_passed;
2987
2988                 jiffies_passed = ktime_get_real_seconds() - data->ioc_u64[0];
2989                 jiffies_passed = cfs_time_seconds(jiffies_passed);
2990
2991                 return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
2992                                    jiffies - jiffies_passed);
2993         }
2994
2995         case IOC_LIBCFS_LNET_DIST:
2996                 rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
2997                 if (rc < 0 && rc != -EHOSTUNREACH)
2998                         return rc;
2999
3000                 data->ioc_u32[0] = rc;
3001                 return 0;
3002
3003         case IOC_LIBCFS_TESTPROTOCOMPAT:
3004                 lnet_net_lock(LNET_LOCK_EX);
3005                 the_lnet.ln_testprotocompat = data->ioc_flags;
3006                 lnet_net_unlock(LNET_LOCK_EX);
3007                 return 0;
3008
3009         case IOC_LIBCFS_LNET_FAULT:
3010                 return lnet_fault_ctl(data->ioc_flags, data);
3011
3012         case IOC_LIBCFS_PING: {
3013                 signed long timeout;
3014
3015                 id.nid = data->ioc_nid;
3016                 id.pid = data->ioc_u32[0];
3017
3018                 /* Don't block longer than 2 minutes */
3019                 if (data->ioc_u32[1] > 120 * MSEC_PER_SEC)
3020                         return -EINVAL;
3021
3022                 /* If timestamp is negative then disable timeout */
3023                 if ((s32)data->ioc_u32[1] < 0)
3024                         timeout = MAX_SCHEDULE_TIMEOUT;
3025                 else
3026                         timeout = msecs_to_jiffies(data->ioc_u32[1]);
3027
3028                 rc = lnet_ping(id, timeout, data->ioc_pbuf1,
3029                                data->ioc_plen1 / sizeof(struct lnet_process_id));
3030                 if (rc < 0)
3031                         return rc;
3032                 data->ioc_count = rc;
3033                 return 0;
3034         }
3035
3036         default:
3037                 ni = lnet_net2ni_addref(data->ioc_net);
3038                 if (ni == NULL)
3039                         return -EINVAL;
3040
3041                 if (ni->ni_net->net_lnd->lnd_ctl == NULL)
3042                         rc = -EINVAL;
3043                 else
3044                         rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
3045
3046                 lnet_ni_decref(ni);
3047                 return rc;
3048         }
3049         /* not reached */
3050 }
3051 EXPORT_SYMBOL(LNetCtl);
3052
3053 void LNetDebugPeer(struct lnet_process_id id)
3054 {
3055         lnet_debug_peer(id.nid);
3056 }
3057 EXPORT_SYMBOL(LNetDebugPeer);
3058
3059 /**
3060  * Determine if the specified peer \a nid is on the local node.
3061  *
3062  * \param nid   peer nid to check
3063  *
3064  * \retval true         If peer NID is on the local node.
3065  * \retval false        If peer NID is not on the local node.
3066  */
3067 bool LNetIsPeerLocal(lnet_nid_t nid)
3068 {
3069         struct lnet_net *net;
3070         struct lnet_ni *ni;
3071         int cpt;
3072
3073         cpt = lnet_net_lock_current();
3074         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3075                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3076                         if (ni->ni_nid == nid) {
3077                                 lnet_net_unlock(cpt);
3078                                 return true;
3079                         }
3080                 }
3081         }
3082         lnet_net_unlock(cpt);
3083
3084         return false;
3085 }
3086 EXPORT_SYMBOL(LNetIsPeerLocal);
3087
3088 /**
3089  * Retrieve the struct lnet_process_id ID of LNet interface at \a index.
3090  * Note that all interfaces share a same PID, as requested by LNetNIInit().
3091  *
3092  * \param index Index of the interface to look up.
3093  * \param id On successful return, this location will hold the
3094  * struct lnet_process_id ID of the interface.
3095  *
3096  * \retval 0 If an interface exists at \a index.
3097  * \retval -ENOENT If no interface has been found.
3098  */
3099 int
3100 LNetGetId(unsigned int index, struct lnet_process_id *id)
3101 {
3102         struct lnet_ni   *ni;
3103         struct lnet_net  *net;
3104         int               cpt;
3105         int               rc = -ENOENT;
3106
3107         LASSERT(the_lnet.ln_refcount > 0);
3108
3109         cpt = lnet_net_lock_current();
3110
3111         list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
3112                 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
3113                         if (index-- != 0)
3114                                 continue;
3115
3116                         id->nid = ni->ni_nid;
3117                         id->pid = the_lnet.ln_pid;
3118                         rc = 0;
3119                         break;
3120                 }
3121         }
3122
3123         lnet_net_unlock(cpt);
3124         return rc;
3125 }
3126 EXPORT_SYMBOL(LNetGetId);
3127
3128 static int lnet_ping(struct lnet_process_id id, signed long timeout,
3129                      struct lnet_process_id __user *ids, int n_ids)
3130 {
3131         struct lnet_handle_eq eqh;
3132         struct lnet_handle_md mdh;
3133         struct lnet_event event;
3134         struct lnet_md md = { NULL };
3135         int which;
3136         int unlinked = 0;
3137         int replied = 0;
3138         const signed long a_long_time = msecs_to_jiffies(60 * MSEC_PER_SEC);
3139         struct lnet_ping_buffer *pbuf;
3140         struct lnet_process_id tmpid;
3141         int i;
3142         int nob;
3143         int rc;
3144         int rc2;
3145         sigset_t blocked;
3146
3147         /* n_ids limit is arbitrary */
3148         if (n_ids <= 0 || n_ids > lnet_interfaces_max || id.nid == LNET_NID_ANY)
3149                 return -EINVAL;
3150
3151         if (id.pid == LNET_PID_ANY)
3152                 id.pid = LNET_PID_LUSTRE;
3153
3154         pbuf = lnet_ping_buffer_alloc(n_ids, GFP_NOFS);
3155         if (!pbuf)
3156                 return -ENOMEM;
3157
3158         /* NB 2 events max (including any unlink event) */
3159         rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh);
3160         if (rc != 0) {
3161                 CERROR("Can't allocate EQ: %d\n", rc);
3162                 goto fail_ping_buffer_decref;
3163         }
3164
3165         /* initialize md content */
3166         md.start     = &pbuf->pb_info;
3167         md.length    = LNET_PING_INFO_SIZE(n_ids);
3168         md.threshold = 2; /*GET/REPLY*/
3169         md.max_size  = 0;
3170         md.options   = LNET_MD_TRUNCATE;
3171         md.user_ptr  = NULL;
3172         md.eq_handle = eqh;
3173
3174         rc = LNetMDBind(md, LNET_UNLINK, &mdh);
3175         if (rc != 0) {
3176                 CERROR("Can't bind MD: %d\n", rc);
3177                 goto fail_free_eq;
3178         }
3179
3180         rc = LNetGet(LNET_NID_ANY, mdh, id,
3181                      LNET_RESERVED_PORTAL,
3182                      LNET_PROTO_PING_MATCHBITS, 0);
3183
3184         if (rc != 0) {
3185                 /* Don't CERROR; this could be deliberate! */
3186
3187                 rc2 = LNetMDUnlink(mdh);
3188                 LASSERT(rc2 == 0);
3189
3190                 /* NB must wait for the UNLINK event below... */
3191                 unlinked = 1;
3192                 timeout = a_long_time;
3193         }
3194
3195         do {
3196                 /* MUST block for unlink to complete */
3197                 if (unlinked)
3198                         blocked = cfs_block_allsigs();
3199
3200                 rc2 = LNetEQPoll(&eqh, 1, timeout, &event, &which);
3201
3202                 if (unlinked)
3203                         cfs_restore_sigs(blocked);
3204
3205                 CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2,
3206                        (rc2 <= 0) ? -1 : event.type,
3207                        (rc2 <= 0) ? -1 : event.status,
3208                        (rc2 > 0 && event.unlinked) ? " unlinked" : "");
3209
3210                 LASSERT(rc2 != -EOVERFLOW);     /* can't miss anything */
3211
3212                 if (rc2 <= 0 || event.status != 0) {
3213                         /* timeout or error */
3214                         if (!replied && rc == 0)
3215                                 rc = (rc2 < 0) ? rc2 :
3216                                      (rc2 == 0) ? -ETIMEDOUT :
3217                                      event.status;
3218
3219                         if (!unlinked) {
3220                                 /* Ensure completion in finite time... */
3221                                 LNetMDUnlink(mdh);
3222                                 /* No assertion (racing with network) */
3223                                 unlinked = 1;
3224                                 timeout = a_long_time;
3225                         } else if (rc2 == 0) {
3226                                 /* timed out waiting for unlink */
3227                                 CWARN("ping %s: late network completion\n",
3228                                       libcfs_id2str(id));
3229                         }
3230                 } else if (event.type == LNET_EVENT_REPLY) {
3231                         replied = 1;
3232                         rc = event.mlength;
3233                 }
3234
3235         } while (rc2 <= 0 || !event.unlinked);
3236
3237         if (!replied) {
3238                 if (rc >= 0)
3239                         CWARN("%s: Unexpected rc >= 0 but no reply!\n",
3240                               libcfs_id2str(id));
3241                 rc = -EIO;
3242                 goto fail_free_eq;
3243         }
3244
3245         nob = rc;
3246         LASSERT(nob >= 0 && nob <= LNET_PING_INFO_SIZE(n_ids));
3247
3248         rc = -EPROTO;                           /* if I can't parse... */
3249
3250         if (nob < 8) {
3251                 /* can't check magic/version */
3252                 CERROR("%s: ping info too short %d\n",
3253                        libcfs_id2str(id), nob);
3254                 goto fail_free_eq;
3255         }
3256
3257         if (pbuf->pb_info.pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) {
3258                 lnet_swap_pinginfo(pbuf);
3259         } else if (pbuf->pb_info.pi_magic != LNET_PROTO_PING_MAGIC) {
3260                 CERROR("%s: Unexpected magic %08x\n",
3261                        libcfs_id2str(id), pbuf->pb_info.pi_magic);
3262                 goto fail_free_eq;
3263         }
3264
3265         if ((pbuf->pb_info.pi_features & LNET_PING_FEAT_NI_STATUS) == 0) {
3266                 CERROR("%s: ping w/o NI status: 0x%x\n",
3267                        libcfs_id2str(id), pbuf->pb_info.pi_features);
3268                 goto fail_free_eq;
3269         }
3270
3271         if (nob < LNET_PING_INFO_SIZE(0)) {
3272                 CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
3273                        nob, (int)LNET_PING_INFO_SIZE(0));
3274                 goto fail_free_eq;
3275         }
3276
3277         if (pbuf->pb_info.pi_nnis < n_ids)
3278                 n_ids = pbuf->pb_info.pi_nnis;
3279
3280         if (nob < LNET_PING_INFO_SIZE(n_ids)) {
3281                 CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
3282                        nob, (int)LNET_PING_INFO_SIZE(n_ids));
3283                 goto fail_free_eq;
3284         }
3285
3286         rc = -EFAULT;                           /* If I SEGV... */
3287
3288         memset(&tmpid, 0, sizeof(tmpid));
3289         for (i = 0; i < n_ids; i++) {
3290                 tmpid.pid = pbuf->pb_info.pi_pid;
3291                 tmpid.nid = pbuf->pb_info.pi_ni[i].ns_nid;
3292                 if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid)))
3293                         goto fail_free_eq;
3294         }
3295         rc = pbuf->pb_info.pi_nnis;
3296
3297  fail_free_eq:
3298         rc2 = LNetEQFree(eqh);
3299         if (rc2 != 0)
3300                 CERROR("rc2 %d\n", rc2);
3301         LASSERT(rc2 == 0);
3302
3303  fail_ping_buffer_decref:
3304         lnet_ping_buffer_decref(pbuf);
3305         return rc;
3306 }