Whamcloud - gitweb
LU-5710 corrected some typos and grammar errors
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2014, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lnet/klnds/socklnd/socklnd.c
37  *
38  * Author: Zach Brown <zab@zabbo.net>
39  * Author: Peter J. Braam <braam@clusterfs.com>
40  * Author: Phil Schwan <phil@clusterfs.com>
41  * Author: Eric Barton <eric@bartonsoftware.com>
42  */
43
44 #include "socklnd.h"
45
46 static lnd_t                   the_ksocklnd;
47 ksock_nal_data_t        ksocknal_data;
48
49 static ksock_interface_t *
50 ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
51 {
52         ksock_net_t       *net = ni->ni_data;
53         int                i;
54         ksock_interface_t *iface;
55
56         for (i = 0; i < net->ksnn_ninterfaces; i++) {
57                 LASSERT(i < LNET_MAX_INTERFACES);
58                 iface = &net->ksnn_interfaces[i];
59
60                 if (iface->ksni_ipaddr == ip)
61                         return (iface);
62         }
63
64         return (NULL);
65 }
66
67 static ksock_route_t *
68 ksocknal_create_route (__u32 ipaddr, int port)
69 {
70         ksock_route_t *route;
71
72         LIBCFS_ALLOC (route, sizeof (*route));
73         if (route == NULL)
74                 return (NULL);
75
76         atomic_set (&route->ksnr_refcount, 1);
77         route->ksnr_peer = NULL;
78         route->ksnr_retry_interval = 0;         /* OK to connect at any time */
79         route->ksnr_ipaddr = ipaddr;
80         route->ksnr_port = port;
81         route->ksnr_scheduled = 0;
82         route->ksnr_connecting = 0;
83         route->ksnr_connected = 0;
84         route->ksnr_deleted = 0;
85         route->ksnr_conn_count = 0;
86         route->ksnr_share_count = 0;
87
88         return (route);
89 }
90
91 void
92 ksocknal_destroy_route (ksock_route_t *route)
93 {
94         LASSERT (atomic_read(&route->ksnr_refcount) == 0);
95
96         if (route->ksnr_peer != NULL)
97                 ksocknal_peer_decref(route->ksnr_peer);
98
99         LIBCFS_FREE (route, sizeof (*route));
100 }
101
102 static int
103 ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
104 {
105         ksock_net_t   *net = ni->ni_data;
106         ksock_peer_t  *peer;
107
108         LASSERT (id.nid != LNET_NID_ANY);
109         LASSERT (id.pid != LNET_PID_ANY);
110         LASSERT (!in_interrupt());
111
112         LIBCFS_ALLOC (peer, sizeof (*peer));
113         if (peer == NULL)
114                 return -ENOMEM;
115
116         memset (peer, 0, sizeof (*peer));       /* NULL pointers/clear flags etc */
117
118         peer->ksnp_ni = ni;
119         peer->ksnp_id = id;
120         atomic_set (&peer->ksnp_refcount, 1);   /* 1 ref for caller */
121         peer->ksnp_closing = 0;
122         peer->ksnp_accepting = 0;
123         peer->ksnp_proto = NULL;
124         peer->ksnp_last_alive = 0;
125         peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
126
127         INIT_LIST_HEAD(&peer->ksnp_conns);
128         INIT_LIST_HEAD(&peer->ksnp_routes);
129         INIT_LIST_HEAD(&peer->ksnp_tx_queue);
130         INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
131         spin_lock_init(&peer->ksnp_lock);
132
133         spin_lock_bh(&net->ksnn_lock);
134
135         if (net->ksnn_shutdown) {
136                 spin_unlock_bh(&net->ksnn_lock);
137
138                 LIBCFS_FREE(peer, sizeof(*peer));
139                 CERROR("Can't create peer: network shutdown\n");
140                 return -ESHUTDOWN;
141         }
142
143         net->ksnn_npeers++;
144
145         spin_unlock_bh(&net->ksnn_lock);
146
147         *peerp = peer;
148         return 0;
149 }
150
151 void
152 ksocknal_destroy_peer (ksock_peer_t *peer)
153 {
154         ksock_net_t    *net = peer->ksnp_ni->ni_data;
155
156         CDEBUG (D_NET, "peer %s %p deleted\n",
157                 libcfs_id2str(peer->ksnp_id), peer);
158
159         LASSERT(atomic_read(&peer->ksnp_refcount) == 0);
160         LASSERT(peer->ksnp_accepting == 0);
161         LASSERT(list_empty(&peer->ksnp_conns));
162         LASSERT(list_empty(&peer->ksnp_routes));
163         LASSERT(list_empty(&peer->ksnp_tx_queue));
164         LASSERT(list_empty(&peer->ksnp_zc_req_list));
165
166         LIBCFS_FREE(peer, sizeof(*peer));
167
168         /* NB a peer's connections and routes keep a reference on their peer
169          * until they are destroyed, so we can be assured that _all_ state to
170          * do with this peer has been cleaned up when its refcount drops to
171          * zero. */
172         spin_lock_bh(&net->ksnn_lock);
173         net->ksnn_npeers--;
174         spin_unlock_bh(&net->ksnn_lock);
175 }
176
177 ksock_peer_t *
178 ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id)
179 {
180         struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
181         struct list_head *tmp;
182         ksock_peer_t     *peer;
183
184         list_for_each(tmp, peer_list) {
185
186                 peer = list_entry(tmp, ksock_peer_t, ksnp_list);
187
188                 LASSERT(!peer->ksnp_closing);
189
190                 if (peer->ksnp_ni != ni)
191                         continue;
192
193                 if (peer->ksnp_id.nid != id.nid ||
194                     peer->ksnp_id.pid != id.pid)
195                         continue;
196
197                 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
198                        peer, libcfs_id2str(id),
199                        atomic_read(&peer->ksnp_refcount));
200                 return peer;
201         }
202         return NULL;
203 }
204
205 ksock_peer_t *
206 ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
207 {
208         ksock_peer_t     *peer;
209
210         read_lock(&ksocknal_data.ksnd_global_lock);
211         peer = ksocknal_find_peer_locked(ni, id);
212         if (peer != NULL)                       /* +1 ref for caller? */
213                 ksocknal_peer_addref(peer);
214         read_unlock(&ksocknal_data.ksnd_global_lock);
215
216         return (peer);
217 }
218
219 static void
220 ksocknal_unlink_peer_locked (ksock_peer_t *peer)
221 {
222         int                i;
223         __u32              ip;
224         ksock_interface_t *iface;
225
226         for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
227                 LASSERT (i < LNET_MAX_INTERFACES);
228                 ip = peer->ksnp_passive_ips[i];
229
230                 iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
231                 /* All IPs in peer->ksnp_passive_ips[] come from the
232                  * interface list, therefore the call must succeed. */
233                 LASSERT (iface != NULL);
234
235                 CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
236                        peer, iface, iface->ksni_nroutes);
237                 iface->ksni_npeers--;
238         }
239
240         LASSERT(list_empty(&peer->ksnp_conns));
241         LASSERT(list_empty(&peer->ksnp_routes));
242         LASSERT(!peer->ksnp_closing);
243         peer->ksnp_closing = 1;
244         list_del(&peer->ksnp_list);
245         /* lose peerlist's ref */
246         ksocknal_peer_decref(peer);
247 }
248
249 static int
250 ksocknal_get_peer_info (lnet_ni_t *ni, int index,
251                         lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip,
252                         int *port, int *conn_count, int *share_count)
253 {
254         ksock_peer_t      *peer;
255         struct list_head  *ptmp;
256         ksock_route_t     *route;
257         struct list_head  *rtmp;
258         int                i;
259         int                j;
260         int                rc = -ENOENT;
261
262         read_lock(&ksocknal_data.ksnd_global_lock);
263
264         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
265                 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
266                         peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
267
268                         if (peer->ksnp_ni != ni)
269                                 continue;
270
271                         if (peer->ksnp_n_passive_ips == 0 &&
272                             list_empty(&peer->ksnp_routes)) {
273                                 if (index-- > 0)
274                                         continue;
275
276                                 *id = peer->ksnp_id;
277                                 *myip = 0;
278                                 *peer_ip = 0;
279                                 *port = 0;
280                                 *conn_count = 0;
281                                 *share_count = 0;
282                                 rc = 0;
283                                 goto out;
284                         }
285
286                         for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
287                                 if (index-- > 0)
288                                         continue;
289
290                                 *id = peer->ksnp_id;
291                                 *myip = peer->ksnp_passive_ips[j];
292                                 *peer_ip = 0;
293                                 *port = 0;
294                                 *conn_count = 0;
295                                 *share_count = 0;
296                                 rc = 0;
297                                 goto out;
298                         }
299
300                         list_for_each(rtmp, &peer->ksnp_routes) {
301                                 if (index-- > 0)
302                                         continue;
303
304                                 route = list_entry(rtmp, ksock_route_t,
305                                                    ksnr_list);
306
307                                 *id = peer->ksnp_id;
308                                 *myip = route->ksnr_myipaddr;
309                                 *peer_ip = route->ksnr_ipaddr;
310                                 *port = route->ksnr_port;
311                                 *conn_count = route->ksnr_conn_count;
312                                 *share_count = route->ksnr_share_count;
313                                 rc = 0;
314                                 goto out;
315                         }
316                 }
317         }
318 out:
319         read_unlock(&ksocknal_data.ksnd_global_lock);
320         return rc;
321 }
322
323 static void
324 ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
325 {
326         ksock_peer_t      *peer = route->ksnr_peer;
327         int                type = conn->ksnc_type;
328         ksock_interface_t *iface;
329
330         conn->ksnc_route = route;
331         ksocknal_route_addref(route);
332
333         if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
334                 if (route->ksnr_myipaddr == 0) {
335                         /* route wasn't bound locally yet (the initial route) */
336                         CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
337                                libcfs_id2str(peer->ksnp_id),
338                                &route->ksnr_ipaddr,
339                                &conn->ksnc_myipaddr);
340                 } else {
341                         CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h "
342                                "to %pI4h\n", libcfs_id2str(peer->ksnp_id),
343                                &route->ksnr_ipaddr,
344                                &route->ksnr_myipaddr,
345                                &conn->ksnc_myipaddr);
346
347                         iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
348                                                   route->ksnr_myipaddr);
349                         if (iface != NULL)
350                                 iface->ksni_nroutes--;
351                 }
352                 route->ksnr_myipaddr = conn->ksnc_myipaddr;
353                 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
354                                           route->ksnr_myipaddr);
355                 if (iface != NULL)
356                         iface->ksni_nroutes++;
357         }
358
359         route->ksnr_connected |= (1<<type);
360         route->ksnr_conn_count++;
361
362         /* Successful connection => further attempts can
363          * proceed immediately */
364         route->ksnr_retry_interval = 0;
365 }
366
367 static void
368 ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
369 {
370         struct list_head *tmp;
371         ksock_conn_t     *conn;
372         ksock_route_t    *route2;
373
374         LASSERT(!peer->ksnp_closing);
375         LASSERT(route->ksnr_peer == NULL);
376         LASSERT(!route->ksnr_scheduled);
377         LASSERT(!route->ksnr_connecting);
378         LASSERT(route->ksnr_connected == 0);
379
380         /* LASSERT(unique) */
381         list_for_each(tmp, &peer->ksnp_routes) {
382                 route2 = list_entry(tmp, ksock_route_t, ksnr_list);
383
384                 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
385                         CERROR("Duplicate route %s %pI4h\n",
386                                libcfs_id2str(peer->ksnp_id),
387                                &route->ksnr_ipaddr);
388                         LBUG();
389                 }
390         }
391
392         route->ksnr_peer = peer;
393         ksocknal_peer_addref(peer);
394         /* peer's routelist takes over my ref on 'route' */
395         list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
396
397         list_for_each(tmp, &peer->ksnp_conns) {
398                 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
399
400                 if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
401                         continue;
402
403                 ksocknal_associate_route_conn_locked(route, conn);
404                 /* keep going (typed routes) */
405         }
406 }
407
408 static void
409 ksocknal_del_route_locked (ksock_route_t *route)
410 {
411         ksock_peer_t      *peer = route->ksnr_peer;
412         ksock_interface_t *iface;
413         ksock_conn_t      *conn;
414         struct list_head  *ctmp;
415         struct list_head  *cnxt;
416
417         LASSERT(!route->ksnr_deleted);
418
419         /* Close associated conns */
420         list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
421                 conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
422
423                 if (conn->ksnc_route != route)
424                         continue;
425
426                 ksocknal_close_conn_locked(conn, 0);
427         }
428
429         if (route->ksnr_myipaddr != 0) {
430                 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
431                                           route->ksnr_myipaddr);
432                 if (iface != NULL)
433                         iface->ksni_nroutes--;
434         }
435
436         route->ksnr_deleted = 1;
437         list_del(&route->ksnr_list);
438         ksocknal_route_decref(route);           /* drop peer's ref */
439
440         if (list_empty(&peer->ksnp_routes) &&
441             list_empty(&peer->ksnp_conns)) {
442                 /* I've just removed the last route to a peer with no active
443                  * connections */
444                 ksocknal_unlink_peer_locked(peer);
445         }
446 }
447
448 int
449 ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
450 {
451         struct list_head *tmp;
452         ksock_peer_t     *peer;
453         ksock_peer_t     *peer2;
454         ksock_route_t    *route;
455         ksock_route_t    *route2;
456         int               rc;
457
458         if (id.nid == LNET_NID_ANY ||
459             id.pid == LNET_PID_ANY)
460                 return (-EINVAL);
461
462         /* Have a brand new peer ready... */
463         rc = ksocknal_create_peer(&peer, ni, id);
464         if (rc != 0)
465                 return rc;
466
467         route = ksocknal_create_route (ipaddr, port);
468         if (route == NULL) {
469                 ksocknal_peer_decref(peer);
470                 return (-ENOMEM);
471         }
472
473         write_lock_bh(&ksocknal_data.ksnd_global_lock);
474
475         /* always called with a ref on ni, so shutdown can't have started */
476         LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
477
478         peer2 = ksocknal_find_peer_locked(ni, id);
479         if (peer2 != NULL) {
480                 ksocknal_peer_decref(peer);
481                 peer = peer2;
482         } else {
483                 /* peer table takes my ref on peer */
484                 list_add_tail(&peer->ksnp_list,
485                               ksocknal_nid2peerlist(id.nid));
486         }
487
488         route2 = NULL;
489         list_for_each(tmp, &peer->ksnp_routes) {
490                 route2 = list_entry(tmp, ksock_route_t, ksnr_list);
491
492                 if (route2->ksnr_ipaddr == ipaddr)
493                         break;
494
495                 route2 = NULL;
496         }
497         if (route2 == NULL) {
498                 ksocknal_add_route_locked(peer, route);
499                 route->ksnr_share_count++;
500         } else {
501                 ksocknal_route_decref(route);
502                 route2->ksnr_share_count++;
503         }
504
505         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
506
507         return 0;
508 }
509
510 static void
511 ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
512 {
513         ksock_conn_t     *conn;
514         ksock_route_t    *route;
515         struct list_head *tmp;
516         struct list_head *nxt;
517         int               nshared;
518
519         LASSERT(!peer->ksnp_closing);
520
521         /* Extra ref prevents peer disappearing until I'm done with it */
522         ksocknal_peer_addref(peer);
523
524         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
525                 route = list_entry(tmp, ksock_route_t, ksnr_list);
526
527                 /* no match */
528                 if (!(ip == 0 || route->ksnr_ipaddr == ip))
529                         continue;
530
531                 route->ksnr_share_count = 0;
532                 /* This deletes associated conns too */
533                 ksocknal_del_route_locked(route);
534         }
535
536         nshared = 0;
537         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
538                 route = list_entry(tmp, ksock_route_t, ksnr_list);
539                 nshared += route->ksnr_share_count;
540         }
541
542         if (nshared == 0) {
543                 /* remove everything else if there are no explicit entries
544                  * left */
545
546                 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
547                         route = list_entry(tmp, ksock_route_t, ksnr_list);
548
549                         /* we should only be removing auto-entries */
550                         LASSERT(route->ksnr_share_count == 0);
551                         ksocknal_del_route_locked(route);
552                 }
553
554                 list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
555                         conn = list_entry(tmp, ksock_conn_t, ksnc_list);
556
557                         ksocknal_close_conn_locked(conn, 0);
558                 }
559         }
560
561         ksocknal_peer_decref(peer);
562                 /* NB peer unlinks itself when last conn/route is removed */
563 }
564
565 static int
566 ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
567 {
568         struct list_head  zombies = LIST_HEAD_INIT(zombies);
569         struct list_head *ptmp;
570         struct list_head *pnxt;
571         ksock_peer_t     *peer;
572         int               lo;
573         int               hi;
574         int               i;
575         int               rc = -ENOENT;
576
577         write_lock_bh(&ksocknal_data.ksnd_global_lock);
578
579         if (id.nid != LNET_NID_ANY) {
580                 hi = (int)(ksocknal_nid2peerlist(id.nid) -
581                            ksocknal_data.ksnd_peers);
582                 lo = hi;
583         } else {
584                 lo = 0;
585                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
586         }
587
588         for (i = lo; i <= hi; i++) {
589                 list_for_each_safe(ptmp, pnxt,
590                                    &ksocknal_data.ksnd_peers[i]) {
591                         peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
592
593                         if (peer->ksnp_ni != ni)
594                                 continue;
595
596                         if (!((id.nid == LNET_NID_ANY ||
597                                peer->ksnp_id.nid == id.nid) &&
598                               (id.pid == LNET_PID_ANY ||
599                                peer->ksnp_id.pid == id.pid)))
600                                 continue;
601
602                         ksocknal_peer_addref(peer);     /* a ref for me... */
603
604                         ksocknal_del_peer_locked(peer, ip);
605
606                         if (peer->ksnp_closing &&
607                             !list_empty(&peer->ksnp_tx_queue)) {
608                                 LASSERT(list_empty(&peer->ksnp_conns));
609                                 LASSERT(list_empty(&peer->ksnp_routes));
610
611                                 list_splice_init(&peer->ksnp_tx_queue,
612                                                  &zombies);
613                         }
614
615                         ksocknal_peer_decref(peer);     /* ...till here */
616
617                         rc = 0;                         /* matched! */
618                 }
619         }
620
621         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
622
623         ksocknal_txlist_done(ni, &zombies, 1);
624
625         return rc;
626 }
627
628 static ksock_conn_t *
629 ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
630 {
631         ksock_peer_t     *peer;
632         struct list_head *ptmp;
633         ksock_conn_t     *conn;
634         struct list_head *ctmp;
635         int               i;
636
637         read_lock(&ksocknal_data.ksnd_global_lock);
638
639         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
640                 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
641                         peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
642
643                         LASSERT(!peer->ksnp_closing);
644
645                         if (peer->ksnp_ni != ni)
646                                 continue;
647
648                         list_for_each(ctmp, &peer->ksnp_conns) {
649                                 if (index-- > 0)
650                                         continue;
651
652                                 conn = list_entry(ctmp, ksock_conn_t,
653                                                   ksnc_list);
654                                 ksocknal_conn_addref(conn);
655                                 read_unlock(&ksocknal_data. \
656                                             ksnd_global_lock);
657                                 return conn;
658                         }
659                 }
660         }
661
662         read_unlock(&ksocknal_data.ksnd_global_lock);
663         return NULL;
664 }
665
666 static ksock_sched_t *
667 ksocknal_choose_scheduler_locked(unsigned int cpt)
668 {
669         struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt];
670         ksock_sched_t           *sched;
671         int                     i;
672
673         LASSERT(info->ksi_nthreads > 0);
674
675         sched = &info->ksi_scheds[0];
676         /*
677          * NB: it's safe so far, but info->ksi_nthreads could be changed
678          * at runtime when we have dynamic LNet configuration, then we
679          * need to take care of this.
680          */
681         for (i = 1; i < info->ksi_nthreads; i++) {
682                 if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
683                         sched = &info->ksi_scheds[i];
684         }
685
686         return sched;
687 }
688
689 static int
690 ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
691 {
692         ksock_net_t       *net = ni->ni_data;
693         int                i;
694         int                nip;
695
696         read_lock(&ksocknal_data.ksnd_global_lock);
697
698         nip = net->ksnn_ninterfaces;
699         LASSERT (nip <= LNET_MAX_INTERFACES);
700
701         /* Only offer interfaces for additional connections if I have
702          * more than one. */
703         if (nip < 2) {
704                 read_unlock(&ksocknal_data.ksnd_global_lock);
705                 return 0;
706         }
707
708         for (i = 0; i < nip; i++) {
709                 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
710                 LASSERT (ipaddrs[i] != 0);
711         }
712
713         read_unlock(&ksocknal_data.ksnd_global_lock);
714         return (nip);
715 }
716
717 static int
718 ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
719 {
720         int   best_netmatch = 0;
721         int   best_xor      = 0;
722         int   best          = -1;
723         int   this_xor;
724         int   this_netmatch;
725         int   i;
726
727         for (i = 0; i < nips; i++) {
728                 if (ips[i] == 0)
729                         continue;
730
731                 this_xor = (ips[i] ^ iface->ksni_ipaddr);
732                 this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
733
734                 if (!(best < 0 ||
735                       best_netmatch < this_netmatch ||
736                       (best_netmatch == this_netmatch &&
737                        best_xor > this_xor)))
738                         continue;
739
740                 best = i;
741                 best_netmatch = this_netmatch;
742                 best_xor = this_xor;
743         }
744
745         LASSERT (best >= 0);
746         return (best);
747 }
748
749 static int
750 ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
751 {
752         rwlock_t                *global_lock = &ksocknal_data.ksnd_global_lock;
753         ksock_net_t        *net = peer->ksnp_ni->ni_data;
754         ksock_interface_t  *iface;
755         ksock_interface_t  *best_iface;
756         int                 n_ips;
757         int                 i;
758         int                 j;
759         int                 k;
760         __u32               ip;
761         __u32               xor;
762         int                 this_netmatch;
763         int                 best_netmatch;
764         int                 best_npeers;
765
766         /* CAVEAT EMPTOR: We do all our interface matching with an
767          * exclusive hold of global lock at IRQ priority.  We're only
768          * expecting to be dealing with small numbers of interfaces, so the
769          * O(n**3)-ness shouldn't matter */
770
771         /* Also note that I'm not going to return more than n_peerips
772          * interfaces, even if I have more myself */
773
774         write_lock_bh(global_lock);
775
776         LASSERT (n_peerips <= LNET_MAX_INTERFACES);
777         LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
778
779         /* Only match interfaces for additional connections
780          * if I have > 1 interface */
781         n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
782                 MIN(n_peerips, net->ksnn_ninterfaces);
783
784         for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
785                 /*              ^ yes really... */
786
787                 /* If we have any new interfaces, first tick off all the
788                  * peer IPs that match old interfaces, then choose new
789                  * interfaces to match the remaining peer IPS.
790                  * We don't forget interfaces we've stopped using; we might
791                  * start using them again... */
792
793                 if (i < peer->ksnp_n_passive_ips) {
794                         /* Old interface. */
795                         ip = peer->ksnp_passive_ips[i];
796                         best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
797
798                         /* peer passive ips are kept up to date */
799                         LASSERT(best_iface != NULL);
800                 } else {
801                         /* choose a new interface */
802                         LASSERT (i == peer->ksnp_n_passive_ips);
803
804                         best_iface = NULL;
805                         best_netmatch = 0;
806                         best_npeers = 0;
807
808                         for (j = 0; j < net->ksnn_ninterfaces; j++) {
809                                 iface = &net->ksnn_interfaces[j];
810                                 ip = iface->ksni_ipaddr;
811
812                                 for (k = 0; k < peer->ksnp_n_passive_ips; k++)
813                                         if (peer->ksnp_passive_ips[k] == ip)
814                                                 break;
815
816                                 if (k < peer->ksnp_n_passive_ips) /* using it already */
817                                         continue;
818
819                                 k = ksocknal_match_peerip(iface, peerips, n_peerips);
820                                 xor = (ip ^ peerips[k]);
821                                 this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
822
823                                 if (!(best_iface == NULL ||
824                                       best_netmatch < this_netmatch ||
825                                       (best_netmatch == this_netmatch &&
826                                        best_npeers > iface->ksni_npeers)))
827                                         continue;
828
829                                 best_iface = iface;
830                                 best_netmatch = this_netmatch;
831                                 best_npeers = iface->ksni_npeers;
832                         }
833
834                         LASSERT(best_iface != NULL);
835
836                         best_iface->ksni_npeers++;
837                         ip = best_iface->ksni_ipaddr;
838                         peer->ksnp_passive_ips[i] = ip;
839                         peer->ksnp_n_passive_ips = i+1;
840                 }
841
842                 /* mark the best matching peer IP used */
843                 j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
844                 peerips[j] = 0;
845         }
846
847         /* Overwrite input peer IP addresses */
848         memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
849
850         write_unlock_bh(global_lock);
851
852         return (n_ips);
853 }
854
855 static void
856 ksocknal_create_routes(ksock_peer_t *peer, int port,
857                        __u32 *peer_ipaddrs, int npeer_ipaddrs)
858 {
859         ksock_route_t           *newroute = NULL;
860         rwlock_t                *global_lock = &ksocknal_data.ksnd_global_lock;
861         lnet_ni_t               *ni = peer->ksnp_ni;
862         ksock_net_t             *net = ni->ni_data;
863         struct list_head        *rtmp;
864         ksock_route_t           *route;
865         ksock_interface_t       *iface;
866         ksock_interface_t       *best_iface;
867         int                     best_netmatch;
868         int                     this_netmatch;
869         int                     best_nroutes;
870         int                     i;
871         int                     j;
872
873         /* CAVEAT EMPTOR: We do all our interface matching with an
874          * exclusive hold of global lock at IRQ priority.  We're only
875          * expecting to be dealing with small numbers of interfaces, so the
876          * O(n**3)-ness here shouldn't matter */
877
878         write_lock_bh(global_lock);
879
880         if (net->ksnn_ninterfaces < 2) {
881                 /* Only create additional connections
882                  * if I have > 1 interface */
883                 write_unlock_bh(global_lock);
884                 return;
885         }
886
887         LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES);
888
889         for (i = 0; i < npeer_ipaddrs; i++) {
890                 if (newroute != NULL) {
891                         newroute->ksnr_ipaddr = peer_ipaddrs[i];
892                 } else {
893                         write_unlock_bh(global_lock);
894
895                         newroute = ksocknal_create_route(peer_ipaddrs[i], port);
896                         if (newroute == NULL)
897                                 return;
898
899                         write_lock_bh(global_lock);
900                 }
901
902                 if (peer->ksnp_closing) {
903                         /* peer got closed under me */
904                         break;
905                 }
906
907                 /* Already got a route? */
908                 route = NULL;
909                 list_for_each(rtmp, &peer->ksnp_routes) {
910                         route = list_entry(rtmp, ksock_route_t, ksnr_list);
911
912                         if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
913                                 break;
914
915                         route = NULL;
916                 }
917                 if (route != NULL)
918                         continue;
919
920                 best_iface = NULL;
921                 best_nroutes = 0;
922                 best_netmatch = 0;
923
924                 LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
925
926                 /* Select interface to connect from */
927                 for (j = 0; j < net->ksnn_ninterfaces; j++) {
928                         iface = &net->ksnn_interfaces[j];
929
930                         /* Using this interface already? */
931                         list_for_each(rtmp, &peer->ksnp_routes) {
932                                 route = list_entry(rtmp, ksock_route_t,
933                                                    ksnr_list);
934
935                                 if (route->ksnr_myipaddr == iface->ksni_ipaddr)
936                                         break;
937
938                                 route = NULL;
939                         }
940                         if (route != NULL)
941                                 continue;
942
943                         this_netmatch = (((iface->ksni_ipaddr ^
944                                            newroute->ksnr_ipaddr) &
945                                            iface->ksni_netmask) == 0) ? 1 : 0;
946
947                         if (!(best_iface == NULL ||
948                               best_netmatch < this_netmatch ||
949                               (best_netmatch == this_netmatch &&
950                                best_nroutes > iface->ksni_nroutes)))
951                                 continue;
952
953                         best_iface = iface;
954                         best_netmatch = this_netmatch;
955                         best_nroutes = iface->ksni_nroutes;
956                 }
957
958                 if (best_iface == NULL)
959                         continue;
960
961                 newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
962                 best_iface->ksni_nroutes++;
963
964                 ksocknal_add_route_locked(peer, newroute);
965                 newroute = NULL;
966         }
967
968         write_unlock_bh(global_lock);
969         if (newroute != NULL)
970                 ksocknal_route_decref(newroute);
971 }
972
973 int
974 ksocknal_accept(lnet_ni_t *ni, struct socket *sock)
975 {
976         ksock_connreq_t *cr;
977         int              rc;
978         __u32            peer_ip;
979         int              peer_port;
980
981         rc = lnet_sock_getaddr(sock, true, &peer_ip, &peer_port);
982         LASSERT(rc == 0);               /* we succeeded before */
983
984         LIBCFS_ALLOC(cr, sizeof(*cr));
985         if (cr == NULL) {
986                 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from "
987                                    "%pI4h: memory exhausted\n", &peer_ip);
988                 return -ENOMEM;
989         }
990
991         lnet_ni_addref(ni);
992         cr->ksncr_ni   = ni;
993         cr->ksncr_sock = sock;
994
995         spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
996
997         list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
998         wake_up(&ksocknal_data.ksnd_connd_waitq);
999
1000         spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
1001         return 0;
1002 }
1003
1004 static int
1005 ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr)
1006 {
1007         ksock_route_t *route;
1008
1009         list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
1010                 if (route->ksnr_ipaddr == ipaddr)
1011                         return route->ksnr_connecting;
1012         }
1013         return 0;
1014 }
1015
1016 int
1017 ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
1018                      struct socket *sock, int type)
1019 {
1020         rwlock_t                *global_lock = &ksocknal_data.ksnd_global_lock;
1021         struct list_head        zombies = LIST_HEAD_INIT(zombies);
1022         lnet_process_id_t       peerid;
1023         struct list_head        *tmp;
1024         __u64              incarnation;
1025         ksock_conn_t      *conn;
1026         ksock_conn_t      *conn2;
1027         ksock_peer_t      *peer = NULL;
1028         ksock_peer_t      *peer2;
1029         ksock_sched_t     *sched;
1030         ksock_hello_msg_t *hello;
1031         int                cpt;
1032         ksock_tx_t        *tx;
1033         ksock_tx_t        *txtmp;
1034         int                rc;
1035         int                active;
1036         char              *warn = NULL;
1037
1038         active = (route != NULL);
1039
1040         LASSERT (active == (type != SOCKLND_CONN_NONE));
1041
1042         LIBCFS_ALLOC(conn, sizeof(*conn));
1043         if (conn == NULL) {
1044                 rc = -ENOMEM;
1045                 goto failed_0;
1046         }
1047
1048         memset (conn, 0, sizeof (*conn));
1049
1050         conn->ksnc_peer = NULL;
1051         conn->ksnc_route = NULL;
1052         conn->ksnc_sock = sock;
1053         /* 2 ref, 1 for conn, another extra ref prevents socket
1054          * being closed before establishment of connection */
1055         atomic_set (&conn->ksnc_sock_refcount, 2);
1056         conn->ksnc_type = type;
1057         ksocknal_lib_save_callback(sock, conn);
1058         atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
1059
1060         conn->ksnc_rx_ready = 0;
1061         conn->ksnc_rx_scheduled = 0;
1062
1063         INIT_LIST_HEAD(&conn->ksnc_tx_queue);
1064         conn->ksnc_tx_ready = 0;
1065         conn->ksnc_tx_scheduled = 0;
1066         conn->ksnc_tx_carrier = NULL;
1067         atomic_set (&conn->ksnc_tx_nob, 0);
1068
1069         LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t,
1070                                      kshm_ips[LNET_MAX_INTERFACES]));
1071         if (hello == NULL) {
1072                 rc = -ENOMEM;
1073                 goto failed_1;
1074         }
1075
1076         /* stash conn's local and remote addrs */
1077         rc = ksocknal_lib_get_conn_addrs (conn);
1078         if (rc != 0)
1079                 goto failed_1;
1080
1081         /* Find out/confirm peer's NID and connection type and get the
1082          * vector of interfaces she's willing to let me connect to.
1083          * Passive connections use the listener timeout since the peer sends
1084          * eagerly */
1085
1086         if (active) {
1087                 peer = route->ksnr_peer;
1088                 LASSERT(ni == peer->ksnp_ni);
1089
1090                 /* Active connection sends HELLO eagerly */
1091                 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
1092                 peerid = peer->ksnp_id;
1093
1094                 write_lock_bh(global_lock);
1095                 conn->ksnc_proto = peer->ksnp_proto;
1096                 write_unlock_bh(global_lock);
1097
1098                 if (conn->ksnc_proto == NULL) {
1099                          conn->ksnc_proto = &ksocknal_protocol_v3x;
1100 #if SOCKNAL_VERSION_DEBUG
1101                          if (*ksocknal_tunables.ksnd_protocol == 2)
1102                                  conn->ksnc_proto = &ksocknal_protocol_v2x;
1103                          else if (*ksocknal_tunables.ksnd_protocol == 1)
1104                                  conn->ksnc_proto = &ksocknal_protocol_v1x;
1105 #endif
1106                 }
1107
1108                 rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
1109                 if (rc != 0)
1110                         goto failed_1;
1111         } else {
1112                 peerid.nid = LNET_NID_ANY;
1113                 peerid.pid = LNET_PID_ANY;
1114
1115                 /* Passive, get protocol from peer */
1116                 conn->ksnc_proto = NULL;
1117         }
1118
1119         rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation);
1120         if (rc < 0)
1121                 goto failed_1;
1122
1123         LASSERT (rc == 0 || active);
1124         LASSERT (conn->ksnc_proto != NULL);
1125         LASSERT (peerid.nid != LNET_NID_ANY);
1126
1127         cpt = lnet_cpt_of_nid(peerid.nid);
1128
1129         if (active) {
1130                 ksocknal_peer_addref(peer);
1131                 write_lock_bh(global_lock);
1132         } else {
1133                 rc = ksocknal_create_peer(&peer, ni, peerid);
1134                 if (rc != 0)
1135                         goto failed_1;
1136
1137                 write_lock_bh(global_lock);
1138
1139                 /* called with a ref on ni, so shutdown can't have started */
1140                 LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
1141
1142                 peer2 = ksocknal_find_peer_locked(ni, peerid);
1143                 if (peer2 == NULL) {
1144                         /* NB this puts an "empty" peer in the peer
1145                          * table (which takes my ref) */
1146                         list_add_tail(&peer->ksnp_list,
1147                                       ksocknal_nid2peerlist(peerid.nid));
1148                 } else {
1149                         ksocknal_peer_decref(peer);
1150                         peer = peer2;
1151                 }
1152
1153                 /* +1 ref for me */
1154                 ksocknal_peer_addref(peer);
1155                 peer->ksnp_accepting++;
1156
1157                 /* Am I already connecting to this guy?  Resolve in
1158                  * favour of higher NID... */
1159                 if (peerid.nid < ni->ni_nid &&
1160                     ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
1161                         rc = EALREADY;
1162                         warn = "connection race resolution";
1163                         goto failed_2;
1164                 }
1165         }
1166
1167         if (peer->ksnp_closing ||
1168             (active && route->ksnr_deleted)) {
1169                 /* peer/route got closed under me */
1170                 rc = -ESTALE;
1171                 warn = "peer/route removed";
1172                 goto failed_2;
1173         }
1174
1175         if (peer->ksnp_proto == NULL) {
1176                 /* Never connected before.
1177                  * NB recv_hello may have returned EPROTO to signal my peer
1178                  * wants a different protocol than the one I asked for.
1179                  */
1180                 LASSERT(list_empty(&peer->ksnp_conns));
1181
1182                 peer->ksnp_proto = conn->ksnc_proto;
1183                 peer->ksnp_incarnation = incarnation;
1184         }
1185
1186         if (peer->ksnp_proto != conn->ksnc_proto ||
1187             peer->ksnp_incarnation != incarnation) {
1188                 /* Peer rebooted or I've got the wrong protocol version */
1189                 ksocknal_close_peer_conns_locked(peer, 0, 0);
1190
1191                 peer->ksnp_proto = NULL;
1192                 rc = ESTALE;
1193                 warn = peer->ksnp_incarnation != incarnation ?
1194                        "peer rebooted" :
1195                        "wrong proto version";
1196                 goto failed_2;
1197         }
1198
1199         switch (rc) {
1200         default:
1201                 LBUG();
1202         case 0:
1203                 break;
1204         case EALREADY:
1205                 warn = "lost conn race";
1206                 goto failed_2;
1207         case EPROTO:
1208                 warn = "retry with different protocol version";
1209                 goto failed_2;
1210         }
1211
1212         /* Refuse to duplicate an existing connection, unless this is a
1213          * loopback connection */
1214         if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
1215                 list_for_each(tmp, &peer->ksnp_conns) {
1216                         conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
1217
1218                         if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
1219                             conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
1220                             conn2->ksnc_type != conn->ksnc_type)
1221                                 continue;
1222
1223                         /* Reply on a passive connection attempt so the peer
1224                          * realises we're connected. */
1225                         LASSERT (rc == 0);
1226                         if (!active)
1227                                 rc = EALREADY;
1228
1229                         warn = "duplicate";
1230                         goto failed_2;
1231                 }
1232         }
1233
1234         /* If the connection created by this route didn't bind to the IP
1235          * address the route connected to, the connection/route matching
1236          * code below probably isn't going to work. */
1237         if (active &&
1238             route->ksnr_ipaddr != conn->ksnc_ipaddr) {
1239                 CERROR("Route %s %pI4h connected to %pI4h\n",
1240                        libcfs_id2str(peer->ksnp_id),
1241                        &route->ksnr_ipaddr,
1242                        &conn->ksnc_ipaddr);
1243         }
1244
1245         /* Search for a route corresponding to the new connection and
1246          * create an association.  This allows incoming connections created
1247          * by routes in my peer to match my own route entries so I don't
1248          * continually create duplicate routes. */
1249         list_for_each(tmp, &peer->ksnp_routes) {
1250                 route = list_entry(tmp, ksock_route_t, ksnr_list);
1251
1252                 if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
1253                         continue;
1254
1255                 ksocknal_associate_route_conn_locked(route, conn);
1256                 break;
1257         }
1258
1259         conn->ksnc_peer = peer;                 /* conn takes my ref on peer */
1260         peer->ksnp_last_alive = cfs_time_current();
1261         peer->ksnp_send_keepalive = 0;
1262         peer->ksnp_error = 0;
1263
1264         sched = ksocknal_choose_scheduler_locked(cpt);
1265         sched->kss_nconns++;
1266         conn->ksnc_scheduler = sched;
1267
1268         conn->ksnc_tx_last_post = cfs_time_current();
1269         /* Set the deadline for the outgoing HELLO to drain */
1270         conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
1271         conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1272         smp_mb();   /* order with adding to peer's conn list */
1273
1274         list_add(&conn->ksnc_list, &peer->ksnp_conns);
1275         ksocknal_conn_addref(conn);
1276
1277         ksocknal_new_packet(conn, 0);
1278
1279         conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
1280
1281         /* Take packets blocking for this connection. */
1282         list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
1283                 if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) ==
1284                     SOCKNAL_MATCH_NO)
1285                         continue;
1286
1287                 list_del(&tx->tx_list);
1288                 ksocknal_queue_tx_locked(tx, conn);
1289         }
1290
1291         write_unlock_bh(global_lock);
1292
1293         /* We've now got a new connection.  Any errors from here on are just
1294          * like "normal" comms errors and we close the connection normally.
1295          * NB (a) we still have to send the reply HELLO for passive
1296          *        connections,
1297          *    (b) normal I/O on the conn is blocked until I setup and call the
1298          *        socket callbacks.
1299          */
1300
1301         CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d"
1302                " incarnation:%lld sched[%d:%d]\n",
1303                libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
1304                &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
1305                conn->ksnc_port, incarnation, cpt,
1306                (int)(sched - &sched->kss_info->ksi_scheds[0]));
1307
1308         if (active) {
1309                 /* additional routes after interface exchange? */
1310                 ksocknal_create_routes(peer, conn->ksnc_port,
1311                                        hello->kshm_ips, hello->kshm_nips);
1312         } else {
1313                 hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
1314                                                        hello->kshm_nips);
1315                 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1316         }
1317
1318         LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
1319                                     kshm_ips[LNET_MAX_INTERFACES]));
1320
1321         /* setup the socket AFTER I've received hello (it disables
1322          * SO_LINGER).  I might call back to the acceptor who may want
1323          * to send a protocol version response and then close the
1324          * socket; this ensures the socket only tears down after the
1325          * response has been sent. */
1326         if (rc == 0)
1327                 rc = ksocknal_lib_setup_sock(sock);
1328
1329         write_lock_bh(global_lock);
1330
1331         /* NB my callbacks block while I hold ksnd_global_lock */
1332         ksocknal_lib_set_callback(sock, conn);
1333
1334         if (!active)
1335                 peer->ksnp_accepting--;
1336
1337         write_unlock_bh(global_lock);
1338
1339         if (rc != 0) {
1340                 write_lock_bh(global_lock);
1341                 if (!conn->ksnc_closing) {
1342                         /* could be closed by another thread */
1343                         ksocknal_close_conn_locked(conn, rc);
1344                 }
1345                 write_unlock_bh(global_lock);
1346         } else if (ksocknal_connsock_addref(conn) == 0) {
1347                 /* Allow I/O to proceed. */
1348                 ksocknal_read_callback(conn);
1349                 ksocknal_write_callback(conn);
1350                 ksocknal_connsock_decref(conn);
1351         }
1352
1353         ksocknal_connsock_decref(conn);
1354         ksocknal_conn_decref(conn);
1355         return rc;
1356
1357 failed_2:
1358         if (!peer->ksnp_closing &&
1359             list_empty(&peer->ksnp_conns) &&
1360             list_empty(&peer->ksnp_routes)) {
1361                 list_add(&zombies, &peer->ksnp_tx_queue);
1362                 list_del_init(&peer->ksnp_tx_queue);
1363                 ksocknal_unlink_peer_locked(peer);
1364         }
1365
1366         write_unlock_bh(global_lock);
1367
1368         if (warn != NULL) {
1369                 if (rc < 0)
1370                         CERROR("Not creating conn %s type %d: %s\n",
1371                                libcfs_id2str(peerid), conn->ksnc_type, warn);
1372                 else
1373                         CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
1374                               libcfs_id2str(peerid), conn->ksnc_type, warn);
1375         }
1376
1377         if (!active) {
1378                 if (rc > 0) {
1379                         /* Request retry by replying with CONN_NONE
1380                          * ksnc_proto has been set already */
1381                         conn->ksnc_type = SOCKLND_CONN_NONE;
1382                         hello->kshm_nips = 0;
1383                         ksocknal_send_hello(ni, conn, peerid.nid, hello);
1384                 }
1385
1386                 write_lock_bh(global_lock);
1387                 peer->ksnp_accepting--;
1388                 write_unlock_bh(global_lock);
1389         }
1390
1391         ksocknal_txlist_done(ni, &zombies, 1);
1392         ksocknal_peer_decref(peer);
1393
1394  failed_1:
1395         if (hello != NULL)
1396                 LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
1397                                             kshm_ips[LNET_MAX_INTERFACES]));
1398
1399         LIBCFS_FREE(conn, sizeof(*conn));
1400
1401 failed_0:
1402         sock_release(sock);
1403         return rc;
1404 }
1405
1406 void
1407 ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
1408 {
1409         /* This just does the immmediate housekeeping, and queues the
1410          * connection for the reaper to terminate.
1411          * Caller holds ksnd_global_lock exclusively in irq context */
1412         ksock_peer_t      *peer = conn->ksnc_peer;
1413         ksock_route_t     *route;
1414         ksock_conn_t      *conn2;
1415         struct list_head  *tmp;
1416
1417         LASSERT(peer->ksnp_error == 0);
1418         LASSERT(!conn->ksnc_closing);
1419         conn->ksnc_closing = 1;
1420
1421         /* ksnd_deathrow_conns takes over peer's ref */
1422         list_del(&conn->ksnc_list);
1423
1424         route = conn->ksnc_route;
1425         if (route != NULL) {
1426                 /* dissociate conn from route... */
1427                 LASSERT(!route->ksnr_deleted);
1428                 LASSERT((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
1429
1430                 conn2 = NULL;
1431                 list_for_each(tmp, &peer->ksnp_conns) {
1432                         conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
1433
1434                         if (conn2->ksnc_route == route &&
1435                             conn2->ksnc_type == conn->ksnc_type)
1436                                 break;
1437
1438                         conn2 = NULL;
1439                 }
1440                 if (conn2 == NULL)
1441                         route->ksnr_connected &= ~(1 << conn->ksnc_type);
1442
1443                 conn->ksnc_route = NULL;
1444
1445 #if 0           /* irrelevent with only eager routes */
1446                 /* make route least favourite */
1447                 list_del(&route->ksnr_list);
1448                 list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
1449 #endif
1450                 ksocknal_route_decref(route);   /* drop conn's ref on route */
1451         }
1452
1453         if (list_empty(&peer->ksnp_conns)) {
1454                 /* No more connections to this peer */
1455
1456                 if (!list_empty(&peer->ksnp_tx_queue)) {
1457                                 ksock_tx_t *tx;
1458
1459                         LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
1460
1461                         /* throw them to the last connection...,
1462                          * these TXs will be send to /dev/null by scheduler */
1463                         list_for_each_entry(tx, &peer->ksnp_tx_queue,
1464                                             tx_list)
1465                                 ksocknal_tx_prep(conn, tx);
1466
1467                         spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
1468                         list_splice_init(&peer->ksnp_tx_queue,
1469                                          &conn->ksnc_tx_queue);
1470                         spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
1471                 }
1472
1473                 /* renegotiate protocol version */
1474                 peer->ksnp_proto = NULL;
1475                 /* stash last conn close reason */
1476                 peer->ksnp_error = error;
1477
1478                 if (list_empty(&peer->ksnp_routes)) {
1479                         /* I've just closed last conn belonging to a
1480                          * peer with no routes to it */
1481                         ksocknal_unlink_peer_locked(peer);
1482                 }
1483         }
1484
1485         spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1486
1487         list_add_tail(&conn->ksnc_list,
1488                       &ksocknal_data.ksnd_deathrow_conns);
1489         wake_up(&ksocknal_data.ksnd_reaper_waitq);
1490
1491         spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1492 }
1493
1494 void
1495 ksocknal_peer_failed (ksock_peer_t *peer)
1496 {
1497         int        notify = 0;
1498         cfs_time_t last_alive = 0;
1499
1500         /* There has been a connection failure or comms error; but I'll only
1501          * tell LNET I think the peer is dead if it's to another kernel and
1502          * there are no connections or connection attempts in existence. */
1503
1504         read_lock(&ksocknal_data.ksnd_global_lock);
1505
1506         if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
1507              list_empty(&peer->ksnp_conns) &&
1508              peer->ksnp_accepting == 0 &&
1509              ksocknal_find_connecting_route_locked(peer) == NULL) {
1510                 notify = 1;
1511                 last_alive = peer->ksnp_last_alive;
1512         }
1513
1514         read_unlock(&ksocknal_data.ksnd_global_lock);
1515
1516         if (notify)
1517                 lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
1518                             last_alive);
1519 }
1520
1521 void
1522 ksocknal_finalize_zcreq(ksock_conn_t *conn)
1523 {
1524         ksock_peer_t     *peer = conn->ksnc_peer;
1525         ksock_tx_t       *tx;
1526         ksock_tx_t       *tmp;
1527         struct list_head  zlist = LIST_HEAD_INIT(zlist);
1528
1529         /* NB safe to finalize TXs because closing of socket will
1530          * abort all buffered data */
1531         LASSERT(conn->ksnc_sock == NULL);
1532
1533         spin_lock(&peer->ksnp_lock);
1534
1535         list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
1536                 if (tx->tx_conn != conn)
1537                         continue;
1538
1539                 LASSERT(tx->tx_msg.ksm_zc_cookies[0] != 0);
1540
1541                 tx->tx_msg.ksm_zc_cookies[0] = 0;
1542                 tx->tx_zc_aborted = 1;  /* mark it as not-acked */
1543                 list_del(&tx->tx_zc_list);
1544                 list_add(&tx->tx_zc_list, &zlist);
1545         }
1546
1547         spin_unlock(&peer->ksnp_lock);
1548
1549         while (!list_empty(&zlist)) {
1550                 tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
1551
1552                 list_del(&tx->tx_zc_list);
1553                 ksocknal_tx_decref(tx);
1554         }
1555 }
1556
1557 void
1558 ksocknal_terminate_conn(ksock_conn_t *conn)
1559 {
1560         /* This gets called by the reaper (guaranteed thread context) to
1561          * disengage the socket from its callbacks and close it.
1562          * ksnc_refcount will eventually hit zero, and then the reaper will
1563          * destroy it. */
1564         ksock_peer_t     *peer = conn->ksnc_peer;
1565         ksock_sched_t    *sched = conn->ksnc_scheduler;
1566         int               failed = 0;
1567
1568         LASSERT(conn->ksnc_closing);
1569
1570         /* wake up the scheduler to "send" all remaining packets to /dev/null */
1571         spin_lock_bh(&sched->kss_lock);
1572
1573         /* a closing conn is always ready to tx */
1574         conn->ksnc_tx_ready = 1;
1575
1576         if (!conn->ksnc_tx_scheduled &&
1577             !list_empty(&conn->ksnc_tx_queue)) {
1578                 list_add_tail(&conn->ksnc_tx_list,
1579                                &sched->kss_tx_conns);
1580                 conn->ksnc_tx_scheduled = 1;
1581                 /* extra ref for scheduler */
1582                 ksocknal_conn_addref(conn);
1583
1584                 wake_up (&sched->kss_waitq);
1585         }
1586
1587         spin_unlock_bh(&sched->kss_lock);
1588
1589         /* serialise with callbacks */
1590         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1591
1592         ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
1593
1594         /* OK, so this conn may not be completely disengaged from its
1595          * scheduler yet, but it _has_ committed to terminate... */
1596         conn->ksnc_scheduler->kss_nconns--;
1597
1598         if (peer->ksnp_error != 0) {
1599                 /* peer's last conn closed in error */
1600                 LASSERT(list_empty(&peer->ksnp_conns));
1601                 failed = 1;
1602                 peer->ksnp_error = 0;     /* avoid multiple notifications */
1603         }
1604
1605         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1606
1607         if (failed)
1608                 ksocknal_peer_failed(peer);
1609
1610         /* The socket is closed on the final put; either here, or in
1611          * ksocknal_{send,recv}msg().  Since we set up the linger2 option
1612          * when the connection was established, this will close the socket
1613          * immediately, aborting anything buffered in it. Any hung
1614          * zero-copy transmits will therefore complete in finite time. */
1615         ksocknal_connsock_decref(conn);
1616 }
1617
1618 void
1619 ksocknal_queue_zombie_conn (ksock_conn_t *conn)
1620 {
1621         /* Queue the conn for the reaper to destroy */
1622
1623         LASSERT(atomic_read(&conn->ksnc_conn_refcount) == 0);
1624         spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1625
1626         list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
1627         wake_up(&ksocknal_data.ksnd_reaper_waitq);
1628
1629         spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1630 }
1631
1632 void
1633 ksocknal_destroy_conn (ksock_conn_t *conn)
1634 {
1635         cfs_time_t      last_rcv;
1636
1637         /* Final coup-de-grace of the reaper */
1638         CDEBUG (D_NET, "connection %p\n", conn);
1639
1640         LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0);
1641         LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0);
1642         LASSERT (conn->ksnc_sock == NULL);
1643         LASSERT (conn->ksnc_route == NULL);
1644         LASSERT (!conn->ksnc_tx_scheduled);
1645         LASSERT (!conn->ksnc_rx_scheduled);
1646         LASSERT(list_empty(&conn->ksnc_tx_queue));
1647
1648         /* complete current receive if any */
1649         switch (conn->ksnc_rx_state) {
1650         case SOCKNAL_RX_LNET_PAYLOAD:
1651                 last_rcv = conn->ksnc_rx_deadline -
1652                            cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
1653                 CERROR("Completing partial receive from %s[%d], "
1654                        "ip %pI4h:%d, with error, wanted: %d, left: %d, "
1655                        "last alive is %ld secs ago\n",
1656                        libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
1657                        &conn->ksnc_ipaddr, conn->ksnc_port,
1658                        conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
1659                        cfs_duration_sec(cfs_time_sub(cfs_time_current(),
1660                                         last_rcv)));
1661                 lnet_finalize (conn->ksnc_peer->ksnp_ni,
1662                                conn->ksnc_cookie, -EIO);
1663                 break;
1664         case SOCKNAL_RX_LNET_HEADER:
1665                 if (conn->ksnc_rx_started)
1666                         CERROR("Incomplete receive of lnet header from %s, "
1667                                "ip %pI4h:%d, with error, protocol: %d.x.\n",
1668                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1669                                &conn->ksnc_ipaddr, conn->ksnc_port,
1670                                conn->ksnc_proto->pro_version);
1671                 break;
1672         case SOCKNAL_RX_KSM_HEADER:
1673                 if (conn->ksnc_rx_started)
1674                         CERROR("Incomplete receive of ksock message from %s, "
1675                                "ip %pI4h:%d, with error, protocol: %d.x.\n",
1676                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1677                                &conn->ksnc_ipaddr, conn->ksnc_port,
1678                                conn->ksnc_proto->pro_version);
1679                 break;
1680         case SOCKNAL_RX_SLOP:
1681                 if (conn->ksnc_rx_started)
1682                         CERROR("Incomplete receive of slops from %s, "
1683                                "ip %pI4h:%d, with error\n",
1684                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1685                                &conn->ksnc_ipaddr, conn->ksnc_port);
1686                break;
1687         default:
1688                 LBUG ();
1689                 break;
1690         }
1691
1692         ksocknal_peer_decref(conn->ksnc_peer);
1693
1694         LIBCFS_FREE (conn, sizeof (*conn));
1695 }
1696
1697 int
1698 ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
1699 {
1700         ksock_conn_t       *conn;
1701         struct list_head         *ctmp;
1702         struct list_head         *cnxt;
1703         int                 count = 0;
1704
1705         list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
1706                 conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
1707
1708                 if (ipaddr == 0 ||
1709                     conn->ksnc_ipaddr == ipaddr) {
1710                         count++;
1711                         ksocknal_close_conn_locked (conn, why);
1712                 }
1713         }
1714
1715         return (count);
1716 }
1717
1718 int
1719 ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
1720 {
1721         ksock_peer_t     *peer = conn->ksnc_peer;
1722         __u32             ipaddr = conn->ksnc_ipaddr;
1723         int               count;
1724
1725         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1726
1727         count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
1728
1729         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1730
1731         return (count);
1732 }
1733
1734 int
1735 ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
1736 {
1737         ksock_peer_t       *peer;
1738         struct list_head         *ptmp;
1739         struct list_head         *pnxt;
1740         int                 lo;
1741         int                 hi;
1742         int                 i;
1743         int                 count = 0;
1744
1745         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1746
1747         if (id.nid != LNET_NID_ANY)
1748                 lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
1749         else {
1750                 lo = 0;
1751                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
1752         }
1753
1754         for (i = lo; i <= hi; i++) {
1755                 list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
1756
1757                         peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
1758
1759                         if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
1760                               (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
1761                                 continue;
1762
1763                         count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
1764                 }
1765         }
1766
1767         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1768
1769         /* wildcards always succeed */
1770         if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
1771                 return (0);
1772
1773         return (count == 0 ? -ENOENT : 0);
1774 }
1775
1776 void
1777 ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive)
1778 {
1779         /* The router is telling me she's been notified of a change in
1780          * gateway state.... */
1781         lnet_process_id_t  id = {0};
1782
1783         id.nid = gw_nid;
1784         id.pid = LNET_PID_ANY;
1785
1786         CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
1787                 alive ? "up" : "down");
1788
1789         if (!alive) {
1790                 /* If the gateway crashed, close all open connections... */
1791                 ksocknal_close_matching_conns (id, 0);
1792                 return;
1793         }
1794
1795         /* ...otherwise do nothing.  We can only establish new connections
1796          * if we have autroutes, and these connect on demand. */
1797 }
1798
1799 void
1800 ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
1801 {
1802         int                connect = 1;
1803         cfs_time_t         last_alive = 0;
1804         cfs_time_t         now = cfs_time_current();
1805         ksock_peer_t      *peer = NULL;
1806         rwlock_t                *glock = &ksocknal_data.ksnd_global_lock;
1807         lnet_process_id_t  id = {
1808                 .nid = nid,
1809                 .pid = LNET_PID_LUSTRE,
1810         };
1811
1812         read_lock(glock);
1813
1814         peer = ksocknal_find_peer_locked(ni, id);
1815         if (peer != NULL) {
1816                 struct list_head       *tmp;
1817                 ksock_conn_t     *conn;
1818                 int               bufnob;
1819
1820                 list_for_each(tmp, &peer->ksnp_conns) {
1821                         conn = list_entry(tmp, ksock_conn_t, ksnc_list);
1822                         bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
1823
1824                         if (bufnob < conn->ksnc_tx_bufnob) {
1825                                 /* something got ACKed */
1826                                 conn->ksnc_tx_deadline =
1827                                         cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1828                                 peer->ksnp_last_alive = now;
1829                                 conn->ksnc_tx_bufnob = bufnob;
1830                         }
1831                 }
1832
1833                 last_alive = peer->ksnp_last_alive;
1834                 if (ksocknal_find_connectable_route_locked(peer) == NULL)
1835                         connect = 0;
1836         }
1837
1838         read_unlock(glock);
1839
1840         if (last_alive != 0)
1841                 *when = last_alive;
1842
1843         CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
1844                libcfs_nid2str(nid), peer,
1845                last_alive ? cfs_duration_sec(now - last_alive) : -1,
1846                connect);
1847
1848         if (!connect)
1849                 return;
1850
1851         ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
1852
1853         write_lock_bh(glock);
1854
1855         peer = ksocknal_find_peer_locked(ni, id);
1856         if (peer != NULL)
1857                 ksocknal_launch_all_connections_locked(peer);
1858
1859         write_unlock_bh(glock);
1860         return;
1861 }
1862
1863 static void
1864 ksocknal_push_peer (ksock_peer_t *peer)
1865 {
1866         int               index;
1867         int               i;
1868         struct list_head       *tmp;
1869         ksock_conn_t     *conn;
1870
1871         for (index = 0; ; index++) {
1872                 read_lock(&ksocknal_data.ksnd_global_lock);
1873
1874                 i = 0;
1875                 conn = NULL;
1876
1877                 list_for_each(tmp, &peer->ksnp_conns) {
1878                         if (i++ == index) {
1879                                 conn = list_entry(tmp, ksock_conn_t,
1880                                                        ksnc_list);
1881                                 ksocknal_conn_addref(conn);
1882                                 break;
1883                         }
1884                 }
1885
1886                 read_unlock(&ksocknal_data.ksnd_global_lock);
1887
1888                 if (conn == NULL)
1889                         break;
1890
1891                 ksocknal_lib_push_conn (conn);
1892                 ksocknal_conn_decref(conn);
1893         }
1894 }
1895
1896 static int
1897 ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
1898 {
1899         ksock_peer_t      *peer;
1900         struct list_head        *tmp;
1901         int                index;
1902         int                i;
1903         int                j;
1904         int                rc = -ENOENT;
1905
1906         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
1907                 for (j = 0; ; j++) {
1908                         read_lock(&ksocknal_data.ksnd_global_lock);
1909
1910                         index = 0;
1911                         peer = NULL;
1912
1913                         list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
1914                                 peer = list_entry(tmp, ksock_peer_t,
1915                                                       ksnp_list);
1916
1917                                 if (!((id.nid == LNET_NID_ANY ||
1918                                        id.nid == peer->ksnp_id.nid) &&
1919                                       (id.pid == LNET_PID_ANY ||
1920                                        id.pid == peer->ksnp_id.pid))) {
1921                                         peer = NULL;
1922                                         continue;
1923                                 }
1924
1925                                 if (index++ == j) {
1926                                         ksocknal_peer_addref(peer);
1927                                         break;
1928                                 }
1929                         }
1930
1931                         read_unlock(&ksocknal_data.ksnd_global_lock);
1932
1933                         if (peer != NULL) {
1934                                 rc = 0;
1935                                 ksocknal_push_peer (peer);
1936                                 ksocknal_peer_decref(peer);
1937                         }
1938                 }
1939
1940         }
1941
1942         return (rc);
1943 }
1944
1945 static int
1946 ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
1947 {
1948         ksock_net_t       *net = ni->ni_data;
1949         ksock_interface_t *iface;
1950         int                rc;
1951         int                i;
1952         int                j;
1953         struct list_head        *ptmp;
1954         ksock_peer_t      *peer;
1955         struct list_head        *rtmp;
1956         ksock_route_t     *route;
1957
1958         if (ipaddress == 0 ||
1959             netmask == 0)
1960                 return (-EINVAL);
1961
1962         write_lock_bh(&ksocknal_data.ksnd_global_lock);
1963
1964         iface = ksocknal_ip2iface(ni, ipaddress);
1965         if (iface != NULL) {
1966                 /* silently ignore dups */
1967                 rc = 0;
1968         } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
1969                 rc = -ENOSPC;
1970         } else {
1971                 iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
1972
1973                 iface->ksni_ipaddr = ipaddress;
1974                 iface->ksni_netmask = netmask;
1975                 iface->ksni_nroutes = 0;
1976                 iface->ksni_npeers = 0;
1977
1978                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
1979                         list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
1980                                 peer = list_entry(ptmp, ksock_peer_t,
1981                                                       ksnp_list);
1982
1983                                 for (j = 0; j < peer->ksnp_n_passive_ips; j++)
1984                                         if (peer->ksnp_passive_ips[j] == ipaddress)
1985                                                 iface->ksni_npeers++;
1986
1987                                 list_for_each(rtmp, &peer->ksnp_routes) {
1988                                         route = list_entry(rtmp,
1989                                                                ksock_route_t,
1990                                                                ksnr_list);
1991
1992                                         if (route->ksnr_myipaddr == ipaddress)
1993                                                 iface->ksni_nroutes++;
1994                                 }
1995                         }
1996                 }
1997
1998                 rc = 0;
1999                 /* NB only new connections will pay attention to the new interface! */
2000         }
2001
2002         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2003
2004         return (rc);
2005 }
2006
2007 static void
2008 ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
2009 {
2010         struct list_head         *tmp;
2011         struct list_head         *nxt;
2012         ksock_route_t      *route;
2013         ksock_conn_t       *conn;
2014         int                 i;
2015         int                 j;
2016
2017         for (i = 0; i < peer->ksnp_n_passive_ips; i++)
2018                 if (peer->ksnp_passive_ips[i] == ipaddr) {
2019                         for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
2020                                 peer->ksnp_passive_ips[j-1] =
2021                                         peer->ksnp_passive_ips[j];
2022                         peer->ksnp_n_passive_ips--;
2023                         break;
2024                 }
2025
2026         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
2027                 route = list_entry(tmp, ksock_route_t, ksnr_list);
2028
2029                 if (route->ksnr_myipaddr != ipaddr)
2030                         continue;
2031
2032                 if (route->ksnr_share_count != 0) {
2033                         /* Manually created; keep, but unbind */
2034                         route->ksnr_myipaddr = 0;
2035                 } else {
2036                         ksocknal_del_route_locked(route);
2037                 }
2038         }
2039
2040         list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
2041                 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
2042
2043                 if (conn->ksnc_myipaddr == ipaddr)
2044                         ksocknal_close_conn_locked (conn, 0);
2045         }
2046 }
2047
2048 static int
2049 ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
2050 {
2051         ksock_net_t       *net = ni->ni_data;
2052         int                rc = -ENOENT;
2053         struct list_head        *tmp;
2054         struct list_head        *nxt;
2055         ksock_peer_t      *peer;
2056         __u32              this_ip;
2057         int                i;
2058         int                j;
2059
2060         write_lock_bh(&ksocknal_data.ksnd_global_lock);
2061
2062         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2063                 this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
2064
2065                 if (!(ipaddress == 0 ||
2066                       ipaddress == this_ip))
2067                         continue;
2068
2069                 rc = 0;
2070
2071                 for (j = i+1; j < net->ksnn_ninterfaces; j++)
2072                         net->ksnn_interfaces[j-1] =
2073                                 net->ksnn_interfaces[j];
2074
2075                 net->ksnn_ninterfaces--;
2076
2077                 for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
2078                         list_for_each_safe(tmp, nxt,
2079                                                &ksocknal_data.ksnd_peers[j]) {
2080                                 peer = list_entry(tmp, ksock_peer_t,
2081                                                       ksnp_list);
2082
2083                                 if (peer->ksnp_ni != ni)
2084                                         continue;
2085
2086                                 ksocknal_peer_del_interface_locked(peer, this_ip);
2087                         }
2088                 }
2089         }
2090
2091         write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2092
2093         return (rc);
2094 }
2095
2096 int
2097 ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
2098 {
2099         lnet_process_id_t id = {0};
2100         struct libcfs_ioctl_data *data = arg;
2101         int rc;
2102
2103         switch(cmd) {
2104         case IOC_LIBCFS_GET_INTERFACE: {
2105                 ksock_net_t       *net = ni->ni_data;
2106                 ksock_interface_t *iface;
2107
2108                 read_lock(&ksocknal_data.ksnd_global_lock);
2109
2110                 if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
2111                         rc = -ENOENT;
2112                 } else {
2113                         rc = 0;
2114                         iface = &net->ksnn_interfaces[data->ioc_count];
2115
2116                         data->ioc_u32[0] = iface->ksni_ipaddr;
2117                         data->ioc_u32[1] = iface->ksni_netmask;
2118                         data->ioc_u32[2] = iface->ksni_npeers;
2119                         data->ioc_u32[3] = iface->ksni_nroutes;
2120                 }
2121
2122                 read_unlock(&ksocknal_data.ksnd_global_lock);
2123                 return rc;
2124         }
2125
2126         case IOC_LIBCFS_ADD_INTERFACE:
2127                 return ksocknal_add_interface(ni,
2128                                               data->ioc_u32[0], /* IP address */
2129                                               data->ioc_u32[1]); /* net mask */
2130
2131         case IOC_LIBCFS_DEL_INTERFACE:
2132                 return ksocknal_del_interface(ni,
2133                                               data->ioc_u32[0]); /* IP address */
2134
2135         case IOC_LIBCFS_GET_PEER: {
2136                 __u32            myip = 0;
2137                 __u32            ip = 0;
2138                 int              port = 0;
2139                 int              conn_count = 0;
2140                 int              share_count = 0;
2141
2142                 rc = ksocknal_get_peer_info(ni, data->ioc_count,
2143                                             &id, &myip, &ip, &port,
2144                                             &conn_count,  &share_count);
2145                 if (rc != 0)
2146                         return rc;
2147
2148                 data->ioc_nid    = id.nid;
2149                 data->ioc_count  = share_count;
2150                 data->ioc_u32[0] = ip;
2151                 data->ioc_u32[1] = port;
2152                 data->ioc_u32[2] = myip;
2153                 data->ioc_u32[3] = conn_count;
2154                 data->ioc_u32[4] = id.pid;
2155                 return 0;
2156         }
2157
2158         case IOC_LIBCFS_ADD_PEER:
2159                 id.nid = data->ioc_nid;
2160                 id.pid = LNET_PID_LUSTRE;
2161                 return ksocknal_add_peer (ni, id,
2162                                           data->ioc_u32[0], /* IP */
2163                                           data->ioc_u32[1]); /* port */
2164
2165         case IOC_LIBCFS_DEL_PEER:
2166                 id.nid = data->ioc_nid;
2167                 id.pid = LNET_PID_ANY;
2168                 return ksocknal_del_peer (ni, id,
2169                                           data->ioc_u32[0]); /* IP */
2170
2171         case IOC_LIBCFS_GET_CONN: {
2172                 int           txmem;
2173                 int           rxmem;
2174                 int           nagle;
2175                 ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count);
2176
2177                 if (conn == NULL)
2178                         return -ENOENT;
2179
2180                 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
2181
2182                 data->ioc_count  = txmem;
2183                 data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
2184                 data->ioc_flags  = nagle;
2185                 data->ioc_u32[0] = conn->ksnc_ipaddr;
2186                 data->ioc_u32[1] = conn->ksnc_port;
2187                 data->ioc_u32[2] = conn->ksnc_myipaddr;
2188                 data->ioc_u32[3] = conn->ksnc_type;
2189                 data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
2190                 data->ioc_u32[5] = rxmem;
2191                 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
2192                 ksocknal_conn_decref(conn);
2193                 return 0;
2194         }
2195
2196         case IOC_LIBCFS_CLOSE_CONNECTION:
2197                 id.nid = data->ioc_nid;
2198                 id.pid = LNET_PID_ANY;
2199                 return ksocknal_close_matching_conns (id,
2200                                                       data->ioc_u32[0]);
2201
2202         case IOC_LIBCFS_REGISTER_MYNID:
2203                 /* Ignore if this is a noop */
2204                 if (data->ioc_nid == ni->ni_nid)
2205                         return 0;
2206
2207                 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2208                        libcfs_nid2str(data->ioc_nid),
2209                        libcfs_nid2str(ni->ni_nid));
2210                 return -EINVAL;
2211
2212         case IOC_LIBCFS_PUSH_CONNECTION:
2213                 id.nid = data->ioc_nid;
2214                 id.pid = LNET_PID_ANY;
2215                 return ksocknal_push(ni, id);
2216
2217         default:
2218                 return -EINVAL;
2219         }
2220         /* not reached */
2221 }
2222
2223 static void
2224 ksocknal_free_buffers (void)
2225 {
2226         LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
2227
2228         if (ksocknal_data.ksnd_sched_info != NULL) {
2229                 struct ksock_sched_info *info;
2230                 int                     i;
2231
2232                 cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
2233                         if (info->ksi_scheds != NULL) {
2234                                 LIBCFS_FREE(info->ksi_scheds,
2235                                             info->ksi_nthreads_max *
2236                                             sizeof(info->ksi_scheds[0]));
2237                         }
2238                 }
2239                 cfs_percpt_free(ksocknal_data.ksnd_sched_info);
2240         }
2241
2242         LIBCFS_FREE (ksocknal_data.ksnd_peers,
2243                      sizeof(struct list_head) *
2244                      ksocknal_data.ksnd_peer_hash_size);
2245
2246         spin_lock(&ksocknal_data.ksnd_tx_lock);
2247
2248         if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
2249                 struct list_head        zlist;
2250                 ksock_tx_t      *tx;
2251
2252                 list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
2253                 list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
2254                 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2255
2256                 while (!list_empty(&zlist)) {
2257                         tx = list_entry(zlist.next, ksock_tx_t, tx_list);
2258                         list_del(&tx->tx_list);
2259                         LIBCFS_FREE(tx, tx->tx_desc_size);
2260                 }
2261         } else {
2262                 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2263         }
2264 }
2265
2266 static void
2267 ksocknal_base_shutdown(void)
2268 {
2269         struct ksock_sched_info *info;
2270         ksock_sched_t           *sched;
2271         int                     i;
2272         int                     j;
2273
2274         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
2275                atomic_read (&libcfs_kmemory));
2276         LASSERT (ksocknal_data.ksnd_nnets == 0);
2277
2278         switch (ksocknal_data.ksnd_init) {
2279         default:
2280                 LASSERT (0);
2281
2282         case SOCKNAL_INIT_ALL:
2283         case SOCKNAL_INIT_DATA:
2284                 LASSERT (ksocknal_data.ksnd_peers != NULL);
2285                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2286                         LASSERT(list_empty(&ksocknal_data.ksnd_peers[i]));
2287                 }
2288
2289                 LASSERT(list_empty(&ksocknal_data.ksnd_nets));
2290                 LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
2291                 LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
2292                 LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
2293                 LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
2294
2295                 if (ksocknal_data.ksnd_sched_info != NULL) {
2296                         cfs_percpt_for_each(info, i,
2297                                             ksocknal_data.ksnd_sched_info) {
2298                                 if (info->ksi_scheds == NULL)
2299                                         continue;
2300
2301                                 for (j = 0; j < info->ksi_nthreads_max; j++) {
2302
2303                                         sched = &info->ksi_scheds[j];
2304                                         LASSERT(list_empty(&sched->\
2305                                                                kss_tx_conns));
2306                                         LASSERT(list_empty(&sched->\
2307                                                                kss_rx_conns));
2308                                         LASSERT(list_empty(&sched-> \
2309                                                   kss_zombie_noop_txs));
2310                                         LASSERT(sched->kss_nconns == 0);
2311                                 }
2312                         }
2313                 }
2314
2315                 /* flag threads to terminate; wake and wait for them to die */
2316                 ksocknal_data.ksnd_shuttingdown = 1;
2317                 wake_up_all(&ksocknal_data.ksnd_connd_waitq);
2318                 wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
2319
2320                 if (ksocknal_data.ksnd_sched_info != NULL) {
2321                         cfs_percpt_for_each(info, i,
2322                                             ksocknal_data.ksnd_sched_info) {
2323                                 if (info->ksi_scheds == NULL)
2324                                         continue;
2325
2326                                 for (j = 0; j < info->ksi_nthreads_max; j++) {
2327                                         sched = &info->ksi_scheds[j];
2328                                         wake_up_all(&sched->kss_waitq);
2329                                 }
2330                         }
2331                 }
2332
2333                 i = 4;
2334                 read_lock(&ksocknal_data.ksnd_global_lock);
2335                 while (ksocknal_data.ksnd_nthreads != 0) {
2336                         i++;
2337                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2338                                "waiting for %d threads to terminate\n",
2339                                 ksocknal_data.ksnd_nthreads);
2340                         read_unlock(&ksocknal_data.ksnd_global_lock);
2341                         cfs_pause(cfs_time_seconds(1));
2342                         read_lock(&ksocknal_data.ksnd_global_lock);
2343                 }
2344                 read_unlock(&ksocknal_data.ksnd_global_lock);
2345
2346                 ksocknal_free_buffers();
2347
2348                 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
2349                 break;
2350         }
2351
2352         CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
2353                atomic_read (&libcfs_kmemory));
2354
2355         module_put(THIS_MODULE);
2356 }
2357
2358 static __u64 ksocknal_new_incarnation(void)
2359 {
2360         struct timeval tv;
2361
2362         /* The incarnation number is the time this module loaded and it
2363          * identifies this particular instance of the socknal.  Hopefully
2364          * we won't be able to reboot more frequently than 1MHz for the
2365          * forseeable future :) */
2366
2367         do_gettimeofday(&tv);
2368
2369         return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
2370 }
2371
2372 static int
2373 ksocknal_base_startup(void)
2374 {
2375         struct ksock_sched_info *info;
2376         int                     rc;
2377         int                     i;
2378
2379         LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
2380         LASSERT (ksocknal_data.ksnd_nnets == 0);
2381
2382         memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
2383
2384         ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
2385         LIBCFS_ALLOC(ksocknal_data.ksnd_peers,
2386                      sizeof(struct list_head) *
2387                      ksocknal_data.ksnd_peer_hash_size);
2388         if (ksocknal_data.ksnd_peers == NULL)
2389                 return -ENOMEM;
2390
2391         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2392                 INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
2393
2394         rwlock_init(&ksocknal_data.ksnd_global_lock);
2395         INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
2396
2397         spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
2398         INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
2399         INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
2400         INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
2401         init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
2402
2403         spin_lock_init(&ksocknal_data.ksnd_connd_lock);
2404         INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
2405         INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
2406         init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
2407
2408         spin_lock_init(&ksocknal_data.ksnd_tx_lock);
2409         INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
2410
2411         /* NB memset above zeros whole of ksocknal_data */
2412
2413         /* flag lists/ptrs/locks initialised */
2414         ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
2415         try_module_get(THIS_MODULE);
2416
2417         ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
2418                                                          sizeof(*info));
2419         if (ksocknal_data.ksnd_sched_info == NULL)
2420                 goto failed;
2421
2422         cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
2423                 ksock_sched_t   *sched;
2424                 int             nthrs;
2425
2426                 nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
2427                 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2428                         nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
2429                 } else {
2430                         /* max to half of CPUs, assume another half should be
2431                          * reserved for upper layer modules */
2432                         nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2433                 }
2434
2435                 info->ksi_nthreads_max = nthrs;
2436                 info->ksi_cpt = i;
2437
2438                 LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i,
2439                                  info->ksi_nthreads_max * sizeof(*sched));
2440                 if (info->ksi_scheds == NULL)
2441                         goto failed;
2442
2443                 for (; nthrs > 0; nthrs--) {
2444                         sched = &info->ksi_scheds[nthrs - 1];
2445
2446                         sched->kss_info = info;
2447                         spin_lock_init(&sched->kss_lock);
2448                         INIT_LIST_HEAD(&sched->kss_rx_conns);
2449                         INIT_LIST_HEAD(&sched->kss_tx_conns);
2450                         INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
2451                         init_waitqueue_head(&sched->kss_waitq);
2452                 }
2453         }
2454
2455         ksocknal_data.ksnd_connd_starting         = 0;
2456         ksocknal_data.ksnd_connd_failed_stamp     = 0;
2457         ksocknal_data.ksnd_connd_starting_stamp   = cfs_time_current_sec();
2458         /* must have at least 2 connds to remain responsive to accepts while
2459          * connecting */
2460         if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
2461                 *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
2462
2463         if (*ksocknal_tunables.ksnd_nconnds_max <
2464             *ksocknal_tunables.ksnd_nconnds) {
2465                 ksocknal_tunables.ksnd_nconnds_max =
2466                         ksocknal_tunables.ksnd_nconnds;
2467         }
2468
2469         for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
2470                 char name[16];
2471                 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2472                 ksocknal_data.ksnd_connd_starting++;
2473                 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2474
2475
2476                 snprintf(name, sizeof(name), "socknal_cd%02d", i);
2477                 rc = ksocknal_thread_start(ksocknal_connd,
2478                                            (void *)((ulong_ptr_t)i), name);
2479                 if (rc != 0) {
2480                         spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2481                         ksocknal_data.ksnd_connd_starting--;
2482                         spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2483                         CERROR("Can't spawn socknal connd: %d\n", rc);
2484                         goto failed;
2485                 }
2486         }
2487
2488         rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
2489         if (rc != 0) {
2490                 CERROR ("Can't spawn socknal reaper: %d\n", rc);
2491                 goto failed;
2492         }
2493
2494         /* flag everything initialised */
2495         ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
2496
2497         return 0;
2498
2499  failed:
2500         ksocknal_base_shutdown();
2501         return -ENETDOWN;
2502 }
2503
2504 static void
2505 ksocknal_debug_peerhash (lnet_ni_t *ni)
2506 {
2507         ksock_peer_t    *peer = NULL;
2508         struct list_head        *tmp;
2509         int             i;
2510
2511         read_lock(&ksocknal_data.ksnd_global_lock);
2512
2513         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2514                 list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
2515                         peer = list_entry(tmp, ksock_peer_t, ksnp_list);
2516
2517                         if (peer->ksnp_ni == ni) break;
2518
2519                         peer = NULL;
2520                 }
2521         }
2522
2523         if (peer != NULL) {
2524                 ksock_route_t *route;
2525                 ksock_conn_t  *conn;
2526
2527                 CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, "
2528                        "closing %d, accepting %d, err %d, zcookie "LPU64", "
2529                        "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id),
2530                        atomic_read(&peer->ksnp_refcount),
2531                        peer->ksnp_sharecount, peer->ksnp_closing,
2532                        peer->ksnp_accepting, peer->ksnp_error,
2533                        peer->ksnp_zc_next_cookie,
2534                        !list_empty(&peer->ksnp_tx_queue),
2535                        !list_empty(&peer->ksnp_zc_req_list));
2536
2537                 list_for_each(tmp, &peer->ksnp_routes) {
2538                         route = list_entry(tmp, ksock_route_t, ksnr_list);
2539                         CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, "
2540                                "del %d\n", atomic_read(&route->ksnr_refcount),
2541                                route->ksnr_scheduled, route->ksnr_connecting,
2542                                route->ksnr_connected, route->ksnr_deleted);
2543                 }
2544
2545                 list_for_each(tmp, &peer->ksnp_conns) {
2546                         conn = list_entry(tmp, ksock_conn_t, ksnc_list);
2547                         CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
2548                                atomic_read(&conn->ksnc_conn_refcount),
2549                                atomic_read(&conn->ksnc_sock_refcount),
2550                                conn->ksnc_type, conn->ksnc_closing);
2551                 }
2552         }
2553
2554         read_unlock(&ksocknal_data.ksnd_global_lock);
2555         return;
2556 }
2557
2558 void
2559 ksocknal_shutdown (lnet_ni_t *ni)
2560 {
2561         ksock_net_t      *net = ni->ni_data;
2562         int               i;
2563         lnet_process_id_t anyid = {0};
2564
2565         anyid.nid =  LNET_NID_ANY;
2566         anyid.pid =  LNET_PID_ANY;
2567
2568         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
2569         LASSERT(ksocknal_data.ksnd_nnets > 0);
2570
2571         spin_lock_bh(&net->ksnn_lock);
2572         net->ksnn_shutdown = 1;                 /* prevent new peers */
2573         spin_unlock_bh(&net->ksnn_lock);
2574
2575         /* Delete all peers */
2576         ksocknal_del_peer(ni, anyid, 0);
2577
2578         /* Wait for all peer state to clean up */
2579         i = 2;
2580         spin_lock_bh(&net->ksnn_lock);
2581         while (net->ksnn_npeers != 0) {
2582                 spin_unlock_bh(&net->ksnn_lock);
2583
2584                 i++;
2585                 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2586                        "waiting for %d peers to disconnect\n",
2587                        net->ksnn_npeers);
2588                 cfs_pause(cfs_time_seconds(1));
2589
2590                 ksocknal_debug_peerhash(ni);
2591
2592                 spin_lock_bh(&net->ksnn_lock);
2593         }
2594         spin_unlock_bh(&net->ksnn_lock);
2595
2596         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2597                 LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
2598                 LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0);
2599         }
2600
2601         list_del(&net->ksnn_list);
2602         LIBCFS_FREE(net, sizeof(*net));
2603
2604         ksocknal_data.ksnd_nnets--;
2605         if (ksocknal_data.ksnd_nnets == 0)
2606                 ksocknal_base_shutdown();
2607 }
2608
2609 static int
2610 ksocknal_enumerate_interfaces(ksock_net_t *net)
2611 {
2612         char      **names;
2613         int         i;
2614         int         j;
2615         int         rc;
2616         int         n;
2617
2618         n = lnet_ipif_enumerate(&names);
2619         if (n <= 0) {
2620                 CERROR("Can't enumerate interfaces: %d\n", n);
2621                 return n;
2622         }
2623
2624         for (i = j = 0; i < n; i++) {
2625                 int        up;
2626                 __u32      ip;
2627                 __u32      mask;
2628
2629                 if (!strcmp(names[i], "lo")) /* skip the loopback IF */
2630                         continue;
2631
2632                 rc = lnet_ipif_query(names[i], &up, &ip, &mask);
2633                 if (rc != 0) {
2634                         CWARN("Can't get interface %s info: %d\n",
2635                               names[i], rc);
2636                         continue;
2637                 }
2638
2639                 if (!up) {
2640                         CWARN("Ignoring interface %s (down)\n",
2641                               names[i]);
2642                         continue;
2643                 }
2644
2645                 if (j == LNET_MAX_INTERFACES) {
2646                         CWARN("Ignoring interface %s (too many interfaces)\n",
2647                               names[i]);
2648                         continue;
2649                 }
2650
2651                 net->ksnn_interfaces[j].ksni_ipaddr = ip;
2652                 net->ksnn_interfaces[j].ksni_netmask = mask;
2653                 strlcpy(net->ksnn_interfaces[j].ksni_name,
2654                         names[i], sizeof(net->ksnn_interfaces[j].ksni_name));
2655                 j++;
2656         }
2657
2658         lnet_ipif_free_enumeration(names, n);
2659
2660         if (j == 0)
2661                 CERROR("Can't find any usable interfaces\n");
2662
2663         return j;
2664 }
2665
2666 static int
2667 ksocknal_search_new_ipif(ksock_net_t *net)
2668 {
2669         int     new_ipif = 0;
2670         int     i;
2671
2672         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2673                 char            *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
2674                 char            *colon = strchr(ifnam, ':');
2675                 int             found  = 0;
2676                 ksock_net_t     *tmp;
2677                 int             j;
2678
2679                 if (colon != NULL) /* ignore alias device */
2680                         *colon = 0;
2681
2682                 list_for_each_entry(tmp, &ksocknal_data.ksnd_nets,
2683                                         ksnn_list) {
2684                         for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
2685                                 char *ifnam2 = &tmp->ksnn_interfaces[j].\
2686                                              ksni_name[0];
2687                                 char *colon2 = strchr(ifnam2, ':');
2688
2689                                 if (colon2 != NULL)
2690                                         *colon2 = 0;
2691
2692                                 found = strcmp(ifnam, ifnam2) == 0;
2693                                 if (colon2 != NULL)
2694                                         *colon2 = ':';
2695                         }
2696                         if (found)
2697                                 break;
2698                 }
2699
2700                 new_ipif += !found;
2701                 if (colon != NULL)
2702                         *colon = ':';
2703         }
2704
2705         return new_ipif;
2706 }
2707
2708 static int
2709 ksocknal_start_schedulers(struct ksock_sched_info *info)
2710 {
2711         int     nthrs;
2712         int     rc = 0;
2713         int     i;
2714
2715         if (info->ksi_nthreads == 0) {
2716                 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2717                         nthrs = info->ksi_nthreads_max;
2718                 } else {
2719                         nthrs = cfs_cpt_weight(lnet_cpt_table(),
2720                                                info->ksi_cpt);
2721                         nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2722                         nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
2723                 }
2724                 nthrs = min(nthrs, info->ksi_nthreads_max);
2725         } else {
2726                 LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
2727                 /* increase two threads if there is new interface */
2728                 nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
2729         }
2730
2731         for (i = 0; i < nthrs; i++) {
2732                 long            id;
2733                 char            name[20];
2734                 ksock_sched_t   *sched;
2735                 id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
2736                 sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
2737                 snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
2738                          info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
2739
2740                 rc = ksocknal_thread_start(ksocknal_scheduler,
2741                                            (void *)id, name);
2742                 if (rc == 0)
2743                         continue;
2744
2745                 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
2746                        info->ksi_cpt, info->ksi_nthreads + i, rc);
2747                 break;
2748         }
2749
2750         info->ksi_nthreads += i;
2751         return rc;
2752 }
2753
2754 static int
2755 ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts)
2756 {
2757         int     newif = ksocknal_search_new_ipif(net);
2758         int     rc;
2759         int     i;
2760
2761         LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
2762
2763         for (i = 0; i < ncpts; i++) {
2764                 struct ksock_sched_info *info;
2765                 int cpt = (cpts == NULL) ? i : cpts[i];
2766
2767                 LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
2768                 info = ksocknal_data.ksnd_sched_info[cpt];
2769
2770                 if (!newif && info->ksi_nthreads > 0)
2771                         continue;
2772
2773                 rc = ksocknal_start_schedulers(info);
2774                 if (rc != 0)
2775                         return rc;
2776         }
2777         return 0;
2778 }
2779
2780 int
2781 ksocknal_startup (lnet_ni_t *ni)
2782 {
2783         ksock_net_t  *net;
2784         int           rc;
2785         int           i;
2786
2787         LASSERT (ni->ni_lnd == &the_ksocklnd);
2788
2789         if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
2790                 rc = ksocknal_base_startup();
2791                 if (rc != 0)
2792                         return rc;
2793         }
2794
2795         LIBCFS_ALLOC(net, sizeof(*net));
2796         if (net == NULL)
2797                 goto fail_0;
2798
2799         spin_lock_init(&net->ksnn_lock);
2800         net->ksnn_incarnation = ksocknal_new_incarnation();
2801         ni->ni_data = net;
2802         ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
2803         ni->ni_maxtxcredits   = *ksocknal_tunables.ksnd_credits;
2804         ni->ni_peertxcredits  = *ksocknal_tunables.ksnd_peertxcredits;
2805         ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
2806
2807         if (ni->ni_interfaces[0] == NULL) {
2808                 rc = ksocknal_enumerate_interfaces(net);
2809                 if (rc <= 0)
2810                         goto fail_1;
2811
2812                 net->ksnn_ninterfaces = 1;
2813         } else {
2814                 for (i = 0; i < LNET_MAX_INTERFACES; i++) {
2815                         int    up;
2816
2817                         if (ni->ni_interfaces[i] == NULL)
2818                                 break;
2819
2820                         rc = lnet_ipif_query(ni->ni_interfaces[i], &up,
2821                                 &net->ksnn_interfaces[i].ksni_ipaddr,
2822                                 &net->ksnn_interfaces[i].ksni_netmask);
2823
2824                         if (rc != 0) {
2825                                 CERROR("Can't get interface %s info: %d\n",
2826                                        ni->ni_interfaces[i], rc);
2827                                 goto fail_1;
2828                         }
2829
2830                         if (!up) {
2831                                 CERROR("Interface %s is down\n",
2832                                        ni->ni_interfaces[i]);
2833                                 goto fail_1;
2834                         }
2835
2836                         strlcpy(net->ksnn_interfaces[i].ksni_name,
2837                                 ni->ni_interfaces[i],
2838                                 sizeof(net->ksnn_interfaces[i].ksni_name));
2839                 }
2840                 net->ksnn_ninterfaces = i;
2841         }
2842
2843         /* call it before add it to ksocknal_data.ksnd_nets */
2844         rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
2845         if (rc != 0)
2846                 goto fail_1;
2847
2848         ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
2849                                 net->ksnn_interfaces[0].ksni_ipaddr);
2850         list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
2851
2852         ksocknal_data.ksnd_nnets++;
2853
2854         return 0;
2855
2856  fail_1:
2857         LIBCFS_FREE(net, sizeof(*net));
2858  fail_0:
2859         if (ksocknal_data.ksnd_nnets == 0)
2860                 ksocknal_base_shutdown();
2861
2862         return -ENETDOWN;
2863 }
2864
2865
2866 static void __exit
2867 ksocknal_module_fini (void)
2868 {
2869         lnet_unregister_lnd(&the_ksocklnd);
2870         ksocknal_tunables_fini();
2871 }
2872
2873 static int __init
2874 ksocknal_module_init (void)
2875 {
2876         int    rc;
2877
2878         /* check ksnr_connected/connecting field large enough */
2879         CLASSERT (SOCKLND_CONN_NTYPES <= 4);
2880         CLASSERT (SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN);
2881
2882         /* initialize the_ksocklnd */
2883         the_ksocklnd.lnd_type     = SOCKLND;
2884         the_ksocklnd.lnd_startup  = ksocknal_startup;
2885         the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
2886         the_ksocklnd.lnd_ctl      = ksocknal_ctl;
2887         the_ksocklnd.lnd_send     = ksocknal_send;
2888         the_ksocklnd.lnd_recv     = ksocknal_recv;
2889         the_ksocklnd.lnd_notify   = ksocknal_notify;
2890         the_ksocklnd.lnd_query    = ksocknal_query;
2891         the_ksocklnd.lnd_accept   = ksocknal_accept;
2892
2893         rc = ksocknal_tunables_init();
2894         if (rc != 0)
2895                 return rc;
2896
2897         lnet_register_lnd(&the_ksocklnd);
2898
2899         return 0;
2900 }
2901
2902 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
2903 MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0");
2904 MODULE_LICENSE("GPL");
2905
2906 cfs_module(ksocknal, "3.0.0", ksocknal_module_init, ksocknal_module_fini);