Whamcloud - gitweb
47f28ce3b3a7de032c4fc6d615703466ef6139d8
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lnet/klnds/socklnd/socklnd.c
37  *
38  * Author: Zach Brown <zab@zabbo.net>
39  * Author: Peter J. Braam <braam@clusterfs.com>
40  * Author: Phil Schwan <phil@clusterfs.com>
41  * Author: Eric Barton <eric@bartonsoftware.com>
42  */
43
44 #include "socklnd.h"
45
46 lnd_t                   the_ksocklnd;
47 ksock_nal_data_t        ksocknal_data;
48
49 ksock_interface_t *
50 ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
51 {
52         ksock_net_t       *net = ni->ni_data;
53         int                i;
54         ksock_interface_t *iface;
55
56         for (i = 0; i < net->ksnn_ninterfaces; i++) {
57                 LASSERT(i < LNET_MAX_INTERFACES);
58                 iface = &net->ksnn_interfaces[i];
59
60                 if (iface->ksni_ipaddr == ip)
61                         return (iface);
62         }
63
64         return (NULL);
65 }
66
67 ksock_route_t *
68 ksocknal_create_route (__u32 ipaddr, int port)
69 {
70         ksock_route_t *route;
71
72         LIBCFS_ALLOC (route, sizeof (*route));
73         if (route == NULL)
74                 return (NULL);
75
76         cfs_atomic_set (&route->ksnr_refcount, 1);
77         route->ksnr_peer = NULL;
78         route->ksnr_retry_interval = 0;         /* OK to connect at any time */
79         route->ksnr_ipaddr = ipaddr;
80         route->ksnr_port = port;
81         route->ksnr_scheduled = 0;
82         route->ksnr_connecting = 0;
83         route->ksnr_connected = 0;
84         route->ksnr_deleted = 0;
85         route->ksnr_conn_count = 0;
86         route->ksnr_share_count = 0;
87
88         return (route);
89 }
90
91 void
92 ksocknal_destroy_route (ksock_route_t *route)
93 {
94         LASSERT (cfs_atomic_read(&route->ksnr_refcount) == 0);
95
96         if (route->ksnr_peer != NULL)
97                 ksocknal_peer_decref(route->ksnr_peer);
98
99         LIBCFS_FREE (route, sizeof (*route));
100 }
101
102 int
103 ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
104 {
105         ksock_net_t   *net = ni->ni_data;
106         ksock_peer_t  *peer;
107
108         LASSERT (id.nid != LNET_NID_ANY);
109         LASSERT (id.pid != LNET_PID_ANY);
110         LASSERT (!cfs_in_interrupt());
111
112         LIBCFS_ALLOC (peer, sizeof (*peer));
113         if (peer == NULL)
114                 return -ENOMEM;
115
116         memset (peer, 0, sizeof (*peer));       /* NULL pointers/clear flags etc */
117
118         peer->ksnp_ni = ni;
119         peer->ksnp_id = id;
120         cfs_atomic_set (&peer->ksnp_refcount, 1);   /* 1 ref for caller */
121         peer->ksnp_closing = 0;
122         peer->ksnp_accepting = 0;
123         peer->ksnp_proto = NULL;
124         peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
125
126         CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
127         CFS_INIT_LIST_HEAD (&peer->ksnp_routes);
128         CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue);
129         CFS_INIT_LIST_HEAD (&peer->ksnp_zc_req_list);
130         cfs_spin_lock_init(&peer->ksnp_lock);
131
132         cfs_spin_lock_bh (&net->ksnn_lock);
133
134         if (net->ksnn_shutdown) {
135                 cfs_spin_unlock_bh (&net->ksnn_lock);
136
137                 LIBCFS_FREE(peer, sizeof(*peer));
138                 CERROR("Can't create peer: network shutdown\n");
139                 return -ESHUTDOWN;
140         }
141
142         net->ksnn_npeers++;
143
144         cfs_spin_unlock_bh (&net->ksnn_lock);
145
146         *peerp = peer;
147         return 0;
148 }
149
150 void
151 ksocknal_destroy_peer (ksock_peer_t *peer)
152 {
153         ksock_net_t    *net = peer->ksnp_ni->ni_data;
154
155         CDEBUG (D_NET, "peer %s %p deleted\n",
156                 libcfs_id2str(peer->ksnp_id), peer);
157
158         LASSERT (cfs_atomic_read (&peer->ksnp_refcount) == 0);
159         LASSERT (peer->ksnp_accepting == 0);
160         LASSERT (list_empty (&peer->ksnp_conns));
161         LASSERT (list_empty (&peer->ksnp_routes));
162         LASSERT (list_empty (&peer->ksnp_tx_queue));
163         LASSERT (list_empty (&peer->ksnp_zc_req_list));
164
165         LIBCFS_FREE (peer, sizeof (*peer));
166
167         /* NB a peer's connections and routes keep a reference on their peer
168          * until they are destroyed, so we can be assured that _all_ state to
169          * do with this peer has been cleaned up when its refcount drops to
170          * zero. */
171         cfs_spin_lock_bh (&net->ksnn_lock);
172         net->ksnn_npeers--;
173         cfs_spin_unlock_bh (&net->ksnn_lock);
174 }
175
176 ksock_peer_t *
177 ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id)
178 {
179         struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
180         struct list_head *tmp;
181         ksock_peer_t     *peer;
182
183         list_for_each (tmp, peer_list) {
184
185                 peer = list_entry (tmp, ksock_peer_t, ksnp_list);
186
187                 LASSERT (!peer->ksnp_closing);
188
189                 if (peer->ksnp_ni != ni)
190                         continue;
191
192                 if (peer->ksnp_id.nid != id.nid ||
193                     peer->ksnp_id.pid != id.pid)
194                         continue;
195
196                 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
197                        peer, libcfs_id2str(id),
198                        cfs_atomic_read(&peer->ksnp_refcount));
199                 return (peer);
200         }
201         return (NULL);
202 }
203
204 ksock_peer_t *
205 ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
206 {
207         ksock_peer_t     *peer;
208
209         cfs_read_lock (&ksocknal_data.ksnd_global_lock);
210         peer = ksocknal_find_peer_locked (ni, id);
211         if (peer != NULL)                       /* +1 ref for caller? */
212                 ksocknal_peer_addref(peer);
213         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
214
215         return (peer);
216 }
217
218 void
219 ksocknal_unlink_peer_locked (ksock_peer_t *peer)
220 {
221         int                i;
222         __u32              ip;
223         ksock_interface_t *iface;
224
225         for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
226                 LASSERT (i < LNET_MAX_INTERFACES);
227                 ip = peer->ksnp_passive_ips[i];
228
229                 iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
230                 /* All IPs in peer->ksnp_passive_ips[] come from the
231                  * interface list, therefore the call must succeed. */
232                 LASSERT (iface != NULL);
233
234                 CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
235                        peer, iface, iface->ksni_nroutes);
236                 iface->ksni_npeers--;
237         }
238
239         LASSERT (list_empty(&peer->ksnp_conns));
240         LASSERT (list_empty(&peer->ksnp_routes));
241         LASSERT (!peer->ksnp_closing);
242         peer->ksnp_closing = 1;
243         list_del (&peer->ksnp_list);
244         /* lose peerlist's ref */
245         ksocknal_peer_decref(peer);
246 }
247
248 int
249 ksocknal_get_peer_info (lnet_ni_t *ni, int index,
250                         lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port,
251                         int *conn_count, int *share_count)
252 {
253         ksock_peer_t      *peer;
254         struct list_head  *ptmp;
255         ksock_route_t     *route;
256         struct list_head  *rtmp;
257         int                i;
258         int                j;
259         int                rc = -ENOENT;
260
261         cfs_read_lock (&ksocknal_data.ksnd_global_lock);
262
263         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
264
265                 list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
266                         peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
267
268                         if (peer->ksnp_ni != ni)
269                                 continue;
270
271                         if (peer->ksnp_n_passive_ips == 0 &&
272                             list_empty(&peer->ksnp_routes)) {
273                                 if (index-- > 0)
274                                         continue;
275
276                                 *id = peer->ksnp_id;
277                                 *myip = 0;
278                                 *peer_ip = 0;
279                                 *port = 0;
280                                 *conn_count = 0;
281                                 *share_count = 0;
282                                 rc = 0;
283                                 goto out;
284                         }
285
286                         for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
287                                 if (index-- > 0)
288                                         continue;
289
290                                 *id = peer->ksnp_id;
291                                 *myip = peer->ksnp_passive_ips[j];
292                                 *peer_ip = 0;
293                                 *port = 0;
294                                 *conn_count = 0;
295                                 *share_count = 0;
296                                 rc = 0;
297                                 goto out;
298                         }
299
300                         list_for_each (rtmp, &peer->ksnp_routes) {
301                                 if (index-- > 0)
302                                         continue;
303
304                                 route = list_entry(rtmp, ksock_route_t,
305                                                    ksnr_list);
306
307                                 *id = peer->ksnp_id;
308                                 *myip = route->ksnr_myipaddr;
309                                 *peer_ip = route->ksnr_ipaddr;
310                                 *port = route->ksnr_port;
311                                 *conn_count = route->ksnr_conn_count;
312                                 *share_count = route->ksnr_share_count;
313                                 rc = 0;
314                                 goto out;
315                         }
316                 }
317         }
318  out:
319         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
320         return (rc);
321 }
322
323 void
324 ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
325 {
326         ksock_peer_t      *peer = route->ksnr_peer;
327         int                type = conn->ksnc_type;
328         ksock_interface_t *iface;
329
330         conn->ksnc_route = route;
331         ksocknal_route_addref(route);
332
333         if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
334                 if (route->ksnr_myipaddr == 0) {
335                         /* route wasn't bound locally yet (the initial route) */
336                         CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n",
337                                libcfs_id2str(peer->ksnp_id),
338                                HIPQUAD(route->ksnr_ipaddr),
339                                HIPQUAD(conn->ksnc_myipaddr));
340                 } else {
341                         CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from "
342                                "%u.%u.%u.%u to %u.%u.%u.%u\n",
343                                libcfs_id2str(peer->ksnp_id),
344                                HIPQUAD(route->ksnr_ipaddr),
345                                HIPQUAD(route->ksnr_myipaddr),
346                                HIPQUAD(conn->ksnc_myipaddr));
347
348                         iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
349                                                   route->ksnr_myipaddr);
350                         if (iface != NULL)
351                                 iface->ksni_nroutes--;
352                 }
353                 route->ksnr_myipaddr = conn->ksnc_myipaddr;
354                 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
355                                           route->ksnr_myipaddr);
356                 if (iface != NULL)
357                         iface->ksni_nroutes++;
358         }
359
360         route->ksnr_connected |= (1<<type);
361         route->ksnr_conn_count++;
362
363         /* Successful connection => further attempts can
364          * proceed immediately */
365         route->ksnr_retry_interval = 0;
366 }
367
368 void
369 ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
370 {
371         struct list_head  *tmp;
372         ksock_conn_t      *conn;
373         ksock_route_t     *route2;
374
375         LASSERT (!peer->ksnp_closing);
376         LASSERT (route->ksnr_peer == NULL);
377         LASSERT (!route->ksnr_scheduled);
378         LASSERT (!route->ksnr_connecting);
379         LASSERT (route->ksnr_connected == 0);
380
381         /* LASSERT(unique) */
382         list_for_each(tmp, &peer->ksnp_routes) {
383                 route2 = list_entry(tmp, ksock_route_t, ksnr_list);
384
385                 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
386                         CERROR ("Duplicate route %s %u.%u.%u.%u\n",
387                                 libcfs_id2str(peer->ksnp_id),
388                                 HIPQUAD(route->ksnr_ipaddr));
389                         LBUG();
390                 }
391         }
392
393         route->ksnr_peer = peer;
394         ksocknal_peer_addref(peer);
395         /* peer's routelist takes over my ref on 'route' */
396         list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
397
398         list_for_each(tmp, &peer->ksnp_conns) {
399                 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
400
401                 if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
402                         continue;
403
404                 ksocknal_associate_route_conn_locked(route, conn);
405                 /* keep going (typed routes) */
406         }
407 }
408
409 void
410 ksocknal_del_route_locked (ksock_route_t *route)
411 {
412         ksock_peer_t      *peer = route->ksnr_peer;
413         ksock_interface_t *iface;
414         ksock_conn_t      *conn;
415         struct list_head  *ctmp;
416         struct list_head  *cnxt;
417
418         LASSERT (!route->ksnr_deleted);
419
420         /* Close associated conns */
421         list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
422                 conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
423
424                 if (conn->ksnc_route != route)
425                         continue;
426
427                 ksocknal_close_conn_locked (conn, 0);
428         }
429
430         if (route->ksnr_myipaddr != 0) {
431                 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
432                                           route->ksnr_myipaddr);
433                 if (iface != NULL)
434                         iface->ksni_nroutes--;
435         }
436
437         route->ksnr_deleted = 1;
438         list_del (&route->ksnr_list);
439         ksocknal_route_decref(route);             /* drop peer's ref */
440
441         if (list_empty (&peer->ksnp_routes) &&
442             list_empty (&peer->ksnp_conns)) {
443                 /* I've just removed the last route to a peer with no active
444                  * connections */
445                 ksocknal_unlink_peer_locked (peer);
446         }
447 }
448
449 int
450 ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
451 {
452         struct list_head  *tmp;
453         ksock_peer_t      *peer;
454         ksock_peer_t      *peer2;
455         ksock_route_t     *route;
456         ksock_route_t     *route2;
457         int                rc;
458
459         if (id.nid == LNET_NID_ANY ||
460             id.pid == LNET_PID_ANY)
461                 return (-EINVAL);
462
463         /* Have a brand new peer ready... */
464         rc = ksocknal_create_peer(&peer, ni, id);
465         if (rc != 0)
466                 return rc;
467
468         route = ksocknal_create_route (ipaddr, port);
469         if (route == NULL) {
470                 ksocknal_peer_decref(peer);
471                 return (-ENOMEM);
472         }
473
474         cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
475
476         /* always called with a ref on ni, so shutdown can't have started */
477         LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
478
479         peer2 = ksocknal_find_peer_locked (ni, id);
480         if (peer2 != NULL) {
481                 ksocknal_peer_decref(peer);
482                 peer = peer2;
483         } else {
484                 /* peer table takes my ref on peer */
485                 list_add_tail (&peer->ksnp_list,
486                                ksocknal_nid2peerlist (id.nid));
487         }
488
489         route2 = NULL;
490         list_for_each (tmp, &peer->ksnp_routes) {
491                 route2 = list_entry(tmp, ksock_route_t, ksnr_list);
492
493                 if (route2->ksnr_ipaddr == ipaddr)
494                         break;
495
496                 route2 = NULL;
497         }
498         if (route2 == NULL) {
499                 ksocknal_add_route_locked(peer, route);
500                 route->ksnr_share_count++;
501         } else {
502                 ksocknal_route_decref(route);
503                 route2->ksnr_share_count++;
504         }
505
506         cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
507
508         return (0);
509 }
510
511 void
512 ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
513 {
514         ksock_conn_t     *conn;
515         ksock_route_t    *route;
516         struct list_head *tmp;
517         struct list_head *nxt;
518         int               nshared;
519
520         LASSERT (!peer->ksnp_closing);
521
522         /* Extra ref prevents peer disappearing until I'm done with it */
523         ksocknal_peer_addref(peer);
524
525         list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
526                 route = list_entry(tmp, ksock_route_t, ksnr_list);
527
528                 /* no match */
529                 if (!(ip == 0 || route->ksnr_ipaddr == ip))
530                         continue;
531
532                 route->ksnr_share_count = 0;
533                 /* This deletes associated conns too */
534                 ksocknal_del_route_locked (route);
535         }
536
537         nshared = 0;
538         list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
539                 route = list_entry(tmp, ksock_route_t, ksnr_list);
540                 nshared += route->ksnr_share_count;
541         }
542
543         if (nshared == 0) {
544                 /* remove everything else if there are no explicit entries
545                  * left */
546
547                 list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
548                         route = list_entry(tmp, ksock_route_t, ksnr_list);
549
550                         /* we should only be removing auto-entries */
551                         LASSERT(route->ksnr_share_count == 0);
552                         ksocknal_del_route_locked (route);
553                 }
554
555                 list_for_each_safe (tmp, nxt, &peer->ksnp_conns) {
556                         conn = list_entry(tmp, ksock_conn_t, ksnc_list);
557
558                         ksocknal_close_conn_locked(conn, 0);
559                 }
560         }
561
562         ksocknal_peer_decref(peer);
563         /* NB peer unlinks itself when last conn/route is removed */
564 }
565
566 int
567 ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
568 {
569         CFS_LIST_HEAD     (zombies);
570         struct list_head  *ptmp;
571         struct list_head  *pnxt;
572         ksock_peer_t      *peer;
573         int                lo;
574         int                hi;
575         int                i;
576         int                rc = -ENOENT;
577
578         cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
579
580         if (id.nid != LNET_NID_ANY)
581                 lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
582         else {
583                 lo = 0;
584                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
585         }
586
587         for (i = lo; i <= hi; i++) {
588                 list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
589                         peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
590
591                         if (peer->ksnp_ni != ni)
592                                 continue;
593
594                         if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
595                               (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
596                                 continue;
597
598                         ksocknal_peer_addref(peer);     /* a ref for me... */
599
600                         ksocknal_del_peer_locked (peer, ip);
601
602                         if (peer->ksnp_closing && !list_empty(&peer->ksnp_tx_queue)) {
603                                 LASSERT (list_empty(&peer->ksnp_conns));
604                                 LASSERT (list_empty(&peer->ksnp_routes));
605
606                                 list_splice_init(&peer->ksnp_tx_queue, &zombies);
607                         }
608
609                         ksocknal_peer_decref(peer);     /* ...till here */
610
611                         rc = 0;                 /* matched! */
612                 }
613         }
614
615         cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
616
617         ksocknal_txlist_done(ni, &zombies, 1);
618
619         return (rc);
620 }
621
622 ksock_conn_t *
623 ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
624 {
625         ksock_peer_t      *peer;
626         struct list_head  *ptmp;
627         ksock_conn_t      *conn;
628         struct list_head  *ctmp;
629         int                i;
630
631         cfs_read_lock (&ksocknal_data.ksnd_global_lock);
632
633         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
634                 list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
635                         peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
636
637                         LASSERT (!peer->ksnp_closing);
638
639                         if (peer->ksnp_ni != ni)
640                                 continue;
641
642                         list_for_each (ctmp, &peer->ksnp_conns) {
643                                 if (index-- > 0)
644                                         continue;
645
646                                 conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
647                                 ksocknal_conn_addref(conn);
648                                 cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
649                                 return (conn);
650                         }
651                 }
652         }
653
654         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
655         return (NULL);
656 }
657
658 ksock_sched_t *
659 ksocknal_choose_scheduler_locked (unsigned int irq)
660 {
661         ksock_sched_t    *sched;
662         ksock_irqinfo_t  *info;
663         int               i;
664
665         LASSERT (irq < NR_IRQS);
666         info = &ksocknal_data.ksnd_irqinfo[irq];
667
668         if (irq != 0 &&                         /* hardware NIC */
669             info->ksni_valid) {                 /* already set up */
670                 return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]);
671         }
672
673         /* software NIC (irq == 0) || not associated with a scheduler yet.
674          * Choose the CPU with the fewest connections... */
675         sched = &ksocknal_data.ksnd_schedulers[0];
676         for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++)
677                 if (sched->kss_nconns >
678                     ksocknal_data.ksnd_schedulers[i].kss_nconns)
679                         sched = &ksocknal_data.ksnd_schedulers[i];
680
681         if (irq != 0) {                         /* Hardware NIC */
682                 info->ksni_valid = 1;
683                 info->ksni_sched = (unsigned int)(sched - ksocknal_data.ksnd_schedulers);
684
685                 /* no overflow... */
686                 LASSERT (info->ksni_sched == (unsigned int)(sched - ksocknal_data.ksnd_schedulers));
687         }
688
689         return (sched);
690 }
691
692 int
693 ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
694 {
695         ksock_net_t       *net = ni->ni_data;
696         int                i;
697         int                nip;
698
699         cfs_read_lock (&ksocknal_data.ksnd_global_lock);
700
701         nip = net->ksnn_ninterfaces;
702         LASSERT (nip <= LNET_MAX_INTERFACES);
703
704         /* Only offer interfaces for additional connections if I have 
705          * more than one. */
706         if (nip < 2) {
707                 cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
708                 return 0;
709         }
710
711         for (i = 0; i < nip; i++) {
712                 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
713                 LASSERT (ipaddrs[i] != 0);
714         }
715
716         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
717         return (nip);
718 }
719
720 int
721 ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
722 {
723         int   best_netmatch = 0;
724         int   best_xor      = 0;
725         int   best          = -1;
726         int   this_xor;
727         int   this_netmatch;
728         int   i;
729
730         for (i = 0; i < nips; i++) {
731                 if (ips[i] == 0)
732                         continue;
733
734                 this_xor = (ips[i] ^ iface->ksni_ipaddr);
735                 this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
736
737                 if (!(best < 0 ||
738                       best_netmatch < this_netmatch ||
739                       (best_netmatch == this_netmatch &&
740                        best_xor > this_xor)))
741                         continue;
742
743                 best = i;
744                 best_netmatch = this_netmatch;
745                 best_xor = this_xor;
746         }
747
748         LASSERT (best >= 0);
749         return (best);
750 }
751
752 int
753 ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
754 {
755         cfs_rwlock_t       *global_lock = &ksocknal_data.ksnd_global_lock;
756         ksock_net_t        *net = peer->ksnp_ni->ni_data;
757         ksock_interface_t  *iface;
758         ksock_interface_t  *best_iface;
759         int                 n_ips;
760         int                 i;
761         int                 j;
762         int                 k;
763         __u32               ip;
764         __u32               xor;
765         int                 this_netmatch;
766         int                 best_netmatch;
767         int                 best_npeers;
768
769         /* CAVEAT EMPTOR: We do all our interface matching with an
770          * exclusive hold of global lock at IRQ priority.  We're only
771          * expecting to be dealing with small numbers of interfaces, so the
772          * O(n**3)-ness shouldn't matter */
773
774         /* Also note that I'm not going to return more than n_peerips
775          * interfaces, even if I have more myself */
776
777         cfs_write_lock_bh (global_lock);
778
779         LASSERT (n_peerips <= LNET_MAX_INTERFACES);
780         LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
781
782         /* Only match interfaces for additional connections 
783          * if I have > 1 interface */
784         n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
785                 MIN(n_peerips, net->ksnn_ninterfaces);
786
787         for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
788                 /*              ^ yes really... */
789
790                 /* If we have any new interfaces, first tick off all the
791                  * peer IPs that match old interfaces, then choose new
792                  * interfaces to match the remaining peer IPS.
793                  * We don't forget interfaces we've stopped using; we might
794                  * start using them again... */
795
796                 if (i < peer->ksnp_n_passive_ips) {
797                         /* Old interface. */
798                         ip = peer->ksnp_passive_ips[i];
799                         best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
800
801                         /* peer passive ips are kept up to date */
802                         LASSERT(best_iface != NULL);
803                 } else {
804                         /* choose a new interface */
805                         LASSERT (i == peer->ksnp_n_passive_ips);
806
807                         best_iface = NULL;
808                         best_netmatch = 0;
809                         best_npeers = 0;
810
811                         for (j = 0; j < net->ksnn_ninterfaces; j++) {
812                                 iface = &net->ksnn_interfaces[j];
813                                 ip = iface->ksni_ipaddr;
814
815                                 for (k = 0; k < peer->ksnp_n_passive_ips; k++)
816                                         if (peer->ksnp_passive_ips[k] == ip)
817                                                 break;
818
819                                 if (k < peer->ksnp_n_passive_ips) /* using it already */
820                                         continue;
821
822                                 k = ksocknal_match_peerip(iface, peerips, n_peerips);
823                                 xor = (ip ^ peerips[k]);
824                                 this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
825
826                                 if (!(best_iface == NULL ||
827                                       best_netmatch < this_netmatch ||
828                                       (best_netmatch == this_netmatch &&
829                                        best_npeers > iface->ksni_npeers)))
830                                         continue;
831
832                                 best_iface = iface;
833                                 best_netmatch = this_netmatch;
834                                 best_npeers = iface->ksni_npeers;
835                         }
836
837                         best_iface->ksni_npeers++;
838                         ip = best_iface->ksni_ipaddr;
839                         peer->ksnp_passive_ips[i] = ip;
840                         peer->ksnp_n_passive_ips = i+1;
841                 }
842
843                 LASSERT (best_iface != NULL);
844
845                 /* mark the best matching peer IP used */
846                 j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
847                 peerips[j] = 0;
848         }
849
850         /* Overwrite input peer IP addresses */
851         memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
852
853         cfs_write_unlock_bh (global_lock);
854
855         return (n_ips);
856 }
857
858 void
859 ksocknal_create_routes(ksock_peer_t *peer, int port,
860                        __u32 *peer_ipaddrs, int npeer_ipaddrs)
861 {
862         ksock_route_t      *newroute = NULL;
863         cfs_rwlock_t       *global_lock = &ksocknal_data.ksnd_global_lock;
864         lnet_ni_t          *ni = peer->ksnp_ni;
865         ksock_net_t        *net = ni->ni_data;
866         struct list_head   *rtmp;
867         ksock_route_t      *route;
868         ksock_interface_t  *iface;
869         ksock_interface_t  *best_iface;
870         int                 best_netmatch;
871         int                 this_netmatch;
872         int                 best_nroutes;
873         int                 i;
874         int                 j;
875
876         /* CAVEAT EMPTOR: We do all our interface matching with an
877          * exclusive hold of global lock at IRQ priority.  We're only
878          * expecting to be dealing with small numbers of interfaces, so the
879          * O(n**3)-ness here shouldn't matter */
880
881         cfs_write_lock_bh (global_lock);
882
883         if (net->ksnn_ninterfaces < 2) {
884                 /* Only create additional connections 
885                  * if I have > 1 interface */
886                 cfs_write_unlock_bh (global_lock);
887                 return;
888         }
889
890         LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES);
891
892         for (i = 0; i < npeer_ipaddrs; i++) {
893                 if (newroute != NULL) {
894                         newroute->ksnr_ipaddr = peer_ipaddrs[i];
895                 } else {
896                         cfs_write_unlock_bh (global_lock);
897
898                         newroute = ksocknal_create_route(peer_ipaddrs[i], port);
899                         if (newroute == NULL)
900                                 return;
901
902                         cfs_write_lock_bh (global_lock);
903                 }
904
905                 if (peer->ksnp_closing) {
906                         /* peer got closed under me */
907                         break;
908                 }
909
910                 /* Already got a route? */
911                 route = NULL;
912                 list_for_each(rtmp, &peer->ksnp_routes) {
913                         route = list_entry(rtmp, ksock_route_t, ksnr_list);
914
915                         if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
916                                 break;
917
918                         route = NULL;
919                 }
920                 if (route != NULL)
921                         continue;
922
923                 best_iface = NULL;
924                 best_nroutes = 0;
925                 best_netmatch = 0;
926
927                 LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
928
929                 /* Select interface to connect from */
930                 for (j = 0; j < net->ksnn_ninterfaces; j++) {
931                         iface = &net->ksnn_interfaces[j];
932
933                         /* Using this interface already? */
934                         list_for_each(rtmp, &peer->ksnp_routes) {
935                                 route = list_entry(rtmp, ksock_route_t, ksnr_list);
936
937                                 if (route->ksnr_myipaddr == iface->ksni_ipaddr)
938                                         break;
939
940                                 route = NULL;
941                         }
942                         if (route != NULL)
943                                 continue;
944
945                         this_netmatch = (((iface->ksni_ipaddr ^
946                                            newroute->ksnr_ipaddr) &
947                                            iface->ksni_netmask) == 0) ? 1 : 0;
948
949                         if (!(best_iface == NULL ||
950                               best_netmatch < this_netmatch ||
951                               (best_netmatch == this_netmatch &&
952                                best_nroutes > iface->ksni_nroutes)))
953                                 continue;
954
955                         best_iface = iface;
956                         best_netmatch = this_netmatch;
957                         best_nroutes = iface->ksni_nroutes;
958                 }
959
960                 if (best_iface == NULL)
961                         continue;
962
963                 newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
964                 best_iface->ksni_nroutes++;
965
966                 ksocknal_add_route_locked(peer, newroute);
967                 newroute = NULL;
968         }
969
970         cfs_write_unlock_bh (global_lock);
971         if (newroute != NULL)
972                 ksocknal_route_decref(newroute);
973 }
974
975 int
976 ksocknal_accept (lnet_ni_t *ni, cfs_socket_t *sock)
977 {
978         ksock_connreq_t    *cr;
979         int                 rc;
980         __u32               peer_ip;
981         int                 peer_port;
982
983         rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
984         LASSERT (rc == 0);                      /* we succeeded before */
985
986         LIBCFS_ALLOC(cr, sizeof(*cr));
987         if (cr == NULL) {
988                 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from "
989                                    "%u.%u.%u.%u: memory exhausted\n",
990                                    HIPQUAD(peer_ip));
991                 return -ENOMEM;
992         }
993
994         lnet_ni_addref(ni);
995         cr->ksncr_ni   = ni;
996         cr->ksncr_sock = sock;
997
998         cfs_spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
999
1000         list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
1001         cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq);
1002
1003         cfs_spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
1004         return 0;
1005 }
1006
1007 int
1008 ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr)
1009 {
1010         ksock_route_t   *route;
1011
1012         cfs_list_for_each_entry_typed (route, &peer->ksnp_routes,
1013                                        ksock_route_t, ksnr_list) {
1014
1015                 if (route->ksnr_ipaddr == ipaddr)
1016                         return route->ksnr_connecting;
1017         }
1018         return 0;
1019 }
1020
1021 int
1022 ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
1023                       cfs_socket_t *sock, int type)
1024 {
1025         cfs_rwlock_t      *global_lock = &ksocknal_data.ksnd_global_lock;
1026         CFS_LIST_HEAD     (zombies);
1027         lnet_process_id_t  peerid;
1028         struct list_head  *tmp;
1029         __u64              incarnation;
1030         ksock_conn_t      *conn;
1031         ksock_conn_t      *conn2;
1032         ksock_peer_t      *peer = NULL;
1033         ksock_peer_t      *peer2;
1034         ksock_sched_t     *sched;
1035         ksock_hello_msg_t *hello;
1036         unsigned int       irq;
1037         ksock_tx_t        *tx;
1038         ksock_tx_t        *txtmp;
1039         int                rc;
1040         int                active;
1041         char              *warn = NULL;
1042
1043         active = (route != NULL);
1044
1045         LASSERT (active == (type != SOCKLND_CONN_NONE));
1046
1047         irq = ksocknal_lib_sock_irq (sock);
1048
1049         LIBCFS_ALLOC(conn, sizeof(*conn));
1050         if (conn == NULL) {
1051                 rc = -ENOMEM;
1052                 goto failed_0;
1053         }
1054
1055         memset (conn, 0, sizeof (*conn));
1056
1057         conn->ksnc_peer = NULL;
1058         conn->ksnc_route = NULL;
1059         conn->ksnc_sock = sock;
1060         /* 2 ref, 1 for conn, another extra ref prevents socket
1061          * being closed before establishment of connection */
1062         cfs_atomic_set (&conn->ksnc_sock_refcount, 2);
1063         conn->ksnc_type = type;
1064         ksocknal_lib_save_callback(sock, conn);
1065         cfs_atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
1066
1067         conn->ksnc_rx_ready = 0;
1068         conn->ksnc_rx_scheduled = 0;
1069
1070         CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue);
1071         conn->ksnc_tx_ready = 0;
1072         conn->ksnc_tx_scheduled = 0;
1073         conn->ksnc_tx_carrier = NULL;
1074         cfs_atomic_set (&conn->ksnc_tx_nob, 0);
1075
1076         LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t,
1077                                      kshm_ips[LNET_MAX_INTERFACES]));
1078         if (hello == NULL) {
1079                 rc = -ENOMEM;
1080                 goto failed_1;
1081         }
1082
1083         /* stash conn's local and remote addrs */
1084         rc = ksocknal_lib_get_conn_addrs (conn);
1085         if (rc != 0)
1086                 goto failed_1;
1087
1088         /* Find out/confirm peer's NID and connection type and get the
1089          * vector of interfaces she's willing to let me connect to.
1090          * Passive connections use the listener timeout since the peer sends
1091          * eagerly */
1092
1093         if (active) {
1094                 peer = route->ksnr_peer;
1095                 LASSERT(ni == peer->ksnp_ni);
1096
1097                 /* Active connection sends HELLO eagerly */
1098                 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
1099                 peerid = peer->ksnp_id;
1100
1101                 cfs_write_lock_bh(global_lock);
1102                 conn->ksnc_proto = peer->ksnp_proto;
1103                 cfs_write_unlock_bh(global_lock);
1104
1105                 if (conn->ksnc_proto == NULL) {
1106                          conn->ksnc_proto = &ksocknal_protocol_v3x;
1107 #if SOCKNAL_VERSION_DEBUG
1108                          if (*ksocknal_tunables.ksnd_protocol == 2)
1109                                  conn->ksnc_proto = &ksocknal_protocol_v2x;
1110                          else if (*ksocknal_tunables.ksnd_protocol == 1)
1111                                  conn->ksnc_proto = &ksocknal_protocol_v1x;
1112 #endif
1113                 }
1114
1115                 rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
1116                 if (rc != 0)
1117                         goto failed_1;
1118         } else {
1119                 peerid.nid = LNET_NID_ANY;
1120                 peerid.pid = LNET_PID_ANY;
1121
1122                 /* Passive, get protocol from peer */
1123                 conn->ksnc_proto = NULL;
1124         }
1125
1126         rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation);
1127         if (rc < 0)
1128                 goto failed_1;
1129
1130         LASSERT (rc == 0 || active);
1131         LASSERT (conn->ksnc_proto != NULL);
1132         LASSERT (peerid.nid != LNET_NID_ANY);
1133
1134         if (active) {
1135                 ksocknal_peer_addref(peer);
1136                 cfs_write_lock_bh (global_lock);
1137         } else {
1138                 rc = ksocknal_create_peer(&peer, ni, peerid);
1139                 if (rc != 0)
1140                         goto failed_1;
1141
1142                 cfs_write_lock_bh (global_lock);
1143
1144                 /* called with a ref on ni, so shutdown can't have started */
1145                 LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
1146
1147                 peer2 = ksocknal_find_peer_locked(ni, peerid);
1148                 if (peer2 == NULL) {
1149                         /* NB this puts an "empty" peer in the peer
1150                          * table (which takes my ref) */
1151                         list_add_tail(&peer->ksnp_list,
1152                                       ksocknal_nid2peerlist(peerid.nid));
1153                 } else {
1154                         ksocknal_peer_decref(peer);
1155                         peer = peer2;
1156                 }
1157
1158                 /* +1 ref for me */
1159                 ksocknal_peer_addref(peer);
1160                 peer->ksnp_accepting++;
1161
1162                 /* Am I already connecting to this guy?  Resolve in
1163                  * favour of higher NID... */
1164                 if (peerid.nid < ni->ni_nid &&
1165                     ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
1166                         rc = EALREADY;
1167                         warn = "connection race resolution";
1168                         goto failed_2;
1169                 }
1170         }
1171
1172         if (peer->ksnp_closing ||
1173             (active && route->ksnr_deleted)) {
1174                 /* peer/route got closed under me */
1175                 rc = -ESTALE;
1176                 warn = "peer/route removed";
1177                 goto failed_2;
1178         }
1179
1180         if (peer->ksnp_proto == NULL) {
1181                 /* Never connected before.
1182                  * NB recv_hello may have returned EPROTO to signal my peer
1183                  * wants a different protocol than the one I asked for.
1184                  */
1185                 LASSERT (list_empty(&peer->ksnp_conns));
1186
1187                 peer->ksnp_proto = conn->ksnc_proto;
1188                 peer->ksnp_incarnation = incarnation;
1189         }
1190
1191         if (peer->ksnp_proto != conn->ksnc_proto ||
1192             peer->ksnp_incarnation != incarnation) {
1193                 /* Peer rebooted or I've got the wrong protocol version */
1194                 ksocknal_close_peer_conns_locked(peer, 0, 0);
1195
1196                 peer->ksnp_proto = NULL;
1197                 rc = ESTALE;
1198                 warn = peer->ksnp_incarnation != incarnation ?
1199                        "peer rebooted" :
1200                        "wrong proto version";
1201                 goto failed_2;
1202         }
1203
1204         switch (rc) {
1205         default:
1206                 LBUG();
1207         case 0:
1208                 break;
1209         case EALREADY:
1210                 warn = "lost conn race";
1211                 goto failed_2;
1212         case EPROTO:
1213                 warn = "retry with different protocol version";
1214                 goto failed_2;
1215         }
1216
1217         /* Refuse to duplicate an existing connection, unless this is a
1218          * loopback connection */
1219         if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
1220                 list_for_each(tmp, &peer->ksnp_conns) {
1221                         conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
1222
1223                         if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
1224                             conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
1225                             conn2->ksnc_type != conn->ksnc_type)
1226                                 continue;
1227
1228                         /* Reply on a passive connection attempt so the peer
1229                          * realises we're connected. */
1230                         LASSERT (rc == 0);
1231                         if (!active)
1232                                 rc = EALREADY;
1233
1234                         warn = "duplicate";
1235                         goto failed_2;
1236                 }
1237         }
1238
1239         /* If the connection created by this route didn't bind to the IP
1240          * address the route connected to, the connection/route matching
1241          * code below probably isn't going to work. */
1242         if (active &&
1243             route->ksnr_ipaddr != conn->ksnc_ipaddr) {
1244                 CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n",
1245                        libcfs_id2str(peer->ksnp_id),
1246                        HIPQUAD(route->ksnr_ipaddr),
1247                        HIPQUAD(conn->ksnc_ipaddr));
1248         }
1249
1250         /* Search for a route corresponding to the new connection and
1251          * create an association.  This allows incoming connections created
1252          * by routes in my peer to match my own route entries so I don't
1253          * continually create duplicate routes. */
1254         list_for_each (tmp, &peer->ksnp_routes) {
1255                 route = list_entry(tmp, ksock_route_t, ksnr_list);
1256
1257                 if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
1258                         continue;
1259
1260                 ksocknal_associate_route_conn_locked(route, conn);
1261                 break;
1262         }
1263
1264         conn->ksnc_peer = peer;                 /* conn takes my ref on peer */
1265         peer->ksnp_last_alive = cfs_time_current();
1266         peer->ksnp_send_keepalive = 0;
1267         peer->ksnp_error = 0;
1268
1269         sched = ksocknal_choose_scheduler_locked (irq);
1270         sched->kss_nconns++;
1271         conn->ksnc_scheduler = sched;
1272
1273         conn->ksnc_tx_last_post = cfs_time_current();
1274         /* Set the deadline for the outgoing HELLO to drain */
1275         conn->ksnc_tx_bufnob = libcfs_sock_wmem_queued(sock);
1276         conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1277         cfs_mb();   /* order with adding to peer's conn list */
1278
1279         list_add (&conn->ksnc_list, &peer->ksnp_conns);
1280         ksocknal_conn_addref(conn);
1281
1282         ksocknal_new_packet(conn, 0);
1283
1284         conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
1285
1286         /* Take packets blocking for this connection. */
1287         list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
1288                 if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) == SOCKNAL_MATCH_NO)
1289                                 continue;
1290
1291                 list_del (&tx->tx_list);
1292                 ksocknal_queue_tx_locked (tx, conn);
1293         }
1294
1295         cfs_write_unlock_bh (global_lock);
1296
1297         /* We've now got a new connection.  Any errors from here on are just
1298          * like "normal" comms errors and we close the connection normally.
1299          * NB (a) we still have to send the reply HELLO for passive
1300          *        connections, 
1301          *    (b) normal I/O on the conn is blocked until I setup and call the
1302          *        socket callbacks.
1303          */
1304
1305         ksocknal_lib_bind_irq (irq);
1306
1307         CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d"
1308                " incarnation:"LPD64" sched[%d]/%d\n",
1309                libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
1310                HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr),
1311                conn->ksnc_port, incarnation,
1312                (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
1313
1314         if (active) {
1315                 /* additional routes after interface exchange? */
1316                 ksocknal_create_routes(peer, conn->ksnc_port,
1317                                        hello->kshm_ips, hello->kshm_nips);
1318         } else {
1319                 hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
1320                                                        hello->kshm_nips);
1321                 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1322         }
1323
1324         LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
1325                                     kshm_ips[LNET_MAX_INTERFACES]));
1326
1327         /* setup the socket AFTER I've received hello (it disables
1328          * SO_LINGER).  I might call back to the acceptor who may want
1329          * to send a protocol version response and then close the
1330          * socket; this ensures the socket only tears down after the
1331          * response has been sent. */
1332         if (rc == 0)
1333                 rc = ksocknal_lib_setup_sock(sock);
1334
1335         cfs_write_lock_bh(global_lock);
1336
1337         /* NB my callbacks block while I hold ksnd_global_lock */
1338         ksocknal_lib_set_callback(sock, conn);
1339
1340         if (!active)
1341                 peer->ksnp_accepting--;
1342
1343         cfs_write_unlock_bh(global_lock);
1344
1345         if (rc != 0) {
1346                 cfs_write_lock_bh(global_lock);
1347                 ksocknal_close_conn_locked(conn, rc);
1348                 cfs_write_unlock_bh(global_lock);
1349         } else if (ksocknal_connsock_addref(conn) == 0) {
1350                 /* Allow I/O to proceed. */
1351                 ksocknal_read_callback(conn);
1352                 ksocknal_write_callback(conn);
1353                 ksocknal_connsock_decref(conn);
1354         }
1355
1356         ksocknal_connsock_decref(conn);
1357         ksocknal_conn_decref(conn);
1358         return rc;
1359
1360  failed_2:
1361         if (!peer->ksnp_closing &&
1362             list_empty (&peer->ksnp_conns) &&
1363             list_empty (&peer->ksnp_routes)) {
1364                 list_add(&zombies, &peer->ksnp_tx_queue);
1365                 list_del_init(&peer->ksnp_tx_queue);
1366                 ksocknal_unlink_peer_locked(peer);
1367         }
1368
1369         cfs_write_unlock_bh (global_lock);
1370
1371         if (warn != NULL) {
1372                 if (rc < 0)
1373                         CERROR("Not creating conn %s type %d: %s\n",
1374                                libcfs_id2str(peerid), conn->ksnc_type, warn);
1375                 else
1376                         CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
1377                               libcfs_id2str(peerid), conn->ksnc_type, warn);
1378         }
1379
1380         if (!active) {
1381                 if (rc > 0) {
1382                         /* Request retry by replying with CONN_NONE 
1383                          * ksnc_proto has been set already */
1384                         conn->ksnc_type = SOCKLND_CONN_NONE;
1385                         hello->kshm_nips = 0;
1386                         ksocknal_send_hello(ni, conn, peerid.nid, hello);
1387                 }
1388
1389                 cfs_write_lock_bh(global_lock);
1390                 peer->ksnp_accepting--;
1391                 cfs_write_unlock_bh(global_lock);
1392         }
1393
1394         ksocknal_txlist_done(ni, &zombies, 1);
1395         ksocknal_peer_decref(peer);
1396
1397  failed_1:
1398         if (hello != NULL)
1399                 LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
1400                                             kshm_ips[LNET_MAX_INTERFACES]));
1401
1402         LIBCFS_FREE (conn, sizeof(*conn));
1403
1404  failed_0:
1405         libcfs_sock_release(sock);
1406         return rc;
1407 }
1408
1409 void
1410 ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
1411 {
1412         /* This just does the immmediate housekeeping, and queues the
1413          * connection for the reaper to terminate.
1414          * Caller holds ksnd_global_lock exclusively in irq context */
1415         ksock_peer_t      *peer = conn->ksnc_peer;
1416         ksock_route_t     *route;
1417         ksock_conn_t      *conn2;
1418         struct list_head  *tmp;
1419
1420         LASSERT (peer->ksnp_error == 0);
1421         LASSERT (!conn->ksnc_closing);
1422         conn->ksnc_closing = 1;
1423
1424         /* ksnd_deathrow_conns takes over peer's ref */
1425         list_del (&conn->ksnc_list);
1426
1427         route = conn->ksnc_route;
1428         if (route != NULL) {
1429                 /* dissociate conn from route... */
1430                 LASSERT (!route->ksnr_deleted);
1431                 LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
1432
1433                 conn2 = NULL;
1434                 list_for_each(tmp, &peer->ksnp_conns) {
1435                         conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
1436
1437                         if (conn2->ksnc_route == route &&
1438                             conn2->ksnc_type == conn->ksnc_type)
1439                                 break;
1440
1441                         conn2 = NULL;
1442                 }
1443                 if (conn2 == NULL)
1444                         route->ksnr_connected &= ~(1 << conn->ksnc_type);
1445
1446                 conn->ksnc_route = NULL;
1447
1448 #if 0           /* irrelevent with only eager routes */
1449                 list_del (&route->ksnr_list);   /* make route least favourite */
1450                 list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
1451 #endif
1452                 ksocknal_route_decref(route);     /* drop conn's ref on route */
1453         }
1454
1455         if (list_empty (&peer->ksnp_conns)) {
1456                 /* No more connections to this peer */
1457
1458                 if (!list_empty(&peer->ksnp_tx_queue)) {
1459                         ksock_tx_t *tx;
1460
1461                         LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x);
1462
1463                         /* throw them to the last connection...,
1464                          * these TXs will be send to /dev/null by scheduler */
1465                         list_for_each_entry(tx, &peer->ksnp_tx_queue, tx_list)
1466                                 ksocknal_tx_prep(conn, tx);
1467
1468                         spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
1469                         list_splice_init(&peer->ksnp_tx_queue, &conn->ksnc_tx_queue);
1470                         spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
1471                 }
1472
1473                 peer->ksnp_proto = NULL;        /* renegotiate protocol version */
1474                 peer->ksnp_error = error;       /* stash last conn close reason */
1475
1476                 if (list_empty (&peer->ksnp_routes)) {
1477                         /* I've just closed last conn belonging to a
1478                          * peer with no routes to it */
1479                         ksocknal_unlink_peer_locked (peer);
1480                 }
1481         }
1482
1483         cfs_spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
1484
1485         list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
1486         cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
1487
1488         cfs_spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
1489 }
1490
1491 void
1492 ksocknal_peer_failed (ksock_peer_t *peer)
1493 {
1494         time_t    last_alive = 0;
1495         int       notify = 0;
1496
1497         /* There has been a connection failure or comms error; but I'll only
1498          * tell LNET I think the peer is dead if it's to another kernel and
1499          * there are no connections or connection attempts in existance. */
1500
1501         cfs_read_lock (&ksocknal_data.ksnd_global_lock);
1502
1503         if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
1504             list_empty(&peer->ksnp_conns) &&
1505             peer->ksnp_accepting == 0 &&
1506             ksocknal_find_connecting_route_locked(peer) == NULL) {
1507                 notify = 1;
1508                 last_alive = (time_t) (cfs_time_current_sec() -
1509                         cfs_duration_sec(cfs_time_current() -
1510                                          peer->ksnp_last_alive));
1511         }
1512
1513         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
1514
1515         if (notify)
1516                 lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0,
1517                              last_alive);
1518 }
1519
1520 void
1521 ksocknal_finalize_zcreq(ksock_conn_t *conn)
1522 {
1523         ksock_peer_t     *peer = conn->ksnc_peer;
1524         ksock_tx_t       *tx;
1525         ksock_tx_t       *tmp;
1526         CFS_LIST_HEAD    (zlist);
1527
1528         /* NB safe to finalize TXs because closing of socket will
1529          * abort all buffered data */
1530         LASSERT (conn->ksnc_sock == NULL);
1531
1532         cfs_spin_lock(&peer->ksnp_lock);
1533
1534         cfs_list_for_each_entry_safe_typed(tx, tmp, &peer->ksnp_zc_req_list,
1535                                            ksock_tx_t, tx_zc_list) {
1536                 if (tx->tx_conn != conn)
1537                         continue;
1538
1539                 LASSERT (tx->tx_msg.ksm_zc_cookies[0] != 0);
1540
1541                 tx->tx_msg.ksm_zc_cookies[0] = 0;
1542                 list_del(&tx->tx_zc_list);
1543                 list_add(&tx->tx_zc_list, &zlist);
1544         }
1545
1546         cfs_spin_unlock(&peer->ksnp_lock);
1547
1548         while (!list_empty(&zlist)) {
1549                 tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
1550
1551                 list_del(&tx->tx_zc_list);
1552                 ksocknal_tx_decref(tx);
1553         }
1554 }
1555
1556 void
1557 ksocknal_terminate_conn (ksock_conn_t *conn)
1558 {
1559         /* This gets called by the reaper (guaranteed thread context) to
1560          * disengage the socket from its callbacks and close it.
1561          * ksnc_refcount will eventually hit zero, and then the reaper will
1562          * destroy it. */
1563         ksock_peer_t     *peer = conn->ksnc_peer;
1564         ksock_sched_t    *sched = conn->ksnc_scheduler;
1565         int               failed = 0;
1566
1567         LASSERT(conn->ksnc_closing);
1568
1569         /* wake up the scheduler to "send" all remaining packets to /dev/null */
1570         cfs_spin_lock_bh (&sched->kss_lock);
1571
1572         /* a closing conn is always ready to tx */
1573         conn->ksnc_tx_ready = 1;
1574
1575         if (!conn->ksnc_tx_scheduled &&
1576             !list_empty(&conn->ksnc_tx_queue)){
1577                 list_add_tail (&conn->ksnc_tx_list,
1578                                &sched->kss_tx_conns);
1579                 conn->ksnc_tx_scheduled = 1;
1580                 /* extra ref for scheduler */
1581                 ksocknal_conn_addref(conn);
1582
1583                 cfs_waitq_signal (&sched->kss_waitq);
1584         }
1585
1586         cfs_spin_unlock_bh (&sched->kss_lock);
1587
1588         /* serialise with callbacks */
1589         cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
1590
1591         ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
1592
1593         /* OK, so this conn may not be completely disengaged from its
1594          * scheduler yet, but it _has_ committed to terminate... */
1595         conn->ksnc_scheduler->kss_nconns--;
1596
1597         if (peer->ksnp_error != 0) {
1598                 /* peer's last conn closed in error */
1599                 LASSERT (list_empty (&peer->ksnp_conns));
1600                 failed = 1;
1601                 peer->ksnp_error = 0;     /* avoid multiple notifications */
1602         }
1603
1604         cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1605
1606         if (failed)
1607                 ksocknal_peer_failed(peer);
1608
1609         /* The socket is closed on the final put; either here, or in
1610          * ksocknal_{send,recv}msg().  Since we set up the linger2 option
1611          * when the connection was established, this will close the socket
1612          * immediately, aborting anything buffered in it. Any hung
1613          * zero-copy transmits will therefore complete in finite time. */
1614         ksocknal_connsock_decref(conn);
1615 }
1616
1617 void
1618 ksocknal_queue_zombie_conn (ksock_conn_t *conn)
1619 {
1620         /* Queue the conn for the reaper to destroy */
1621
1622         LASSERT (cfs_atomic_read(&conn->ksnc_conn_refcount) == 0);
1623         cfs_spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
1624
1625         list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
1626         cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq);
1627
1628         cfs_spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
1629 }
1630
1631 void
1632 ksocknal_destroy_conn (ksock_conn_t *conn)
1633 {
1634         /* Final coup-de-grace of the reaper */
1635         CDEBUG (D_NET, "connection %p\n", conn);
1636
1637         LASSERT (cfs_atomic_read (&conn->ksnc_conn_refcount) == 0);
1638         LASSERT (cfs_atomic_read (&conn->ksnc_sock_refcount) == 0);
1639         LASSERT (conn->ksnc_sock == NULL);
1640         LASSERT (conn->ksnc_route == NULL);
1641         LASSERT (!conn->ksnc_tx_scheduled);
1642         LASSERT (!conn->ksnc_rx_scheduled);
1643         LASSERT (list_empty(&conn->ksnc_tx_queue));
1644
1645         /* complete current receive if any */
1646         switch (conn->ksnc_rx_state) {
1647         case SOCKNAL_RX_LNET_PAYLOAD:
1648                 CERROR("Completing partial receive from %s"
1649                        ", ip %d.%d.%d.%d:%d, with error\n",
1650                        libcfs_id2str(conn->ksnc_peer->ksnp_id),
1651                        HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
1652                 lnet_finalize (conn->ksnc_peer->ksnp_ni,
1653                                conn->ksnc_cookie, -EIO);
1654                 break;
1655         case SOCKNAL_RX_LNET_HEADER:
1656                 if (conn->ksnc_rx_started)
1657                         CERROR("Incomplete receive of lnet header from %s"
1658                                ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
1659                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1660                                HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
1661                                conn->ksnc_proto->pro_version);
1662                 break;
1663         case SOCKNAL_RX_KSM_HEADER:
1664                 if (conn->ksnc_rx_started)
1665                         CERROR("Incomplete receive of ksock message from %s"
1666                                ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
1667                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1668                                HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
1669                                conn->ksnc_proto->pro_version);
1670                 break;
1671         case SOCKNAL_RX_SLOP:
1672                 if (conn->ksnc_rx_started)
1673                         CERROR("Incomplete receive of slops from %s"
1674                                ", ip %d.%d.%d.%d:%d, with error\n",
1675                                libcfs_id2str(conn->ksnc_peer->ksnp_id),
1676                                HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
1677                break;
1678         default:
1679                 LBUG ();
1680                 break;
1681         }
1682
1683         ksocknal_peer_decref(conn->ksnc_peer);
1684
1685         LIBCFS_FREE (conn, sizeof (*conn));
1686 }
1687
1688 int
1689 ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
1690 {
1691         ksock_conn_t       *conn;
1692         struct list_head   *ctmp;
1693         struct list_head   *cnxt;
1694         int                 count = 0;
1695
1696         list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
1697                 conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
1698
1699                 if (ipaddr == 0 ||
1700                     conn->ksnc_ipaddr == ipaddr) {
1701                         count++;
1702                         ksocknal_close_conn_locked (conn, why);
1703                 }
1704         }
1705
1706         return (count);
1707 }
1708
1709 int
1710 ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
1711 {
1712         ksock_peer_t     *peer = conn->ksnc_peer;
1713         __u32             ipaddr = conn->ksnc_ipaddr;
1714         int               count;
1715
1716         cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
1717
1718         count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
1719
1720         cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1721
1722         return (count);
1723 }
1724
1725 int
1726 ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
1727 {
1728         ksock_peer_t       *peer;
1729         struct list_head   *ptmp;
1730         struct list_head   *pnxt;
1731         int                 lo;
1732         int                 hi;
1733         int                 i;
1734         int                 count = 0;
1735
1736         cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
1737
1738         if (id.nid != LNET_NID_ANY)
1739                 lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
1740         else {
1741                 lo = 0;
1742                 hi = ksocknal_data.ksnd_peer_hash_size - 1;
1743         }
1744
1745         for (i = lo; i <= hi; i++) {
1746                 list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
1747
1748                         peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
1749
1750                         if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
1751                               (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
1752                                 continue;
1753
1754                         count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
1755                 }
1756         }
1757
1758         cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1759
1760         /* wildcards always succeed */
1761         if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
1762                 return (0);
1763
1764         return (count == 0 ? -ENOENT : 0);
1765 }
1766
1767 void
1768 ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive)
1769 {
1770         /* The router is telling me she's been notified of a change in
1771          * gateway state.... */
1772         lnet_process_id_t  id = {0};
1773
1774         id.nid = gw_nid;
1775         id.pid = LNET_PID_ANY;
1776
1777         CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
1778                 alive ? "up" : "down");
1779
1780         if (!alive) {
1781                 /* If the gateway crashed, close all open connections... */
1782                 ksocknal_close_matching_conns (id, 0);
1783                 return;
1784         }
1785
1786         /* ...otherwise do nothing.  We can only establish new connections
1787          * if we have autroutes, and these connect on demand. */
1788 }
1789
1790 void
1791 ksocknal_push_peer (ksock_peer_t *peer)
1792 {
1793         int               index;
1794         int               i;
1795         struct list_head *tmp;
1796         ksock_conn_t     *conn;
1797
1798         for (index = 0; ; index++) {
1799                 cfs_read_lock (&ksocknal_data.ksnd_global_lock);
1800
1801                 i = 0;
1802                 conn = NULL;
1803
1804                 list_for_each (tmp, &peer->ksnp_conns) {
1805                         if (i++ == index) {
1806                                 conn = list_entry (tmp, ksock_conn_t, ksnc_list);
1807                                 ksocknal_conn_addref(conn);
1808                                 break;
1809                         }
1810                 }
1811
1812                 cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
1813
1814                 if (conn == NULL)
1815                         break;
1816
1817                 ksocknal_lib_push_conn (conn);
1818                 ksocknal_conn_decref(conn);
1819         }
1820 }
1821
1822 int
1823 ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
1824 {
1825         ksock_peer_t      *peer;
1826         struct list_head  *tmp;
1827         int                index;
1828         int                i;
1829         int                j;
1830         int                rc = -ENOENT;
1831
1832         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
1833                 for (j = 0; ; j++) {
1834                         cfs_read_lock (&ksocknal_data.ksnd_global_lock);
1835
1836                         index = 0;
1837                         peer = NULL;
1838
1839                         list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
1840                                 peer = list_entry(tmp, ksock_peer_t,
1841                                                   ksnp_list);
1842
1843                                 if (!((id.nid == LNET_NID_ANY ||
1844                                        id.nid == peer->ksnp_id.nid) &&
1845                                       (id.pid == LNET_PID_ANY ||
1846                                        id.pid == peer->ksnp_id.pid))) {
1847                                         peer = NULL;
1848                                         continue;
1849                                 }
1850
1851                                 if (index++ == j) {
1852                                         ksocknal_peer_addref(peer);
1853                                         break;
1854                                 }
1855                         }
1856
1857                         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
1858
1859                         if (peer != NULL) {
1860                                 rc = 0;
1861                                 ksocknal_push_peer (peer);
1862                                 ksocknal_peer_decref(peer);
1863                         }
1864                 }
1865
1866         }
1867
1868         return (rc);
1869 }
1870
1871 int
1872 ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
1873 {
1874         ksock_net_t       *net = ni->ni_data;
1875         ksock_interface_t *iface;
1876         int                rc;
1877         int                i;
1878         int                j;
1879         struct list_head  *ptmp;
1880         ksock_peer_t      *peer;
1881         struct list_head  *rtmp;
1882         ksock_route_t     *route;
1883
1884         if (ipaddress == 0 ||
1885             netmask == 0)
1886                 return (-EINVAL);
1887
1888         cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
1889
1890         iface = ksocknal_ip2iface(ni, ipaddress);
1891         if (iface != NULL) {
1892                 /* silently ignore dups */
1893                 rc = 0;
1894         } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
1895                 rc = -ENOSPC;
1896         } else {
1897                 iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
1898
1899                 iface->ksni_ipaddr = ipaddress;
1900                 iface->ksni_netmask = netmask;
1901                 iface->ksni_nroutes = 0;
1902                 iface->ksni_npeers = 0;
1903
1904                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
1905                         list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
1906                                 peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
1907
1908                                 for (j = 0; j < peer->ksnp_n_passive_ips; j++)
1909                                         if (peer->ksnp_passive_ips[j] == ipaddress)
1910                                                 iface->ksni_npeers++;
1911
1912                                 list_for_each(rtmp, &peer->ksnp_routes) {
1913                                         route = list_entry(rtmp, ksock_route_t, ksnr_list);
1914
1915                                         if (route->ksnr_myipaddr == ipaddress)
1916                                                 iface->ksni_nroutes++;
1917                                 }
1918                         }
1919                 }
1920
1921                 rc = 0;
1922                 /* NB only new connections will pay attention to the new interface! */
1923         }
1924
1925         cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1926
1927         return (rc);
1928 }
1929
1930 void
1931 ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
1932 {
1933         struct list_head   *tmp;
1934         struct list_head   *nxt;
1935         ksock_route_t      *route;
1936         ksock_conn_t       *conn;
1937         int                 i;
1938         int                 j;
1939
1940         for (i = 0; i < peer->ksnp_n_passive_ips; i++)
1941                 if (peer->ksnp_passive_ips[i] == ipaddr) {
1942                         for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
1943                                 peer->ksnp_passive_ips[j-1] =
1944                                         peer->ksnp_passive_ips[j];
1945                         peer->ksnp_n_passive_ips--;
1946                         break;
1947                 }
1948
1949         list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
1950                 route = list_entry (tmp, ksock_route_t, ksnr_list);
1951
1952                 if (route->ksnr_myipaddr != ipaddr)
1953                         continue;
1954
1955                 if (route->ksnr_share_count != 0) {
1956                         /* Manually created; keep, but unbind */
1957                         route->ksnr_myipaddr = 0;
1958                 } else {
1959                         ksocknal_del_route_locked(route);
1960                 }
1961         }
1962
1963         list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
1964                 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
1965
1966                 if (conn->ksnc_myipaddr == ipaddr)
1967                         ksocknal_close_conn_locked (conn, 0);
1968         }
1969 }
1970
1971 int
1972 ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
1973 {
1974         ksock_net_t       *net = ni->ni_data;
1975         int                rc = -ENOENT;
1976         struct list_head  *tmp;
1977         struct list_head  *nxt;
1978         ksock_peer_t      *peer;
1979         __u32              this_ip;
1980         int                i;
1981         int                j;
1982
1983         cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
1984
1985         for (i = 0; i < net->ksnn_ninterfaces; i++) {
1986                 this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
1987
1988                 if (!(ipaddress == 0 ||
1989                       ipaddress == this_ip))
1990                         continue;
1991
1992                 rc = 0;
1993
1994                 for (j = i+1; j < net->ksnn_ninterfaces; j++)
1995                         net->ksnn_interfaces[j-1] =
1996                                 net->ksnn_interfaces[j];
1997
1998                 net->ksnn_ninterfaces--;
1999
2000                 for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
2001                         list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) {
2002                                 peer = list_entry(tmp, ksock_peer_t, ksnp_list);
2003
2004                                 if (peer->ksnp_ni != ni)
2005                                         continue;
2006
2007                                 ksocknal_peer_del_interface_locked(peer, this_ip);
2008                         }
2009                 }
2010         }
2011
2012         cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
2013
2014         return (rc);
2015 }
2016
2017 int
2018 ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
2019 {
2020         lnet_process_id_t id = {0}; 
2021         struct libcfs_ioctl_data *data = arg;
2022         int rc;
2023
2024         switch(cmd) {
2025         case IOC_LIBCFS_GET_INTERFACE: {
2026                 ksock_net_t       *net = ni->ni_data;
2027                 ksock_interface_t *iface;
2028
2029                 cfs_read_lock (&ksocknal_data.ksnd_global_lock);
2030
2031                 if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
2032                         rc = -ENOENT;
2033                 } else {
2034                         rc = 0;
2035                         iface = &net->ksnn_interfaces[data->ioc_count];
2036
2037                         data->ioc_u32[0] = iface->ksni_ipaddr;
2038                         data->ioc_u32[1] = iface->ksni_netmask;
2039                         data->ioc_u32[2] = iface->ksni_npeers;
2040                         data->ioc_u32[3] = iface->ksni_nroutes;
2041                 }
2042
2043                 cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
2044                 return rc;
2045         }
2046
2047         case IOC_LIBCFS_ADD_INTERFACE:
2048                 return ksocknal_add_interface(ni,
2049                                               data->ioc_u32[0], /* IP address */
2050                                               data->ioc_u32[1]); /* net mask */
2051
2052         case IOC_LIBCFS_DEL_INTERFACE:
2053                 return ksocknal_del_interface(ni,
2054                                               data->ioc_u32[0]); /* IP address */
2055
2056         case IOC_LIBCFS_GET_PEER: {
2057                 __u32            myip = 0;
2058                 __u32            ip = 0;
2059                 int              port = 0;
2060                 int              conn_count = 0;
2061                 int              share_count = 0;
2062
2063                 rc = ksocknal_get_peer_info(ni, data->ioc_count,
2064                                             &id, &myip, &ip, &port,
2065                                             &conn_count,  &share_count);
2066                 if (rc != 0)
2067                         return rc;
2068
2069                 data->ioc_nid    = id.nid;
2070                 data->ioc_count  = share_count;
2071                 data->ioc_u32[0] = ip;
2072                 data->ioc_u32[1] = port;
2073                 data->ioc_u32[2] = myip;
2074                 data->ioc_u32[3] = conn_count;
2075                 data->ioc_u32[4] = id.pid;
2076                 return 0;
2077         }
2078
2079         case IOC_LIBCFS_ADD_PEER:
2080                 id.nid = data->ioc_nid;
2081                 id.pid = LUSTRE_SRV_LNET_PID;
2082                 return ksocknal_add_peer (ni, id,
2083                                           data->ioc_u32[0], /* IP */
2084                                           data->ioc_u32[1]); /* port */
2085
2086         case IOC_LIBCFS_DEL_PEER:
2087                 id.nid = data->ioc_nid;
2088                 id.pid = LNET_PID_ANY;
2089                 return ksocknal_del_peer (ni, id,
2090                                           data->ioc_u32[0]); /* IP */
2091
2092         case IOC_LIBCFS_GET_CONN: {
2093                 int           txmem;
2094                 int           rxmem;
2095                 int           nagle;
2096                 ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count);
2097
2098                 if (conn == NULL)
2099                         return -ENOENT;
2100
2101                 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
2102
2103                 data->ioc_count  = txmem;
2104                 data->ioc_nid    = conn->ksnc_peer->ksnp_id.nid;
2105                 data->ioc_flags  = nagle;
2106                 data->ioc_u32[0] = conn->ksnc_ipaddr;
2107                 data->ioc_u32[1] = conn->ksnc_port;
2108                 data->ioc_u32[2] = conn->ksnc_myipaddr;
2109                 data->ioc_u32[3] = conn->ksnc_type;
2110                 data->ioc_u32[4] = (__u32)(conn->ksnc_scheduler -
2111                                    ksocknal_data.ksnd_schedulers);
2112                 data->ioc_u32[5] = rxmem;
2113                 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
2114                 ksocknal_conn_decref(conn);
2115                 return 0;
2116         }
2117
2118         case IOC_LIBCFS_CLOSE_CONNECTION:
2119                 id.nid = data->ioc_nid;
2120                 id.pid = LNET_PID_ANY;
2121                 return ksocknal_close_matching_conns (id,
2122                                                       data->ioc_u32[0]);
2123
2124         case IOC_LIBCFS_REGISTER_MYNID:
2125                 /* Ignore if this is a noop */
2126                 if (data->ioc_nid == ni->ni_nid)
2127                         return 0;
2128
2129                 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2130                        libcfs_nid2str(data->ioc_nid),
2131                        libcfs_nid2str(ni->ni_nid));
2132                 return -EINVAL;
2133
2134         case IOC_LIBCFS_PUSH_CONNECTION:
2135                 id.nid = data->ioc_nid;
2136                 id.pid = LNET_PID_ANY;
2137                 return ksocknal_push(ni, id);
2138
2139         default:
2140                 return -EINVAL;
2141         }
2142         /* not reached */
2143 }
2144
2145 void
2146 ksocknal_free_buffers (void)
2147 {
2148         LASSERT (cfs_atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
2149
2150         if (ksocknal_data.ksnd_schedulers != NULL)
2151                 LIBCFS_FREE (ksocknal_data.ksnd_schedulers,
2152                              sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
2153
2154         LIBCFS_FREE (ksocknal_data.ksnd_peers,
2155                      sizeof (struct list_head) *
2156                      ksocknal_data.ksnd_peer_hash_size);
2157
2158         cfs_spin_lock(&ksocknal_data.ksnd_tx_lock);
2159
2160         if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
2161                 struct list_head  zlist;
2162                 ksock_tx_t       *tx;
2163
2164                 list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
2165                 list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
2166                 cfs_spin_unlock(&ksocknal_data.ksnd_tx_lock);
2167
2168                 while(!list_empty(&zlist)) {
2169                         tx = list_entry(zlist.next, ksock_tx_t, tx_list);
2170                         list_del(&tx->tx_list);
2171                         LIBCFS_FREE(tx, tx->tx_desc_size);
2172                 }
2173         } else {
2174                 cfs_spin_unlock(&ksocknal_data.ksnd_tx_lock);
2175         }
2176 }
2177
2178 void
2179 ksocknal_base_shutdown (void)
2180 {
2181         ksock_sched_t *sched;
2182         int            i;
2183
2184         CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
2185                cfs_atomic_read (&libcfs_kmemory));
2186         LASSERT (ksocknal_data.ksnd_nnets == 0);
2187
2188         switch (ksocknal_data.ksnd_init) {
2189         default:
2190                 LASSERT (0);
2191
2192         case SOCKNAL_INIT_ALL:
2193         case SOCKNAL_INIT_DATA:
2194                 LASSERT (ksocknal_data.ksnd_peers != NULL);
2195                 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2196                         LASSERT (list_empty (&ksocknal_data.ksnd_peers[i]));
2197                 }
2198                 LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns));
2199                 LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns));
2200                 LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs));
2201                 LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes));
2202
2203                 if (ksocknal_data.ksnd_schedulers != NULL)
2204                         for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
2205                                 ksock_sched_t *kss =
2206                                         &ksocknal_data.ksnd_schedulers[i];
2207
2208                                 LASSERT (list_empty (&kss->kss_tx_conns));
2209                                 LASSERT (list_empty (&kss->kss_rx_conns));
2210                                 LASSERT (list_empty (&kss->kss_zombie_noop_txs));
2211                                 LASSERT (kss->kss_nconns == 0);
2212                         }
2213
2214                 /* flag threads to terminate; wake and wait for them to die */
2215                 ksocknal_data.ksnd_shuttingdown = 1;
2216                 cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq);
2217                 cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq);
2218
2219                 if (ksocknal_data.ksnd_schedulers != NULL)
2220                         for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
2221                                 sched = &ksocknal_data.ksnd_schedulers[i];
2222                                 cfs_waitq_broadcast(&sched->kss_waitq);
2223                         }
2224
2225                 i = 4;
2226                 cfs_read_lock (&ksocknal_data.ksnd_global_lock);
2227                 while (ksocknal_data.ksnd_nthreads != 0) {
2228                         i++;
2229                         CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2230                                "waiting for %d threads to terminate\n",
2231                                 ksocknal_data.ksnd_nthreads);
2232                         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
2233                         cfs_pause(cfs_time_seconds(1));
2234                         cfs_read_lock (&ksocknal_data.ksnd_global_lock);
2235                 }
2236                 cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
2237
2238                 ksocknal_free_buffers();
2239
2240                 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
2241                 break;
2242         }
2243
2244         CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
2245                cfs_atomic_read (&libcfs_kmemory));
2246
2247         PORTAL_MODULE_UNUSE;
2248 }
2249
2250 int
2251 ksocknal_base_startup (void)
2252 {
2253         int               rc;
2254         int               i;
2255
2256         LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
2257         LASSERT (ksocknal_data.ksnd_nnets == 0);
2258
2259         memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
2260
2261         ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
2262         LIBCFS_ALLOC (ksocknal_data.ksnd_peers,
2263                       sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
2264         if (ksocknal_data.ksnd_peers == NULL)
2265                 return -ENOMEM;
2266
2267         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2268                 CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
2269
2270         cfs_rwlock_init(&ksocknal_data.ksnd_global_lock);
2271
2272         cfs_spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
2273         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
2274         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
2275         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
2276         cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq);
2277
2278         cfs_spin_lock_init (&ksocknal_data.ksnd_connd_lock);
2279         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs);
2280         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes);
2281         cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq);
2282
2283         cfs_spin_lock_init (&ksocknal_data.ksnd_tx_lock);
2284         CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs);
2285
2286         /* NB memset above zeros whole of ksocknal_data, including
2287          * ksocknal_data.ksnd_irqinfo[all].ksni_valid */
2288
2289         /* flag lists/ptrs/locks initialised */
2290         ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
2291         PORTAL_MODULE_USE;
2292
2293         ksocknal_data.ksnd_nschedulers = ksocknal_nsched();
2294         LIBCFS_ALLOC(ksocknal_data.ksnd_schedulers,
2295                      sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
2296         if (ksocknal_data.ksnd_schedulers == NULL)
2297                 goto failed;
2298
2299         for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
2300                 ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
2301
2302                 cfs_spin_lock_init (&kss->kss_lock);
2303                 CFS_INIT_LIST_HEAD (&kss->kss_rx_conns);
2304                 CFS_INIT_LIST_HEAD (&kss->kss_tx_conns);
2305                 CFS_INIT_LIST_HEAD (&kss->kss_zombie_noop_txs);
2306                 cfs_waitq_init (&kss->kss_waitq);
2307         }
2308
2309         for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
2310                 rc = ksocknal_thread_start (ksocknal_scheduler,
2311                                             &ksocknal_data.ksnd_schedulers[i]);
2312                 if (rc != 0) {
2313                         CERROR("Can't spawn socknal scheduler[%d]: %d\n",
2314                                i, rc);
2315                         goto failed;
2316                 }
2317         }
2318
2319         /* must have at least 2 connds to remain responsive to accepts while
2320          * connecting */
2321         if (*ksocknal_tunables.ksnd_nconnds < 2)
2322                 *ksocknal_tunables.ksnd_nconnds = 2;
2323
2324         for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
2325                 rc = ksocknal_thread_start (ksocknal_connd,
2326                                             (void *)((ulong_ptr_t)i));
2327                 if (rc != 0) {
2328                         CERROR("Can't spawn socknal connd: %d\n", rc);
2329                         goto failed;
2330                 }
2331         }
2332
2333         rc = ksocknal_thread_start (ksocknal_reaper, NULL);
2334         if (rc != 0) {
2335                 CERROR ("Can't spawn socknal reaper: %d\n", rc);
2336                 goto failed;
2337         }
2338
2339         /* flag everything initialised */
2340         ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
2341
2342         return 0;
2343
2344  failed:
2345         ksocknal_base_shutdown();
2346         return -ENETDOWN;
2347 }
2348
2349 void
2350 ksocknal_debug_peerhash (lnet_ni_t *ni)
2351 {
2352         ksock_peer_t     *peer = NULL;
2353         struct list_head *tmp;
2354         int               i;
2355
2356         cfs_read_lock (&ksocknal_data.ksnd_global_lock);
2357
2358         for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2359                 list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
2360                         peer = list_entry (tmp, ksock_peer_t, ksnp_list);
2361
2362                         if (peer->ksnp_ni == ni) break;
2363
2364                         peer = NULL;
2365                 }
2366         }
2367
2368         if (peer != NULL) {
2369                 ksock_route_t *route;
2370                 ksock_conn_t  *conn;
2371
2372                 CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, "
2373                        "closing %d, accepting %d, err %d, zcookie "LPU64", "
2374                        "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id),
2375                        cfs_atomic_read(&peer->ksnp_refcount),
2376                        peer->ksnp_sharecount, peer->ksnp_closing,
2377                        peer->ksnp_accepting, peer->ksnp_error,
2378                        peer->ksnp_zc_next_cookie,
2379                        !list_empty(&peer->ksnp_tx_queue),
2380                        !list_empty(&peer->ksnp_zc_req_list));
2381
2382                 list_for_each (tmp, &peer->ksnp_routes) {
2383                         route = list_entry(tmp, ksock_route_t, ksnr_list);
2384                         CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, "
2385                                "del %d\n", cfs_atomic_read(&route->ksnr_refcount),
2386                                route->ksnr_scheduled, route->ksnr_connecting,
2387                                route->ksnr_connected, route->ksnr_deleted);
2388                 }
2389
2390                 list_for_each (tmp, &peer->ksnp_conns) {
2391                         conn = list_entry(tmp, ksock_conn_t, ksnc_list);
2392                         CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
2393                                cfs_atomic_read(&conn->ksnc_conn_refcount),
2394                                cfs_atomic_read(&conn->ksnc_sock_refcount),
2395                                conn->ksnc_type, conn->ksnc_closing);
2396                 }
2397         }
2398
2399         cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
2400         return;
2401 }
2402
2403 void
2404 ksocknal_shutdown (lnet_ni_t *ni)
2405 {
2406         ksock_net_t      *net = ni->ni_data;
2407         int               i;
2408         lnet_process_id_t anyid = {0};
2409
2410         anyid.nid =  LNET_NID_ANY;
2411         anyid.pid =  LNET_PID_ANY;
2412
2413         LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
2414         LASSERT(ksocknal_data.ksnd_nnets > 0);
2415
2416         cfs_spin_lock_bh (&net->ksnn_lock);
2417         net->ksnn_shutdown = 1;                 /* prevent new peers */
2418         cfs_spin_unlock_bh (&net->ksnn_lock);
2419
2420         /* Delete all peers */
2421         ksocknal_del_peer(ni, anyid, 0);
2422
2423         /* Wait for all peer state to clean up */
2424         i = 2;
2425         cfs_spin_lock_bh (&net->ksnn_lock);
2426         while (net->ksnn_npeers != 0) {
2427                 cfs_spin_unlock_bh (&net->ksnn_lock);
2428
2429                 i++;
2430                 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2431                        "waiting for %d peers to disconnect\n",
2432                        net->ksnn_npeers);
2433                 cfs_pause(cfs_time_seconds(1));
2434
2435                 ksocknal_debug_peerhash(ni);
2436
2437                 cfs_spin_lock_bh (&net->ksnn_lock);
2438         }
2439         cfs_spin_unlock_bh (&net->ksnn_lock);
2440
2441         for (i = 0; i < net->ksnn_ninterfaces; i++) {
2442                 LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
2443                 LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0);
2444         }
2445
2446         LIBCFS_FREE(net, sizeof(*net));
2447
2448         ksocknal_data.ksnd_nnets--;
2449         if (ksocknal_data.ksnd_nnets == 0)
2450                 ksocknal_base_shutdown();
2451 }
2452
2453 int
2454 ksocknal_enumerate_interfaces(ksock_net_t *net)
2455 {
2456         char      **names;
2457         int         i;
2458         int         j;
2459         int         rc;
2460         int         n;
2461
2462         n = libcfs_ipif_enumerate(&names);
2463         if (n <= 0) {
2464                 CERROR("Can't enumerate interfaces: %d\n", n);
2465                 return n;
2466         }
2467
2468         for (i = j = 0; i < n; i++) {
2469                 int        up;
2470                 __u32      ip;
2471                 __u32      mask;
2472
2473                 if (!strcmp(names[i], "lo")) /* skip the loopback IF */
2474                         continue;
2475
2476                 rc = libcfs_ipif_query(names[i], &up, &ip, &mask);
2477                 if (rc != 0) {
2478                         CWARN("Can't get interface %s info: %d\n",
2479                               names[i], rc);
2480                         continue;
2481                 }
2482
2483                 if (!up) {
2484                         CWARN("Ignoring interface %s (down)\n",
2485                               names[i]);
2486                         continue;
2487                 }
2488
2489                 if (j == LNET_MAX_INTERFACES) {
2490                         CWARN("Ignoring interface %s (too many interfaces)\n",
2491                               names[i]);
2492                         continue;
2493                 }
2494
2495                 net->ksnn_interfaces[j].ksni_ipaddr = ip;
2496                 net->ksnn_interfaces[j].ksni_netmask = mask;
2497                 j++;
2498         }
2499
2500         libcfs_ipif_free_enumeration(names, n);
2501
2502         if (j == 0)
2503                 CERROR("Can't find any usable interfaces\n");
2504
2505         return j;
2506 }
2507
2508 int
2509 ksocknal_startup (lnet_ni_t *ni)
2510 {
2511         ksock_net_t  *net;
2512         int           rc;
2513         int           i;
2514
2515         LASSERT (ni->ni_lnd == &the_ksocklnd);
2516
2517         if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
2518                 rc = ksocknal_base_startup();
2519                 if (rc != 0)
2520                         return rc;
2521         }
2522
2523         LIBCFS_ALLOC(net, sizeof(*net));
2524         if (net == NULL)
2525                 goto fail_0;
2526
2527         memset(net, 0, sizeof(*net));
2528         cfs_spin_lock_init(&net->ksnn_lock);
2529         net->ksnn_incarnation = ksocknal_lib_new_incarnation();
2530         ni->ni_data = net;
2531         ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
2532         ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits;
2533
2534         if (ni->ni_interfaces[0] == NULL) {
2535                 rc = ksocknal_enumerate_interfaces(net);
2536                 if (rc <= 0)
2537                         goto fail_1;
2538
2539                 net->ksnn_ninterfaces = 1;
2540         } else {
2541                 for (i = 0; i < LNET_MAX_INTERFACES; i++) {
2542                         int    up;
2543
2544                         if (ni->ni_interfaces[i] == NULL)
2545                                 break;
2546
2547                         rc = libcfs_ipif_query(
2548                                 ni->ni_interfaces[i], &up,
2549                                 &net->ksnn_interfaces[i].ksni_ipaddr,
2550                                 &net->ksnn_interfaces[i].ksni_netmask);
2551
2552                         if (rc != 0) {
2553                                 CERROR("Can't get interface %s info: %d\n",
2554                                        ni->ni_interfaces[i], rc);
2555                                 goto fail_1;
2556                         }
2557
2558                         if (!up) {
2559                                 CERROR("Interface %s is down\n",
2560                                        ni->ni_interfaces[i]);
2561                                 goto fail_1;
2562                         }
2563                 }
2564                 net->ksnn_ninterfaces = i;
2565         }
2566
2567         ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
2568                                 net->ksnn_interfaces[0].ksni_ipaddr);
2569
2570         ksocknal_data.ksnd_nnets++;
2571
2572         return 0;
2573
2574  fail_1:
2575         LIBCFS_FREE(net, sizeof(*net));
2576  fail_0:
2577         if (ksocknal_data.ksnd_nnets == 0)
2578                 ksocknal_base_shutdown();
2579
2580         return -ENETDOWN;
2581 }
2582
2583
2584 void __exit
2585 ksocknal_module_fini (void)
2586 {
2587         lnet_unregister_lnd(&the_ksocklnd);
2588         ksocknal_tunables_fini();
2589 }
2590
2591 int __init
2592 ksocknal_module_init (void)
2593 {
2594         int    rc;
2595
2596         /* check ksnr_connected/connecting field large enough */
2597         CLASSERT (SOCKLND_CONN_NTYPES <= 4);
2598         CLASSERT (SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN);
2599
2600         /* initialize the_ksocklnd */
2601         the_ksocklnd.lnd_type     = SOCKLND;
2602         the_ksocklnd.lnd_startup  = ksocknal_startup;
2603         the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
2604         the_ksocklnd.lnd_ctl      = ksocknal_ctl;
2605         the_ksocklnd.lnd_send     = ksocknal_send;
2606         the_ksocklnd.lnd_recv     = ksocknal_recv;
2607         the_ksocklnd.lnd_notify   = ksocknal_notify;
2608         the_ksocklnd.lnd_accept   = ksocknal_accept;
2609
2610         rc = ksocknal_tunables_init();
2611         if (rc != 0)
2612                 return rc;
2613
2614         lnet_register_lnd(&the_ksocklnd);
2615
2616         return 0;
2617 }
2618
2619 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
2620 MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0");
2621 MODULE_LICENSE("GPL");
2622
2623 cfs_module(ksocknal, "3.0.0", ksocknal_module_init, ksocknal_module_fini);