2 * -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
3 * vim:expandtab:shiftwidth=8:tabstop=8:
7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 only,
11 * as published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License version 2 for more details (a copy is included
17 * in the LICENSE file that accompanied this code).
19 * You should have received a copy of the GNU General Public License
20 * version 2 along with this program; If not, see [sun.com URL with a
23 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
24 * CA 95054 USA or visit www.sun.com if you need additional information or
30 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
31 * Use is subject to license terms.
34 * This file is part of Lustre, http://www.lustre.org/
35 * Lustre is a trademark of Sun Microsystems, Inc.
37 * lnet/klnds/socklnd/socklnd.c
39 * Author: Zach Brown <zab@zabbo.net>
40 * Author: Peter J. Braam <braam@clusterfs.com>
41 * Author: Phil Schwan <phil@clusterfs.com>
42 * Author: Eric Barton <eric@bartonsoftware.com>
47 lnd_t the_ksocklnd = {
49 .lnd_startup = ksocknal_startup,
50 .lnd_shutdown = ksocknal_shutdown,
51 .lnd_ctl = ksocknal_ctl,
52 .lnd_send = ksocknal_send,
53 .lnd_recv = ksocknal_recv,
54 .lnd_notify = ksocknal_notify,
55 .lnd_accept = ksocknal_accept,
58 ksock_nal_data_t ksocknal_data;
61 ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
63 ksock_net_t *net = ni->ni_data;
65 ksock_interface_t *iface;
67 for (i = 0; i < net->ksnn_ninterfaces; i++) {
68 LASSERT(i < LNET_MAX_INTERFACES);
69 iface = &net->ksnn_interfaces[i];
71 if (iface->ksni_ipaddr == ip)
79 ksocknal_create_route (__u32 ipaddr, int port)
83 LIBCFS_ALLOC (route, sizeof (*route));
87 atomic_set (&route->ksnr_refcount, 1);
88 route->ksnr_peer = NULL;
89 route->ksnr_retry_interval = 0; /* OK to connect at any time */
90 route->ksnr_ipaddr = ipaddr;
91 route->ksnr_port = port;
92 route->ksnr_scheduled = 0;
93 route->ksnr_connecting = 0;
94 route->ksnr_connected = 0;
95 route->ksnr_deleted = 0;
96 route->ksnr_conn_count = 0;
97 route->ksnr_share_count = 0;
103 ksocknal_destroy_route (ksock_route_t *route)
105 LASSERT (atomic_read(&route->ksnr_refcount) == 0);
107 if (route->ksnr_peer != NULL)
108 ksocknal_peer_decref(route->ksnr_peer);
110 LIBCFS_FREE (route, sizeof (*route));
114 ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
116 ksock_net_t *net = ni->ni_data;
119 LASSERT (id.nid != LNET_NID_ANY);
120 LASSERT (id.pid != LNET_PID_ANY);
121 LASSERT (!in_interrupt());
123 LIBCFS_ALLOC (peer, sizeof (*peer));
127 memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */
131 atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */
132 peer->ksnp_closing = 0;
133 peer->ksnp_accepting = 0;
134 peer->ksnp_zc_next_cookie = 1;
135 peer->ksnp_proto = NULL;
136 CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
137 CFS_INIT_LIST_HEAD (&peer->ksnp_routes);
138 CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue);
139 CFS_INIT_LIST_HEAD (&peer->ksnp_zc_req_list);
140 spin_lock_init(&peer->ksnp_lock);
142 spin_lock_bh (&net->ksnn_lock);
144 if (net->ksnn_shutdown) {
145 spin_unlock_bh (&net->ksnn_lock);
147 LIBCFS_FREE(peer, sizeof(*peer));
148 CERROR("Can't create peer: network shutdown\n");
154 spin_unlock_bh (&net->ksnn_lock);
161 ksocknal_destroy_peer (ksock_peer_t *peer)
163 ksock_net_t *net = peer->ksnp_ni->ni_data;
165 CDEBUG (D_NET, "peer %s %p deleted\n",
166 libcfs_id2str(peer->ksnp_id), peer);
168 LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
169 LASSERT (peer->ksnp_accepting == 0);
170 LASSERT (list_empty (&peer->ksnp_conns));
171 LASSERT (list_empty (&peer->ksnp_routes));
172 LASSERT (list_empty (&peer->ksnp_tx_queue));
173 LASSERT (list_empty (&peer->ksnp_zc_req_list));
175 LIBCFS_FREE (peer, sizeof (*peer));
177 /* NB a peer's connections and routes keep a reference on their peer
178 * until they are destroyed, so we can be assured that _all_ state to
179 * do with this peer has been cleaned up when its refcount drops to
181 spin_lock_bh (&net->ksnn_lock);
183 spin_unlock_bh (&net->ksnn_lock);
187 ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id)
189 struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
190 struct list_head *tmp;
193 list_for_each (tmp, peer_list) {
195 peer = list_entry (tmp, ksock_peer_t, ksnp_list);
197 LASSERT (!peer->ksnp_closing);
199 if (peer->ksnp_ni != ni)
202 if (peer->ksnp_id.nid != id.nid ||
203 peer->ksnp_id.pid != id.pid)
206 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
207 peer, libcfs_id2str(id),
208 atomic_read(&peer->ksnp_refcount));
215 ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
219 read_lock (&ksocknal_data.ksnd_global_lock);
220 peer = ksocknal_find_peer_locked (ni, id);
221 if (peer != NULL) /* +1 ref for caller? */
222 ksocknal_peer_addref(peer);
223 read_unlock (&ksocknal_data.ksnd_global_lock);
229 ksocknal_unlink_peer_locked (ksock_peer_t *peer)
233 ksock_interface_t *iface;
235 for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
236 LASSERT (i < LNET_MAX_INTERFACES);
237 ip = peer->ksnp_passive_ips[i];
239 iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
240 /* All IPs in peer->ksnp_passive_ips[] come from the
241 * interface list, therefore the call must succeed. */
242 LASSERT (iface != NULL);
244 CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
245 peer, iface, iface->ksni_nroutes);
246 iface->ksni_npeers--;
249 LASSERT (list_empty(&peer->ksnp_conns));
250 LASSERT (list_empty(&peer->ksnp_routes));
251 LASSERT (!peer->ksnp_closing);
252 peer->ksnp_closing = 1;
253 list_del (&peer->ksnp_list);
254 /* lose peerlist's ref */
255 ksocknal_peer_decref(peer);
259 ksocknal_get_peer_info (lnet_ni_t *ni, int index,
260 lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port,
261 int *conn_count, int *share_count)
264 struct list_head *ptmp;
265 ksock_route_t *route;
266 struct list_head *rtmp;
271 read_lock (&ksocknal_data.ksnd_global_lock);
273 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
275 list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
276 peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
278 if (peer->ksnp_ni != ni)
281 if (peer->ksnp_n_passive_ips == 0 &&
282 list_empty(&peer->ksnp_routes)) {
296 for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
301 *myip = peer->ksnp_passive_ips[j];
310 list_for_each (rtmp, &peer->ksnp_routes) {
314 route = list_entry(rtmp, ksock_route_t,
318 *myip = route->ksnr_myipaddr;
319 *peer_ip = route->ksnr_ipaddr;
320 *port = route->ksnr_port;
321 *conn_count = route->ksnr_conn_count;
322 *share_count = route->ksnr_share_count;
329 read_unlock (&ksocknal_data.ksnd_global_lock);
334 ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
336 ksock_peer_t *peer = route->ksnr_peer;
337 int type = conn->ksnc_type;
338 ksock_interface_t *iface;
340 conn->ksnc_route = route;
341 ksocknal_route_addref(route);
343 if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
344 if (route->ksnr_myipaddr == 0) {
345 /* route wasn't bound locally yet (the initial route) */
346 CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n",
347 libcfs_id2str(peer->ksnp_id),
348 HIPQUAD(route->ksnr_ipaddr),
349 HIPQUAD(conn->ksnc_myipaddr));
351 CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from "
352 "%u.%u.%u.%u to %u.%u.%u.%u\n",
353 libcfs_id2str(peer->ksnp_id),
354 HIPQUAD(route->ksnr_ipaddr),
355 HIPQUAD(route->ksnr_myipaddr),
356 HIPQUAD(conn->ksnc_myipaddr));
358 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
359 route->ksnr_myipaddr);
361 iface->ksni_nroutes--;
363 route->ksnr_myipaddr = conn->ksnc_myipaddr;
364 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
365 route->ksnr_myipaddr);
367 iface->ksni_nroutes++;
370 route->ksnr_connected |= (1<<type);
371 route->ksnr_conn_count++;
373 /* Successful connection => further attempts can
374 * proceed immediately */
375 route->ksnr_retry_interval = 0;
379 ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
381 struct list_head *tmp;
383 ksock_route_t *route2;
385 LASSERT (!peer->ksnp_closing);
386 LASSERT (route->ksnr_peer == NULL);
387 LASSERT (!route->ksnr_scheduled);
388 LASSERT (!route->ksnr_connecting);
389 LASSERT (route->ksnr_connected == 0);
391 /* LASSERT(unique) */
392 list_for_each(tmp, &peer->ksnp_routes) {
393 route2 = list_entry(tmp, ksock_route_t, ksnr_list);
395 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
396 CERROR ("Duplicate route %s %u.%u.%u.%u\n",
397 libcfs_id2str(peer->ksnp_id),
398 HIPQUAD(route->ksnr_ipaddr));
403 route->ksnr_peer = peer;
404 ksocknal_peer_addref(peer);
405 /* peer's routelist takes over my ref on 'route' */
406 list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
408 list_for_each(tmp, &peer->ksnp_conns) {
409 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
411 if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
414 ksocknal_associate_route_conn_locked(route, conn);
415 /* keep going (typed routes) */
420 ksocknal_del_route_locked (ksock_route_t *route)
422 ksock_peer_t *peer = route->ksnr_peer;
423 ksock_interface_t *iface;
425 struct list_head *ctmp;
426 struct list_head *cnxt;
428 LASSERT (!route->ksnr_deleted);
430 /* Close associated conns */
431 list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
432 conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
434 if (conn->ksnc_route != route)
437 ksocknal_close_conn_locked (conn, 0);
440 if (route->ksnr_myipaddr != 0) {
441 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
442 route->ksnr_myipaddr);
444 iface->ksni_nroutes--;
447 route->ksnr_deleted = 1;
448 list_del (&route->ksnr_list);
449 ksocknal_route_decref(route); /* drop peer's ref */
451 if (list_empty (&peer->ksnp_routes) &&
452 list_empty (&peer->ksnp_conns)) {
453 /* I've just removed the last route to a peer with no active
455 ksocknal_unlink_peer_locked (peer);
460 ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
462 struct list_head *tmp;
465 ksock_route_t *route;
466 ksock_route_t *route2;
469 if (id.nid == LNET_NID_ANY ||
470 id.pid == LNET_PID_ANY)
473 /* Have a brand new peer ready... */
474 rc = ksocknal_create_peer(&peer, ni, id);
478 route = ksocknal_create_route (ipaddr, port);
480 ksocknal_peer_decref(peer);
484 write_lock_bh (&ksocknal_data.ksnd_global_lock);
486 /* always called with a ref on ni, so shutdown can't have started */
487 LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
489 peer2 = ksocknal_find_peer_locked (ni, id);
491 ksocknal_peer_decref(peer);
494 /* peer table takes my ref on peer */
495 list_add_tail (&peer->ksnp_list,
496 ksocknal_nid2peerlist (id.nid));
500 list_for_each (tmp, &peer->ksnp_routes) {
501 route2 = list_entry(tmp, ksock_route_t, ksnr_list);
503 if (route2->ksnr_ipaddr == ipaddr)
508 if (route2 == NULL) {
509 ksocknal_add_route_locked(peer, route);
510 route->ksnr_share_count++;
512 ksocknal_route_decref(route);
513 route2->ksnr_share_count++;
516 write_unlock_bh (&ksocknal_data.ksnd_global_lock);
522 ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
525 ksock_route_t *route;
526 struct list_head *tmp;
527 struct list_head *nxt;
530 LASSERT (!peer->ksnp_closing);
532 /* Extra ref prevents peer disappearing until I'm done with it */
533 ksocknal_peer_addref(peer);
535 list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
536 route = list_entry(tmp, ksock_route_t, ksnr_list);
539 if (!(ip == 0 || route->ksnr_ipaddr == ip))
542 route->ksnr_share_count = 0;
543 /* This deletes associated conns too */
544 ksocknal_del_route_locked (route);
548 list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
549 route = list_entry(tmp, ksock_route_t, ksnr_list);
550 nshared += route->ksnr_share_count;
554 /* remove everything else if there are no explicit entries
557 list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
558 route = list_entry(tmp, ksock_route_t, ksnr_list);
560 /* we should only be removing auto-entries */
561 LASSERT(route->ksnr_share_count == 0);
562 ksocknal_del_route_locked (route);
565 list_for_each_safe (tmp, nxt, &peer->ksnp_conns) {
566 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
568 ksocknal_close_conn_locked(conn, 0);
572 ksocknal_peer_decref(peer);
573 /* NB peer unlinks itself when last conn/route is removed */
577 ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
579 CFS_LIST_HEAD (zombies);
580 struct list_head *ptmp;
581 struct list_head *pnxt;
588 write_lock_bh (&ksocknal_data.ksnd_global_lock);
590 if (id.nid != LNET_NID_ANY)
591 lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
594 hi = ksocknal_data.ksnd_peer_hash_size - 1;
597 for (i = lo; i <= hi; i++) {
598 list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
599 peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
601 if (peer->ksnp_ni != ni)
604 if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
605 (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
608 ksocknal_peer_addref(peer); /* a ref for me... */
610 ksocknal_del_peer_locked (peer, ip);
612 if (peer->ksnp_closing && !list_empty(&peer->ksnp_tx_queue)) {
613 LASSERT (list_empty(&peer->ksnp_conns));
614 LASSERT (list_empty(&peer->ksnp_routes));
616 list_splice_init(&peer->ksnp_tx_queue, &zombies);
619 ksocknal_peer_decref(peer); /* ...till here */
621 rc = 0; /* matched! */
625 write_unlock_bh (&ksocknal_data.ksnd_global_lock);
627 ksocknal_txlist_done(ni, &zombies, 1);
633 ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
636 struct list_head *ptmp;
638 struct list_head *ctmp;
641 read_lock (&ksocknal_data.ksnd_global_lock);
643 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
644 list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
645 peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
647 LASSERT (!peer->ksnp_closing);
649 if (peer->ksnp_ni != ni)
652 list_for_each (ctmp, &peer->ksnp_conns) {
656 conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
657 ksocknal_conn_addref(conn);
658 read_unlock (&ksocknal_data.ksnd_global_lock);
664 read_unlock (&ksocknal_data.ksnd_global_lock);
669 ksocknal_choose_scheduler_locked (unsigned int irq)
671 ksock_sched_t *sched;
672 ksock_irqinfo_t *info;
675 LASSERT (irq < NR_IRQS);
676 info = &ksocknal_data.ksnd_irqinfo[irq];
678 if (irq != 0 && /* hardware NIC */
679 info->ksni_valid) { /* already set up */
680 return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]);
683 /* software NIC (irq == 0) || not associated with a scheduler yet.
684 * Choose the CPU with the fewest connections... */
685 sched = &ksocknal_data.ksnd_schedulers[0];
686 for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++)
687 if (sched->kss_nconns >
688 ksocknal_data.ksnd_schedulers[i].kss_nconns)
689 sched = &ksocknal_data.ksnd_schedulers[i];
691 if (irq != 0) { /* Hardware NIC */
692 info->ksni_valid = 1;
693 info->ksni_sched = sched - ksocknal_data.ksnd_schedulers;
696 LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers);
703 ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
705 ksock_net_t *net = ni->ni_data;
709 read_lock (&ksocknal_data.ksnd_global_lock);
711 nip = net->ksnn_ninterfaces;
712 LASSERT (nip <= LNET_MAX_INTERFACES);
714 /* Only offer interfaces for additional connections if I have
717 read_unlock (&ksocknal_data.ksnd_global_lock);
721 for (i = 0; i < nip; i++) {
722 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
723 LASSERT (ipaddrs[i] != 0);
726 read_unlock (&ksocknal_data.ksnd_global_lock);
731 ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
733 int best_netmatch = 0;
740 for (i = 0; i < nips; i++) {
744 this_xor = (ips[i] ^ iface->ksni_ipaddr);
745 this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
748 best_netmatch < this_netmatch ||
749 (best_netmatch == this_netmatch &&
750 best_xor > this_xor)))
754 best_netmatch = this_netmatch;
763 ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
765 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
766 ksock_net_t *net = peer->ksnp_ni->ni_data;
767 ksock_interface_t *iface;
768 ksock_interface_t *best_iface;
779 /* CAVEAT EMPTOR: We do all our interface matching with an
780 * exclusive hold of global lock at IRQ priority. We're only
781 * expecting to be dealing with small numbers of interfaces, so the
782 * O(n**3)-ness shouldn't matter */
784 /* Also note that I'm not going to return more than n_peerips
785 * interfaces, even if I have more myself */
787 write_lock_bh (global_lock);
789 LASSERT (n_peerips <= LNET_MAX_INTERFACES);
790 LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
792 /* Only match interfaces for additional connections
793 * if I have > 1 interface */
794 n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
795 MIN(n_peerips, net->ksnn_ninterfaces);
797 for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
798 /* ^ yes really... */
800 /* If we have any new interfaces, first tick off all the
801 * peer IPs that match old interfaces, then choose new
802 * interfaces to match the remaining peer IPS.
803 * We don't forget interfaces we've stopped using; we might
804 * start using them again... */
806 if (i < peer->ksnp_n_passive_ips) {
808 ip = peer->ksnp_passive_ips[i];
809 best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
811 /* peer passive ips are kept up to date */
812 LASSERT(best_iface != NULL);
814 /* choose a new interface */
815 LASSERT (i == peer->ksnp_n_passive_ips);
821 for (j = 0; j < net->ksnn_ninterfaces; j++) {
822 iface = &net->ksnn_interfaces[j];
823 ip = iface->ksni_ipaddr;
825 for (k = 0; k < peer->ksnp_n_passive_ips; k++)
826 if (peer->ksnp_passive_ips[k] == ip)
829 if (k < peer->ksnp_n_passive_ips) /* using it already */
832 k = ksocknal_match_peerip(iface, peerips, n_peerips);
833 xor = (ip ^ peerips[k]);
834 this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
836 if (!(best_iface == NULL ||
837 best_netmatch < this_netmatch ||
838 (best_netmatch == this_netmatch &&
839 best_npeers > iface->ksni_npeers)))
843 best_netmatch = this_netmatch;
844 best_npeers = iface->ksni_npeers;
847 best_iface->ksni_npeers++;
848 ip = best_iface->ksni_ipaddr;
849 peer->ksnp_passive_ips[i] = ip;
850 peer->ksnp_n_passive_ips = i+1;
853 LASSERT (best_iface != NULL);
855 /* mark the best matching peer IP used */
856 j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
860 /* Overwrite input peer IP addresses */
861 memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
863 write_unlock_bh (global_lock);
869 ksocknal_create_routes(ksock_peer_t *peer, int port,
870 __u32 *peer_ipaddrs, int npeer_ipaddrs)
872 ksock_route_t *newroute = NULL;
873 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
874 lnet_ni_t *ni = peer->ksnp_ni;
875 ksock_net_t *net = ni->ni_data;
876 struct list_head *rtmp;
877 ksock_route_t *route;
878 ksock_interface_t *iface;
879 ksock_interface_t *best_iface;
886 /* CAVEAT EMPTOR: We do all our interface matching with an
887 * exclusive hold of global lock at IRQ priority. We're only
888 * expecting to be dealing with small numbers of interfaces, so the
889 * O(n**3)-ness here shouldn't matter */
891 write_lock_bh (global_lock);
893 if (net->ksnn_ninterfaces < 2) {
894 /* Only create additional connections
895 * if I have > 1 interface */
896 write_unlock_bh (global_lock);
900 LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES);
902 for (i = 0; i < npeer_ipaddrs; i++) {
903 if (newroute != NULL) {
904 newroute->ksnr_ipaddr = peer_ipaddrs[i];
906 write_unlock_bh (global_lock);
908 newroute = ksocknal_create_route(peer_ipaddrs[i], port);
909 if (newroute == NULL)
912 write_lock_bh (global_lock);
915 if (peer->ksnp_closing) {
916 /* peer got closed under me */
920 /* Already got a route? */
922 list_for_each(rtmp, &peer->ksnp_routes) {
923 route = list_entry(rtmp, ksock_route_t, ksnr_list);
925 if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
937 LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
939 /* Select interface to connect from */
940 for (j = 0; j < net->ksnn_ninterfaces; j++) {
941 iface = &net->ksnn_interfaces[j];
943 /* Using this interface already? */
944 list_for_each(rtmp, &peer->ksnp_routes) {
945 route = list_entry(rtmp, ksock_route_t, ksnr_list);
947 if (route->ksnr_myipaddr == iface->ksni_ipaddr)
955 this_netmatch = (((iface->ksni_ipaddr ^
956 newroute->ksnr_ipaddr) &
957 iface->ksni_netmask) == 0) ? 1 : 0;
959 if (!(best_iface == NULL ||
960 best_netmatch < this_netmatch ||
961 (best_netmatch == this_netmatch &&
962 best_nroutes > iface->ksni_nroutes)))
966 best_netmatch = this_netmatch;
967 best_nroutes = iface->ksni_nroutes;
970 if (best_iface == NULL)
973 newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
974 best_iface->ksni_nroutes++;
976 ksocknal_add_route_locked(peer, newroute);
980 write_unlock_bh (global_lock);
981 if (newroute != NULL)
982 ksocknal_route_decref(newroute);
986 ksocknal_accept (lnet_ni_t *ni, cfs_socket_t *sock)
993 rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
994 LASSERT (rc == 0); /* we succeeded before */
996 LIBCFS_ALLOC(cr, sizeof(*cr));
998 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from "
999 "%u.%u.%u.%u: memory exhausted\n",
1006 cr->ksncr_sock = sock;
1008 spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
1010 list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
1011 cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq);
1013 spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
1018 ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr)
1020 ksock_route_t *route;
1022 list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) {
1024 if (route->ksnr_ipaddr == ipaddr)
1025 return route->ksnr_connecting;
1031 ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
1032 cfs_socket_t *sock, int type)
1034 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
1035 CFS_LIST_HEAD (zombies);
1036 lnet_process_id_t peerid;
1037 struct list_head *tmp;
1040 ksock_conn_t *conn2;
1041 ksock_peer_t *peer = NULL;
1042 ksock_peer_t *peer2;
1043 ksock_sched_t *sched;
1044 ksock_hello_msg_t *hello;
1051 active = (route != NULL);
1053 LASSERT (active == (type != SOCKLND_CONN_NONE));
1055 irq = ksocknal_lib_sock_irq (sock);
1057 LIBCFS_ALLOC(conn, sizeof(*conn));
1063 memset (conn, 0, sizeof (*conn));
1064 conn->ksnc_peer = NULL;
1065 conn->ksnc_route = NULL;
1066 conn->ksnc_sock = sock;
1067 /* 2 ref, 1 for conn, another extra ref prevents socket
1068 * being closed before establishment of connection */
1069 atomic_set (&conn->ksnc_sock_refcount, 2);
1070 conn->ksnc_type = type;
1071 ksocknal_lib_save_callback(sock, conn);
1072 atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
1074 conn->ksnc_zc_capable = ksocknal_lib_zc_capable(sock);
1075 conn->ksnc_rx_ready = 0;
1076 conn->ksnc_rx_scheduled = 0;
1078 CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue);
1079 conn->ksnc_tx_ready = 0;
1080 conn->ksnc_tx_scheduled = 0;
1081 conn->ksnc_tx_mono = NULL;
1082 atomic_set (&conn->ksnc_tx_nob, 0);
1084 LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t,
1085 kshm_ips[LNET_MAX_INTERFACES]));
1086 if (hello == NULL) {
1091 /* stash conn's local and remote addrs */
1092 rc = ksocknal_lib_get_conn_addrs (conn);
1096 /* Find out/confirm peer's NID and connection type and get the
1097 * vector of interfaces she's willing to let me connect to.
1098 * Passive connections use the listener timeout since the peer sends
1102 peer = route->ksnr_peer;
1103 LASSERT(ni == peer->ksnp_ni);
1105 /* Active connection sends HELLO eagerly */
1106 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
1107 peerid = peer->ksnp_id;
1109 write_lock_bh(global_lock);
1110 conn->ksnc_proto = peer->ksnp_proto;
1111 write_unlock_bh(global_lock);
1113 if (conn->ksnc_proto == NULL) {
1114 conn->ksnc_proto = &ksocknal_protocol_v2x;
1115 #if SOCKNAL_VERSION_DEBUG
1116 if (*ksocknal_tunables.ksnd_protocol != 2)
1117 conn->ksnc_proto = &ksocknal_protocol_v1x;
1121 rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
1125 peerid.nid = LNET_NID_ANY;
1126 peerid.pid = LNET_PID_ANY;
1128 /* Passive, get protocol from peer */
1129 conn->ksnc_proto = NULL;
1132 rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation);
1136 LASSERT (rc == 0 || active);
1137 LASSERT (conn->ksnc_proto != NULL);
1138 LASSERT (peerid.nid != LNET_NID_ANY);
1141 ksocknal_peer_addref(peer);
1142 write_lock_bh (global_lock);
1144 rc = ksocknal_create_peer(&peer, ni, peerid);
1148 write_lock_bh (global_lock);
1150 /* called with a ref on ni, so shutdown can't have started */
1151 LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0);
1153 peer2 = ksocknal_find_peer_locked(ni, peerid);
1154 if (peer2 == NULL) {
1155 /* NB this puts an "empty" peer in the peer
1156 * table (which takes my ref) */
1157 list_add_tail(&peer->ksnp_list,
1158 ksocknal_nid2peerlist(peerid.nid));
1160 ksocknal_peer_decref(peer);
1165 ksocknal_peer_addref(peer);
1166 peer->ksnp_accepting++;
1168 /* Am I already connecting to this guy? Resolve in
1169 * favour of higher NID... */
1170 if (peerid.nid < ni->ni_nid &&
1171 ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
1173 warn = "connection race resolution";
1178 if (peer->ksnp_closing ||
1179 (active && route->ksnr_deleted)) {
1180 /* peer/route got closed under me */
1182 warn = "peer/route removed";
1186 if (peer->ksnp_proto == NULL) {
1187 /* Never connected before.
1188 * NB recv_hello may have returned EPROTO to signal my peer
1189 * wants a different protocol than the one I asked for.
1191 LASSERT (list_empty(&peer->ksnp_conns));
1193 peer->ksnp_proto = conn->ksnc_proto;
1194 peer->ksnp_incarnation = incarnation;
1197 if (peer->ksnp_proto != conn->ksnc_proto ||
1198 peer->ksnp_incarnation != incarnation) {
1199 /* Peer rebooted or I've got the wrong protocol version */
1200 ksocknal_close_peer_conns_locked(peer, 0, 0);
1202 peer->ksnp_proto = NULL;
1204 warn = peer->ksnp_incarnation != incarnation ?
1206 "wrong proto version";
1216 warn = "lost conn race";
1219 warn = "retry with different protocol version";
1223 /* Refuse to duplicate an existing connection, unless this is a
1224 * loopback connection */
1225 if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
1226 list_for_each(tmp, &peer->ksnp_conns) {
1227 conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
1229 if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
1230 conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
1231 conn2->ksnc_type != conn->ksnc_type)
1234 /* Reply on a passive connection attempt so the peer
1235 * realises we're connected. */
1245 /* If the connection created by this route didn't bind to the IP
1246 * address the route connected to, the connection/route matching
1247 * code below probably isn't going to work. */
1249 route->ksnr_ipaddr != conn->ksnc_ipaddr) {
1250 CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n",
1251 libcfs_id2str(peer->ksnp_id),
1252 HIPQUAD(route->ksnr_ipaddr),
1253 HIPQUAD(conn->ksnc_ipaddr));
1256 /* Search for a route corresponding to the new connection and
1257 * create an association. This allows incoming connections created
1258 * by routes in my peer to match my own route entries so I don't
1259 * continually create duplicate routes. */
1260 list_for_each (tmp, &peer->ksnp_routes) {
1261 route = list_entry(tmp, ksock_route_t, ksnr_list);
1263 if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
1266 ksocknal_associate_route_conn_locked(route, conn);
1270 conn->ksnc_peer = peer; /* conn takes my ref on peer */
1271 peer->ksnp_last_alive = cfs_time_current();
1272 peer->ksnp_error = 0;
1274 sched = ksocknal_choose_scheduler_locked (irq);
1275 sched->kss_nconns++;
1276 conn->ksnc_scheduler = sched;
1278 /* Set the deadline for the outgoing HELLO to drain */
1279 conn->ksnc_tx_bufnob = SOCK_WMEM_QUEUED(sock);
1280 conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1281 mb(); /* order with adding to peer's conn list */
1283 list_add (&conn->ksnc_list, &peer->ksnp_conns);
1284 ksocknal_conn_addref(conn);
1286 ksocknal_new_packet(conn, 0);
1288 /* Take all the packets blocking for a connection.
1289 * NB, it might be nicer to share these blocked packets among any
1290 * other connections that are becoming established. */
1291 while (!list_empty (&peer->ksnp_tx_queue)) {
1292 tx = list_entry (peer->ksnp_tx_queue.next,
1293 ksock_tx_t, tx_list);
1295 list_del (&tx->tx_list);
1296 ksocknal_queue_tx_locked (tx, conn);
1299 write_unlock_bh (global_lock);
1301 /* We've now got a new connection. Any errors from here on are just
1302 * like "normal" comms errors and we close the connection normally.
1303 * NB (a) we still have to send the reply HELLO for passive
1305 * (b) normal I/O on the conn is blocked until I setup and call the
1309 ksocknal_lib_bind_irq (irq);
1311 CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d"
1312 " incarnation:"LPD64" sched[%d]/%d\n",
1313 libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
1314 HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr),
1315 conn->ksnc_port, incarnation,
1316 (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
1319 /* additional routes after interface exchange? */
1320 ksocknal_create_routes(peer, conn->ksnc_port,
1321 hello->kshm_ips, hello->kshm_nips);
1323 hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
1325 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1328 LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
1329 kshm_ips[LNET_MAX_INTERFACES]));
1331 /* setup the socket AFTER I've received hello (it disables
1332 * SO_LINGER). I might call back to the acceptor who may want
1333 * to send a protocol version response and then close the
1334 * socket; this ensures the socket only tears down after the
1335 * response has been sent. */
1337 rc = ksocknal_lib_setup_sock(sock);
1339 write_lock_bh(global_lock);
1341 /* NB my callbacks block while I hold ksnd_global_lock */
1342 ksocknal_lib_set_callback(sock, conn);
1345 peer->ksnp_accepting--;
1347 write_unlock_bh(global_lock);
1350 write_lock_bh(global_lock);
1351 ksocknal_close_conn_locked(conn, rc);
1352 write_unlock_bh(global_lock);
1353 } else if (ksocknal_connsock_addref(conn) == 0) {
1354 /* Allow I/O to proceed. */
1355 ksocknal_read_callback(conn);
1356 ksocknal_write_callback(conn);
1357 ksocknal_connsock_decref(conn);
1360 ksocknal_connsock_decref(conn);
1361 ksocknal_conn_decref(conn);
1365 if (!peer->ksnp_closing &&
1366 list_empty (&peer->ksnp_conns) &&
1367 list_empty (&peer->ksnp_routes)) {
1368 list_add(&zombies, &peer->ksnp_tx_queue);
1369 list_del_init(&peer->ksnp_tx_queue);
1370 ksocknal_unlink_peer_locked(peer);
1373 write_unlock_bh (global_lock);
1377 CERROR("Not creating conn %s type %d: %s\n",
1378 libcfs_id2str(peerid), conn->ksnc_type, warn);
1380 CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
1381 libcfs_id2str(peerid), conn->ksnc_type, warn);
1386 /* Request retry by replying with CONN_NONE
1387 * ksnc_proto has been set already */
1388 conn->ksnc_type = SOCKLND_CONN_NONE;
1389 hello->kshm_nips = 0;
1390 ksocknal_send_hello(ni, conn, peerid.nid, hello);
1393 write_lock_bh(global_lock);
1394 peer->ksnp_accepting--;
1395 write_unlock_bh(global_lock);
1398 ksocknal_txlist_done(ni, &zombies, 1);
1399 ksocknal_peer_decref(peer);
1403 LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
1404 kshm_ips[LNET_MAX_INTERFACES]));
1406 LIBCFS_FREE (conn, sizeof(*conn));
1409 libcfs_sock_release(sock);
1414 ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
1416 /* This just does the immmediate housekeeping, and queues the
1417 * connection for the reaper to terminate.
1418 * Caller holds ksnd_global_lock exclusively in irq context */
1419 ksock_peer_t *peer = conn->ksnc_peer;
1420 ksock_route_t *route;
1421 ksock_conn_t *conn2;
1422 struct list_head *tmp;
1424 LASSERT (peer->ksnp_error == 0);
1425 LASSERT (!conn->ksnc_closing);
1426 conn->ksnc_closing = 1;
1428 /* ksnd_deathrow_conns takes over peer's ref */
1429 list_del (&conn->ksnc_list);
1431 route = conn->ksnc_route;
1432 if (route != NULL) {
1433 /* dissociate conn from route... */
1434 LASSERT (!route->ksnr_deleted);
1435 LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
1438 list_for_each(tmp, &peer->ksnp_conns) {
1439 conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
1441 if (conn2->ksnc_route == route &&
1442 conn2->ksnc_type == conn->ksnc_type)
1448 route->ksnr_connected &= ~(1 << conn->ksnc_type);
1450 conn->ksnc_route = NULL;
1452 #if 0 /* irrelevent with only eager routes */
1453 list_del (&route->ksnr_list); /* make route least favourite */
1454 list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
1456 ksocknal_route_decref(route); /* drop conn's ref on route */
1459 if (list_empty (&peer->ksnp_conns)) {
1460 /* No more connections to this peer */
1462 peer->ksnp_proto = NULL; /* renegotiate protocol version */
1463 peer->ksnp_error = error; /* stash last conn close reason */
1465 if (list_empty (&peer->ksnp_routes)) {
1466 /* I've just closed last conn belonging to a
1467 * peer with no routes to it */
1468 ksocknal_unlink_peer_locked (peer);
1472 spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
1474 list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
1475 cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
1477 spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
1481 ksocknal_peer_failed (ksock_peer_t *peer)
1483 time_t last_alive = 0;
1486 /* There has been a connection failure or comms error; but I'll only
1487 * tell LNET I think the peer is dead if it's to another kernel and
1488 * there are no connections or connection attempts in existance. */
1490 read_lock (&ksocknal_data.ksnd_global_lock);
1492 if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
1493 list_empty(&peer->ksnp_conns) &&
1494 peer->ksnp_accepting == 0 &&
1495 ksocknal_find_connecting_route_locked(peer) == NULL) {
1497 last_alive = cfs_time_current_sec() -
1498 cfs_duration_sec(cfs_time_current() -
1499 peer->ksnp_last_alive);
1502 read_unlock (&ksocknal_data.ksnd_global_lock);
1505 lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0,
1510 ksocknal_terminate_conn (ksock_conn_t *conn)
1512 /* This gets called by the reaper (guaranteed thread context) to
1513 * disengage the socket from its callbacks and close it.
1514 * ksnc_refcount will eventually hit zero, and then the reaper will
1516 ksock_peer_t *peer = conn->ksnc_peer;
1517 ksock_sched_t *sched = conn->ksnc_scheduler;
1519 struct list_head *tmp;
1520 struct list_head *nxt;
1524 LASSERT(conn->ksnc_closing);
1526 /* wake up the scheduler to "send" all remaining packets to /dev/null */
1527 spin_lock_bh (&sched->kss_lock);
1529 /* a closing conn is always ready to tx */
1530 conn->ksnc_tx_ready = 1;
1532 if (!conn->ksnc_tx_scheduled &&
1533 !list_empty(&conn->ksnc_tx_queue)){
1534 list_add_tail (&conn->ksnc_tx_list,
1535 &sched->kss_tx_conns);
1536 conn->ksnc_tx_scheduled = 1;
1537 /* extra ref for scheduler */
1538 ksocknal_conn_addref(conn);
1540 cfs_waitq_signal (&sched->kss_waitq);
1543 spin_unlock_bh (&sched->kss_lock);
1545 spin_lock(&peer->ksnp_lock);
1547 list_for_each_safe(tmp, nxt, &peer->ksnp_zc_req_list) {
1548 tx = list_entry(tmp, ksock_tx_t, tx_zc_list);
1550 if (tx->tx_conn != conn)
1553 LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0);
1555 tx->tx_msg.ksm_zc_req_cookie = 0;
1556 list_del(&tx->tx_zc_list);
1557 list_add(&tx->tx_zc_list, &zlist);
1560 spin_unlock(&peer->ksnp_lock);
1562 list_for_each_safe(tmp, nxt, &zlist) {
1563 tx = list_entry(tmp, ksock_tx_t, tx_zc_list);
1565 list_del(&tx->tx_zc_list);
1566 ksocknal_tx_decref(tx);
1569 /* serialise with callbacks */
1570 write_lock_bh (&ksocknal_data.ksnd_global_lock);
1572 ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
1574 /* OK, so this conn may not be completely disengaged from its
1575 * scheduler yet, but it _has_ committed to terminate... */
1576 conn->ksnc_scheduler->kss_nconns--;
1578 if (peer->ksnp_error != 0) {
1579 /* peer's last conn closed in error */
1580 LASSERT (list_empty (&peer->ksnp_conns));
1582 peer->ksnp_error = 0; /* avoid multiple notifications */
1585 write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1588 ksocknal_peer_failed(peer);
1590 /* The socket is closed on the final put; either here, or in
1591 * ksocknal_{send,recv}msg(). Since we set up the linger2 option
1592 * when the connection was established, this will close the socket
1593 * immediately, aborting anything buffered in it. Any hung
1594 * zero-copy transmits will therefore complete in finite time. */
1595 ksocknal_connsock_decref(conn);
1599 ksocknal_queue_zombie_conn (ksock_conn_t *conn)
1601 /* Queue the conn for the reaper to destroy */
1603 LASSERT (atomic_read(&conn->ksnc_conn_refcount) == 0);
1604 spin_lock_bh (&ksocknal_data.ksnd_reaper_lock);
1606 list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
1607 cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq);
1609 spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock);
1613 ksocknal_destroy_conn (ksock_conn_t *conn)
1615 /* Final coup-de-grace of the reaper */
1616 CDEBUG (D_NET, "connection %p\n", conn);
1618 LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0);
1619 LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0);
1620 LASSERT (conn->ksnc_sock == NULL);
1621 LASSERT (conn->ksnc_route == NULL);
1622 LASSERT (!conn->ksnc_tx_scheduled);
1623 LASSERT (!conn->ksnc_rx_scheduled);
1624 LASSERT (list_empty(&conn->ksnc_tx_queue));
1626 /* complete current receive if any */
1627 switch (conn->ksnc_rx_state) {
1628 case SOCKNAL_RX_LNET_PAYLOAD:
1629 CERROR("Completing partial receive from %s"
1630 ", ip %d.%d.%d.%d:%d, with error\n",
1631 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1632 HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
1633 lnet_finalize (conn->ksnc_peer->ksnp_ni,
1634 conn->ksnc_cookie, -EIO);
1636 case SOCKNAL_RX_LNET_HEADER:
1637 if (conn->ksnc_rx_started)
1638 CERROR("Incomplete receive of lnet header from %s"
1639 ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
1640 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1641 HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
1642 conn->ksnc_proto->pro_version);
1644 case SOCKNAL_RX_KSM_HEADER:
1645 if (conn->ksnc_rx_started)
1646 CERROR("Incomplete receive of ksock message from %s"
1647 ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n",
1648 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1649 HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port,
1650 conn->ksnc_proto->pro_version);
1652 case SOCKNAL_RX_SLOP:
1653 if (conn->ksnc_rx_started)
1654 CERROR("Incomplete receive of slops from %s"
1655 ", ip %d.%d.%d.%d:%d, with error\n",
1656 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1657 HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
1664 ksocknal_peer_decref(conn->ksnc_peer);
1666 LIBCFS_FREE (conn, sizeof (*conn));
1670 ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
1673 struct list_head *ctmp;
1674 struct list_head *cnxt;
1677 list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
1678 conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
1681 conn->ksnc_ipaddr == ipaddr) {
1683 ksocknal_close_conn_locked (conn, why);
1691 ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
1693 ksock_peer_t *peer = conn->ksnc_peer;
1694 __u32 ipaddr = conn->ksnc_ipaddr;
1697 write_lock_bh (&ksocknal_data.ksnd_global_lock);
1699 count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
1701 write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1707 ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
1710 struct list_head *ptmp;
1711 struct list_head *pnxt;
1717 write_lock_bh (&ksocknal_data.ksnd_global_lock);
1719 if (id.nid != LNET_NID_ANY)
1720 lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
1723 hi = ksocknal_data.ksnd_peer_hash_size - 1;
1726 for (i = lo; i <= hi; i++) {
1727 list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
1729 peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
1731 if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
1732 (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
1735 count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
1739 write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1741 /* wildcards always succeed */
1742 if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
1745 return (count == 0 ? -ENOENT : 0);
1749 ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive)
1751 /* The router is telling me she's been notified of a change in
1752 * gateway state.... */
1753 lnet_process_id_t id = {.nid = gw_nid, .pid = LNET_PID_ANY};
1755 CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
1756 alive ? "up" : "down");
1759 /* If the gateway crashed, close all open connections... */
1760 ksocknal_close_matching_conns (id, 0);
1764 /* ...otherwise do nothing. We can only establish new connections
1765 * if we have autroutes, and these connect on demand. */
1769 ksocknal_push_peer (ksock_peer_t *peer)
1773 struct list_head *tmp;
1776 for (index = 0; ; index++) {
1777 read_lock (&ksocknal_data.ksnd_global_lock);
1782 list_for_each (tmp, &peer->ksnp_conns) {
1784 conn = list_entry (tmp, ksock_conn_t, ksnc_list);
1785 ksocknal_conn_addref(conn);
1790 read_unlock (&ksocknal_data.ksnd_global_lock);
1795 ksocknal_lib_push_conn (conn);
1796 ksocknal_conn_decref(conn);
1801 ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
1804 struct list_head *tmp;
1810 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
1811 for (j = 0; ; j++) {
1812 read_lock (&ksocknal_data.ksnd_global_lock);
1817 list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
1818 peer = list_entry(tmp, ksock_peer_t,
1821 if (!((id.nid == LNET_NID_ANY ||
1822 id.nid == peer->ksnp_id.nid) &&
1823 (id.pid == LNET_PID_ANY ||
1824 id.pid == peer->ksnp_id.pid))) {
1830 ksocknal_peer_addref(peer);
1835 read_unlock (&ksocknal_data.ksnd_global_lock);
1839 ksocknal_push_peer (peer);
1840 ksocknal_peer_decref(peer);
1850 ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
1852 ksock_net_t *net = ni->ni_data;
1853 ksock_interface_t *iface;
1857 struct list_head *ptmp;
1859 struct list_head *rtmp;
1860 ksock_route_t *route;
1862 if (ipaddress == 0 ||
1866 write_lock_bh (&ksocknal_data.ksnd_global_lock);
1868 iface = ksocknal_ip2iface(ni, ipaddress);
1869 if (iface != NULL) {
1870 /* silently ignore dups */
1872 } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
1875 iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
1877 iface->ksni_ipaddr = ipaddress;
1878 iface->ksni_netmask = netmask;
1879 iface->ksni_nroutes = 0;
1880 iface->ksni_npeers = 0;
1882 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
1883 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
1884 peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
1886 for (j = 0; j < peer->ksnp_n_passive_ips; j++)
1887 if (peer->ksnp_passive_ips[j] == ipaddress)
1888 iface->ksni_npeers++;
1890 list_for_each(rtmp, &peer->ksnp_routes) {
1891 route = list_entry(rtmp, ksock_route_t, ksnr_list);
1893 if (route->ksnr_myipaddr == ipaddress)
1894 iface->ksni_nroutes++;
1900 /* NB only new connections will pay attention to the new interface! */
1903 write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1909 ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
1911 struct list_head *tmp;
1912 struct list_head *nxt;
1913 ksock_route_t *route;
1918 for (i = 0; i < peer->ksnp_n_passive_ips; i++)
1919 if (peer->ksnp_passive_ips[i] == ipaddr) {
1920 for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
1921 peer->ksnp_passive_ips[j-1] =
1922 peer->ksnp_passive_ips[j];
1923 peer->ksnp_n_passive_ips--;
1927 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
1928 route = list_entry (tmp, ksock_route_t, ksnr_list);
1930 if (route->ksnr_myipaddr != ipaddr)
1933 if (route->ksnr_share_count != 0) {
1934 /* Manually created; keep, but unbind */
1935 route->ksnr_myipaddr = 0;
1937 ksocknal_del_route_locked(route);
1941 list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
1942 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
1944 if (conn->ksnc_myipaddr == ipaddr)
1945 ksocknal_close_conn_locked (conn, 0);
1950 ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
1952 ksock_net_t *net = ni->ni_data;
1954 struct list_head *tmp;
1955 struct list_head *nxt;
1961 write_lock_bh (&ksocknal_data.ksnd_global_lock);
1963 for (i = 0; i < net->ksnn_ninterfaces; i++) {
1964 this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
1966 if (!(ipaddress == 0 ||
1967 ipaddress == this_ip))
1972 for (j = i+1; j < net->ksnn_ninterfaces; j++)
1973 net->ksnn_interfaces[j-1] =
1974 net->ksnn_interfaces[j];
1976 net->ksnn_ninterfaces--;
1978 for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
1979 list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) {
1980 peer = list_entry(tmp, ksock_peer_t, ksnp_list);
1982 if (peer->ksnp_ni != ni)
1985 ksocknal_peer_del_interface_locked(peer, this_ip);
1990 write_unlock_bh (&ksocknal_data.ksnd_global_lock);
1996 ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
1998 struct libcfs_ioctl_data *data = arg;
2002 case IOC_LIBCFS_GET_INTERFACE: {
2003 ksock_net_t *net = ni->ni_data;
2004 ksock_interface_t *iface;
2006 read_lock (&ksocknal_data.ksnd_global_lock);
2008 if (data->ioc_count < 0 ||
2009 data->ioc_count >= net->ksnn_ninterfaces) {
2013 iface = &net->ksnn_interfaces[data->ioc_count];
2015 data->ioc_u32[0] = iface->ksni_ipaddr;
2016 data->ioc_u32[1] = iface->ksni_netmask;
2017 data->ioc_u32[2] = iface->ksni_npeers;
2018 data->ioc_u32[3] = iface->ksni_nroutes;
2021 read_unlock (&ksocknal_data.ksnd_global_lock);
2025 case IOC_LIBCFS_ADD_INTERFACE:
2026 return ksocknal_add_interface(ni,
2027 data->ioc_u32[0], /* IP address */
2028 data->ioc_u32[1]); /* net mask */
2030 case IOC_LIBCFS_DEL_INTERFACE:
2031 return ksocknal_del_interface(ni,
2032 data->ioc_u32[0]); /* IP address */
2034 case IOC_LIBCFS_GET_PEER: {
2035 lnet_process_id_t id = {0,};
2040 int share_count = 0;
2042 rc = ksocknal_get_peer_info(ni, data->ioc_count,
2043 &id, &myip, &ip, &port,
2044 &conn_count, &share_count);
2048 data->ioc_nid = id.nid;
2049 data->ioc_count = share_count;
2050 data->ioc_u32[0] = ip;
2051 data->ioc_u32[1] = port;
2052 data->ioc_u32[2] = myip;
2053 data->ioc_u32[3] = conn_count;
2054 data->ioc_u32[4] = id.pid;
2058 case IOC_LIBCFS_ADD_PEER: {
2059 lnet_process_id_t id = {.nid = data->ioc_nid,
2060 .pid = LUSTRE_SRV_LNET_PID};
2061 return ksocknal_add_peer (ni, id,
2062 data->ioc_u32[0], /* IP */
2063 data->ioc_u32[1]); /* port */
2065 case IOC_LIBCFS_DEL_PEER: {
2066 lnet_process_id_t id = {.nid = data->ioc_nid,
2067 .pid = LNET_PID_ANY};
2068 return ksocknal_del_peer (ni, id,
2069 data->ioc_u32[0]); /* IP */
2071 case IOC_LIBCFS_GET_CONN: {
2075 ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count);
2080 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
2082 data->ioc_count = txmem;
2083 data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
2084 data->ioc_flags = nagle;
2085 data->ioc_u32[0] = conn->ksnc_ipaddr;
2086 data->ioc_u32[1] = conn->ksnc_port;
2087 data->ioc_u32[2] = conn->ksnc_myipaddr;
2088 data->ioc_u32[3] = conn->ksnc_type;
2089 data->ioc_u32[4] = conn->ksnc_scheduler -
2090 ksocknal_data.ksnd_schedulers;
2091 data->ioc_u32[5] = rxmem;
2092 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
2093 ksocknal_conn_decref(conn);
2097 case IOC_LIBCFS_CLOSE_CONNECTION: {
2098 lnet_process_id_t id = {.nid = data->ioc_nid,
2099 .pid = LNET_PID_ANY};
2101 return ksocknal_close_matching_conns (id,
2104 case IOC_LIBCFS_REGISTER_MYNID:
2105 /* Ignore if this is a noop */
2106 if (data->ioc_nid == ni->ni_nid)
2109 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2110 libcfs_nid2str(data->ioc_nid),
2111 libcfs_nid2str(ni->ni_nid));
2114 case IOC_LIBCFS_PUSH_CONNECTION: {
2115 lnet_process_id_t id = {.nid = data->ioc_nid,
2116 .pid = LNET_PID_ANY};
2118 return ksocknal_push(ni, id);
2127 ksocknal_free_buffers (void)
2129 LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
2131 if (ksocknal_data.ksnd_schedulers != NULL)
2132 LIBCFS_FREE (ksocknal_data.ksnd_schedulers,
2133 sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
2135 LIBCFS_FREE (ksocknal_data.ksnd_peers,
2136 sizeof (struct list_head) *
2137 ksocknal_data.ksnd_peer_hash_size);
2139 spin_lock(&ksocknal_data.ksnd_tx_lock);
2141 if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
2142 struct list_head zlist;
2145 list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
2146 list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
2147 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2149 while(!list_empty(&zlist)) {
2150 tx = list_entry(zlist.next, ksock_tx_t, tx_list);
2151 list_del(&tx->tx_list);
2152 LIBCFS_FREE(tx, tx->tx_desc_size);
2155 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2160 ksocknal_base_shutdown (void)
2162 ksock_sched_t *sched;
2165 CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
2166 atomic_read (&libcfs_kmemory));
2167 LASSERT (ksocknal_data.ksnd_nnets == 0);
2169 switch (ksocknal_data.ksnd_init) {
2173 case SOCKNAL_INIT_ALL:
2174 case SOCKNAL_INIT_DATA:
2175 LASSERT (ksocknal_data.ksnd_peers != NULL);
2176 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2177 LASSERT (list_empty (&ksocknal_data.ksnd_peers[i]));
2179 LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns));
2180 LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns));
2181 LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs));
2182 LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes));
2184 if (ksocknal_data.ksnd_schedulers != NULL)
2185 for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
2186 ksock_sched_t *kss =
2187 &ksocknal_data.ksnd_schedulers[i];
2189 LASSERT (list_empty (&kss->kss_tx_conns));
2190 LASSERT (list_empty (&kss->kss_rx_conns));
2191 LASSERT (list_empty (&kss->kss_zombie_noop_txs));
2192 LASSERT (kss->kss_nconns == 0);
2195 /* flag threads to terminate; wake and wait for them to die */
2196 ksocknal_data.ksnd_shuttingdown = 1;
2197 cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq);
2198 cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq);
2200 if (ksocknal_data.ksnd_schedulers != NULL)
2201 for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
2202 sched = &ksocknal_data.ksnd_schedulers[i];
2203 cfs_waitq_broadcast(&sched->kss_waitq);
2207 read_lock (&ksocknal_data.ksnd_global_lock);
2208 while (ksocknal_data.ksnd_nthreads != 0) {
2210 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2211 "waiting for %d threads to terminate\n",
2212 ksocknal_data.ksnd_nthreads);
2213 read_unlock (&ksocknal_data.ksnd_global_lock);
2214 cfs_pause(cfs_time_seconds(1));
2215 read_lock (&ksocknal_data.ksnd_global_lock);
2217 read_unlock (&ksocknal_data.ksnd_global_lock);
2219 ksocknal_free_buffers();
2221 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
2225 CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
2226 atomic_read (&libcfs_kmemory));
2228 PORTAL_MODULE_UNUSE;
2233 ksocknal_new_incarnation (void)
2237 /* The incarnation number is the time this module loaded and it
2238 * identifies this particular instance of the socknal. Hopefully
2239 * we won't be able to reboot more frequently than 1MHz for the
2240 * forseeable future :) */
2242 do_gettimeofday(&tv);
2244 return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
2248 ksocknal_base_startup (void)
2253 LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
2254 LASSERT (ksocknal_data.ksnd_nnets == 0);
2256 memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
2258 ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
2259 LIBCFS_ALLOC (ksocknal_data.ksnd_peers,
2260 sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
2261 if (ksocknal_data.ksnd_peers == NULL)
2264 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2265 CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
2267 rwlock_init(&ksocknal_data.ksnd_global_lock);
2269 spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
2270 CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
2271 CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
2272 CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
2273 cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq);
2275 spin_lock_init (&ksocknal_data.ksnd_connd_lock);
2276 CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs);
2277 CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes);
2278 cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq);
2280 spin_lock_init (&ksocknal_data.ksnd_tx_lock);
2281 CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs);
2283 /* NB memset above zeros whole of ksocknal_data, including
2284 * ksocknal_data.ksnd_irqinfo[all].ksni_valid */
2286 /* flag lists/ptrs/locks initialised */
2287 ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
2290 ksocknal_data.ksnd_nschedulers = ksocknal_nsched();
2291 LIBCFS_ALLOC(ksocknal_data.ksnd_schedulers,
2292 sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
2293 if (ksocknal_data.ksnd_schedulers == NULL)
2296 for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
2297 ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
2299 spin_lock_init (&kss->kss_lock);
2300 CFS_INIT_LIST_HEAD (&kss->kss_rx_conns);
2301 CFS_INIT_LIST_HEAD (&kss->kss_tx_conns);
2302 CFS_INIT_LIST_HEAD (&kss->kss_zombie_noop_txs);
2303 cfs_waitq_init (&kss->kss_waitq);
2306 for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
2307 rc = ksocknal_thread_start (ksocknal_scheduler,
2308 &ksocknal_data.ksnd_schedulers[i]);
2310 CERROR("Can't spawn socknal scheduler[%d]: %d\n",
2316 /* must have at least 2 connds to remain responsive to accepts while
2318 if (*ksocknal_tunables.ksnd_nconnds < 2)
2319 *ksocknal_tunables.ksnd_nconnds = 2;
2321 for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
2322 rc = ksocknal_thread_start (ksocknal_connd, (void *)((long)i));
2324 CERROR("Can't spawn socknal connd: %d\n", rc);
2329 rc = ksocknal_thread_start (ksocknal_reaper, NULL);
2331 CERROR ("Can't spawn socknal reaper: %d\n", rc);
2335 /* flag everything initialised */
2336 ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
2341 ksocknal_base_shutdown();
2346 ksocknal_debug_peerhash (lnet_ni_t *ni)
2348 ksock_peer_t *peer = NULL;
2349 struct list_head *tmp;
2352 read_lock (&ksocknal_data.ksnd_global_lock);
2354 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2355 list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
2356 peer = list_entry (tmp, ksock_peer_t, ksnp_list);
2358 if (peer->ksnp_ni == ni) break;
2365 ksock_route_t *route;
2368 CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, "
2369 "closing %d, accepting %d, err %d, zcookie "LPU64", "
2370 "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id),
2371 atomic_read(&peer->ksnp_refcount),
2372 peer->ksnp_sharecount, peer->ksnp_closing,
2373 peer->ksnp_accepting, peer->ksnp_error,
2374 peer->ksnp_zc_next_cookie,
2375 !list_empty(&peer->ksnp_tx_queue),
2376 !list_empty(&peer->ksnp_zc_req_list));
2378 list_for_each (tmp, &peer->ksnp_routes) {
2379 route = list_entry(tmp, ksock_route_t, ksnr_list);
2380 CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, "
2381 "del %d\n", atomic_read(&route->ksnr_refcount),
2382 route->ksnr_scheduled, route->ksnr_connecting,
2383 route->ksnr_connected, route->ksnr_deleted);
2386 list_for_each (tmp, &peer->ksnp_conns) {
2387 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
2388 CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
2389 atomic_read(&conn->ksnc_conn_refcount),
2390 atomic_read(&conn->ksnc_sock_refcount),
2391 conn->ksnc_type, conn->ksnc_closing);
2395 read_unlock (&ksocknal_data.ksnd_global_lock);
2400 ksocknal_shutdown (lnet_ni_t *ni)
2402 ksock_net_t *net = ni->ni_data;
2404 lnet_process_id_t anyid = {.nid = LNET_NID_ANY,
2405 .pid = LNET_PID_ANY};
2407 LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
2408 LASSERT(ksocknal_data.ksnd_nnets > 0);
2410 spin_lock_bh (&net->ksnn_lock);
2411 net->ksnn_shutdown = 1; /* prevent new peers */
2412 spin_unlock_bh (&net->ksnn_lock);
2414 /* Delete all peers */
2415 ksocknal_del_peer(ni, anyid, 0);
2417 /* Wait for all peer state to clean up */
2419 spin_lock_bh (&net->ksnn_lock);
2420 while (net->ksnn_npeers != 0) {
2421 spin_unlock_bh (&net->ksnn_lock);
2424 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2425 "waiting for %d peers to disconnect\n",
2427 cfs_pause(cfs_time_seconds(1));
2429 ksocknal_debug_peerhash(ni);
2431 spin_lock_bh (&net->ksnn_lock);
2433 spin_unlock_bh (&net->ksnn_lock);
2435 for (i = 0; i < net->ksnn_ninterfaces; i++) {
2436 LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
2437 LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0);
2440 LIBCFS_FREE(net, sizeof(*net));
2442 ksocknal_data.ksnd_nnets--;
2443 if (ksocknal_data.ksnd_nnets == 0)
2444 ksocknal_base_shutdown();
2448 ksocknal_enumerate_interfaces(ksock_net_t *net)
2456 n = libcfs_ipif_enumerate(&names);
2458 CERROR("Can't enumerate interfaces: %d\n", n);
2462 for (i = j = 0; i < n; i++) {
2467 if (!strcmp(names[i], "lo")) /* skip the loopback IF */
2470 rc = libcfs_ipif_query(names[i], &up, &ip, &mask);
2472 CWARN("Can't get interface %s info: %d\n",
2478 CWARN("Ignoring interface %s (down)\n",
2483 if (j == LNET_MAX_INTERFACES) {
2484 CWARN("Ignoring interface %s (too many interfaces)\n",
2489 net->ksnn_interfaces[j].ksni_ipaddr = ip;
2490 net->ksnn_interfaces[j].ksni_netmask = mask;
2494 libcfs_ipif_free_enumeration(names, n);
2497 CERROR("Can't find any usable interfaces\n");
2503 ksocknal_startup (lnet_ni_t *ni)
2509 LASSERT (ni->ni_lnd == &the_ksocklnd);
2511 if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
2512 rc = ksocknal_base_startup();
2517 LIBCFS_ALLOC(net, sizeof(*net));
2521 memset(net, 0, sizeof(*net));
2522 spin_lock_init(&net->ksnn_lock);
2523 net->ksnn_incarnation = ksocknal_new_incarnation();
2525 ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
2526 ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits;
2528 if (ni->ni_interfaces[0] == NULL) {
2529 rc = ksocknal_enumerate_interfaces(net);
2533 net->ksnn_ninterfaces = 1;
2535 for (i = 0; i < LNET_MAX_INTERFACES; i++) {
2538 if (ni->ni_interfaces[i] == NULL)
2541 rc = libcfs_ipif_query(
2542 ni->ni_interfaces[i], &up,
2543 &net->ksnn_interfaces[i].ksni_ipaddr,
2544 &net->ksnn_interfaces[i].ksni_netmask);
2547 CERROR("Can't get interface %s info: %d\n",
2548 ni->ni_interfaces[i], rc);
2553 CERROR("Interface %s is down\n",
2554 ni->ni_interfaces[i]);
2558 net->ksnn_ninterfaces = i;
2561 ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
2562 net->ksnn_interfaces[0].ksni_ipaddr);
2564 ksocknal_data.ksnd_nnets++;
2569 LIBCFS_FREE(net, sizeof(*net));
2571 if (ksocknal_data.ksnd_nnets == 0)
2572 ksocknal_base_shutdown();
2579 ksocknal_module_fini (void)
2581 lnet_unregister_lnd(&the_ksocklnd);
2582 ksocknal_lib_tunables_fini();
2586 ksocknal_module_init (void)
2590 /* check ksnr_connected/connecting field large enough */
2591 CLASSERT(SOCKLND_CONN_NTYPES <= 4);
2593 rc = ksocknal_lib_tunables_init();
2597 lnet_register_lnd(&the_ksocklnd);
2602 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
2603 MODULE_DESCRIPTION("Kernel TCP Socket LND v2.0.0");
2604 MODULE_LICENSE("GPL");
2606 cfs_module(ksocknal, "2.0.0", ksocknal_module_init, ksocknal_module_fini);