4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lnet/klnds/socklnd/socklnd.c
34 * Author: Zach Brown <zab@zabbo.net>
35 * Author: Peter J. Braam <braam@clusterfs.com>
36 * Author: Phil Schwan <phil@clusterfs.com>
37 * Author: Eric Barton <eric@bartonsoftware.com>
40 #include <linux/inetdevice.h>
43 static const struct lnet_lnd the_ksocklnd;
44 struct ksock_nal_data ksocknal_data;
46 static struct ksock_interface *
47 ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip)
49 struct ksock_net *net = ni->ni_data;
51 struct ksock_interface *iface;
53 for (i = 0; i < net->ksnn_ninterfaces; i++) {
54 LASSERT(i < LNET_INTERFACES_NUM);
55 iface = &net->ksnn_interfaces[i];
57 if (iface->ksni_ipaddr == ip)
64 static struct ksock_interface *
65 ksocknal_index2iface(struct lnet_ni *ni, int index)
67 struct ksock_net *net = ni->ni_data;
69 struct ksock_interface *iface;
71 for (i = 0; i < net->ksnn_ninterfaces; i++) {
72 LASSERT(i < LNET_INTERFACES_NUM);
73 iface = &net->ksnn_interfaces[i];
75 if (iface->ksni_index == index)
82 static int ksocknal_ip2index(__u32 ipaddress, struct lnet_ni *ni)
84 struct net_device *dev;
86 DECLARE_CONST_IN_IFADDR(ifa);
89 for_each_netdev(ni->ni_net_ns, dev) {
90 int flags = dev_get_flags(dev);
91 struct in_device *in_dev;
93 if (flags & IFF_LOOPBACK) /* skip the loopback IF */
96 if (!(flags & IFF_UP))
99 in_dev = __in_dev_get_rcu(dev);
103 in_dev_for_each_ifa_rcu(ifa, in_dev) {
104 if (ntohl(ifa->ifa_local) == ipaddress)
116 static struct ksock_route *
117 ksocknal_create_route(__u32 ipaddr, int port)
119 struct ksock_route *route;
121 LIBCFS_ALLOC (route, sizeof (*route));
125 refcount_set(&route->ksnr_refcount, 1);
126 route->ksnr_peer = NULL;
127 route->ksnr_retry_interval = 0; /* OK to connect at any time */
128 route->ksnr_ipaddr = ipaddr;
129 route->ksnr_myiface = -1;
130 route->ksnr_port = port;
131 route->ksnr_scheduled = 0;
132 route->ksnr_connecting = 0;
133 route->ksnr_connected = 0;
134 route->ksnr_deleted = 0;
135 route->ksnr_conn_count = 0;
136 route->ksnr_share_count = 0;
142 ksocknal_destroy_route(struct ksock_route *route)
144 LASSERT(refcount_read(&route->ksnr_refcount) == 0);
146 if (route->ksnr_peer != NULL)
147 ksocknal_peer_decref(route->ksnr_peer);
149 LIBCFS_FREE (route, sizeof (*route));
152 static struct ksock_peer_ni *
153 ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id)
155 int cpt = lnet_cpt_of_nid(id.nid, ni);
156 struct ksock_net *net = ni->ni_data;
157 struct ksock_peer_ni *peer_ni;
159 LASSERT(id.nid != LNET_NID_ANY);
160 LASSERT(id.pid != LNET_PID_ANY);
161 LASSERT(!in_interrupt());
163 if (!atomic_inc_unless_negative(&net->ksnn_npeers)) {
164 CERROR("Can't create peer_ni: network shutdown\n");
165 return ERR_PTR(-ESHUTDOWN);
168 LIBCFS_CPT_ALLOC(peer_ni, lnet_cpt_table(), cpt, sizeof(*peer_ni));
170 atomic_dec(&net->ksnn_npeers);
171 return ERR_PTR(-ENOMEM);
174 peer_ni->ksnp_ni = ni;
175 peer_ni->ksnp_id = id;
176 refcount_set(&peer_ni->ksnp_refcount, 1); /* 1 ref for caller */
177 peer_ni->ksnp_closing = 0;
178 peer_ni->ksnp_accepting = 0;
179 peer_ni->ksnp_proto = NULL;
180 peer_ni->ksnp_last_alive = 0;
181 peer_ni->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
183 INIT_LIST_HEAD(&peer_ni->ksnp_conns);
184 INIT_LIST_HEAD(&peer_ni->ksnp_routes);
185 INIT_LIST_HEAD(&peer_ni->ksnp_tx_queue);
186 INIT_LIST_HEAD(&peer_ni->ksnp_zc_req_list);
187 spin_lock_init(&peer_ni->ksnp_lock);
193 ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni)
195 struct ksock_net *net = peer_ni->ksnp_ni->ni_data;
197 CDEBUG (D_NET, "peer_ni %s %p deleted\n",
198 libcfs_id2str(peer_ni->ksnp_id), peer_ni);
200 LASSERT(refcount_read(&peer_ni->ksnp_refcount) == 0);
201 LASSERT(peer_ni->ksnp_accepting == 0);
202 LASSERT(list_empty(&peer_ni->ksnp_conns));
203 LASSERT(list_empty(&peer_ni->ksnp_routes));
204 LASSERT(list_empty(&peer_ni->ksnp_tx_queue));
205 LASSERT(list_empty(&peer_ni->ksnp_zc_req_list));
207 LIBCFS_FREE(peer_ni, sizeof(*peer_ni));
209 /* NB a peer_ni's connections and routes keep a reference on their
210 * peer_ni until they are destroyed, so we can be assured that _all_
211 * state to do with this peer_ni has been cleaned up when its refcount
214 if (atomic_dec_and_test(&net->ksnn_npeers))
215 wake_up_var(&net->ksnn_npeers);
218 struct ksock_peer_ni *
219 ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id)
221 struct ksock_peer_ni *peer_ni;
223 hash_for_each_possible(ksocknal_data.ksnd_peers, peer_ni,
225 LASSERT(!peer_ni->ksnp_closing);
227 if (peer_ni->ksnp_ni != ni)
230 if (peer_ni->ksnp_id.nid != id.nid ||
231 peer_ni->ksnp_id.pid != id.pid)
234 CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d)\n",
235 peer_ni, libcfs_id2str(id),
236 refcount_read(&peer_ni->ksnp_refcount));
242 struct ksock_peer_ni *
243 ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id)
245 struct ksock_peer_ni *peer_ni;
247 read_lock(&ksocknal_data.ksnd_global_lock);
248 peer_ni = ksocknal_find_peer_locked(ni, id);
249 if (peer_ni != NULL) /* +1 ref for caller? */
250 ksocknal_peer_addref(peer_ni);
251 read_unlock(&ksocknal_data.ksnd_global_lock);
257 ksocknal_unlink_peer_locked(struct ksock_peer_ni *peer_ni)
261 struct ksock_interface *iface;
263 for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++) {
264 LASSERT(i < LNET_INTERFACES_NUM);
265 ip = peer_ni->ksnp_passive_ips[i];
267 iface = ksocknal_ip2iface(peer_ni->ksnp_ni, ip);
269 * All IPs in peer_ni->ksnp_passive_ips[] come from the
270 * interface list, therefore the call must succeed.
272 LASSERT(iface != NULL);
274 CDEBUG(D_NET, "peer_ni=%p iface=%p ksni_nroutes=%d\n",
275 peer_ni, iface, iface->ksni_nroutes);
276 iface->ksni_npeers--;
279 LASSERT(list_empty(&peer_ni->ksnp_conns));
280 LASSERT(list_empty(&peer_ni->ksnp_routes));
281 LASSERT(!peer_ni->ksnp_closing);
282 peer_ni->ksnp_closing = 1;
283 hlist_del(&peer_ni->ksnp_list);
284 /* lose peerlist's ref */
285 ksocknal_peer_decref(peer_ni);
289 ksocknal_get_peer_info(struct lnet_ni *ni, int index,
290 struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip,
291 int *port, int *conn_count, int *share_count)
293 struct ksock_peer_ni *peer_ni;
294 struct ksock_route *route;
295 struct list_head *rtmp;
300 read_lock(&ksocknal_data.ksnd_global_lock);
302 hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) {
304 if (peer_ni->ksnp_ni != ni)
307 if (peer_ni->ksnp_n_passive_ips == 0 &&
308 list_empty(&peer_ni->ksnp_routes)) {
312 *id = peer_ni->ksnp_id;
322 for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) {
326 *id = peer_ni->ksnp_id;
327 *myip = peer_ni->ksnp_passive_ips[j];
336 list_for_each(rtmp, &peer_ni->ksnp_routes) {
340 route = list_entry(rtmp, struct ksock_route,
343 *id = peer_ni->ksnp_id;
344 rc = choose_ipv4_src(myip, route->ksnr_myiface,
347 *peer_ip = route->ksnr_ipaddr;
348 *port = route->ksnr_port;
349 *conn_count = route->ksnr_conn_count;
350 *share_count = route->ksnr_share_count;
355 read_unlock(&ksocknal_data.ksnd_global_lock);
360 ksocknal_associate_route_conn_locked(struct ksock_route *route,
361 struct ksock_conn *conn)
363 struct ksock_peer_ni *peer_ni = route->ksnr_peer;
364 int type = conn->ksnc_type;
365 struct ksock_interface *iface;
366 int conn_iface = ksocknal_ip2index(conn->ksnc_myipaddr,
367 route->ksnr_peer->ksnp_ni);
369 conn->ksnc_route = route;
370 ksocknal_route_addref(route);
372 if (route->ksnr_myiface != conn_iface) {
373 if (route->ksnr_myiface < 0) {
374 /* route wasn't bound locally yet (the initial route) */
375 CDEBUG(D_NET, "Binding %s %pI4h to interface %d\n",
376 libcfs_id2str(peer_ni->ksnp_id),
381 "Rebinding %s %pI4h from interface %d to %d\n",
382 libcfs_id2str(peer_ni->ksnp_id),
387 iface = ksocknal_index2iface(route->ksnr_peer->ksnp_ni,
388 route->ksnr_myiface);
390 iface->ksni_nroutes--;
392 route->ksnr_myiface = conn_iface;
393 iface = ksocknal_index2iface(route->ksnr_peer->ksnp_ni,
394 route->ksnr_myiface);
396 iface->ksni_nroutes++;
399 route->ksnr_connected |= (1<<type);
400 route->ksnr_conn_count++;
402 /* Successful connection => further attempts can
403 * proceed immediately
405 route->ksnr_retry_interval = 0;
409 ksocknal_add_route_locked(struct ksock_peer_ni *peer_ni, struct ksock_route *route)
411 struct list_head *tmp;
412 struct ksock_conn *conn;
413 struct ksock_route *route2;
414 struct ksock_net *net = peer_ni->ksnp_ni->ni_data;
416 LASSERT(!peer_ni->ksnp_closing);
417 LASSERT(route->ksnr_peer == NULL);
418 LASSERT(!route->ksnr_scheduled);
419 LASSERT(!route->ksnr_connecting);
420 LASSERT(route->ksnr_connected == 0);
421 LASSERT(net->ksnn_ninterfaces > 0);
423 /* LASSERT(unique) */
424 list_for_each(tmp, &peer_ni->ksnp_routes) {
425 route2 = list_entry(tmp, struct ksock_route, ksnr_list);
427 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
428 CERROR("Duplicate route %s %pI4h\n",
429 libcfs_id2str(peer_ni->ksnp_id),
430 &route->ksnr_ipaddr);
435 route->ksnr_peer = peer_ni;
436 ksocknal_peer_addref(peer_ni);
438 /* set the route's interface to the current net's interface */
439 route->ksnr_myiface = net->ksnn_interfaces[0].ksni_index;
440 net->ksnn_interfaces[0].ksni_nroutes++;
442 /* peer_ni's routelist takes over my ref on 'route' */
443 list_add_tail(&route->ksnr_list, &peer_ni->ksnp_routes);
445 list_for_each(tmp, &peer_ni->ksnp_conns) {
446 conn = list_entry(tmp, struct ksock_conn, ksnc_list);
448 if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
451 ksocknal_associate_route_conn_locked(route, conn);
452 /* keep going (typed routes) */
457 ksocknal_del_route_locked(struct ksock_route *route)
459 struct ksock_peer_ni *peer_ni = route->ksnr_peer;
460 struct ksock_interface *iface;
461 struct ksock_conn *conn;
462 struct ksock_conn *cnxt;
464 LASSERT(!route->ksnr_deleted);
466 /* Close associated conns */
467 list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns, ksnc_list) {
468 if (conn->ksnc_route != route)
471 ksocknal_close_conn_locked(conn, 0);
474 if (route->ksnr_myiface >= 0) {
475 iface = ksocknal_index2iface(route->ksnr_peer->ksnp_ni,
476 route->ksnr_myiface);
478 iface->ksni_nroutes--;
481 route->ksnr_deleted = 1;
482 list_del(&route->ksnr_list);
483 ksocknal_route_decref(route); /* drop peer_ni's ref */
485 if (list_empty(&peer_ni->ksnp_routes) &&
486 list_empty(&peer_ni->ksnp_conns)) {
487 /* I've just removed the last route to a peer_ni with no active
489 ksocknal_unlink_peer_locked(peer_ni);
494 ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
497 struct list_head *tmp;
498 struct ksock_peer_ni *peer_ni;
499 struct ksock_peer_ni *peer2;
500 struct ksock_route *route;
501 struct ksock_route *route2;
503 if (id.nid == LNET_NID_ANY ||
504 id.pid == LNET_PID_ANY)
507 /* Have a brand new peer_ni ready... */
508 peer_ni = ksocknal_create_peer(ni, id);
510 return PTR_ERR(peer_ni);
512 route = ksocknal_create_route (ipaddr, port);
514 ksocknal_peer_decref(peer_ni);
518 write_lock_bh(&ksocknal_data.ksnd_global_lock);
520 /* always called with a ref on ni, so shutdown can't have started */
521 LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers)
524 peer2 = ksocknal_find_peer_locked(ni, id);
526 ksocknal_peer_decref(peer_ni);
529 /* peer_ni table takes my ref on peer_ni */
530 hash_add(ksocknal_data.ksnd_peers, &peer_ni->ksnp_list, id.nid);
534 list_for_each(tmp, &peer_ni->ksnp_routes) {
535 route2 = list_entry(tmp, struct ksock_route, ksnr_list);
537 if (route2->ksnr_ipaddr == ipaddr)
542 if (route2 == NULL) {
543 ksocknal_add_route_locked(peer_ni, route);
544 route->ksnr_share_count++;
546 ksocknal_route_decref(route);
547 route2->ksnr_share_count++;
550 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
556 ksocknal_del_peer_locked(struct ksock_peer_ni *peer_ni, __u32 ip)
558 struct ksock_conn *conn;
559 struct ksock_conn *cnxt;
560 struct ksock_route *route;
561 struct ksock_route *rnxt;
564 LASSERT(!peer_ni->ksnp_closing);
566 /* Extra ref prevents peer_ni disappearing until I'm done with it */
567 ksocknal_peer_addref(peer_ni);
569 list_for_each_entry_safe(route, rnxt, &peer_ni->ksnp_routes,
572 if (!(ip == 0 || route->ksnr_ipaddr == ip))
575 route->ksnr_share_count = 0;
576 /* This deletes associated conns too */
577 ksocknal_del_route_locked(route);
581 list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list)
582 nshared += route->ksnr_share_count;
585 /* remove everything else if there are no explicit entries
588 list_for_each_entry_safe(route, rnxt, &peer_ni->ksnp_routes,
590 /* we should only be removing auto-entries */
591 LASSERT(route->ksnr_share_count == 0);
592 ksocknal_del_route_locked(route);
595 list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns,
597 ksocknal_close_conn_locked(conn, 0);
600 ksocknal_peer_decref(peer_ni);
601 /* NB peer_ni unlinks itself when last conn/route is removed */
605 ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip)
608 struct hlist_node *pnxt;
609 struct ksock_peer_ni *peer_ni;
615 write_lock_bh(&ksocknal_data.ksnd_global_lock);
617 if (id.nid != LNET_NID_ANY) {
618 lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers));
622 hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1;
625 for (i = lo; i <= hi; i++) {
626 hlist_for_each_entry_safe(peer_ni, pnxt,
627 &ksocknal_data.ksnd_peers[i],
629 if (peer_ni->ksnp_ni != ni)
632 if (!((id.nid == LNET_NID_ANY ||
633 peer_ni->ksnp_id.nid == id.nid) &&
634 (id.pid == LNET_PID_ANY ||
635 peer_ni->ksnp_id.pid == id.pid)))
638 ksocknal_peer_addref(peer_ni); /* a ref for me... */
640 ksocknal_del_peer_locked(peer_ni, ip);
642 if (peer_ni->ksnp_closing &&
643 !list_empty(&peer_ni->ksnp_tx_queue)) {
644 LASSERT(list_empty(&peer_ni->ksnp_conns));
645 LASSERT(list_empty(&peer_ni->ksnp_routes));
647 list_splice_init(&peer_ni->ksnp_tx_queue,
651 ksocknal_peer_decref(peer_ni); /* ...till here */
653 rc = 0; /* matched! */
657 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
659 ksocknal_txlist_done(ni, &zombies, -ENETDOWN);
664 static struct ksock_conn *
665 ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index)
667 struct ksock_peer_ni *peer_ni;
668 struct ksock_conn *conn;
669 struct list_head *ctmp;
672 read_lock(&ksocknal_data.ksnd_global_lock);
674 hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) {
675 LASSERT(!peer_ni->ksnp_closing);
677 if (peer_ni->ksnp_ni != ni)
680 list_for_each(ctmp, &peer_ni->ksnp_conns) {
684 conn = list_entry(ctmp, struct ksock_conn,
686 ksocknal_conn_addref(conn);
687 read_unlock(&ksocknal_data.ksnd_global_lock);
692 read_unlock(&ksocknal_data.ksnd_global_lock);
696 static struct ksock_sched *
697 ksocknal_choose_scheduler_locked(unsigned int cpt)
699 struct ksock_sched *sched = ksocknal_data.ksnd_schedulers[cpt];
702 if (sched->kss_nthreads == 0) {
703 cfs_percpt_for_each(sched, i, ksocknal_data.ksnd_schedulers) {
704 if (sched->kss_nthreads > 0) {
705 CDEBUG(D_NET, "scheduler[%d] has no threads. selected scheduler[%d]\n",
706 cpt, sched->kss_cpt);
717 ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs)
719 struct ksock_net *net = ni->ni_data;
723 read_lock(&ksocknal_data.ksnd_global_lock);
725 nip = net->ksnn_ninterfaces;
726 LASSERT(nip <= LNET_INTERFACES_NUM);
729 * Only offer interfaces for additional connections if I have
733 read_unlock(&ksocknal_data.ksnd_global_lock);
737 for (i = 0; i < nip; i++) {
738 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
739 LASSERT(ipaddrs[i] != 0);
742 read_unlock(&ksocknal_data.ksnd_global_lock);
747 ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips)
749 int best_netmatch = 0;
756 for (i = 0; i < nips; i++) {
760 this_xor = (ips[i] ^ iface->ksni_ipaddr);
761 this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
764 best_netmatch < this_netmatch ||
765 (best_netmatch == this_netmatch &&
766 best_xor > this_xor)))
770 best_netmatch = this_netmatch;
779 ksocknal_select_ips(struct ksock_peer_ni *peer_ni, __u32 *peerips, int n_peerips)
781 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
782 struct ksock_net *net = peer_ni->ksnp_ni->ni_data;
783 struct ksock_interface *iface;
784 struct ksock_interface *best_iface;
795 /* CAVEAT EMPTOR: We do all our interface matching with an
796 * exclusive hold of global lock at IRQ priority. We're only
797 * expecting to be dealing with small numbers of interfaces, so the
798 * O(n**3)-ness shouldn't matter */
800 /* Also note that I'm not going to return more than n_peerips
801 * interfaces, even if I have more myself */
803 write_lock_bh(global_lock);
805 LASSERT(n_peerips <= LNET_INTERFACES_NUM);
806 LASSERT(net->ksnn_ninterfaces <= LNET_INTERFACES_NUM);
808 /* Only match interfaces for additional connections
809 * if I have > 1 interface
811 n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
812 min(n_peerips, net->ksnn_ninterfaces);
814 for (i = 0; peer_ni->ksnp_n_passive_ips < n_ips; i++) {
815 /* ^ yes really... */
817 /* If we have any new interfaces, first tick off all the
818 * peer_ni IPs that match old interfaces, then choose new
819 * interfaces to match the remaining peer_ni IPS.
820 * We don't forget interfaces we've stopped using; we might
821 * start using them again... */
823 if (i < peer_ni->ksnp_n_passive_ips) {
825 ip = peer_ni->ksnp_passive_ips[i];
826 best_iface = ksocknal_ip2iface(peer_ni->ksnp_ni, ip);
828 /* peer_ni passive ips are kept up to date */
829 LASSERT(best_iface != NULL);
831 /* choose a new interface */
832 LASSERT (i == peer_ni->ksnp_n_passive_ips);
838 for (j = 0; j < net->ksnn_ninterfaces; j++) {
839 iface = &net->ksnn_interfaces[j];
840 ip = iface->ksni_ipaddr;
842 for (k = 0; k < peer_ni->ksnp_n_passive_ips; k++)
843 if (peer_ni->ksnp_passive_ips[k] == ip)
846 if (k < peer_ni->ksnp_n_passive_ips) /* using it already */
849 k = ksocknal_match_peerip(iface, peerips, n_peerips);
850 xor = (ip ^ peerips[k]);
851 this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
853 if (!(best_iface == NULL ||
854 best_netmatch < this_netmatch ||
855 (best_netmatch == this_netmatch &&
856 best_npeers > iface->ksni_npeers)))
860 best_netmatch = this_netmatch;
861 best_npeers = iface->ksni_npeers;
864 LASSERT(best_iface != NULL);
866 best_iface->ksni_npeers++;
867 ip = best_iface->ksni_ipaddr;
868 peer_ni->ksnp_passive_ips[i] = ip;
869 peer_ni->ksnp_n_passive_ips = i+1;
872 /* mark the best matching peer_ni IP used */
873 j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
877 /* Overwrite input peer_ni IP addresses */
878 memcpy(peerips, peer_ni->ksnp_passive_ips, n_ips * sizeof(*peerips));
880 write_unlock_bh(global_lock);
886 ksocknal_create_routes(struct ksock_peer_ni *peer_ni, int port,
887 __u32 *peer_ipaddrs, int npeer_ipaddrs)
889 struct ksock_route *newroute = NULL;
890 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
891 struct lnet_ni *ni = peer_ni->ksnp_ni;
892 struct ksock_net *net = ni->ni_data;
893 struct list_head *rtmp;
894 struct ksock_route *route;
895 struct ksock_interface *iface;
896 struct ksock_interface *best_iface;
903 /* CAVEAT EMPTOR: We do all our interface matching with an
904 * exclusive hold of global lock at IRQ priority. We're only
905 * expecting to be dealing with small numbers of interfaces, so the
906 * O(n**3)-ness here shouldn't matter */
908 write_lock_bh(global_lock);
910 if (net->ksnn_ninterfaces < 2) {
911 /* Only create additional connections
912 * if I have > 1 interface */
913 write_unlock_bh(global_lock);
917 LASSERT(npeer_ipaddrs <= LNET_INTERFACES_NUM);
919 for (i = 0; i < npeer_ipaddrs; i++) {
920 if (newroute != NULL) {
921 newroute->ksnr_ipaddr = peer_ipaddrs[i];
923 write_unlock_bh(global_lock);
925 newroute = ksocknal_create_route(peer_ipaddrs[i], port);
926 if (newroute == NULL)
929 write_lock_bh(global_lock);
932 if (peer_ni->ksnp_closing) {
933 /* peer_ni got closed under me */
937 /* Already got a route? */
939 list_for_each(rtmp, &peer_ni->ksnp_routes) {
940 route = list_entry(rtmp, struct ksock_route, ksnr_list);
942 if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
954 LASSERT(net->ksnn_ninterfaces <= LNET_INTERFACES_NUM);
956 /* Select interface to connect from */
957 for (j = 0; j < net->ksnn_ninterfaces; j++) {
958 iface = &net->ksnn_interfaces[j];
960 /* Using this interface already? */
961 list_for_each(rtmp, &peer_ni->ksnp_routes) {
962 route = list_entry(rtmp, struct ksock_route,
965 if (route->ksnr_myiface == iface->ksni_index)
973 this_netmatch = (((iface->ksni_ipaddr ^
974 newroute->ksnr_ipaddr) &
975 iface->ksni_netmask) == 0) ? 1 : 0;
977 if (!(best_iface == NULL ||
978 best_netmatch < this_netmatch ||
979 (best_netmatch == this_netmatch &&
980 best_nroutes > iface->ksni_nroutes)))
984 best_netmatch = this_netmatch;
985 best_nroutes = iface->ksni_nroutes;
988 if (best_iface == NULL)
991 newroute->ksnr_myiface = best_iface->ksni_index;
992 best_iface->ksni_nroutes++;
994 ksocknal_add_route_locked(peer_ni, newroute);
998 write_unlock_bh(global_lock);
999 if (newroute != NULL)
1000 ksocknal_route_decref(newroute);
1004 ksocknal_accept(struct lnet_ni *ni, struct socket *sock)
1006 struct ksock_connreq *cr;
1011 rc = lnet_sock_getaddr(sock, true, &peer_ip, &peer_port);
1012 LASSERT(rc == 0); /* we succeeded before */
1014 LIBCFS_ALLOC(cr, sizeof(*cr));
1016 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from "
1017 "%pI4h: memory exhausted\n", &peer_ip);
1023 cr->ksncr_sock = sock;
1025 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
1027 list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
1028 wake_up(&ksocknal_data.ksnd_connd_waitq);
1030 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
1035 ksocknal_connecting(struct ksock_peer_ni *peer_ni, __u32 ipaddr)
1037 struct ksock_route *route;
1039 list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list) {
1040 if (route->ksnr_ipaddr == ipaddr)
1041 return route->ksnr_connecting;
1047 ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route,
1048 struct socket *sock, int type)
1050 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
1052 struct lnet_process_id peerid;
1053 struct list_head *tmp;
1055 struct ksock_conn *conn;
1056 struct ksock_conn *conn2;
1057 struct ksock_peer_ni *peer_ni = NULL;
1058 struct ksock_peer_ni *peer2;
1059 struct ksock_sched *sched;
1060 struct ksock_hello_msg *hello;
1062 struct ksock_tx *tx;
1063 struct ksock_tx *txtmp;
1069 active = (route != NULL);
1071 LASSERT (active == (type != SOCKLND_CONN_NONE));
1073 LIBCFS_ALLOC(conn, sizeof(*conn));
1079 conn->ksnc_peer = NULL;
1080 conn->ksnc_route = NULL;
1081 conn->ksnc_sock = sock;
1082 /* 2 ref, 1 for conn, another extra ref prevents socket
1083 * being closed before establishment of connection */
1084 refcount_set(&conn->ksnc_sock_refcount, 2);
1085 conn->ksnc_type = type;
1086 ksocknal_lib_save_callback(sock, conn);
1087 refcount_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
1089 conn->ksnc_rx_ready = 0;
1090 conn->ksnc_rx_scheduled = 0;
1092 INIT_LIST_HEAD(&conn->ksnc_tx_queue);
1093 conn->ksnc_tx_ready = 0;
1094 conn->ksnc_tx_scheduled = 0;
1095 conn->ksnc_tx_carrier = NULL;
1096 atomic_set (&conn->ksnc_tx_nob, 0);
1098 LIBCFS_ALLOC(hello, offsetof(struct ksock_hello_msg,
1099 kshm_ips[LNET_INTERFACES_NUM]));
1100 if (hello == NULL) {
1105 /* stash conn's local and remote addrs */
1106 rc = ksocknal_lib_get_conn_addrs (conn);
1110 /* Find out/confirm peer_ni's NID and connection type and get the
1111 * vector of interfaces she's willing to let me connect to.
1112 * Passive connections use the listener timeout since the peer_ni sends
1116 peer_ni = route->ksnr_peer;
1117 LASSERT(ni == peer_ni->ksnp_ni);
1119 /* Active connection sends HELLO eagerly */
1120 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
1121 peerid = peer_ni->ksnp_id;
1123 write_lock_bh(global_lock);
1124 conn->ksnc_proto = peer_ni->ksnp_proto;
1125 write_unlock_bh(global_lock);
1127 if (conn->ksnc_proto == NULL) {
1128 conn->ksnc_proto = &ksocknal_protocol_v3x;
1129 #if SOCKNAL_VERSION_DEBUG
1130 if (*ksocknal_tunables.ksnd_protocol == 2)
1131 conn->ksnc_proto = &ksocknal_protocol_v2x;
1132 else if (*ksocknal_tunables.ksnd_protocol == 1)
1133 conn->ksnc_proto = &ksocknal_protocol_v1x;
1137 rc = ksocknal_send_hello (ni, conn, peerid.nid, hello);
1141 peerid.nid = LNET_NID_ANY;
1142 peerid.pid = LNET_PID_ANY;
1144 /* Passive, get protocol from peer_ni */
1145 conn->ksnc_proto = NULL;
1148 rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation);
1152 LASSERT (rc == 0 || active);
1153 LASSERT (conn->ksnc_proto != NULL);
1154 LASSERT (peerid.nid != LNET_NID_ANY);
1156 cpt = lnet_cpt_of_nid(peerid.nid, ni);
1159 ksocknal_peer_addref(peer_ni);
1160 write_lock_bh(global_lock);
1162 peer_ni = ksocknal_create_peer(ni, peerid);
1163 if (IS_ERR(peer_ni)) {
1164 rc = PTR_ERR(peer_ni);
1168 write_lock_bh(global_lock);
1170 /* called with a ref on ni, so shutdown can't have started */
1171 LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) >= 0);
1173 peer2 = ksocknal_find_peer_locked(ni, peerid);
1174 if (peer2 == NULL) {
1175 /* NB this puts an "empty" peer_ni in the peer_ni
1176 * table (which takes my ref) */
1177 hash_add(ksocknal_data.ksnd_peers,
1178 &peer_ni->ksnp_list, peerid.nid);
1180 ksocknal_peer_decref(peer_ni);
1185 ksocknal_peer_addref(peer_ni);
1186 peer_ni->ksnp_accepting++;
1188 /* Am I already connecting to this guy? Resolve in
1189 * favour of higher NID... */
1190 if (peerid.nid < ni->ni_nid &&
1191 ksocknal_connecting(peer_ni, conn->ksnc_ipaddr)) {
1193 warn = "connection race resolution";
1198 if (peer_ni->ksnp_closing ||
1199 (active && route->ksnr_deleted)) {
1200 /* peer_ni/route got closed under me */
1202 warn = "peer_ni/route removed";
1206 if (peer_ni->ksnp_proto == NULL) {
1207 /* Never connected before.
1208 * NB recv_hello may have returned EPROTO to signal my peer_ni
1209 * wants a different protocol than the one I asked for.
1211 LASSERT(list_empty(&peer_ni->ksnp_conns));
1213 peer_ni->ksnp_proto = conn->ksnc_proto;
1214 peer_ni->ksnp_incarnation = incarnation;
1217 if (peer_ni->ksnp_proto != conn->ksnc_proto ||
1218 peer_ni->ksnp_incarnation != incarnation) {
1219 /* peer_ni rebooted or I've got the wrong protocol version */
1220 ksocknal_close_peer_conns_locked(peer_ni, 0, 0);
1222 peer_ni->ksnp_proto = NULL;
1224 warn = peer_ni->ksnp_incarnation != incarnation ?
1225 "peer_ni rebooted" :
1226 "wrong proto version";
1236 warn = "lost conn race";
1239 warn = "retry with different protocol version";
1243 /* Refuse to duplicate an existing connection, unless this is a
1244 * loopback connection */
1245 if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
1246 list_for_each(tmp, &peer_ni->ksnp_conns) {
1247 conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
1249 if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
1250 conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
1251 conn2->ksnc_type != conn->ksnc_type)
1254 /* Reply on a passive connection attempt so the peer_ni
1255 * realises we're connected. */
1265 /* If the connection created by this route didn't bind to the IP
1266 * address the route connected to, the connection/route matching
1267 * code below probably isn't going to work. */
1269 route->ksnr_ipaddr != conn->ksnc_ipaddr) {
1270 CERROR("Route %s %pI4h connected to %pI4h\n",
1271 libcfs_id2str(peer_ni->ksnp_id),
1272 &route->ksnr_ipaddr,
1273 &conn->ksnc_ipaddr);
1276 /* Search for a route corresponding to the new connection and
1277 * create an association. This allows incoming connections created
1278 * by routes in my peer_ni to match my own route entries so I don't
1279 * continually create duplicate routes. */
1280 list_for_each(tmp, &peer_ni->ksnp_routes) {
1281 route = list_entry(tmp, struct ksock_route, ksnr_list);
1283 if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
1286 ksocknal_associate_route_conn_locked(route, conn);
1290 conn->ksnc_peer = peer_ni; /* conn takes my ref on peer_ni */
1291 peer_ni->ksnp_last_alive = ktime_get_seconds();
1292 peer_ni->ksnp_send_keepalive = 0;
1293 peer_ni->ksnp_error = 0;
1295 sched = ksocknal_choose_scheduler_locked(cpt);
1297 CERROR("no schedulers available. node is unhealthy\n");
1301 * The cpt might have changed if we ended up selecting a non cpt
1302 * native scheduler. So use the scheduler's cpt instead.
1304 cpt = sched->kss_cpt;
1305 sched->kss_nconns++;
1306 conn->ksnc_scheduler = sched;
1308 conn->ksnc_tx_last_post = ktime_get_seconds();
1309 /* Set the deadline for the outgoing HELLO to drain */
1310 conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
1311 conn->ksnc_tx_deadline = ktime_get_seconds() +
1313 smp_mb(); /* order with adding to peer_ni's conn list */
1315 list_add(&conn->ksnc_list, &peer_ni->ksnp_conns);
1316 ksocknal_conn_addref(conn);
1318 ksocknal_new_packet(conn, 0);
1320 conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
1322 /* Take packets blocking for this connection. */
1323 list_for_each_entry_safe(tx, txtmp, &peer_ni->ksnp_tx_queue, tx_list) {
1324 if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) ==
1328 list_del(&tx->tx_list);
1329 ksocknal_queue_tx_locked(tx, conn);
1332 write_unlock_bh(global_lock);
1334 /* We've now got a new connection. Any errors from here on are just
1335 * like "normal" comms errors and we close the connection normally.
1336 * NB (a) we still have to send the reply HELLO for passive
1338 * (b) normal I/O on the conn is blocked until I setup and call the
1342 CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d"
1343 " incarnation:%lld sched[%d]\n",
1344 libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
1345 &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
1346 conn->ksnc_port, incarnation, cpt);
1349 /* additional routes after interface exchange? */
1350 ksocknal_create_routes(peer_ni, conn->ksnc_port,
1351 hello->kshm_ips, hello->kshm_nips);
1353 hello->kshm_nips = ksocknal_select_ips(peer_ni, hello->kshm_ips,
1355 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1358 LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg,
1359 kshm_ips[LNET_INTERFACES_NUM]));
1361 /* setup the socket AFTER I've received hello (it disables
1362 * SO_LINGER). I might call back to the acceptor who may want
1363 * to send a protocol version response and then close the
1364 * socket; this ensures the socket only tears down after the
1365 * response has been sent. */
1367 rc = ksocknal_lib_setup_sock(sock);
1369 write_lock_bh(global_lock);
1371 /* NB my callbacks block while I hold ksnd_global_lock */
1372 ksocknal_lib_set_callback(sock, conn);
1375 peer_ni->ksnp_accepting--;
1377 write_unlock_bh(global_lock);
1380 write_lock_bh(global_lock);
1381 if (!conn->ksnc_closing) {
1382 /* could be closed by another thread */
1383 ksocknal_close_conn_locked(conn, rc);
1385 write_unlock_bh(global_lock);
1386 } else if (ksocknal_connsock_addref(conn) == 0) {
1387 /* Allow I/O to proceed. */
1388 ksocknal_read_callback(conn);
1389 ksocknal_write_callback(conn);
1390 ksocknal_connsock_decref(conn);
1393 ksocknal_connsock_decref(conn);
1394 ksocknal_conn_decref(conn);
1398 if (!peer_ni->ksnp_closing &&
1399 list_empty(&peer_ni->ksnp_conns) &&
1400 list_empty(&peer_ni->ksnp_routes)) {
1401 list_splice_init(&peer_ni->ksnp_tx_queue, &zombies);
1402 ksocknal_unlink_peer_locked(peer_ni);
1405 write_unlock_bh(global_lock);
1409 CERROR("Not creating conn %s type %d: %s\n",
1410 libcfs_id2str(peerid), conn->ksnc_type, warn);
1412 CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
1413 libcfs_id2str(peerid), conn->ksnc_type, warn);
1418 /* Request retry by replying with CONN_NONE
1419 * ksnc_proto has been set already */
1420 conn->ksnc_type = SOCKLND_CONN_NONE;
1421 hello->kshm_nips = 0;
1422 ksocknal_send_hello(ni, conn, peerid.nid, hello);
1425 write_lock_bh(global_lock);
1426 peer_ni->ksnp_accepting--;
1427 write_unlock_bh(global_lock);
1431 * If we get here without an error code, just use -EALREADY.
1432 * Depending on how we got here, the error may be positive
1433 * or negative. Normalize the value for ksocknal_txlist_done().
1435 rc2 = (rc == 0 ? -EALREADY : (rc > 0 ? -rc : rc));
1436 ksocknal_txlist_done(ni, &zombies, rc2);
1437 ksocknal_peer_decref(peer_ni);
1441 LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg,
1442 kshm_ips[LNET_INTERFACES_NUM]));
1444 LIBCFS_FREE(conn, sizeof(*conn));
1452 ksocknal_close_conn_locked(struct ksock_conn *conn, int error)
1454 /* This just does the immmediate housekeeping, and queues the
1455 * connection for the reaper to terminate.
1456 * Caller holds ksnd_global_lock exclusively in irq context */
1457 struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
1458 struct ksock_route *route;
1459 struct ksock_conn *conn2;
1460 struct list_head *tmp;
1462 LASSERT(peer_ni->ksnp_error == 0);
1463 LASSERT(!conn->ksnc_closing);
1464 conn->ksnc_closing = 1;
1466 /* ksnd_deathrow_conns takes over peer_ni's ref */
1467 list_del(&conn->ksnc_list);
1469 route = conn->ksnc_route;
1470 if (route != NULL) {
1471 /* dissociate conn from route... */
1472 LASSERT(!route->ksnr_deleted);
1473 LASSERT((route->ksnr_connected & BIT(conn->ksnc_type)) != 0);
1476 list_for_each(tmp, &peer_ni->ksnp_conns) {
1477 conn2 = list_entry(tmp, struct ksock_conn, ksnc_list);
1479 if (conn2->ksnc_route == route &&
1480 conn2->ksnc_type == conn->ksnc_type)
1486 route->ksnr_connected &= ~BIT(conn->ksnc_type);
1488 conn->ksnc_route = NULL;
1490 ksocknal_route_decref(route); /* drop conn's ref on route */
1493 if (list_empty(&peer_ni->ksnp_conns)) {
1494 /* No more connections to this peer_ni */
1496 if (!list_empty(&peer_ni->ksnp_tx_queue)) {
1497 struct ksock_tx *tx;
1499 LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
1501 /* throw them to the last connection...,
1502 * these TXs will be send to /dev/null by scheduler */
1503 list_for_each_entry(tx, &peer_ni->ksnp_tx_queue,
1505 ksocknal_tx_prep(conn, tx);
1507 spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
1508 list_splice_init(&peer_ni->ksnp_tx_queue,
1509 &conn->ksnc_tx_queue);
1510 spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
1513 /* renegotiate protocol version */
1514 peer_ni->ksnp_proto = NULL;
1515 /* stash last conn close reason */
1516 peer_ni->ksnp_error = error;
1518 if (list_empty(&peer_ni->ksnp_routes)) {
1519 /* I've just closed last conn belonging to a
1520 * peer_ni with no routes to it */
1521 ksocknal_unlink_peer_locked(peer_ni);
1525 spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1527 list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
1528 wake_up(&ksocknal_data.ksnd_reaper_waitq);
1530 spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1534 ksocknal_peer_failed(struct ksock_peer_ni *peer_ni)
1537 time64_t last_alive = 0;
1539 /* There has been a connection failure or comms error; but I'll only
1540 * tell LNET I think the peer_ni is dead if it's to another kernel and
1541 * there are no connections or connection attempts in existence. */
1543 read_lock(&ksocknal_data.ksnd_global_lock);
1545 if ((peer_ni->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
1546 list_empty(&peer_ni->ksnp_conns) &&
1547 peer_ni->ksnp_accepting == 0 &&
1548 ksocknal_find_connecting_route_locked(peer_ni) == NULL) {
1550 last_alive = peer_ni->ksnp_last_alive;
1553 read_unlock(&ksocknal_data.ksnd_global_lock);
1556 lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid,
1557 false, false, last_alive);
1561 ksocknal_finalize_zcreq(struct ksock_conn *conn)
1563 struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
1564 struct ksock_tx *tx;
1565 struct ksock_tx *tmp;
1568 /* NB safe to finalize TXs because closing of socket will
1569 * abort all buffered data */
1570 LASSERT(conn->ksnc_sock == NULL);
1572 spin_lock(&peer_ni->ksnp_lock);
1574 list_for_each_entry_safe(tx, tmp, &peer_ni->ksnp_zc_req_list, tx_zc_list) {
1575 if (tx->tx_conn != conn)
1578 LASSERT(tx->tx_msg.ksm_zc_cookies[0] != 0);
1580 tx->tx_msg.ksm_zc_cookies[0] = 0;
1581 tx->tx_zc_aborted = 1; /* mark it as not-acked */
1582 list_move(&tx->tx_zc_list, &zlist);
1585 spin_unlock(&peer_ni->ksnp_lock);
1587 while (!list_empty(&zlist)) {
1588 tx = list_entry(zlist.next, struct ksock_tx, tx_zc_list);
1590 list_del(&tx->tx_zc_list);
1591 ksocknal_tx_decref(tx);
1596 ksocknal_terminate_conn(struct ksock_conn *conn)
1598 /* This gets called by the reaper (guaranteed thread context) to
1599 * disengage the socket from its callbacks and close it.
1600 * ksnc_refcount will eventually hit zero, and then the reaper will
1602 struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
1603 struct ksock_sched *sched = conn->ksnc_scheduler;
1606 LASSERT(conn->ksnc_closing);
1608 /* wake up the scheduler to "send" all remaining packets to /dev/null */
1609 spin_lock_bh(&sched->kss_lock);
1611 /* a closing conn is always ready to tx */
1612 conn->ksnc_tx_ready = 1;
1614 if (!conn->ksnc_tx_scheduled &&
1615 !list_empty(&conn->ksnc_tx_queue)) {
1616 list_add_tail(&conn->ksnc_tx_list,
1617 &sched->kss_tx_conns);
1618 conn->ksnc_tx_scheduled = 1;
1619 /* extra ref for scheduler */
1620 ksocknal_conn_addref(conn);
1622 wake_up (&sched->kss_waitq);
1625 spin_unlock_bh(&sched->kss_lock);
1627 /* serialise with callbacks */
1628 write_lock_bh(&ksocknal_data.ksnd_global_lock);
1630 ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
1632 /* OK, so this conn may not be completely disengaged from its
1633 * scheduler yet, but it _has_ committed to terminate... */
1634 conn->ksnc_scheduler->kss_nconns--;
1636 if (peer_ni->ksnp_error != 0) {
1637 /* peer_ni's last conn closed in error */
1638 LASSERT(list_empty(&peer_ni->ksnp_conns));
1640 peer_ni->ksnp_error = 0; /* avoid multiple notifications */
1643 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1646 ksocknal_peer_failed(peer_ni);
1648 /* The socket is closed on the final put; either here, or in
1649 * ksocknal_{send,recv}msg(). Since we set up the linger2 option
1650 * when the connection was established, this will close the socket
1651 * immediately, aborting anything buffered in it. Any hung
1652 * zero-copy transmits will therefore complete in finite time. */
1653 ksocknal_connsock_decref(conn);
1657 ksocknal_queue_zombie_conn(struct ksock_conn *conn)
1659 /* Queue the conn for the reaper to destroy */
1660 LASSERT(refcount_read(&conn->ksnc_conn_refcount) == 0);
1661 spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1663 list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
1664 wake_up(&ksocknal_data.ksnd_reaper_waitq);
1666 spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1670 ksocknal_destroy_conn(struct ksock_conn *conn)
1674 /* Final coup-de-grace of the reaper */
1675 CDEBUG (D_NET, "connection %p\n", conn);
1677 LASSERT(refcount_read(&conn->ksnc_conn_refcount) == 0);
1678 LASSERT(refcount_read(&conn->ksnc_sock_refcount) == 0);
1679 LASSERT (conn->ksnc_sock == NULL);
1680 LASSERT (conn->ksnc_route == NULL);
1681 LASSERT (!conn->ksnc_tx_scheduled);
1682 LASSERT (!conn->ksnc_rx_scheduled);
1683 LASSERT(list_empty(&conn->ksnc_tx_queue));
1685 /* complete current receive if any */
1686 switch (conn->ksnc_rx_state) {
1687 case SOCKNAL_RX_LNET_PAYLOAD:
1688 last_rcv = conn->ksnc_rx_deadline -
1690 CERROR("Completing partial receive from %s[%d], "
1691 "ip %pI4h:%d, with error, wanted: %d, left: %d, "
1692 "last alive is %lld secs ago\n",
1693 libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
1694 &conn->ksnc_ipaddr, conn->ksnc_port,
1695 conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
1696 ktime_get_seconds() - last_rcv);
1697 if (conn->ksnc_lnet_msg)
1698 conn->ksnc_lnet_msg->msg_health_status =
1699 LNET_MSG_STATUS_REMOTE_ERROR;
1700 lnet_finalize(conn->ksnc_lnet_msg, -EIO);
1702 case SOCKNAL_RX_LNET_HEADER:
1703 if (conn->ksnc_rx_started)
1704 CERROR("Incomplete receive of lnet header from %s, "
1705 "ip %pI4h:%d, with error, protocol: %d.x.\n",
1706 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1707 &conn->ksnc_ipaddr, conn->ksnc_port,
1708 conn->ksnc_proto->pro_version);
1710 case SOCKNAL_RX_KSM_HEADER:
1711 if (conn->ksnc_rx_started)
1712 CERROR("Incomplete receive of ksock message from %s, "
1713 "ip %pI4h:%d, with error, protocol: %d.x.\n",
1714 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1715 &conn->ksnc_ipaddr, conn->ksnc_port,
1716 conn->ksnc_proto->pro_version);
1718 case SOCKNAL_RX_SLOP:
1719 if (conn->ksnc_rx_started)
1720 CERROR("Incomplete receive of slops from %s, "
1721 "ip %pI4h:%d, with error\n",
1722 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1723 &conn->ksnc_ipaddr, conn->ksnc_port);
1730 ksocknal_peer_decref(conn->ksnc_peer);
1732 LIBCFS_FREE (conn, sizeof (*conn));
1736 ksocknal_close_peer_conns_locked(struct ksock_peer_ni *peer_ni, __u32 ipaddr, int why)
1738 struct ksock_conn *conn;
1739 struct ksock_conn *cnxt;
1742 list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns, ksnc_list) {
1744 conn->ksnc_ipaddr == ipaddr) {
1746 ksocknal_close_conn_locked (conn, why);
1754 ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why)
1756 struct ksock_peer_ni *peer_ni = conn->ksnc_peer;
1757 u32 ipaddr = conn->ksnc_ipaddr;
1760 write_lock_bh(&ksocknal_data.ksnd_global_lock);
1762 count = ksocknal_close_peer_conns_locked (peer_ni, ipaddr, why);
1764 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1770 ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr)
1772 struct ksock_peer_ni *peer_ni;
1773 struct hlist_node *pnxt;
1779 write_lock_bh(&ksocknal_data.ksnd_global_lock);
1781 if (id.nid != LNET_NID_ANY) {
1782 lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers));
1786 hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1;
1789 for (i = lo; i <= hi; i++) {
1790 hlist_for_each_entry_safe(peer_ni, pnxt,
1791 &ksocknal_data.ksnd_peers[i],
1794 if (!((id.nid == LNET_NID_ANY ||
1795 id.nid == peer_ni->ksnp_id.nid) &&
1796 (id.pid == LNET_PID_ANY ||
1797 id.pid == peer_ni->ksnp_id.pid)))
1800 count += ksocknal_close_peer_conns_locked(peer_ni,
1805 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1807 /* wildcards always succeed */
1808 if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
1811 return (count == 0 ? -ENOENT : 0);
1815 ksocknal_notify_gw_down(lnet_nid_t gw_nid)
1817 /* The router is telling me she's been notified of a change in
1820 struct lnet_process_id id = {
1822 .pid = LNET_PID_ANY,
1825 CDEBUG(D_NET, "gw %s down\n", libcfs_nid2str(gw_nid));
1827 /* If the gateway crashed, close all open connections... */
1828 ksocknal_close_matching_conns(id, 0);
1831 /* We can only establish new connections
1832 * if we have autroutes, and these connect on demand. */
1836 ksocknal_push_peer(struct ksock_peer_ni *peer_ni)
1840 struct list_head *tmp;
1841 struct ksock_conn *conn;
1843 for (index = 0; ; index++) {
1844 read_lock(&ksocknal_data.ksnd_global_lock);
1849 list_for_each(tmp, &peer_ni->ksnp_conns) {
1851 conn = list_entry(tmp, struct ksock_conn,
1853 ksocknal_conn_addref(conn);
1858 read_unlock(&ksocknal_data.ksnd_global_lock);
1863 ksocknal_lib_push_conn (conn);
1864 ksocknal_conn_decref(conn);
1869 ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id)
1876 if (id.nid != LNET_NID_ANY) {
1877 lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers));
1881 hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1;
1884 for (bkt = lo; bkt <= hi; bkt++) {
1885 int peer_off; /* searching offset in peer_ni hash table */
1887 for (peer_off = 0; ; peer_off++) {
1888 struct ksock_peer_ni *peer_ni;
1891 read_lock(&ksocknal_data.ksnd_global_lock);
1892 hlist_for_each_entry(peer_ni,
1893 &ksocknal_data.ksnd_peers[bkt],
1895 if (!((id.nid == LNET_NID_ANY ||
1896 id.nid == peer_ni->ksnp_id.nid) &&
1897 (id.pid == LNET_PID_ANY ||
1898 id.pid == peer_ni->ksnp_id.pid)))
1901 if (i++ == peer_off) {
1902 ksocknal_peer_addref(peer_ni);
1906 read_unlock(&ksocknal_data.ksnd_global_lock);
1908 if (i <= peer_off) /* no match */
1912 ksocknal_push_peer(peer_ni);
1913 ksocknal_peer_decref(peer_ni);
1920 ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask)
1922 struct ksock_net *net = ni->ni_data;
1923 struct ksock_interface *iface;
1927 struct ksock_peer_ni *peer_ni;
1928 struct list_head *rtmp;
1929 struct ksock_route *route;
1931 if (ipaddress == 0 ||
1935 write_lock_bh(&ksocknal_data.ksnd_global_lock);
1937 iface = ksocknal_ip2iface(ni, ipaddress);
1938 if (iface != NULL) {
1939 /* silently ignore dups */
1941 } else if (net->ksnn_ninterfaces == LNET_INTERFACES_NUM) {
1944 iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
1946 iface->ksni_index = ksocknal_ip2index(ipaddress, ni);
1947 iface->ksni_ipaddr = ipaddress;
1948 iface->ksni_netmask = netmask;
1949 iface->ksni_nroutes = 0;
1950 iface->ksni_npeers = 0;
1952 hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) {
1953 for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++)
1954 if (peer_ni->ksnp_passive_ips[j] == ipaddress)
1955 iface->ksni_npeers++;
1957 list_for_each(rtmp, &peer_ni->ksnp_routes) {
1958 route = list_entry(rtmp,
1962 if (route->ksnr_myiface ==
1964 iface->ksni_nroutes++;
1969 /* NB only new connections will pay attention to the new
1974 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1980 ksocknal_peer_del_interface_locked(struct ksock_peer_ni *peer_ni,
1981 __u32 ipaddr, int index)
1983 struct ksock_route *route;
1984 struct ksock_route *rnxt;
1985 struct ksock_conn *conn;
1986 struct ksock_conn *cnxt;
1990 for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++)
1991 if (peer_ni->ksnp_passive_ips[i] == ipaddr) {
1992 for (j = i+1; j < peer_ni->ksnp_n_passive_ips; j++)
1993 peer_ni->ksnp_passive_ips[j-1] =
1994 peer_ni->ksnp_passive_ips[j];
1995 peer_ni->ksnp_n_passive_ips--;
1999 list_for_each_entry_safe(route, rnxt, &peer_ni->ksnp_routes,
2001 if (route->ksnr_myiface != index)
2004 if (route->ksnr_share_count != 0) {
2005 /* Manually created; keep, but unbind */
2006 route->ksnr_myiface = -1;
2008 ksocknal_del_route_locked(route);
2012 list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns, ksnc_list)
2013 if (conn->ksnc_myipaddr == ipaddr)
2014 ksocknal_close_conn_locked (conn, 0);
2018 ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress)
2020 struct ksock_net *net = ni->ni_data;
2022 struct hlist_node *nxt;
2023 struct ksock_peer_ni *peer_ni;
2029 index = ksocknal_ip2index(ipaddress, ni);
2031 write_lock_bh(&ksocknal_data.ksnd_global_lock);
2033 for (i = 0; i < net->ksnn_ninterfaces; i++) {
2034 this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
2036 if (!(ipaddress == 0 ||
2037 ipaddress == this_ip))
2042 for (j = i+1; j < net->ksnn_ninterfaces; j++)
2043 net->ksnn_interfaces[j-1] =
2044 net->ksnn_interfaces[j];
2046 net->ksnn_ninterfaces--;
2048 hash_for_each_safe(ksocknal_data.ksnd_peers, j,
2049 nxt, peer_ni, ksnp_list) {
2050 if (peer_ni->ksnp_ni != ni)
2053 ksocknal_peer_del_interface_locked(peer_ni,
2058 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2064 ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg)
2066 struct lnet_process_id id = {0};
2067 struct libcfs_ioctl_data *data = arg;
2071 case IOC_LIBCFS_GET_INTERFACE: {
2072 struct ksock_net *net = ni->ni_data;
2073 struct ksock_interface *iface;
2075 read_lock(&ksocknal_data.ksnd_global_lock);
2077 if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
2081 iface = &net->ksnn_interfaces[data->ioc_count];
2083 data->ioc_u32[0] = iface->ksni_ipaddr;
2084 data->ioc_u32[1] = iface->ksni_netmask;
2085 data->ioc_u32[2] = iface->ksni_npeers;
2086 data->ioc_u32[3] = iface->ksni_nroutes;
2089 read_unlock(&ksocknal_data.ksnd_global_lock);
2093 case IOC_LIBCFS_ADD_INTERFACE:
2094 return ksocknal_add_interface(ni,
2095 data->ioc_u32[0], /* IP address */
2096 data->ioc_u32[1]); /* net mask */
2098 case IOC_LIBCFS_DEL_INTERFACE:
2099 return ksocknal_del_interface(ni,
2100 data->ioc_u32[0]); /* IP address */
2102 case IOC_LIBCFS_GET_PEER: {
2107 int share_count = 0;
2109 rc = ksocknal_get_peer_info(ni, data->ioc_count,
2110 &id, &myip, &ip, &port,
2111 &conn_count, &share_count);
2115 data->ioc_nid = id.nid;
2116 data->ioc_count = share_count;
2117 data->ioc_u32[0] = ip;
2118 data->ioc_u32[1] = port;
2119 data->ioc_u32[2] = myip;
2120 data->ioc_u32[3] = conn_count;
2121 data->ioc_u32[4] = id.pid;
2125 case IOC_LIBCFS_ADD_PEER:
2126 id.nid = data->ioc_nid;
2127 id.pid = LNET_PID_LUSTRE;
2128 return ksocknal_add_peer (ni, id,
2129 data->ioc_u32[0], /* IP */
2130 data->ioc_u32[1]); /* port */
2132 case IOC_LIBCFS_DEL_PEER:
2133 id.nid = data->ioc_nid;
2134 id.pid = LNET_PID_ANY;
2135 return ksocknal_del_peer (ni, id,
2136 data->ioc_u32[0]); /* IP */
2138 case IOC_LIBCFS_GET_CONN: {
2142 struct ksock_conn *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
2147 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
2149 data->ioc_count = txmem;
2150 data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
2151 data->ioc_flags = nagle;
2152 data->ioc_u32[0] = conn->ksnc_ipaddr;
2153 data->ioc_u32[1] = conn->ksnc_port;
2154 data->ioc_u32[2] = conn->ksnc_myipaddr;
2155 data->ioc_u32[3] = conn->ksnc_type;
2156 data->ioc_u32[4] = conn->ksnc_scheduler->kss_cpt;
2157 data->ioc_u32[5] = rxmem;
2158 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
2159 ksocknal_conn_decref(conn);
2163 case IOC_LIBCFS_CLOSE_CONNECTION:
2164 id.nid = data->ioc_nid;
2165 id.pid = LNET_PID_ANY;
2166 return ksocknal_close_matching_conns (id,
2169 case IOC_LIBCFS_REGISTER_MYNID:
2170 /* Ignore if this is a noop */
2171 if (data->ioc_nid == ni->ni_nid)
2174 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2175 libcfs_nid2str(data->ioc_nid),
2176 libcfs_nid2str(ni->ni_nid));
2179 case IOC_LIBCFS_PUSH_CONNECTION:
2180 id.nid = data->ioc_nid;
2181 id.pid = LNET_PID_ANY;
2182 return ksocknal_push(ni, id);
2191 ksocknal_free_buffers (void)
2193 LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
2195 if (ksocknal_data.ksnd_schedulers != NULL)
2196 cfs_percpt_free(ksocknal_data.ksnd_schedulers);
2198 spin_lock(&ksocknal_data.ksnd_tx_lock);
2200 if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
2202 struct ksock_tx *tx;
2204 list_splice_init(&ksocknal_data.ksnd_idle_noop_txs, &zlist);
2205 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2207 while (!list_empty(&zlist)) {
2208 tx = list_entry(zlist.next, struct ksock_tx, tx_list);
2209 list_del(&tx->tx_list);
2210 LIBCFS_FREE(tx, tx->tx_desc_size);
2213 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2218 ksocknal_base_shutdown(void)
2220 struct ksock_sched *sched;
2221 struct ksock_peer_ni *peer_ni;
2224 CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
2225 atomic_read (&libcfs_kmemory));
2226 LASSERT (ksocknal_data.ksnd_nnets == 0);
2228 switch (ksocknal_data.ksnd_init) {
2233 case SOCKNAL_INIT_ALL:
2234 case SOCKNAL_INIT_DATA:
2235 hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list)
2238 LASSERT(list_empty(&ksocknal_data.ksnd_nets));
2239 LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
2240 LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
2241 LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
2242 LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
2244 if (ksocknal_data.ksnd_schedulers != NULL) {
2245 cfs_percpt_for_each(sched, i,
2246 ksocknal_data.ksnd_schedulers) {
2248 LASSERT(list_empty(&sched->kss_tx_conns));
2249 LASSERT(list_empty(&sched->kss_rx_conns));
2250 LASSERT(list_empty(&sched->kss_zombie_noop_txs));
2251 LASSERT(sched->kss_nconns == 0);
2255 /* flag threads to terminate; wake and wait for them to die */
2256 ksocknal_data.ksnd_shuttingdown = 1;
2257 wake_up_all(&ksocknal_data.ksnd_connd_waitq);
2258 wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
2260 if (ksocknal_data.ksnd_schedulers != NULL) {
2261 cfs_percpt_for_each(sched, i,
2262 ksocknal_data.ksnd_schedulers)
2263 wake_up_all(&sched->kss_waitq);
2266 wait_var_event_warning(&ksocknal_data.ksnd_nthreads,
2267 ksocknal_data.ksnd_nthreads == 0,
2268 "waiting for %d threads to terminate\n",
2269 ksocknal_data.ksnd_nthreads);
2271 ksocknal_free_buffers();
2273 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
2277 CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
2278 atomic_read (&libcfs_kmemory));
2280 module_put(THIS_MODULE);
2284 ksocknal_base_startup(void)
2286 struct ksock_sched *sched;
2290 LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
2291 LASSERT(ksocknal_data.ksnd_nnets == 0);
2293 memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
2295 hash_init(ksocknal_data.ksnd_peers);
2297 rwlock_init(&ksocknal_data.ksnd_global_lock);
2298 INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
2300 spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
2301 INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
2302 INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
2303 INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
2304 init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
2306 spin_lock_init(&ksocknal_data.ksnd_connd_lock);
2307 INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
2308 INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
2309 init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
2311 spin_lock_init(&ksocknal_data.ksnd_tx_lock);
2312 INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
2314 /* NB memset above zeros whole of ksocknal_data */
2316 /* flag lists/ptrs/locks initialised */
2317 ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
2318 if (!try_module_get(THIS_MODULE))
2321 /* Create a scheduler block per available CPT */
2322 ksocknal_data.ksnd_schedulers = cfs_percpt_alloc(lnet_cpt_table(),
2324 if (ksocknal_data.ksnd_schedulers == NULL)
2327 cfs_percpt_for_each(sched, i, ksocknal_data.ksnd_schedulers) {
2331 * make sure not to allocate more threads than there are
2332 * cores/CPUs in teh CPT
2334 nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
2335 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2336 nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
2339 * max to half of CPUs, assume another half should be
2340 * reserved for upper layer modules
2342 nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2345 sched->kss_nthreads_max = nthrs;
2348 spin_lock_init(&sched->kss_lock);
2349 INIT_LIST_HEAD(&sched->kss_rx_conns);
2350 INIT_LIST_HEAD(&sched->kss_tx_conns);
2351 INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
2352 init_waitqueue_head(&sched->kss_waitq);
2355 ksocknal_data.ksnd_connd_starting = 0;
2356 ksocknal_data.ksnd_connd_failed_stamp = 0;
2357 ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds();
2358 /* must have at least 2 connds to remain responsive to accepts while
2360 if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
2361 *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
2363 if (*ksocknal_tunables.ksnd_nconnds_max <
2364 *ksocknal_tunables.ksnd_nconnds) {
2365 ksocknal_tunables.ksnd_nconnds_max =
2366 ksocknal_tunables.ksnd_nconnds;
2369 for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
2371 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2372 ksocknal_data.ksnd_connd_starting++;
2373 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2376 snprintf(name, sizeof(name), "socknal_cd%02d", i);
2377 rc = ksocknal_thread_start(ksocknal_connd,
2378 (void *)((uintptr_t)i), name);
2380 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2381 ksocknal_data.ksnd_connd_starting--;
2382 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2383 CERROR("Can't spawn socknal connd: %d\n", rc);
2388 rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
2390 CERROR ("Can't spawn socknal reaper: %d\n", rc);
2394 /* flag everything initialised */
2395 ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
2400 ksocknal_base_shutdown();
2405 ksocknal_debug_peerhash(struct lnet_ni *ni)
2407 struct ksock_peer_ni *peer_ni;
2410 read_lock(&ksocknal_data.ksnd_global_lock);
2412 hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) {
2413 struct ksock_route *route;
2414 struct ksock_conn *conn;
2416 if (peer_ni->ksnp_ni != ni)
2419 CWARN("Active peer_ni on shutdown: %s, ref %d, "
2420 "closing %d, accepting %d, err %d, zcookie %llu, "
2421 "txq %d, zc_req %d\n", libcfs_id2str(peer_ni->ksnp_id),
2422 refcount_read(&peer_ni->ksnp_refcount),
2423 peer_ni->ksnp_closing,
2424 peer_ni->ksnp_accepting, peer_ni->ksnp_error,
2425 peer_ni->ksnp_zc_next_cookie,
2426 !list_empty(&peer_ni->ksnp_tx_queue),
2427 !list_empty(&peer_ni->ksnp_zc_req_list));
2429 list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list) {
2430 CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
2431 refcount_read(&route->ksnr_refcount),
2432 route->ksnr_scheduled, route->ksnr_connecting,
2433 route->ksnr_connected, route->ksnr_deleted);
2436 list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) {
2437 CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
2438 refcount_read(&conn->ksnc_conn_refcount),
2439 refcount_read(&conn->ksnc_sock_refcount),
2440 conn->ksnc_type, conn->ksnc_closing);
2445 read_unlock(&ksocknal_data.ksnd_global_lock);
2450 ksocknal_shutdown(struct lnet_ni *ni)
2452 struct ksock_net *net = ni->ni_data;
2453 struct lnet_process_id anyid = {
2454 .nid = LNET_NID_ANY,
2455 .pid = LNET_PID_ANY,
2459 LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
2460 LASSERT(ksocknal_data.ksnd_nnets > 0);
2462 /* prevent new peers */
2463 atomic_add(SOCKNAL_SHUTDOWN_BIAS, &net->ksnn_npeers);
2465 /* Delete all peers */
2466 ksocknal_del_peer(ni, anyid, 0);
2468 /* Wait for all peer_ni state to clean up */
2469 wait_var_event_warning(&net->ksnn_npeers,
2470 atomic_read(&net->ksnn_npeers) ==
2471 SOCKNAL_SHUTDOWN_BIAS,
2472 "waiting for %d peers to disconnect\n",
2473 ksocknal_debug_peerhash(ni) +
2474 atomic_read(&net->ksnn_npeers) -
2475 SOCKNAL_SHUTDOWN_BIAS);
2477 for (i = 0; i < net->ksnn_ninterfaces; i++) {
2478 LASSERT(net->ksnn_interfaces[i].ksni_npeers == 0);
2479 LASSERT(net->ksnn_interfaces[i].ksni_nroutes == 0);
2482 list_del(&net->ksnn_list);
2483 LIBCFS_FREE(net, sizeof(*net));
2485 ksocknal_data.ksnd_nnets--;
2486 if (ksocknal_data.ksnd_nnets == 0)
2487 ksocknal_base_shutdown();
2491 ksocknal_search_new_ipif(struct ksock_net *net)
2496 for (i = 0; i < net->ksnn_ninterfaces; i++) {
2497 char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
2498 char *colon = strchr(ifnam, ':');
2500 struct ksock_net *tmp;
2503 if (colon != NULL) /* ignore alias device */
2506 list_for_each_entry(tmp, &ksocknal_data.ksnd_nets,
2508 for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
2509 char *ifnam2 = &tmp->ksnn_interfaces[j].\
2511 char *colon2 = strchr(ifnam2, ':');
2516 found = strcmp(ifnam, ifnam2) == 0;
2533 ksocknal_start_schedulers(struct ksock_sched *sched)
2539 if (sched->kss_nthreads == 0) {
2540 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2541 nthrs = sched->kss_nthreads_max;
2543 nthrs = cfs_cpt_weight(lnet_cpt_table(),
2545 nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2546 nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
2548 nthrs = min(nthrs, sched->kss_nthreads_max);
2550 LASSERT(sched->kss_nthreads <= sched->kss_nthreads_max);
2551 /* increase two threads if there is new interface */
2552 nthrs = min(2, sched->kss_nthreads_max - sched->kss_nthreads);
2555 for (i = 0; i < nthrs; i++) {
2559 id = KSOCK_THREAD_ID(sched->kss_cpt, sched->kss_nthreads + i);
2560 snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
2561 sched->kss_cpt, (int)KSOCK_THREAD_SID(id));
2563 rc = ksocknal_thread_start(ksocknal_scheduler,
2568 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
2569 sched->kss_cpt, (int) KSOCK_THREAD_SID(id), rc);
2573 sched->kss_nthreads += i;
2578 ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts)
2580 int newif = ksocknal_search_new_ipif(net);
2584 if (ncpts > 0 && ncpts > cfs_cpt_number(lnet_cpt_table()))
2587 for (i = 0; i < ncpts; i++) {
2588 struct ksock_sched *sched;
2589 int cpt = (cpts == NULL) ? i : cpts[i];
2591 LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
2592 sched = ksocknal_data.ksnd_schedulers[cpt];
2594 if (!newif && sched->kss_nthreads > 0)
2597 rc = ksocknal_start_schedulers(sched);
2605 ksocknal_startup(struct lnet_ni *ni)
2607 struct ksock_net *net;
2608 struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables;
2609 struct ksock_interface *ksi = NULL;
2610 struct lnet_inetdev *ifaces = NULL;
2614 LASSERT (ni->ni_net->net_lnd == &the_ksocklnd);
2616 if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
2617 rc = ksocknal_base_startup();
2622 LIBCFS_ALLOC(net, sizeof(*net));
2626 net->ksnn_incarnation = ktime_get_real_ns();
2628 net_tunables = &ni->ni_net->net_tunables;
2630 if (net_tunables->lct_peer_timeout == -1)
2631 net_tunables->lct_peer_timeout =
2632 *ksocknal_tunables.ksnd_peertimeout;
2634 if (net_tunables->lct_max_tx_credits == -1)
2635 net_tunables->lct_max_tx_credits =
2636 *ksocknal_tunables.ksnd_credits;
2638 if (net_tunables->lct_peer_tx_credits == -1)
2639 net_tunables->lct_peer_tx_credits =
2640 *ksocknal_tunables.ksnd_peertxcredits;
2642 if (net_tunables->lct_peer_tx_credits >
2643 net_tunables->lct_max_tx_credits)
2644 net_tunables->lct_peer_tx_credits =
2645 net_tunables->lct_max_tx_credits;
2647 if (net_tunables->lct_peer_rtr_credits == -1)
2648 net_tunables->lct_peer_rtr_credits =
2649 *ksocknal_tunables.ksnd_peerrtrcredits;
2651 rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns);
2655 if (!ni->ni_interfaces[0]) {
2656 ksi = &net->ksnn_interfaces[0];
2658 /* Use the first discovered interface */
2659 net->ksnn_ninterfaces = 1;
2660 ni->ni_dev_cpt = ifaces[0].li_cpt;
2661 ksi->ksni_ipaddr = ifaces[0].li_ipaddr;
2662 ksi->ksni_index = ksocknal_ip2index(ksi->ksni_ipaddr, ni);
2663 ksi->ksni_netmask = ifaces[0].li_netmask;
2664 strlcpy(ksi->ksni_name, ifaces[0].li_name,
2665 sizeof(ksi->ksni_name));
2667 /* Before Multi-Rail ksocklnd would manage
2668 * multiple interfaces with its own tcp bonding.
2669 * If we encounter an old configuration using
2670 * this tcp bonding approach then we need to
2671 * handle more than one ni_interfaces.
2673 * In Multi-Rail configuration only ONE ni_interface
2674 * should exist. Each IP alias should be mapped to
2675 * each 'struct net_ni'.
2677 for (i = 0; i < LNET_INTERFACES_NUM; i++) {
2680 if (!ni->ni_interfaces[i])
2683 for (j = 0; j < LNET_INTERFACES_NUM; j++) {
2684 if (i != j && ni->ni_interfaces[j] &&
2685 strcmp(ni->ni_interfaces[i],
2686 ni->ni_interfaces[j]) == 0) {
2688 CERROR("ksocklnd: found duplicate %s at %d and %d, rc = %d\n",
2689 ni->ni_interfaces[i], i, j, rc);
2694 for (j = 0; j < rc; j++) {
2695 if (strcmp(ifaces[j].li_name,
2696 ni->ni_interfaces[i]) != 0)
2699 ksi = &net->ksnn_interfaces[j];
2700 ni->ni_dev_cpt = ifaces[j].li_cpt;
2701 ksi->ksni_ipaddr = ifaces[j].li_ipaddr;
2703 ksocknal_ip2index(ksi->ksni_ipaddr, ni);
2704 ksi->ksni_netmask = ifaces[j].li_netmask;
2705 strlcpy(ksi->ksni_name, ifaces[j].li_name,
2706 sizeof(ksi->ksni_name));
2707 net->ksnn_ninterfaces++;
2711 /* ni_interfaces don't map to all network interfaces */
2712 if (!ksi || net->ksnn_ninterfaces != i) {
2713 CERROR("ksocklnd: requested %d but only %d interfaces found\n",
2714 i, net->ksnn_ninterfaces);
2719 /* call it before add it to ksocknal_data.ksnd_nets */
2720 rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
2725 ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ksi->ksni_ipaddr);
2726 list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
2728 ksocknal_data.ksnd_nnets++;
2733 LIBCFS_FREE(net, sizeof(*net));
2735 if (ksocknal_data.ksnd_nnets == 0)
2736 ksocknal_base_shutdown();
2742 static void __exit ksocklnd_exit(void)
2744 lnet_unregister_lnd(&the_ksocklnd);
2747 static const struct lnet_lnd the_ksocklnd = {
2748 .lnd_type = SOCKLND,
2749 .lnd_startup = ksocknal_startup,
2750 .lnd_shutdown = ksocknal_shutdown,
2751 .lnd_ctl = ksocknal_ctl,
2752 .lnd_send = ksocknal_send,
2753 .lnd_recv = ksocknal_recv,
2754 .lnd_notify_peer_down = ksocknal_notify_gw_down,
2755 .lnd_accept = ksocknal_accept,
2758 static int __init ksocklnd_init(void)
2762 /* check ksnr_connected/connecting field large enough */
2763 BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4);
2764 BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN);
2766 rc = ksocknal_tunables_init();
2770 lnet_register_lnd(&the_ksocklnd);
2775 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
2776 MODULE_DESCRIPTION("TCP Socket LNet Network Driver");
2777 MODULE_VERSION("2.8.0");
2778 MODULE_LICENSE("GPL");
2780 module_init(ksocklnd_init);
2781 module_exit(ksocklnd_exit);