X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fsocklnd%2Fsocklnd.c;h=c71b36acbb7f5390123be7fb67be7aa91d2ae0d8;hp=5d60a0a067656fea6b8ed91f2d16e976a8c4f997;hb=HEAD;hpb=1aae733c16161513b07d7f8cc046299e2de5aad3 diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 5d60a0a..ecea6a9 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lnet/klnds/socklnd/socklnd.c * @@ -37,112 +36,167 @@ * Author: Eric Barton */ -#include "socklnd.h" +#include #include +#include +#include +#include +#include "socklnd.h" -static struct lnet_lnd the_ksocklnd; +static const struct lnet_lnd the_ksocklnd; struct ksock_nal_data ksocknal_data; -static struct ksock_interface * -ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip) +static int ksocknal_ip2index(struct sockaddr *addr, struct lnet_ni *ni, + int *dev_status) { - struct ksock_net *net = ni->ni_data; - int i; - struct ksock_interface *iface; + struct net_device *dev; + int ret = -1; + DECLARE_CONST_IN_IFADDR(ifa); + + *dev_status = -1; + + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) + return ret; + + rcu_read_lock(); + for_each_netdev(ni->ni_net_ns, dev) { + int flags = dev_get_flags(dev); + struct in_device *in_dev; - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(i < LNET_INTERFACES_NUM); - iface = &net->ksnn_interfaces[i]; + if (flags & IFF_LOOPBACK) /* skip the loopback IF */ + continue; + + if (!(flags & IFF_UP)) + continue; + + switch (addr->sa_family) { + case AF_INET: + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + continue; + + in_dev_for_each_ifa_rcu(ifa, in_dev) { + if (ifa->ifa_local == + ((struct sockaddr_in *)addr)->sin_addr.s_addr) + ret = dev->ifindex; + } + endfor_ifa(in_dev); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + struct inet6_dev *in6_dev; + const struct inet6_ifaddr *ifa6; + struct sockaddr_in6 *addr6 = (struct sockaddr_in6*)addr; + + in6_dev = __in6_dev_get(dev); + if (!in6_dev) + continue; - if (iface->ksni_ipaddr == ip) - return iface; + list_for_each_entry_rcu(ifa6, &in6_dev->addr_list, if_list) { + if (ipv6_addr_cmp(&ifa6->addr, + &addr6->sin6_addr) == 0) + ret = dev->ifindex; + } + break; + } +#endif /* IS_ENABLED(CONFIG_IPV6) */ + } + if (ret >= 0) + break; } - return NULL; + rcu_read_unlock(); + if (ret >= 0) + *dev_status = 1; + + if ((ret == -1) || + ((dev->reg_state == NETREG_UNREGISTERING) || + ((dev->operstate != IF_OPER_UP) && + (dev->operstate != IF_OPER_UNKNOWN))) || + (lnet_get_link_status(dev) == 0)) + *dev_status = 0; + + return ret; } -static struct ksock_route * -ksocknal_create_route(__u32 ipaddr, int port) +static struct ksock_conn_cb * +ksocknal_create_conn_cb(struct sockaddr *addr) { - struct ksock_route *route; - - LIBCFS_ALLOC (route, sizeof (*route)); - if (route == NULL) - return (NULL); - - atomic_set (&route->ksnr_refcount, 1); - route->ksnr_peer = NULL; - route->ksnr_retry_interval = 0; /* OK to connect at any time */ - route->ksnr_ipaddr = ipaddr; - route->ksnr_port = port; - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - route->ksnr_connected = 0; - route->ksnr_deleted = 0; - route->ksnr_conn_count = 0; - route->ksnr_share_count = 0; - - return (route); + struct ksock_conn_cb *conn_cb; + + LIBCFS_ALLOC(conn_cb, sizeof(*conn_cb)); + if (!conn_cb) + return NULL; + + refcount_set(&conn_cb->ksnr_refcount, 1); + conn_cb->ksnr_peer = NULL; + conn_cb->ksnr_retry_interval = 0; /* OK to connect at any time */ + rpc_copy_addr((struct sockaddr *)&conn_cb->ksnr_addr, addr); + rpc_set_port((struct sockaddr *)&conn_cb->ksnr_addr, + rpc_get_port(addr)); + conn_cb->ksnr_scheduled = 0; + conn_cb->ksnr_connecting = 0; + conn_cb->ksnr_connected = 0; + conn_cb->ksnr_deleted = 0; + conn_cb->ksnr_conn_count = 0; + conn_cb->ksnr_ctrl_conn_count = 0; + conn_cb->ksnr_blki_conn_count = 0; + conn_cb->ksnr_blko_conn_count = 0; + conn_cb->ksnr_max_conns = 0; + conn_cb->ksnr_busy_retry_count = 0; + + return conn_cb; } void -ksocknal_destroy_route(struct ksock_route *route) +ksocknal_destroy_conn_cb(struct ksock_conn_cb *conn_cb) { - LASSERT (atomic_read(&route->ksnr_refcount) == 0); + LASSERT(refcount_read(&conn_cb->ksnr_refcount) == 0); - if (route->ksnr_peer != NULL) - ksocknal_peer_decref(route->ksnr_peer); + if (conn_cb->ksnr_peer) + ksocknal_peer_decref(conn_cb->ksnr_peer); - LIBCFS_FREE (route, sizeof (*route)); + LIBCFS_FREE(conn_cb, sizeof(*conn_cb)); } -static int -ksocknal_create_peer(struct ksock_peer_ni **peerp, struct lnet_ni *ni, - struct lnet_process_id id) +static struct ksock_peer_ni * +ksocknal_create_peer(struct lnet_ni *ni, struct lnet_processid *id) { - int cpt = lnet_cpt_of_nid(id.nid, ni); + int cpt = lnet_nid2cpt(&id->nid, ni); struct ksock_net *net = ni->ni_data; struct ksock_peer_ni *peer_ni; - LASSERT(id.nid != LNET_NID_ANY); - LASSERT(id.pid != LNET_PID_ANY); + LASSERT(!LNET_NID_IS_ANY(&id->nid)); + LASSERT(id->pid != LNET_PID_ANY); LASSERT(!in_interrupt()); + if (!atomic_inc_unless_negative(&net->ksnn_npeers)) { + CERROR("Can't create peer_ni: network shutdown\n"); + return ERR_PTR(-ESHUTDOWN); + } + LIBCFS_CPT_ALLOC(peer_ni, lnet_cpt_table(), cpt, sizeof(*peer_ni)); - if (peer_ni == NULL) - return -ENOMEM; + if (!peer_ni) { + atomic_dec(&net->ksnn_npeers); + return ERR_PTR(-ENOMEM); + } peer_ni->ksnp_ni = ni; - peer_ni->ksnp_id = id; - atomic_set(&peer_ni->ksnp_refcount, 1); /* 1 ref for caller */ + peer_ni->ksnp_id = *id; + refcount_set(&peer_ni->ksnp_refcount, 1); /* 1 ref for caller */ peer_ni->ksnp_closing = 0; peer_ni->ksnp_accepting = 0; peer_ni->ksnp_proto = NULL; peer_ni->ksnp_last_alive = 0; peer_ni->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; + peer_ni->ksnp_conn_cb = NULL; INIT_LIST_HEAD(&peer_ni->ksnp_conns); - INIT_LIST_HEAD(&peer_ni->ksnp_routes); INIT_LIST_HEAD(&peer_ni->ksnp_tx_queue); INIT_LIST_HEAD(&peer_ni->ksnp_zc_req_list); spin_lock_init(&peer_ni->ksnp_lock); - spin_lock_bh(&net->ksnn_lock); - - if (net->ksnn_shutdown) { - spin_unlock_bh(&net->ksnn_lock); - - LIBCFS_FREE(peer_ni, sizeof(*peer_ni)); - CERROR("Can't create peer_ni: network shutdown\n"); - return -ESHUTDOWN; - } - - net->ksnn_npeers++; - - spin_unlock_bh(&net->ksnn_lock); - - *peerp = peer_ni; - return 0; + return peer_ni; } void @@ -151,55 +205,53 @@ ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni) struct ksock_net *net = peer_ni->ksnp_ni->ni_data; CDEBUG (D_NET, "peer_ni %s %p deleted\n", - libcfs_id2str(peer_ni->ksnp_id), peer_ni); + libcfs_idstr(&peer_ni->ksnp_id), peer_ni); - LASSERT(atomic_read(&peer_ni->ksnp_refcount) == 0); + LASSERT(refcount_read(&peer_ni->ksnp_refcount) == 0); LASSERT(peer_ni->ksnp_accepting == 0); LASSERT(list_empty(&peer_ni->ksnp_conns)); - LASSERT(list_empty(&peer_ni->ksnp_routes)); + LASSERT(peer_ni->ksnp_conn_cb == NULL); LASSERT(list_empty(&peer_ni->ksnp_tx_queue)); LASSERT(list_empty(&peer_ni->ksnp_zc_req_list)); LIBCFS_FREE(peer_ni, sizeof(*peer_ni)); - /* NB a peer_ni's connections and routes keep a reference on their peer_ni - * until they are destroyed, so we can be assured that _all_ state to - * do with this peer_ni has been cleaned up when its refcount drops to - * zero. */ - spin_lock_bh(&net->ksnn_lock); - net->ksnn_npeers--; - spin_unlock_bh(&net->ksnn_lock); + /* NB a peer_ni's connections and conn_cb keep a reference on their + * peer_ni until they are destroyed, so we can be assured that _all_ + * state to do with this peer_ni has been cleaned up when its refcount + * drops to zero. + */ + if (atomic_dec_and_test(&net->ksnn_npeers)) + wake_up_var(&net->ksnn_npeers); } struct ksock_peer_ni * -ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_processid *id) { - struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); - struct list_head *tmp; struct ksock_peer_ni *peer_ni; + unsigned long hash = nidhash(&id->nid); - list_for_each(tmp, peer_list) { - peer_ni = list_entry(tmp, struct ksock_peer_ni, ksnp_list); - + hash_for_each_possible(ksocknal_data.ksnd_peers, peer_ni, + ksnp_list, hash) { LASSERT(!peer_ni->ksnp_closing); if (peer_ni->ksnp_ni != ni) continue; - if (peer_ni->ksnp_id.nid != id.nid || - peer_ni->ksnp_id.pid != id.pid) + if (!nid_same(&peer_ni->ksnp_id.nid, &id->nid) || + peer_ni->ksnp_id.pid != id->pid) continue; CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d)\n", - peer_ni, libcfs_id2str(id), - atomic_read(&peer_ni->ksnp_refcount)); + peer_ni, libcfs_idstr(id), + refcount_read(&peer_ni->ksnp_refcount)); return peer_ni; } return NULL; } struct ksock_peer_ni * -ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) +ksocknal_find_peer(struct lnet_ni *ni, struct lnet_processid *id) { struct ksock_peer_ni *peer_ni; @@ -209,270 +261,351 @@ ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) ksocknal_peer_addref(peer_ni); read_unlock(&ksocknal_data.ksnd_global_lock); - return (peer_ni); + return peer_ni; } static void ksocknal_unlink_peer_locked(struct ksock_peer_ni *peer_ni) { - int i; - __u32 ip; - struct ksock_interface *iface; - - for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++) { - LASSERT(i < LNET_INTERFACES_NUM); - ip = peer_ni->ksnp_passive_ips[i]; - - iface = ksocknal_ip2iface(peer_ni->ksnp_ni, ip); - /* - * All IPs in peer_ni->ksnp_passive_ips[] come from the - * interface list, therefore the call must succeed. - */ - LASSERT(iface != NULL); - - CDEBUG(D_NET, "peer_ni=%p iface=%p ksni_nroutes=%d\n", - peer_ni, iface, iface->ksni_nroutes); - iface->ksni_npeers--; - } - LASSERT(list_empty(&peer_ni->ksnp_conns)); - LASSERT(list_empty(&peer_ni->ksnp_routes)); + LASSERT(peer_ni->ksnp_conn_cb == NULL); LASSERT(!peer_ni->ksnp_closing); peer_ni->ksnp_closing = 1; - list_del(&peer_ni->ksnp_list); + hlist_del(&peer_ni->ksnp_list); /* lose peerlist's ref */ ksocknal_peer_decref(peer_ni); } + +static void +ksocknal_dump_peer_debug_info(struct ksock_peer_ni *peer_ni) +{ + struct ksock_conn *conn; + struct list_head *ctmp; + struct list_head *txtmp; + int ccount = 0; + int txcount = 0; + + list_for_each(ctmp, &peer_ni->ksnp_conns) { + conn = list_entry(ctmp, struct ksock_conn, ksnc_list); + + if (!list_empty(&conn->ksnc_tx_queue)) + list_for_each(txtmp, &conn->ksnc_tx_queue) txcount++; + + CDEBUG(D_CONSOLE, "Conn %d [type, closing, crefcnt, srefcnt]: %d, %d, %d, %d\n", + ccount, + conn->ksnc_type, + conn->ksnc_closing, + refcount_read(&conn->ksnc_conn_refcount), + refcount_read(&conn->ksnc_sock_refcount)); + CDEBUG(D_CONSOLE, "Conn %d rx [scheduled, ready, state]: %d, %d, %d\n", + ccount, + conn->ksnc_rx_scheduled, + conn->ksnc_rx_ready, + conn->ksnc_rx_state); + CDEBUG(D_CONSOLE, "Conn %d tx [txqcnt, scheduled, last_post, ready, deadline]: %d, %d, %lld, %d, %lld\n", + ccount, + txcount, + conn->ksnc_tx_scheduled, + conn->ksnc_tx_last_post, + conn->ksnc_rx_ready, + conn->ksnc_rx_deadline); + + if (conn->ksnc_scheduler) + CDEBUG(D_CONSOLE, "Conn %d sched [nconns, cpt]: %d, %d\n", + ccount, + conn->ksnc_scheduler->kss_nconns, + conn->ksnc_scheduler->kss_cpt); + + txcount = 0; + ccount++; + } +} + static int ksocknal_get_peer_info(struct lnet_ni *ni, int index, - struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip, + struct lnet_processid *id, __u32 *myip, __u32 *peer_ip, int *port, int *conn_count, int *share_count) { struct ksock_peer_ni *peer_ni; - struct list_head *ptmp; - struct ksock_route *route; - struct list_head *rtmp; + struct ksock_conn_cb *conn_cb; int i; - int j; int rc = -ENOENT; + struct ksock_net *net; read_lock(&ksocknal_data.ksnd_global_lock); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(ptmp, struct ksock_peer_ni, ksnp_list); + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) { - if (peer_ni->ksnp_ni != ni) - continue; + if (peer_ni->ksnp_ni != ni) + continue; + if (index-- > 0) + continue; - if (peer_ni->ksnp_n_passive_ips == 0 && - list_empty(&peer_ni->ksnp_routes)) { - if (index-- > 0) - continue; + *id = peer_ni->ksnp_id; + conn_cb = peer_ni->ksnp_conn_cb; + if (conn_cb == NULL) { + *myip = 0; + *peer_ip = 0; + *port = 0; + *conn_count = 0; + *share_count = 0; + rc = 0; + } else { + ksocknal_dump_peer_debug_info(peer_ni); + + if (conn_cb->ksnr_addr.ss_family == AF_INET) { + struct sockaddr_in *sa = + (void *)&conn_cb->ksnr_addr; + net = ni->ni_data; + rc = choose_ipv4_src(myip, + net->ksnn_interface.ksni_index, + ntohl(sa->sin_addr.s_addr), + ni->ni_net_ns); + *peer_ip = ntohl(sa->sin_addr.s_addr); + *port = ntohs(sa->sin_port); + + } else { + *myip = 0xFFFFFFFF; + *peer_ip = 0xFFFFFFFF; + *port = 0; + rc = -ENOTSUPP; + } + *conn_count = conn_cb->ksnr_conn_count; + *share_count = 1; + } + break; + } + read_unlock(&ksocknal_data.ksnd_global_lock); + return rc; +} - *id = peer_ni->ksnp_id; - *myip = 0; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } +static unsigned int +ksocknal_get_conns_per_peer(struct ksock_peer_ni *peer_ni) +{ + struct lnet_ni *ni = peer_ni->ksnp_ni; + struct lnet_ioctl_config_socklnd_tunables *tunables; - for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) { - if (index-- > 0) - continue; + LASSERT(ni); - *id = peer_ni->ksnp_id; - *myip = peer_ni->ksnp_passive_ips[j]; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_sock; - list_for_each(rtmp, &peer_ni->ksnp_routes) { - if (index-- > 0) - continue; + return tunables->lnd_conns_per_peer; +} - route = list_entry(rtmp, struct ksock_route, - ksnr_list); - - *id = peer_ni->ksnp_id; - *myip = route->ksnr_myipaddr; - *peer_ip = route->ksnr_ipaddr; - *port = route->ksnr_port; - *conn_count = route->ksnr_conn_count; - *share_count = route->ksnr_share_count; - rc = 0; - goto out; - } - } +static void +ksocknal_incr_conn_count(struct ksock_conn_cb *conn_cb, + int type) +{ + conn_cb->ksnr_conn_count++; + + /* check if all connections of the given type got created */ + switch (type) { + case SOCKLND_CONN_CONTROL: + conn_cb->ksnr_ctrl_conn_count++; + /* there's a single control connection per peer, + * two in case of loopback + */ + conn_cb->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_BULK_IN: + conn_cb->ksnr_blki_conn_count++; + if (conn_cb->ksnr_blki_conn_count >= conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_BULK_OUT: + conn_cb->ksnr_blko_conn_count++; + if (conn_cb->ksnr_blko_conn_count >= conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected |= BIT(type); + break; + case SOCKLND_CONN_ANY: + if (conn_cb->ksnr_conn_count >= conn_cb->ksnr_max_conns) + conn_cb->ksnr_connected |= BIT(type); + break; + default: + LBUG(); + break; } -out: - read_unlock(&ksocknal_data.ksnd_global_lock); - return rc; + + CDEBUG(D_NET, "Add conn type %d, ksnr_connected %x ksnr_max_conns %d\n", + type, conn_cb->ksnr_connected, conn_cb->ksnr_max_conns); } + static void -ksocknal_associate_route_conn_locked(struct ksock_route *route, struct ksock_conn *conn) +ksocknal_decr_conn_count(struct ksock_conn_cb *conn_cb, + int type) +{ + conn_cb->ksnr_conn_count--; + + /* check if all connections of the given type got created */ + switch (type) { + case SOCKLND_CONN_CONTROL: + conn_cb->ksnr_ctrl_conn_count--; + /* there's a single control connection per peer, + * two in case of loopback + */ + if (conn_cb->ksnr_ctrl_conn_count == 0) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_BULK_IN: + conn_cb->ksnr_blki_conn_count--; + if (conn_cb->ksnr_blki_conn_count == 0) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_BULK_OUT: + conn_cb->ksnr_blko_conn_count--; + if (conn_cb->ksnr_blko_conn_count == 0) + conn_cb->ksnr_connected &= ~BIT(type); + break; + case SOCKLND_CONN_ANY: + if (conn_cb->ksnr_conn_count == 0) + conn_cb->ksnr_connected &= ~BIT(type); + break; + default: + LBUG(); + break; + } + + CDEBUG(D_NET, "Del conn type %d, ksnr_connected %x ksnr_max_conns %d\n", + type, conn_cb->ksnr_connected, conn_cb->ksnr_max_conns); +} + +static void +ksocknal_associate_cb_conn_locked(struct ksock_conn_cb *conn_cb, + struct ksock_conn *conn) { - struct ksock_peer_ni *peer_ni = route->ksnr_peer; int type = conn->ksnc_type; - struct ksock_interface *iface; - - conn->ksnc_route = route; - ksocknal_route_addref(route); - - if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { - if (route->ksnr_myipaddr == 0) { - /* route wasn't bound locally yet (the initial route) */ - CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n", - libcfs_id2str(peer_ni->ksnp_id), - &route->ksnr_ipaddr, - &conn->ksnc_myipaddr); - } else { - CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h " - "to %pI4h\n", libcfs_id2str(peer_ni->ksnp_id), - &route->ksnr_ipaddr, - &route->ksnr_myipaddr, - &conn->ksnc_myipaddr); - - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes--; - } - route->ksnr_myipaddr = conn->ksnc_myipaddr; - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes++; - } - route->ksnr_connected |= (1<ksnr_conn_count++; + conn->ksnc_conn_cb = conn_cb; + ksocknal_conn_cb_addref(conn_cb); + ksocknal_incr_conn_count(conn_cb, type); - /* Successful connection => further attempts can - * proceed immediately */ - route->ksnr_retry_interval = 0; + /* Successful connection => further attempts can + * proceed immediately + */ + conn_cb->ksnr_retry_interval = 0; } static void -ksocknal_add_route_locked(struct ksock_peer_ni *peer_ni, struct ksock_route *route) +ksocknal_add_conn_cb_locked(struct ksock_peer_ni *peer_ni, + struct ksock_conn_cb *conn_cb) { - struct list_head *tmp; struct ksock_conn *conn; - struct ksock_route *route2; + struct ksock_net *net = peer_ni->ksnp_ni->ni_data; LASSERT(!peer_ni->ksnp_closing); - LASSERT(route->ksnr_peer == NULL); - LASSERT(!route->ksnr_scheduled); - LASSERT(!route->ksnr_connecting); - LASSERT(route->ksnr_connected == 0); - - /* LASSERT(unique) */ - list_for_each(tmp, &peer_ni->ksnp_routes) { - route2 = list_entry(tmp, struct ksock_route, ksnr_list); - - if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { - CERROR("Duplicate route %s %pI4h\n", - libcfs_id2str(peer_ni->ksnp_id), - &route->ksnr_ipaddr); - LBUG(); - } - } + LASSERT(!conn_cb->ksnr_peer); + LASSERT(!conn_cb->ksnr_scheduled); + LASSERT(!conn_cb->ksnr_connecting); + LASSERT(conn_cb->ksnr_connected == 0); - route->ksnr_peer = peer_ni; + conn_cb->ksnr_peer = peer_ni; ksocknal_peer_addref(peer_ni); - /* peer_ni's routelist takes over my ref on 'route' */ - list_add_tail(&route->ksnr_list, &peer_ni->ksnp_routes); - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); + /* peer_ni's route list takes over my ref on 'route' */ + peer_ni->ksnp_conn_cb = conn_cb; + net->ksnn_interface.ksni_nroutes++; - if (conn->ksnc_ipaddr != route->ksnr_ipaddr) + list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) { + if (!rpc_cmp_addr((struct sockaddr *)&conn->ksnc_peeraddr, + (struct sockaddr *)&conn_cb->ksnr_addr)) continue; - - ksocknal_associate_route_conn_locked(route, conn); - /* keep going (typed routes) */ + CDEBUG(D_NET, "call ksocknal_associate_cb_conn_locked\n"); + ksocknal_associate_cb_conn_locked(conn_cb, conn); + /* keep going (typed conns) */ } } static void -ksocknal_del_route_locked(struct ksock_route *route) +ksocknal_del_conn_cb_locked(struct ksock_conn_cb *conn_cb) { - struct ksock_peer_ni *peer_ni = route->ksnr_peer; - struct ksock_interface *iface; + struct ksock_peer_ni *peer_ni = conn_cb->ksnr_peer; struct ksock_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; + struct ksock_conn *cnxt; + struct ksock_net *net; - LASSERT(!route->ksnr_deleted); + LASSERT(!conn_cb->ksnr_deleted); /* Close associated conns */ - list_for_each_safe(ctmp, cnxt, &peer_ni->ksnp_conns) { - conn = list_entry(ctmp, struct ksock_conn, ksnc_list); - - if (conn->ksnc_route != route) + list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns, ksnc_list) { + if (conn->ksnc_conn_cb != conn_cb) continue; ksocknal_close_conn_locked(conn, 0); } - if (route->ksnr_myipaddr != 0) { - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes--; - } + net = (struct ksock_net *)(peer_ni->ksnp_ni->ni_data); + net->ksnn_interface.ksni_nroutes--; + LASSERT(net->ksnn_interface.ksni_nroutes >= 0); - route->ksnr_deleted = 1; - list_del(&route->ksnr_list); - ksocknal_route_decref(route); /* drop peer_ni's ref */ + conn_cb->ksnr_deleted = 1; + ksocknal_conn_cb_decref(conn_cb); /* drop peer_ni's ref */ + peer_ni->ksnp_conn_cb = NULL; - if (list_empty(&peer_ni->ksnp_routes) && - list_empty(&peer_ni->ksnp_conns)) { + if (list_empty(&peer_ni->ksnp_conns)) { /* I've just removed the last route to a peer_ni with no active - * connections */ + * connections + */ ksocknal_unlink_peer_locked(peer_ni); } } +unsigned int +ksocknal_get_conn_count_by_type(struct ksock_conn_cb *conn_cb, + int type) +{ + unsigned int count = 0; + + switch (type) { + case SOCKLND_CONN_CONTROL: + count = conn_cb->ksnr_ctrl_conn_count; + break; + case SOCKLND_CONN_BULK_IN: + count = conn_cb->ksnr_blki_conn_count; + break; + case SOCKLND_CONN_BULK_OUT: + count = conn_cb->ksnr_blko_conn_count; + break; + case SOCKLND_CONN_ANY: + count = conn_cb->ksnr_conn_count; + break; + default: + LBUG(); + break; + } + + return count; +} + int -ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, - int port) +ksocknal_add_peer(struct lnet_ni *ni, struct lnet_processid *id, + struct sockaddr *addr) { - struct list_head *tmp; struct ksock_peer_ni *peer_ni; struct ksock_peer_ni *peer2; - struct ksock_route *route; - struct ksock_route *route2; - int rc; + struct ksock_conn_cb *conn_cb; - if (id.nid == LNET_NID_ANY || - id.pid == LNET_PID_ANY) - return (-EINVAL); + if (LNET_NID_IS_ANY(&id->nid) || + id->pid == LNET_PID_ANY) + return (-EINVAL); - /* Have a brand new peer_ni ready... */ - rc = ksocknal_create_peer(&peer_ni, ni, id); - if (rc != 0) - return rc; + /* Have a brand new peer_ni ready... */ + peer_ni = ksocknal_create_peer(ni, id); + if (IS_ERR(peer_ni)) + return PTR_ERR(peer_ni); - route = ksocknal_create_route (ipaddr, port); - if (route == NULL) { - ksocknal_peer_decref(peer_ni); - return (-ENOMEM); - } + conn_cb = ksocknal_create_conn_cb(addr); + if (!conn_cb) { + ksocknal_peer_decref(peer_ni); + return -ENOMEM; + } write_lock_bh(&ksocknal_data.ksnd_global_lock); - /* always called with a ref on ni, so shutdown can't have started */ - LASSERT(((struct ksock_net *) ni->ni_data)->ksnn_shutdown == 0); + /* always called with a ref on ni, so shutdown can't have started */ + LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) + >= 0); peer2 = ksocknal_find_peer_locked(ni, id); if (peer2 != NULL) { @@ -480,25 +613,20 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, peer_ni = peer2; } else { /* peer_ni table takes my ref on peer_ni */ - list_add_tail(&peer_ni->ksnp_list, - ksocknal_nid2peerlist(id.nid)); + hash_add(ksocknal_data.ksnd_peers, &peer_ni->ksnp_list, + nidhash(&id->nid)); } - route2 = NULL; - list_for_each(tmp, &peer_ni->ksnp_routes) { - route2 = list_entry(tmp, struct ksock_route, ksnr_list); - - if (route2->ksnr_ipaddr == ipaddr) - break; - - route2 = NULL; - } - if (route2 == NULL) { - ksocknal_add_route_locked(peer_ni, route); - route->ksnr_share_count++; + if (peer_ni->ksnp_conn_cb) { + ksocknal_conn_cb_decref(conn_cb); } else { - ksocknal_route_decref(route); - route2->ksnr_share_count++; + /* Remember conns_per_peer setting at the time + * of connection initiation. It will define the + * max number of conns per type for this conn_cb + * while it's in use. + */ + conn_cb->ksnr_max_conns = ksocknal_get_conns_per_peer(peer_ni); + ksocknal_add_conn_cb_locked(peer_ni, conn_cb); } write_unlock_bh(&ksocknal_data.ksnd_global_lock); @@ -507,66 +635,33 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, } static void -ksocknal_del_peer_locked(struct ksock_peer_ni *peer_ni, __u32 ip) +ksocknal_del_peer_locked(struct ksock_peer_ni *peer_ni) { struct ksock_conn *conn; - struct ksock_route *route; - struct list_head *tmp; - struct list_head *nxt; - int nshared; + struct ksock_conn *cnxt; + struct ksock_conn_cb *conn_cb; LASSERT(!peer_ni->ksnp_closing); /* Extra ref prevents peer_ni disappearing until I'm done with it */ ksocknal_peer_addref(peer_ni); + conn_cb = peer_ni->ksnp_conn_cb; + if (conn_cb) + ksocknal_del_conn_cb_locked(conn_cb); - list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - /* no match */ - if (!(ip == 0 || route->ksnr_ipaddr == ip)) - continue; - - route->ksnr_share_count = 0; - /* This deletes associated conns too */ - ksocknal_del_route_locked(route); - } - - nshared = 0; - list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - nshared += route->ksnr_share_count; - } - - if (nshared == 0) { - /* remove everything else if there are no explicit entries - * left */ - - list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - - /* we should only be removing auto-entries */ - LASSERT(route->ksnr_share_count == 0); - ksocknal_del_route_locked(route); - } - - list_for_each_safe(tmp, nxt, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - - ksocknal_close_conn_locked(conn, 0); - } - } + list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns, + ksnc_list) + ksocknal_close_conn_locked(conn, 0); ksocknal_peer_decref(peer_ni); - /* NB peer_ni unlinks itself when last conn/route is removed */ + /* NB peer_ni unlinks itself when last conn/conn_cb is removed */ } static int -ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) +ksocknal_del_peer(struct lnet_ni *ni, struct lnet_processid *id) { - struct list_head zombies = LIST_HEAD_INIT(zombies); - struct list_head *ptmp; - struct list_head *pnxt; + LIST_HEAD(zombies); + struct hlist_node *pnxt; struct ksock_peer_ni *peer_ni; int lo; int hi; @@ -575,37 +670,36 @@ ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) { - hi = (int)(ksocknal_nid2peerlist(id.nid) - - ksocknal_data.ksnd_peers); - lo = hi; + if (id && !LNET_NID_IS_ANY(&id->nid)) { + lo = hash_min(nidhash(&id->nid), + HASH_BITS(ksocknal_data.ksnd_peers)); + hi = lo; } else { lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; + hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1; } for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, - &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(ptmp, struct ksock_peer_ni, ksnp_list); - + hlist_for_each_entry_safe(peer_ni, pnxt, + &ksocknal_data.ksnd_peers[i], + ksnp_list) { if (peer_ni->ksnp_ni != ni) continue; - if (!((id.nid == LNET_NID_ANY || - peer_ni->ksnp_id.nid == id.nid) && - (id.pid == LNET_PID_ANY || - peer_ni->ksnp_id.pid == id.pid))) + if (!((!id || LNET_NID_IS_ANY(&id->nid) || + nid_same(&peer_ni->ksnp_id.nid, &id->nid)) && + (!id || id->pid == LNET_PID_ANY || + peer_ni->ksnp_id.pid == id->pid))) continue; ksocknal_peer_addref(peer_ni); /* a ref for me... */ - ksocknal_del_peer_locked(peer_ni, ip); + ksocknal_del_peer_locked(peer_ni); if (peer_ni->ksnp_closing && !list_empty(&peer_ni->ksnp_tx_queue)) { LASSERT(list_empty(&peer_ni->ksnp_conns)); - LASSERT(list_empty(&peer_ni->ksnp_routes)); + LASSERT(peer_ni->ksnp_conn_cb == NULL); list_splice_init(&peer_ni->ksnp_tx_queue, &zombies); @@ -628,33 +722,25 @@ static struct ksock_conn * ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index) { struct ksock_peer_ni *peer_ni; - struct list_head *ptmp; struct ksock_conn *conn; - struct list_head *ctmp; int i; read_lock(&ksocknal_data.ksnd_global_lock); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(ptmp, struct ksock_peer_ni, ksnp_list); + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) { + LASSERT(!peer_ni->ksnp_closing); - LASSERT(!peer_ni->ksnp_closing); + if (peer_ni->ksnp_ni != ni) + continue; - if (peer_ni->ksnp_ni != ni) + list_for_each_entry(conn, &peer_ni->ksnp_conns, + ksnc_list) { + if (index-- > 0) continue; - list_for_each(ctmp, &peer_ni->ksnp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, struct ksock_conn, - ksnc_list); - ksocknal_conn_addref(conn); - read_unlock(&ksocknal_data. \ - ksnd_global_lock); - return conn; - } + ksocknal_conn_addref(conn); + read_unlock(&ksocknal_data.ksnd_global_lock); + return conn; } } @@ -682,343 +768,161 @@ ksocknal_choose_scheduler_locked(unsigned int cpt) return sched; } -static int -ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs) +int +ksocknal_accept(struct lnet_ni *ni, struct socket *sock) { - struct ksock_net *net = ni->ni_data; - int i; - int nip; - - read_lock(&ksocknal_data.ksnd_global_lock); - - nip = net->ksnn_ninterfaces; - LASSERT(nip <= LNET_INTERFACES_NUM); + struct ksock_connreq *cr; + int rc; + struct sockaddr_storage peer; - /* - * Only offer interfaces for additional connections if I have - * more than one. - */ - if (nip < 2) { - read_unlock(&ksocknal_data.ksnd_global_lock); - return 0; + rc = lnet_sock_getaddr(sock, true, &peer); + if (rc != 0) { + CERROR("Can't determine new connection's address\n"); + return rc; } - for (i = 0; i < nip; i++) { - ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; - LASSERT(ipaddrs[i] != 0); + LIBCFS_ALLOC(cr, sizeof(*cr)); + if (cr == NULL) { + LCONSOLE_ERROR_MSG(0x12f, + "Dropping connection request from %pISc: memory exhausted\n", + &peer); + return -ENOMEM; } - read_unlock(&ksocknal_data.ksnd_global_lock); - return nip; -} - -static int -ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips) -{ - int best_netmatch = 0; - int best_xor = 0; - int best = -1; - int this_xor; - int this_netmatch; - int i; - - for (i = 0; i < nips; i++) { - if (ips[i] == 0) - continue; - - this_xor = (ips[i] ^ iface->ksni_ipaddr); - this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0; + lnet_ni_addref(ni); + cr->ksncr_ni = ni; + cr->ksncr_sock = sock; - if (!(best < 0 || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_xor > this_xor))) - continue; + spin_lock_bh(&ksocknal_data.ksnd_connd_lock); - best = i; - best_netmatch = this_netmatch; - best_xor = this_xor; - } + list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); + wake_up(&ksocknal_data.ksnd_connd_waitq); - LASSERT (best >= 0); - return (best); + spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); + return 0; } +static const struct ln_key_list ksocknal_tunables_keys = { + .lkl_maxattr = LNET_NET_SOCKLND_TUNABLES_ATTR_MAX, + .lkl_list = { + [LNET_NET_SOCKLND_TUNABLES_ATTR_CONNS_PER_PEER] = { + .lkp_value = "conns_per_peer", + .lkp_data_type = NLA_U16 + }, + [LNET_NET_SOCKLND_TUNABLES_ATTR_LND_TIMEOUT] = { + .lkp_value = "timeout", + .lkp_data_type = NLA_U32 + }, + [LNET_NET_SOCKLND_TUNABLES_ATTR_LND_TOS] = { + .lkp_value = "tos", + .lkp_data_type = NLA_S16, + }, + }, +}; + static int -ksocknal_select_ips(struct ksock_peer_ni *peer_ni, __u32 *peerips, int n_peerips) +ksocknal_nl_get(int cmd, struct sk_buff *msg, int type, void *data) { - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - struct ksock_net *net = peer_ni->ksnp_ni->ni_data; - struct ksock_interface *iface; - struct ksock_interface *best_iface; - int n_ips; - int i; - int j; - int k; - u32 ip; - u32 xor; - int this_netmatch; - int best_netmatch; - int best_npeers; - - /* CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness shouldn't matter */ - - /* Also note that I'm not going to return more than n_peerips - * interfaces, even if I have more myself */ - - write_lock_bh(global_lock); - - LASSERT(n_peerips <= LNET_INTERFACES_NUM); - LASSERT(net->ksnn_ninterfaces <= LNET_INTERFACES_NUM); - - /* Only match interfaces for additional connections - * if I have > 1 interface */ - n_ips = (net->ksnn_ninterfaces < 2) ? 0 : - MIN(n_peerips, net->ksnn_ninterfaces); - - for (i = 0; peer_ni->ksnp_n_passive_ips < n_ips; i++) { - /* ^ yes really... */ - - /* If we have any new interfaces, first tick off all the - * peer_ni IPs that match old interfaces, then choose new - * interfaces to match the remaining peer_ni IPS. - * We don't forget interfaces we've stopped using; we might - * start using them again... */ - - if (i < peer_ni->ksnp_n_passive_ips) { - /* Old interface. */ - ip = peer_ni->ksnp_passive_ips[i]; - best_iface = ksocknal_ip2iface(peer_ni->ksnp_ni, ip); - - /* peer_ni passive ips are kept up to date */ - LASSERT(best_iface != NULL); - } else { - /* choose a new interface */ - LASSERT (i == peer_ni->ksnp_n_passive_ips); - - best_iface = NULL; - best_netmatch = 0; - best_npeers = 0; - - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - ip = iface->ksni_ipaddr; - - for (k = 0; k < peer_ni->ksnp_n_passive_ips; k++) - if (peer_ni->ksnp_passive_ips[k] == ip) - break; - - if (k < peer_ni->ksnp_n_passive_ips) /* using it already */ - continue; - - k = ksocknal_match_peerip(iface, peerips, n_peerips); - xor = (ip ^ peerips[k]); - this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best_iface == NULL || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_npeers > iface->ksni_npeers))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_npeers = iface->ksni_npeers; - } + struct lnet_lnd_tunables *tun; + struct lnet_ni *ni = data; - LASSERT(best_iface != NULL); - - best_iface->ksni_npeers++; - ip = best_iface->ksni_ipaddr; - peer_ni->ksnp_passive_ips[i] = ip; - peer_ni->ksnp_n_passive_ips = i+1; - } - - /* mark the best matching peer_ni IP used */ - j = ksocknal_match_peerip(best_iface, peerips, n_peerips); - peerips[j] = 0; - } + if (!ni || !msg) + return -EINVAL; - /* Overwrite input peer_ni IP addresses */ - memcpy(peerips, peer_ni->ksnp_passive_ips, n_ips * sizeof(*peerips)); + if (cmd != LNET_CMD_NETS || type != LNET_NET_LOCAL_NI_ATTR_LND_TUNABLES) + return -EOPNOTSUPP; - write_unlock_bh(global_lock); + tun = &ni->ni_lnd_tunables; + nla_put_u16(msg, LNET_NET_SOCKLND_TUNABLES_ATTR_CONNS_PER_PEER, + tun->lnd_tun_u.lnd_sock.lnd_conns_per_peer); + nla_put_u32(msg, LNET_NET_SOCKLND_TUNABLES_ATTR_LND_TIMEOUT, + ksocknal_timeout()); + nla_put_s16(msg, LNET_NET_SOCKLND_TUNABLES_ATTR_LND_TOS, + tun->lnd_tun_u.lnd_sock.lnd_tos); - return (n_ips); + return 0; } -static void -ksocknal_create_routes(struct ksock_peer_ni *peer_ni, int port, - __u32 *peer_ipaddrs, int npeer_ipaddrs) +static inline void +ksocknal_nl_set_default(int cmd, int type, void *data) { - struct ksock_route *newroute = NULL; - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - struct lnet_ni *ni = peer_ni->ksnp_ni; - struct ksock_net *net = ni->ni_data; - struct list_head *rtmp; - struct ksock_route *route; - struct ksock_interface *iface; - struct ksock_interface *best_iface; - int best_netmatch; - int this_netmatch; - int best_nroutes; - int i; - int j; - - /* CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness here shouldn't matter */ - - write_lock_bh(global_lock); - - if (net->ksnn_ninterfaces < 2) { - /* Only create additional connections - * if I have > 1 interface */ - write_unlock_bh(global_lock); - return; - } - - LASSERT(npeer_ipaddrs <= LNET_INTERFACES_NUM); - - for (i = 0; i < npeer_ipaddrs; i++) { - if (newroute != NULL) { - newroute->ksnr_ipaddr = peer_ipaddrs[i]; - } else { - write_unlock_bh(global_lock); - - newroute = ksocknal_create_route(peer_ipaddrs[i], port); - if (newroute == NULL) - return; - - write_lock_bh(global_lock); - } - - if (peer_ni->ksnp_closing) { - /* peer_ni got closed under me */ - break; - } - - /* Already got a route? */ - route = NULL; - list_for_each(rtmp, &peer_ni->ksnp_routes) { - route = list_entry(rtmp, struct ksock_route, ksnr_list); - - if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) - break; - - route = NULL; - } - if (route != NULL) - continue; - - best_iface = NULL; - best_nroutes = 0; - best_netmatch = 0; - - LASSERT(net->ksnn_ninterfaces <= LNET_INTERFACES_NUM); - - /* Select interface to connect from */ - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - - /* Using this interface already? */ - list_for_each(rtmp, &peer_ni->ksnp_routes) { - route = list_entry(rtmp, struct ksock_route, - ksnr_list); - - if (route->ksnr_myipaddr == iface->ksni_ipaddr) - break; - - route = NULL; - } - if (route != NULL) - continue; - - this_netmatch = (((iface->ksni_ipaddr ^ - newroute->ksnr_ipaddr) & - iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best_iface == NULL || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_nroutes > iface->ksni_nroutes))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_nroutes = iface->ksni_nroutes; - } - - if (best_iface == NULL) - continue; - - newroute->ksnr_myipaddr = best_iface->ksni_ipaddr; - best_iface->ksni_nroutes++; - - ksocknal_add_route_locked(peer_ni, newroute); - newroute = NULL; - } - - write_unlock_bh(global_lock); - if (newroute != NULL) - ksocknal_route_decref(newroute); + struct lnet_lnd_tunables *tunables = data; + struct lnet_ioctl_config_socklnd_tunables *lt; + struct lnet_ioctl_config_socklnd_tunables *df; + + lt = &tunables->lnd_tun_u.lnd_sock; + df = &ksock_default_tunables; + switch (type) { + case LNET_NET_SOCKLND_TUNABLES_ATTR_CONNS_PER_PEER: + lt->lnd_conns_per_peer = df->lnd_conns_per_peer; + break; + case LNET_NET_SOCKLND_TUNABLES_ATTR_LND_TIMEOUT: + lt->lnd_timeout = df->lnd_timeout; + fallthrough; + default: + break; + } } -int -ksocknal_accept(struct lnet_ni *ni, struct socket *sock) +static int +ksocknal_nl_set(int cmd, struct nlattr *attr, int type, void *data) { - struct ksock_connreq *cr; - int rc; - u32 peer_ip; - int peer_port; + struct lnet_lnd_tunables *tunables = data; + int rc = 0; + s64 num; - rc = lnet_sock_getaddr(sock, true, &peer_ip, &peer_port); - LASSERT(rc == 0); /* we succeeded before */ + if (cmd != LNET_CMD_NETS) + return -EOPNOTSUPP; - LIBCFS_ALLOC(cr, sizeof(*cr)); - if (cr == NULL) { - LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from " - "%pI4h: memory exhausted\n", &peer_ip); - return -ENOMEM; + if (!attr) { + ksocknal_nl_set_default(cmd, type, data); + return 0; } - lnet_ni_addref(ni); - cr->ksncr_ni = ni; - cr->ksncr_sock = sock; - - spin_lock_bh(&ksocknal_data.ksnd_connd_lock); + if (nla_type(attr) != LN_SCALAR_ATTR_INT_VALUE) + return -EINVAL; - list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); - wake_up(&ksocknal_data.ksnd_connd_waitq); + switch (type) { + case LNET_NET_SOCKLND_TUNABLES_ATTR_CONNS_PER_PEER: + /* value values are 1 to 127. Zero mean calculate the value */ + num = nla_get_s64(attr); + if (num > -1 && num < 128) + tunables->lnd_tun_u.lnd_sock.lnd_conns_per_peer = num; + else + rc = -ERANGE; + break; + case LNET_NET_SOCKLND_TUNABLES_ATTR_LND_TIMEOUT: + num = nla_get_s64(attr); + tunables->lnd_tun_u.lnd_sock.lnd_timeout = num; + break; + case LNET_NET_SOCKLND_TUNABLES_ATTR_LND_TOS: + num = nla_get_s64(attr); + clamp_t(s64, num, -1, 0xff); + tunables->lnd_tun_u.lnd_sock.lnd_tos = num; + fallthrough; + default: + break; + } - spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - return 0; + return rc; } static int -ksocknal_connecting(struct ksock_peer_ni *peer_ni, __u32 ipaddr) +ksocknal_connecting(struct ksock_conn_cb *conn_cb, struct sockaddr *sa) { - struct ksock_route *route; - - list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list) { - if (route->ksnr_ipaddr == ipaddr) - return route->ksnr_connecting; - } + if (conn_cb && + rpc_cmp_addr((struct sockaddr *)&conn_cb->ksnr_addr, sa)) + return conn_cb->ksnr_connecting; return 0; } int -ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, +ksocknal_create_conn(struct lnet_ni *ni, struct ksock_conn_cb *conn_cb, struct socket *sock, int type) { rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - struct list_head zombies = LIST_HEAD_INIT(zombies); - struct lnet_process_id peerid; - struct list_head *tmp; + LIST_HEAD(zombies); + struct lnet_processid peerid; u64 incarnation; struct ksock_conn *conn; struct ksock_conn *conn2; @@ -1032,27 +936,28 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, int rc; int rc2; int active; + int num_dup = 0; char *warn = NULL; - active = (route != NULL); + active = (conn_cb != NULL); - LASSERT (active == (type != SOCKLND_CONN_NONE)); + LASSERT(active == (type != SOCKLND_CONN_NONE)); - LIBCFS_ALLOC(conn, sizeof(*conn)); - if (conn == NULL) { - rc = -ENOMEM; - goto failed_0; - } + LIBCFS_ALLOC(conn, sizeof(*conn)); + if (conn == NULL) { + rc = -ENOMEM; + goto failed_0; + } - conn->ksnc_peer = NULL; - conn->ksnc_route = NULL; - conn->ksnc_sock = sock; + conn->ksnc_peer = NULL; + conn->ksnc_conn_cb = NULL; + conn->ksnc_sock = sock; /* 2 ref, 1 for conn, another extra ref prevents socket * being closed before establishment of connection */ - atomic_set (&conn->ksnc_sock_refcount, 2); + refcount_set(&conn->ksnc_sock_refcount, 2); conn->ksnc_type = type; ksocknal_lib_save_callback(sock, conn); - atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ + refcount_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ conn->ksnc_rx_ready = 0; conn->ksnc_rx_scheduled = 0; @@ -1065,108 +970,121 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, LIBCFS_ALLOC(hello, offsetof(struct ksock_hello_msg, kshm_ips[LNET_INTERFACES_NUM])); - if (hello == NULL) { - rc = -ENOMEM; - goto failed_1; - } + if (hello == NULL) { + rc = -ENOMEM; + goto failed_1; + } - /* stash conn's local and remote addrs */ - rc = ksocknal_lib_get_conn_addrs (conn); - if (rc != 0) - goto failed_1; + /* stash conn's local and remote addrs */ + rc = ksocknal_lib_get_conn_addrs(conn); + if (rc != 0) + goto failed_1; - /* Find out/confirm peer_ni's NID and connection type and get the - * vector of interfaces she's willing to let me connect to. - * Passive connections use the listener timeout since the peer_ni sends - * eagerly */ + /* Find out/confirm peer_ni's NID and connection type and get the + * vector of interfaces she's willing to let me connect to. + * Passive connections use the listener timeout since the peer_ni sends + * eagerly + */ + if (active) { + struct sockaddr_in *psa = (void *)&conn->ksnc_peeraddr; - if (active) { - peer_ni = route->ksnr_peer; - LASSERT(ni == peer_ni->ksnp_ni); + peer_ni = conn_cb->ksnr_peer; + LASSERT(ni == peer_ni->ksnp_ni); - /* Active connection sends HELLO eagerly */ - hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips); - peerid = peer_ni->ksnp_id; + /* Active connection sends HELLO eagerly */ + hello->kshm_nips = 0; + peerid = peer_ni->ksnp_id; write_lock_bh(global_lock); - conn->ksnc_proto = peer_ni->ksnp_proto; + conn->ksnc_proto = peer_ni->ksnp_proto; write_unlock_bh(global_lock); - if (conn->ksnc_proto == NULL) { - conn->ksnc_proto = &ksocknal_protocol_v3x; + if (conn->ksnc_proto == NULL) { + if (psa->sin_family == AF_INET6) + conn->ksnc_proto = &ksocknal_protocol_v4x; + else if (psa->sin_family == AF_INET) + conn->ksnc_proto = &ksocknal_protocol_v3x; #if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol == 2) - conn->ksnc_proto = &ksocknal_protocol_v2x; - else if (*ksocknal_tunables.ksnd_protocol == 1) - conn->ksnc_proto = &ksocknal_protocol_v1x; + if (*ksocknal_tunables.ksnd_protocol == 2) + conn->ksnc_proto = &ksocknal_protocol_v2x; + else if (*ksocknal_tunables.ksnd_protocol == 1) + conn->ksnc_proto = &ksocknal_protocol_v1x; #endif - } + } + if (!conn->ksnc_proto) { + rc = -EPROTO; + goto failed_1; + } - rc = ksocknal_send_hello (ni, conn, peerid.nid, hello); - if (rc != 0) - goto failed_1; - } else { - peerid.nid = LNET_NID_ANY; - peerid.pid = LNET_PID_ANY; + rc = ksocknal_send_hello(ni, conn, &peerid.nid, hello); + if (rc != 0) + goto failed_1; + } else { + peerid.nid = LNET_ANY_NID; + peerid.pid = LNET_PID_ANY; - /* Passive, get protocol from peer_ni */ - conn->ksnc_proto = NULL; - } + /* Passive, get protocol from peer_ni */ + conn->ksnc_proto = NULL; + } - rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation); - if (rc < 0) - goto failed_1; + rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation); + if (rc < 0) + goto failed_1; - LASSERT (rc == 0 || active); - LASSERT (conn->ksnc_proto != NULL); - LASSERT (peerid.nid != LNET_NID_ANY); + LASSERT(rc == 0 || active); + LASSERT(conn->ksnc_proto != NULL); + LASSERT(!LNET_NID_IS_ANY(&peerid.nid)); - cpt = lnet_cpt_of_nid(peerid.nid, ni); + cpt = lnet_nid2cpt(&peerid.nid, ni); - if (active) { - ksocknal_peer_addref(peer_ni); + if (active) { + ksocknal_peer_addref(peer_ni); write_lock_bh(global_lock); - } else { - rc = ksocknal_create_peer(&peer_ni, ni, peerid); - if (rc != 0) - goto failed_1; + } else { + peer_ni = ksocknal_create_peer(ni, &peerid); + if (IS_ERR(peer_ni)) { + rc = PTR_ERR(peer_ni); + goto failed_1; + } write_lock_bh(global_lock); - /* called with a ref on ni, so shutdown can't have started */ - LASSERT(((struct ksock_net *) ni->ni_data)->ksnn_shutdown == 0); + /* called with a ref on ni, so shutdown can't have started */ + LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) >= 0); - peer2 = ksocknal_find_peer_locked(ni, peerid); + peer2 = ksocknal_find_peer_locked(ni, &peerid); if (peer2 == NULL) { /* NB this puts an "empty" peer_ni in the peer_ni * table (which takes my ref) */ - list_add_tail(&peer_ni->ksnp_list, - ksocknal_nid2peerlist(peerid.nid)); + hash_add(ksocknal_data.ksnd_peers, + &peer_ni->ksnp_list, nidhash(&peerid.nid)); } else { ksocknal_peer_decref(peer_ni); peer_ni = peer2; } - /* +1 ref for me */ - ksocknal_peer_addref(peer_ni); - peer_ni->ksnp_accepting++; - - /* Am I already connecting to this guy? Resolve in - * favour of higher NID... */ - if (peerid.nid < ni->ni_nid && - ksocknal_connecting(peer_ni, conn->ksnc_ipaddr)) { - rc = EALREADY; - warn = "connection race resolution"; - goto failed_2; - } - } + /* +1 ref for me */ + ksocknal_peer_addref(peer_ni); + peer_ni->ksnp_accepting++; + + /* Am I already connecting to this guy? Resolve in + * favour of higher NID... + */ + if (memcmp(&peerid.nid, &ni->ni_nid, sizeof(peerid.nid)) < 0 && + ksocknal_connecting(peer_ni->ksnp_conn_cb, + ((struct sockaddr *) &conn->ksnc_peeraddr))) { + rc = EALREADY; + warn = "connection race resolution"; + goto failed_2; + } + } - if (peer_ni->ksnp_closing || - (active && route->ksnr_deleted)) { - /* peer_ni/route got closed under me */ - rc = -ESTALE; - warn = "peer_ni/route removed"; - goto failed_2; + if (peer_ni->ksnp_closing || + (active && conn_cb->ksnr_deleted)) { + /* peer_ni/conn_cb got closed under me */ + rc = -ESTALE; + warn = "peer_ni/conn_cb removed"; + goto failed_2; } if (peer_ni->ksnp_proto == NULL) { @@ -1180,78 +1098,90 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, peer_ni->ksnp_incarnation = incarnation; } - if (peer_ni->ksnp_proto != conn->ksnc_proto || - peer_ni->ksnp_incarnation != incarnation) { - /* peer_ni rebooted or I've got the wrong protocol version */ - ksocknal_close_peer_conns_locked(peer_ni, 0, 0); + if (peer_ni->ksnp_proto != conn->ksnc_proto || + peer_ni->ksnp_incarnation != incarnation) { + /* peer_ni rebooted or I've got the wrong protocol version */ + ksocknal_close_peer_conns_locked(peer_ni, NULL, 0); - peer_ni->ksnp_proto = NULL; - rc = ESTALE; - warn = peer_ni->ksnp_incarnation != incarnation ? - "peer_ni rebooted" : - "wrong proto version"; - goto failed_2; - } + peer_ni->ksnp_proto = NULL; + rc = ESTALE; + warn = peer_ni->ksnp_incarnation != incarnation ? + "peer_ni rebooted" : + "wrong proto version"; + goto failed_2; + } - switch (rc) { - default: - LBUG(); - case 0: - break; - case EALREADY: - warn = "lost conn race"; - goto failed_2; - case EPROTO: - warn = "retry with different protocol version"; - goto failed_2; - } + switch (rc) { + default: + LBUG(); + case 0: + break; + case EALREADY: + warn = "lost conn race"; + goto failed_2; + case EPROTO: + warn = "retry with different protocol version"; + goto failed_2; + } /* Refuse to duplicate an existing connection, unless this is a * loopback connection */ - if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || - conn2->ksnc_myipaddr != conn->ksnc_myipaddr || - conn2->ksnc_type != conn->ksnc_type) - continue; - - /* Reply on a passive connection attempt so the peer_ni - * realises we're connected. */ - LASSERT (rc == 0); - if (!active) - rc = EALREADY; - - warn = "duplicate"; - goto failed_2; - } - } - - /* If the connection created by this route didn't bind to the IP - * address the route connected to, the connection/route matching - * code below probably isn't going to work. */ - if (active && - route->ksnr_ipaddr != conn->ksnc_ipaddr) { - CERROR("Route %s %pI4h connected to %pI4h\n", - libcfs_id2str(peer_ni->ksnp_id), - &route->ksnr_ipaddr, - &conn->ksnc_ipaddr); - } + if (!rpc_cmp_addr((struct sockaddr *)&conn->ksnc_peeraddr, + (struct sockaddr *)&conn->ksnc_myaddr)) { + list_for_each_entry(conn2, &peer_ni->ksnp_conns, ksnc_list) { + if (!rpc_cmp_addr( + (struct sockaddr *)&conn2->ksnc_peeraddr, + (struct sockaddr *)&conn->ksnc_peeraddr) || + !rpc_cmp_addr( + (struct sockaddr *)&conn2->ksnc_myaddr, + (struct sockaddr *)&conn->ksnc_myaddr) || + conn2->ksnc_type != conn->ksnc_type) + continue; - /* Search for a route corresponding to the new connection and - * create an association. This allows incoming connections created - * by routes in my peer_ni to match my own route entries so I don't - * continually create duplicate routes. */ - list_for_each(tmp, &peer_ni->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); + num_dup++; + /* If max conns per type is not registered in conn_cb + * as ksnr_max_conns, use ni's conns_per_peer + */ + if ((peer_ni->ksnp_conn_cb && + num_dup < peer_ni->ksnp_conn_cb->ksnr_max_conns) || + (!peer_ni->ksnp_conn_cb && + num_dup < ksocknal_get_conns_per_peer(peer_ni))) + continue; - if (route->ksnr_ipaddr != conn->ksnc_ipaddr) - continue; + /* Reply on a passive connection attempt so the peer_ni + * realises we're connected. + */ + LASSERT(rc == 0); + if (!active) + rc = EALREADY; - ksocknal_associate_route_conn_locked(route, conn); - break; + warn = "duplicate"; + goto failed_2; + } } + /* If the connection created by this route didn't bind to the IP + * address the route connected to, the connection/route matching + * code below probably isn't going to work. + */ + if (active && + !rpc_cmp_addr((struct sockaddr *)&conn_cb->ksnr_addr, + (struct sockaddr *)&conn->ksnc_peeraddr)) { + CERROR("Route %s %pISc connected to %pISc\n", + libcfs_idstr(&peer_ni->ksnp_id), + &conn_cb->ksnr_addr, + &conn->ksnc_peeraddr); + } + + /* Search for a conn_cb corresponding to the new connection and + * create an association. This allows incoming connections created + * by conn_cbs in my peer_ni to match my own conn_cb entries so I don't + * continually create duplicate conn_cbs. + */ + conn_cb = peer_ni->ksnp_conn_cb; + + if (conn_cb && rpc_cmp_addr((struct sockaddr *)&conn->ksnc_peeraddr, + (struct sockaddr *)&conn_cb->ksnr_addr)) + ksocknal_associate_cb_conn_locked(conn_cb, conn); conn->ksnc_peer = peer_ni; /* conn takes my ref on peer_ni */ peer_ni->ksnp_last_alive = ktime_get_seconds(); @@ -1268,14 +1198,14 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, * native scheduler. So use the scheduler's cpt instead. */ cpt = sched->kss_cpt; - sched->kss_nconns++; - conn->ksnc_scheduler = sched; + sched->kss_nconns++; + conn->ksnc_scheduler = sched; conn->ksnc_tx_last_post = ktime_get_seconds(); /* Set the deadline for the outgoing HELLO to drain */ conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued; conn->ksnc_tx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); + ksocknal_timeout(); smp_mb(); /* order with adding to peer_ni's conn list */ list_add(&conn->ksnc_list, &peer_ni->ksnp_conns); @@ -1283,7 +1213,7 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, ksocknal_new_packet(conn, 0); - conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn); + conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn); /* Take packets blocking for this connection. */ list_for_each_entry_safe(tx, txtmp, &peer_ni->ksnp_tx_queue, tx_list) { @@ -1296,103 +1226,99 @@ ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, } write_unlock_bh(global_lock); - - /* We've now got a new connection. Any errors from here on are just - * like "normal" comms errors and we close the connection normally. - * NB (a) we still have to send the reply HELLO for passive + /* We've now got a new connection. Any errors from here on are just + * like "normal" comms errors and we close the connection normally. + * NB (a) we still have to send the reply HELLO for passive * connections, - * (b) normal I/O on the conn is blocked until I setup and call the - * socket callbacks. - */ + * (b) normal I/O on the conn is blocked until I setup and call the + * socket callbacks. + */ - CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d" + CDEBUG(D_NET, "New conn %s p %d.x %pISc -> %pIScp" " incarnation:%lld sched[%d]\n", - libcfs_id2str(peerid), conn->ksnc_proto->pro_version, - &conn->ksnc_myipaddr, &conn->ksnc_ipaddr, - conn->ksnc_port, incarnation, cpt); - - if (active) { - /* additional routes after interface exchange? */ - ksocknal_create_routes(peer_ni, conn->ksnc_port, - hello->kshm_ips, hello->kshm_nips); - } else { - hello->kshm_nips = ksocknal_select_ips(peer_ni, hello->kshm_ips, - hello->kshm_nips); - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - } + libcfs_idstr(&peerid), conn->ksnc_proto->pro_version, + &conn->ksnc_myaddr, &conn->ksnc_peeraddr, + incarnation, cpt); + + if (!active) { + hello->kshm_nips = 0; + rc = ksocknal_send_hello(ni, conn, &peerid.nid, hello); + } LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg, kshm_ips[LNET_INTERFACES_NUM])); - /* setup the socket AFTER I've received hello (it disables - * SO_LINGER). I might call back to the acceptor who may want - * to send a protocol version response and then close the - * socket; this ensures the socket only tears down after the - * response has been sent. */ - if (rc == 0) - rc = ksocknal_lib_setup_sock(sock); + /* setup the socket AFTER I've received hello (it disables + * SO_LINGER). I might call back to the acceptor who may want + * to send a protocol version response and then close the + * socket; this ensures the socket only tears down after the + * response has been sent. + */ + if (rc == 0) + rc = ksocknal_lib_setup_sock(sock, ni); write_lock_bh(global_lock); - /* NB my callbacks block while I hold ksnd_global_lock */ - ksocknal_lib_set_callback(sock, conn); + /* NB my callbacks block while I hold ksnd_global_lock */ + ksocknal_lib_set_callback(sock, conn); - if (!active) - peer_ni->ksnp_accepting--; + if (!active) + peer_ni->ksnp_accepting--; write_unlock_bh(global_lock); - if (rc != 0) { + if (rc != 0) { write_lock_bh(global_lock); - if (!conn->ksnc_closing) { - /* could be closed by another thread */ - ksocknal_close_conn_locked(conn, rc); - } + if (!conn->ksnc_closing) { + /* could be closed by another thread */ + ksocknal_close_conn_locked(conn, rc); + } write_unlock_bh(global_lock); - } else if (ksocknal_connsock_addref(conn) == 0) { - /* Allow I/O to proceed. */ - ksocknal_read_callback(conn); - ksocknal_write_callback(conn); - ksocknal_connsock_decref(conn); - } + } else if (ksocknal_connsock_addref(conn) == 0) { + /* Allow I/O to proceed. */ + ksocknal_read_callback(conn); + ksocknal_write_callback(conn); + ksocknal_connsock_decref(conn); + } - ksocknal_connsock_decref(conn); - ksocknal_conn_decref(conn); - return rc; + ksocknal_connsock_decref(conn); + ksocknal_conn_decref(conn); + return rc; failed_2: + if (!peer_ni->ksnp_closing && list_empty(&peer_ni->ksnp_conns) && - list_empty(&peer_ni->ksnp_routes)) { - list_add(&zombies, &peer_ni->ksnp_tx_queue); - list_del_init(&peer_ni->ksnp_tx_queue); + peer_ni->ksnp_conn_cb == NULL) { + list_splice_init(&peer_ni->ksnp_tx_queue, &zombies); ksocknal_unlink_peer_locked(peer_ni); } write_unlock_bh(global_lock); - if (warn != NULL) { - if (rc < 0) - CERROR("Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - else - CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - } + if (warn != NULL) { + if (rc < 0) + CERROR("Not creating conn %s type %d: %s\n", + libcfs_idstr(&peerid), conn->ksnc_type, warn); + else + CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", + libcfs_idstr(&peerid), conn->ksnc_type, warn); + } - if (!active) { - if (rc > 0) { + if (!active) { + if (rc > 0) { /* Request retry by replying with CONN_NONE - * ksnc_proto has been set already */ - conn->ksnc_type = SOCKLND_CONN_NONE; - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, peerid.nid, hello); - } + * ksnc_proto has been set already + */ + conn->ksnc_type = SOCKLND_CONN_NONE; + hello->kshm_nips = 0; + ksocknal_send_hello(ni, conn, &peerid.nid, hello); + } write_lock_bh(global_lock); - peer_ni->ksnp_accepting--; + peer_ni->ksnp_accepting--; write_unlock_bh(global_lock); - } + } /* * If we get here without an error code, just use -EALREADY. @@ -1401,7 +1327,7 @@ failed_2: */ rc2 = (rc == 0 ? -EALREADY : (rc > 0 ? -rc : rc)); ksocknal_txlist_done(ni, &zombies, rc2); - ksocknal_peer_decref(peer_ni); + ksocknal_peer_decref(peer_ni); failed_1: if (hello != NULL) @@ -1412,6 +1338,7 @@ failed_1: failed_0: sock_release(sock); + return rc; } @@ -1422,9 +1349,10 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) * connection for the reaper to terminate. * Caller holds ksnd_global_lock exclusively in irq context */ struct ksock_peer_ni *peer_ni = conn->ksnc_peer; - struct ksock_route *route; + struct ksock_conn_cb *conn_cb; struct ksock_conn *conn2; - struct list_head *tmp; + int conn_count; + int duplicate_count = 0; LASSERT(peer_ni->ksnp_error == 0); LASSERT(!conn->ksnc_closing); @@ -1433,35 +1361,45 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) /* ksnd_deathrow_conns takes over peer_ni's ref */ list_del(&conn->ksnc_list); - route = conn->ksnc_route; - if (route != NULL) { - /* dissociate conn from route... */ - LASSERT(!route->ksnr_deleted); - LASSERT((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); - - conn2 = NULL; - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); - - if (conn2->ksnc_route == route && - conn2->ksnc_type == conn->ksnc_type) - break; + conn_cb = conn->ksnc_conn_cb; + if (conn_cb != NULL) { + /* dissociate conn from cb... */ + LASSERT(!conn_cb->ksnr_deleted); - conn2 = NULL; + conn_count = ksocknal_get_conn_count_by_type(conn_cb, + conn->ksnc_type); + /* connected bit is set only if all connections + * of the given type got created + */ + if (conn_count == conn_cb->ksnr_max_conns) + LASSERT((conn_cb->ksnr_connected & + BIT(conn->ksnc_type)) != 0); + + if (conn_count == 1) { + list_for_each_entry(conn2, &peer_ni->ksnp_conns, + ksnc_list) { + if (conn2->ksnc_conn_cb == conn_cb && + conn2->ksnc_type == conn->ksnc_type) + duplicate_count += 1; + } + if (duplicate_count > 0) + CERROR("Found %d duplicate conns type %d\n", + duplicate_count, + conn->ksnc_type); } - if (conn2 == NULL) - route->ksnr_connected &= ~(1 << conn->ksnc_type); + ksocknal_decr_conn_count(conn_cb, conn->ksnc_type); - conn->ksnc_route = NULL; + conn->ksnc_conn_cb = NULL; - ksocknal_route_decref(route); /* drop conn's ref on route */ + /* drop conn's ref on conn_cb */ + ksocknal_conn_cb_decref(conn_cb); } if (list_empty(&peer_ni->ksnp_conns)) { /* No more connections to this peer_ni */ if (!list_empty(&peer_ni->ksnp_tx_queue)) { - struct ksock_tx *tx; + struct ksock_tx *tx; LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x); @@ -1482,17 +1420,17 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) /* stash last conn close reason */ peer_ni->ksnp_error = error; - if (list_empty(&peer_ni->ksnp_routes)) { + if (peer_ni->ksnp_conn_cb == NULL) { /* I've just closed last conn belonging to a - * peer_ni with no routes to it */ + * peer_ni with no connections to it + */ ksocknal_unlink_peer_locked(peer_ni); } } spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - list_add_tail(&conn->ksnc_list, - &ksocknal_data.ksnd_deathrow_conns); + list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns); wake_up(&ksocknal_data.ksnd_reaper_waitq); spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); @@ -1501,7 +1439,7 @@ ksocknal_close_conn_locked(struct ksock_conn *conn, int error) void ksocknal_peer_failed(struct ksock_peer_ni *peer_ni) { - int notify = 0; + bool notify = false; time64_t last_alive = 0; /* There has been a connection failure or comms error; but I'll only @@ -1513,15 +1451,16 @@ ksocknal_peer_failed(struct ksock_peer_ni *peer_ni) if ((peer_ni->ksnp_id.pid & LNET_PID_USERFLAG) == 0 && list_empty(&peer_ni->ksnp_conns) && peer_ni->ksnp_accepting == 0 && - ksocknal_find_connecting_route_locked(peer_ni) == NULL) { - notify = 1; + !ksocknal_find_connecting_conn_cb_locked(peer_ni)) { + notify = true; last_alive = peer_ni->ksnp_last_alive; } read_unlock(&ksocknal_data.ksnd_global_lock); if (notify) - lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid, + lnet_notify(peer_ni->ksnp_ni, + &peer_ni->ksnp_id.nid, false, false, last_alive); } @@ -1531,7 +1470,7 @@ ksocknal_finalize_zcreq(struct ksock_conn *conn) struct ksock_peer_ni *peer_ni = conn->ksnc_peer; struct ksock_tx *tx; struct ksock_tx *tmp; - struct list_head zlist = LIST_HEAD_INIT(zlist); + LIST_HEAD(zlist); /* NB safe to finalize TXs because closing of socket will * abort all buffered data */ @@ -1539,7 +1478,8 @@ ksocknal_finalize_zcreq(struct ksock_conn *conn) spin_lock(&peer_ni->ksnp_lock); - list_for_each_entry_safe(tx, tmp, &peer_ni->ksnp_zc_req_list, tx_zc_list) { + list_for_each_entry_safe(tx, tmp, &peer_ni->ksnp_zc_req_list, + tx_zc_list) { if (tx->tx_conn != conn) continue; @@ -1547,15 +1487,13 @@ ksocknal_finalize_zcreq(struct ksock_conn *conn) tx->tx_msg.ksm_zc_cookies[0] = 0; tx->tx_zc_aborted = 1; /* mark it as not-acked */ - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); + list_move(&tx->tx_zc_list, &zlist); } spin_unlock(&peer_ni->ksnp_lock); - while (!list_empty(&zlist)) { - tx = list_entry(zlist.next, struct ksock_tx, tx_zc_list); - + while ((tx = list_first_entry_or_null(&zlist, struct ksock_tx, + tx_zc_list)) != NULL) { list_del(&tx->tx_zc_list); ksocknal_tx_decref(tx); } @@ -1564,31 +1502,32 @@ ksocknal_finalize_zcreq(struct ksock_conn *conn) void ksocknal_terminate_conn(struct ksock_conn *conn) { - /* This gets called by the reaper (guaranteed thread context) to - * disengage the socket from its callbacks and close it. - * ksnc_refcount will eventually hit zero, and then the reaper will - * destroy it. */ + /* This gets called by the reaper (guaranteed thread context) to + * disengage the socket from its callbacks and close it. + * ksnc_refcount will eventually hit zero, and then the reaper will + * destroy it. + */ struct ksock_peer_ni *peer_ni = conn->ksnc_peer; struct ksock_sched *sched = conn->ksnc_scheduler; - int failed = 0; + bool failed = false; - LASSERT(conn->ksnc_closing); + LASSERT(conn->ksnc_closing); - /* wake up the scheduler to "send" all remaining packets to /dev/null */ + /* wake up the scheduler to "send" all remaining packets to /dev/null */ spin_lock_bh(&sched->kss_lock); - /* a closing conn is always ready to tx */ - conn->ksnc_tx_ready = 1; + /* a closing conn is always ready to tx */ + conn->ksnc_tx_ready = 1; - if (!conn->ksnc_tx_scheduled && + if (!conn->ksnc_tx_scheduled && !list_empty(&conn->ksnc_tx_queue)) { list_add_tail(&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ + &sched->kss_tx_conns); + conn->ksnc_tx_scheduled = 1; + /* extra ref for scheduler */ ksocknal_conn_addref(conn); - wake_up (&sched->kss_waitq); + wake_up(&sched->kss_waitq); } spin_unlock_bh(&sched->kss_lock); @@ -1596,37 +1535,39 @@ ksocknal_terminate_conn(struct ksock_conn *conn) /* serialise with callbacks */ write_lock_bh(&ksocknal_data.ksnd_global_lock); - ksocknal_lib_reset_callback(conn->ksnc_sock, conn); + ksocknal_lib_reset_callback(conn->ksnc_sock, conn); - /* OK, so this conn may not be completely disengaged from its - * scheduler yet, but it _has_ committed to terminate... */ - conn->ksnc_scheduler->kss_nconns--; + /* OK, so this conn may not be completely disengaged from its + * scheduler yet, but it _has_ committed to terminate... + */ + conn->ksnc_scheduler->kss_nconns--; - if (peer_ni->ksnp_error != 0) { - /* peer_ni's last conn closed in error */ + if (peer_ni->ksnp_error != 0) { + /* peer_ni's last conn closed in error */ LASSERT(list_empty(&peer_ni->ksnp_conns)); - failed = 1; - peer_ni->ksnp_error = 0; /* avoid multiple notifications */ - } + failed = true; + peer_ni->ksnp_error = 0; /* avoid multiple notifications */ + } write_unlock_bh(&ksocknal_data.ksnd_global_lock); - if (failed) - ksocknal_peer_failed(peer_ni); + if (failed) + ksocknal_peer_failed(peer_ni); - /* The socket is closed on the final put; either here, or in - * ksocknal_{send,recv}msg(). Since we set up the linger2 option - * when the connection was established, this will close the socket - * immediately, aborting anything buffered in it. Any hung - * zero-copy transmits will therefore complete in finite time. */ - ksocknal_connsock_decref(conn); + /* The socket is closed on the final put; either here, or in + * ksocknal_{send,recv}msg(). Since we set up the linger2 option + * when the connection was established, this will close the socket + * immediately, aborting anything buffered in it. Any hung + * zero-copy transmits will therefore complete in finite time. + */ + ksocknal_connsock_decref(conn); } void ksocknal_queue_zombie_conn(struct ksock_conn *conn) { /* Queue the conn for the reaper to destroy */ - LASSERT(atomic_read(&conn->ksnc_conn_refcount) == 0); + LASSERT(refcount_read(&conn->ksnc_conn_refcount) == 0); spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); @@ -1641,556 +1582,356 @@ ksocknal_destroy_conn(struct ksock_conn *conn) time64_t last_rcv; /* Final coup-de-grace of the reaper */ - CDEBUG (D_NET, "connection %p\n", conn); - - LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0); - LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0); - LASSERT (conn->ksnc_sock == NULL); - LASSERT (conn->ksnc_route == NULL); - LASSERT (!conn->ksnc_tx_scheduled); - LASSERT (!conn->ksnc_rx_scheduled); + CDEBUG(D_NET, "connection %p\n", conn); + + LASSERT(refcount_read(&conn->ksnc_conn_refcount) == 0); + LASSERT(refcount_read(&conn->ksnc_sock_refcount) == 0); + LASSERT(conn->ksnc_sock == NULL); + LASSERT(conn->ksnc_conn_cb == NULL); + LASSERT(!conn->ksnc_tx_scheduled); + LASSERT(!conn->ksnc_rx_scheduled); LASSERT(list_empty(&conn->ksnc_tx_queue)); /* complete current receive if any */ switch (conn->ksnc_rx_state) { case SOCKNAL_RX_LNET_PAYLOAD: last_rcv = conn->ksnc_rx_deadline - - lnet_get_lnd_timeout(); - CERROR("Completing partial receive from %s[%d], " - "ip %pI4h:%d, with error, wanted: %d, left: %d, " - "last alive is %lld secs ago\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, - &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, + ksocknal_timeout(); + CERROR("Completing partial receive from %s[%d], ip %pIScp, with error, wanted: %d, left: %d, last alive is %lld secs ago\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + conn->ksnc_type, + &conn->ksnc_peeraddr, + conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, ktime_get_seconds() - last_rcv); if (conn->ksnc_lnet_msg) conn->ksnc_lnet_msg->msg_health_status = LNET_MSG_STATUS_REMOTE_ERROR; lnet_finalize(conn->ksnc_lnet_msg, -EIO); break; - case SOCKNAL_RX_LNET_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of lnet header from %s, " - "ip %pI4h:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of ksock message from %s, " - "ip %pI4h:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_SLOP: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of slops from %s, " - "ip %pI4h:%d, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - &conn->ksnc_ipaddr, conn->ksnc_port); - break; - default: - LBUG (); - break; - } + case SOCKNAL_RX_LNET_HEADER: + if (conn->ksnc_rx_started) + CERROR("Incomplete receive of lnet header from %s, ip %pIScp, with error, protocol: %d.x.\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + &conn->ksnc_peeraddr, + conn->ksnc_proto->pro_version); + break; + case SOCKNAL_RX_KSM_HEADER: + if (conn->ksnc_rx_started) + CERROR("Incomplete receive of ksock message from %s, ip %pIScp, with error, protocol: %d.x.\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + &conn->ksnc_peeraddr, + conn->ksnc_proto->pro_version); + break; + case SOCKNAL_RX_SLOP: + if (conn->ksnc_rx_started) + CERROR("Incomplete receive of slops from %s, ip %pIScp, with error\n", + libcfs_idstr(&conn->ksnc_peer->ksnp_id), + &conn->ksnc_peeraddr); + break; + default: + LBUG(); + break; + } - ksocknal_peer_decref(conn->ksnc_peer); + ksocknal_peer_decref(conn->ksnc_peer); - LIBCFS_FREE (conn, sizeof (*conn)); + LIBCFS_FREE(conn, sizeof(*conn)); } int -ksocknal_close_peer_conns_locked(struct ksock_peer_ni *peer_ni, __u32 ipaddr, int why) +ksocknal_close_peer_conns_locked(struct ksock_peer_ni *peer_ni, + struct sockaddr *addr, int why) { struct ksock_conn *conn; - struct list_head *ctmp; - struct list_head *cnxt; + struct ksock_conn *cnxt; int count = 0; - list_for_each_safe(ctmp, cnxt, &peer_ni->ksnp_conns) { - conn = list_entry(ctmp, struct ksock_conn, ksnc_list); - - if (ipaddr == 0 || - conn->ksnc_ipaddr == ipaddr) { - count++; - ksocknal_close_conn_locked (conn, why); - } - } + list_for_each_entry_safe(conn, cnxt, &peer_ni->ksnp_conns, ksnc_list) { + if (!addr || + rpc_cmp_addr(addr, + (struct sockaddr *)&conn->ksnc_peeraddr)) { + count++; + ksocknal_close_conn_locked(conn, why); + } + } - return (count); + return count; } int ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why) { struct ksock_peer_ni *peer_ni = conn->ksnc_peer; - u32 ipaddr = conn->ksnc_ipaddr; int count; write_lock_bh(&ksocknal_data.ksnd_global_lock); - count = ksocknal_close_peer_conns_locked (peer_ni, ipaddr, why); + count = ksocknal_close_peer_conns_locked( + peer_ni, (struct sockaddr *)&conn->ksnc_peeraddr, why); write_unlock_bh(&ksocknal_data.ksnd_global_lock); - return (count); + return count; } int -ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) +ksocknal_close_matching_conns(struct lnet_processid *id, __u32 ipaddr) { struct ksock_peer_ni *peer_ni; - struct list_head *ptmp; - struct list_head *pnxt; + struct hlist_node *pnxt; int lo; int hi; int i; int count = 0; + struct sockaddr_in sa = {.sin_family = AF_INET}; write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) - lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { - - peer_ni = list_entry(ptmp, struct ksock_peer_ni, ksnp_list); + if (!LNET_NID_IS_ANY(&id->nid)) { + lo = hash_min(nidhash(&id->nid), + HASH_BITS(ksocknal_data.ksnd_peers)); + hi = lo; + } else { + lo = 0; + hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1; + } - if (!((id.nid == LNET_NID_ANY || id.nid == peer_ni->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || id.pid == peer_ni->ksnp_id.pid))) - continue; + sa.sin_addr.s_addr = htonl(ipaddr); + for (i = lo; i <= hi; i++) { + hlist_for_each_entry_safe(peer_ni, pnxt, + &ksocknal_data.ksnd_peers[i], + ksnp_list) { + + if (!((LNET_NID_IS_ANY(&id->nid) || + nid_same(&id->nid, &peer_ni->ksnp_id.nid)) && + (id->pid == LNET_PID_ANY || + id->pid == peer_ni->ksnp_id.pid))) + continue; - count += ksocknal_close_peer_conns_locked (peer_ni, ipaddr, 0); - } - } + count += ksocknal_close_peer_conns_locked( + peer_ni, + ipaddr ? (struct sockaddr *)&sa : NULL, 0); + } + } write_unlock_bh(&ksocknal_data.ksnd_global_lock); - /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) - return (0); + /* wildcards always succeed */ + if (LNET_NID_IS_ANY(&id->nid) || id->pid == LNET_PID_ANY || + ipaddr == 0) + return 0; - return (count == 0 ? -ENOENT : 0); + return (count == 0 ? -ENOENT : 0); } -void -ksocknal_notify_gw_down(lnet_nid_t gw_nid) +static void +ksocknal_notify_gw_down(struct lnet_nid *gw_nid) { /* The router is telling me she's been notified of a change in * gateway state.... */ - struct lnet_process_id id = { - .nid = gw_nid, + struct lnet_processid id = { .pid = LNET_PID_ANY, + .nid = *gw_nid, }; - CDEBUG(D_NET, "gw %s down\n", libcfs_nid2str(gw_nid)); + CDEBUG(D_NET, "gw %s down\n", libcfs_nidstr(gw_nid)); /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns(id, 0); - return; - - /* We can only establish new connections - * if we have autroutes, and these connect on demand. */ -} - -void -ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when) -{ - int connect = 1; - time64_t last_alive = 0; - time64_t now = ktime_get_seconds(); - struct ksock_peer_ni *peer_ni = NULL; - rwlock_t *glock = &ksocknal_data.ksnd_global_lock; - struct lnet_process_id id = { - .nid = nid, - .pid = LNET_PID_LUSTRE, - }; - - read_lock(glock); - - peer_ni = ksocknal_find_peer_locked(ni, id); - if (peer_ni != NULL) { - struct list_head *tmp; - struct ksock_conn *conn; - int bufnob; - - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - - if (bufnob < conn->ksnc_tx_bufnob) { - /* something got ACKed */ - conn->ksnc_tx_deadline = ktime_get_seconds() + - lnet_get_lnd_timeout(); - peer_ni->ksnp_last_alive = now; - conn->ksnc_tx_bufnob = bufnob; - } - } - - last_alive = peer_ni->ksnp_last_alive; - if (ksocknal_find_connectable_route_locked(peer_ni) == NULL) - connect = 0; - } - - read_unlock(glock); - - if (last_alive != 0) - *when = last_alive; - - CDEBUG(D_NET, "peer_ni %s %p, alive %lld secs ago, connect %d\n", - libcfs_nid2str(nid), peer_ni, - last_alive ? now - last_alive : -1, - connect); - - if (!connect) - return; - - ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port()); - - write_lock_bh(glock); - - peer_ni = ksocknal_find_peer_locked(ni, id); - if (peer_ni != NULL) - ksocknal_launch_all_connections_locked(peer_ni); - - write_unlock_bh(glock); - return; -} - -static void -ksocknal_push_peer(struct ksock_peer_ni *peer_ni) -{ - int index; - int i; - struct list_head *tmp; - struct ksock_conn *conn; - - for (index = 0; ; index++) { - read_lock(&ksocknal_data.ksnd_global_lock); - - i = 0; - conn = NULL; - - list_for_each(tmp, &peer_ni->ksnp_conns) { - if (i++ == index) { - conn = list_entry(tmp, struct ksock_conn, - ksnc_list); - ksocknal_conn_addref(conn); - break; - } - } - - read_unlock(&ksocknal_data.ksnd_global_lock); - - if (conn == NULL) - break; - - ksocknal_lib_push_conn (conn); - ksocknal_conn_decref(conn); - } -} - -static int -ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) -{ - struct list_head *start; - struct list_head *end; - struct list_head *tmp; - int rc = -ENOENT; - unsigned int hsize = ksocknal_data.ksnd_peer_hash_size; - - if (id.nid == LNET_NID_ANY) { - start = &ksocknal_data.ksnd_peers[0]; - end = &ksocknal_data.ksnd_peers[hsize - 1]; - } else { - start = end = ksocknal_nid2peerlist(id.nid); - } - - for (tmp = start; tmp <= end; tmp++) { - int peer_off; /* searching offset in peer_ni hash table */ - - for (peer_off = 0; ; peer_off++) { - struct ksock_peer_ni *peer_ni; - int i = 0; - - read_lock(&ksocknal_data.ksnd_global_lock); - list_for_each_entry(peer_ni, tmp, ksnp_list) { - if (!((id.nid == LNET_NID_ANY || - id.nid == peer_ni->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer_ni->ksnp_id.pid))) - continue; - - if (i++ == peer_off) { - ksocknal_peer_addref(peer_ni); - break; - } - } - read_unlock(&ksocknal_data.ksnd_global_lock); - - if (i == 0) /* no match */ - break; - - rc = 0; - ksocknal_push_peer(peer_ni); - ksocknal_peer_decref(peer_ni); - } - } - return rc; -} - -static int -ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask) -{ - struct ksock_net *net = ni->ni_data; - struct ksock_interface *iface; - int rc; - int i; - int j; - struct list_head *ptmp; - struct ksock_peer_ni *peer_ni; - struct list_head *rtmp; - struct ksock_route *route; - - if (ipaddress == 0 || - netmask == 0) - return -EINVAL; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - iface = ksocknal_ip2iface(ni, ipaddress); - if (iface != NULL) { - /* silently ignore dups */ - rc = 0; - } else if (net->ksnn_ninterfaces == LNET_INTERFACES_NUM) { - rc = -ENOSPC; - } else { - iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++]; - - iface->ksni_ipaddr = ipaddress; - iface->ksni_netmask = netmask; - iface->ksni_nroutes = 0; - iface->ksni_npeers = 0; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(ptmp, struct ksock_peer_ni, - ksnp_list); - - for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) - if (peer_ni->ksnp_passive_ips[j] == ipaddress) - iface->ksni_npeers++; - - list_for_each(rtmp, &peer_ni->ksnp_routes) { - route = list_entry(rtmp, - struct ksock_route, - ksnr_list); - - if (route->ksnr_myipaddr == ipaddress) - iface->ksni_nroutes++; - } - } - } - - rc = 0; - /* NB only new connections will pay attention to the new interface! */ - } - - write_unlock_bh(&ksocknal_data.ksnd_global_lock); + ksocknal_close_matching_conns(&id, 0); + return; - return rc; + /* We can only establish new connections + * if we have autroutes, and these connect on demand. + */ } static void -ksocknal_peer_del_interface_locked(struct ksock_peer_ni *peer_ni, __u32 ipaddr) +ksocknal_push_peer(struct ksock_peer_ni *peer_ni) { - struct list_head *tmp; - struct list_head *nxt; - struct ksock_route *route; - struct ksock_conn *conn; + int index; int i; - int j; - - for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++) - if (peer_ni->ksnp_passive_ips[i] == ipaddr) { - for (j = i+1; j < peer_ni->ksnp_n_passive_ips; j++) - peer_ni->ksnp_passive_ips[j-1] = - peer_ni->ksnp_passive_ips[j]; - peer_ni->ksnp_n_passive_ips--; - break; - } + struct ksock_conn *conn; - list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); + for (index = 0; ; index++) { + read_lock(&ksocknal_data.ksnd_global_lock); - if (route->ksnr_myipaddr != ipaddr) - continue; + i = 0; + conn = NULL; - if (route->ksnr_share_count != 0) { - /* Manually created; keep, but unbind */ - route->ksnr_myipaddr = 0; - } else { - ksocknal_del_route_locked(route); + list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) { + if (i++ == index) { + ksocknal_conn_addref(conn); + break; + } } - } - list_for_each_safe(tmp, nxt, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); + read_unlock(&ksocknal_data.ksnd_global_lock); + + if (i <= index) + break; - if (conn->ksnc_myipaddr == ipaddr) - ksocknal_close_conn_locked (conn, 0); + ksocknal_lib_push_conn (conn); + ksocknal_conn_decref(conn); } } static int -ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress) +ksocknal_push(struct lnet_ni *ni, struct lnet_processid *id) { - struct ksock_net *net = ni->ni_data; + int lo; + int hi; + int bkt; int rc = -ENOENT; - struct list_head *tmp; - struct list_head *nxt; - struct ksock_peer_ni *peer_ni; - u32 this_ip; - int i; - int j; - - write_lock_bh(&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - this_ip = net->ksnn_interfaces[i].ksni_ipaddr; - if (!(ipaddress == 0 || - ipaddress == this_ip)) - continue; - - rc = 0; - - for (j = i+1; j < net->ksnn_ninterfaces; j++) - net->ksnn_interfaces[j-1] = - net->ksnn_interfaces[j]; + if (!LNET_NID_IS_ANY(&id->nid)) { + lo = hash_min(nidhash(&id->nid), + HASH_BITS(ksocknal_data.ksnd_peers)); + hi = lo; + } else { + lo = 0; + hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1; + } - net->ksnn_ninterfaces--; + for (bkt = lo; bkt <= hi; bkt++) { + int peer_off; /* searching offset in peer_ni hash table */ - for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { - list_for_each_safe(tmp, nxt, - &ksocknal_data.ksnd_peers[j]) { - peer_ni = list_entry(tmp, struct ksock_peer_ni, - ksnp_list); + for (peer_off = 0; ; peer_off++) { + struct ksock_peer_ni *peer_ni; + int i = 0; - if (peer_ni->ksnp_ni != ni) - continue; + read_lock(&ksocknal_data.ksnd_global_lock); + hlist_for_each_entry(peer_ni, + &ksocknal_data.ksnd_peers[bkt], + ksnp_list) { + if (!((LNET_NID_IS_ANY(&id->nid) || + nid_same(&id->nid, + &peer_ni->ksnp_id.nid)) && + (id->pid == LNET_PID_ANY || + id->pid == peer_ni->ksnp_id.pid))) + continue; - ksocknal_peer_del_interface_locked(peer_ni, this_ip); - } - } - } + if (i++ == peer_off) { + ksocknal_peer_addref(peer_ni); + break; + } + } + read_unlock(&ksocknal_data.ksnd_global_lock); - write_unlock_bh(&ksocknal_data.ksnd_global_lock); + if (i <= peer_off) /* no match */ + break; - return (rc); + rc = 0; + ksocknal_push_peer(peer_ni); + ksocknal_peer_decref(peer_ni); + } + } + return rc; } int ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) { - struct lnet_process_id id = {0}; - struct libcfs_ioctl_data *data = arg; - int rc; + struct lnet_processid id = {}; + struct libcfs_ioctl_data *data = arg; + int rc; - switch(cmd) { - case IOC_LIBCFS_GET_INTERFACE: { + switch(cmd) { + case IOC_LIBCFS_GET_INTERFACE: { struct ksock_net *net = ni->ni_data; struct ksock_interface *iface; + struct sockaddr_in *sa; read_lock(&ksocknal_data.ksnd_global_lock); - if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) { - rc = -ENOENT; - } else { - rc = 0; - iface = &net->ksnn_interfaces[data->ioc_count]; - - data->ioc_u32[0] = iface->ksni_ipaddr; - data->ioc_u32[1] = iface->ksni_netmask; - data->ioc_u32[2] = iface->ksni_npeers; - data->ioc_u32[3] = iface->ksni_nroutes; - } + if (data->ioc_count >= 1) { + rc = -ENOENT; + } else { + rc = 0; + iface = &net->ksnn_interface; + + sa = (void *)&iface->ksni_addr; + if (sa->sin_family == AF_INET) { + data->ioc_u32[0] = ntohl(sa->sin_addr.s_addr); + data->ioc_u32[1] = iface->ksni_netmask; + } else { + data->ioc_u32[0] = 0xFFFFFFFF; + data->ioc_u32[1] = 0; + } + data->ioc_u32[2] = iface->ksni_npeers; + data->ioc_u32[3] = iface->ksni_nroutes; + } read_unlock(&ksocknal_data.ksnd_global_lock); - return rc; + return rc; } - case IOC_LIBCFS_ADD_INTERFACE: - return ksocknal_add_interface(ni, - data->ioc_u32[0], /* IP address */ - data->ioc_u32[1]); /* net mask */ - - case IOC_LIBCFS_DEL_INTERFACE: - return ksocknal_del_interface(ni, - data->ioc_u32[0]); /* IP address */ - - case IOC_LIBCFS_GET_PEER: { - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; - - rc = ksocknal_get_peer_info(ni, data->ioc_count, - &id, &myip, &ip, &port, - &conn_count, &share_count); - if (rc != 0) - return rc; - - data->ioc_nid = id.nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - data->ioc_u32[2] = myip; - data->ioc_u32[3] = conn_count; - data->ioc_u32[4] = id.pid; - return 0; - } + case IOC_LIBCFS_GET_PEER: { + __u32 myip = 0; + __u32 ip = 0; + int port = 0; + int conn_count = 0; + int share_count = 0; - case IOC_LIBCFS_ADD_PEER: - id.nid = data->ioc_nid; - id.pid = LNET_PID_LUSTRE; - return ksocknal_add_peer (ni, id, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ + rc = ksocknal_get_peer_info(ni, data->ioc_count, + &id, &myip, &ip, &port, + &conn_count, &share_count); + if (rc != 0) + return rc; - case IOC_LIBCFS_DEL_PEER: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_del_peer (ni, id, - data->ioc_u32[0]); /* IP */ + if (!nid_is_nid4(&id.nid)) + return -EINVAL; + data->ioc_nid = lnet_nid_to_nid4(&id.nid); + data->ioc_count = share_count; + data->ioc_u32[0] = ip; + data->ioc_u32[1] = port; + data->ioc_u32[2] = myip; + data->ioc_u32[3] = conn_count; + data->ioc_u32[4] = id.pid; + return 0; + } + + case IOC_LIBCFS_ADD_PEER: { + struct sockaddr_in sa = {.sin_family = AF_INET}; + + id.pid = LNET_PID_LUSTRE; + lnet_nid4_to_nid(data->ioc_nid, &id.nid); + sa.sin_addr.s_addr = htonl(data->ioc_u32[0]); + sa.sin_port = htons(data->ioc_u32[1]); + return ksocknal_add_peer(ni, &id, (struct sockaddr *)&sa); + } + case IOC_LIBCFS_DEL_PEER: + lnet_nid4_to_nid(data->ioc_nid, &id.nid); + id.pid = LNET_PID_ANY; + return ksocknal_del_peer(ni, &id); case IOC_LIBCFS_GET_CONN: { int txmem; int rxmem; int nagle; struct ksock_conn *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count); + struct sockaddr_in *psa = (void *)&conn->ksnc_peeraddr; + struct sockaddr_in *mysa = (void *)&conn->ksnc_myaddr; if (conn == NULL) return -ENOENT; ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); - data->ioc_count = txmem; - data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; - data->ioc_flags = nagle; - data->ioc_u32[0] = conn->ksnc_ipaddr; - data->ioc_u32[1] = conn->ksnc_port; - data->ioc_u32[2] = conn->ksnc_myipaddr; - data->ioc_u32[3] = conn->ksnc_type; + data->ioc_count = txmem; + data->ioc_nid = lnet_nid_to_nid4(&conn->ksnc_peer->ksnp_id.nid); + data->ioc_flags = nagle; + if (psa->sin_family == AF_INET) + data->ioc_u32[0] = ntohl(psa->sin_addr.s_addr); + else + data->ioc_u32[0] = 0xFFFFFFFF; + data->ioc_u32[1] = rpc_get_port((struct sockaddr *) + &conn->ksnc_peeraddr); + if (mysa->sin_family == AF_INET) + data->ioc_u32[2] = ntohl(mysa->sin_addr.s_addr); + else + data->ioc_u32[2] = 0xFFFFFFFF; + data->ioc_u32[3] = conn->ksnc_type; data->ioc_u32[4] = conn->ksnc_scheduler->kss_cpt; data->ioc_u32[5] = rxmem; data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; @@ -2198,31 +1939,32 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) return 0; } - case IOC_LIBCFS_CLOSE_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_close_matching_conns (id, - data->ioc_u32[0]); - - case IOC_LIBCFS_REGISTER_MYNID: - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) - return 0; - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - - case IOC_LIBCFS_PUSH_CONNECTION: - id.nid = data->ioc_nid; - id.pid = LNET_PID_ANY; - return ksocknal_push(ni, id); - - default: - return -EINVAL; - } - /* not reached */ + case IOC_LIBCFS_CLOSE_CONNECTION: + lnet_nid4_to_nid(data->ioc_nid, &id.nid); + id.pid = LNET_PID_ANY; + return ksocknal_close_matching_conns(&id, + data->ioc_u32[0]); + + case IOC_LIBCFS_REGISTER_MYNID: + /* Ignore if this is a noop */ + if (nid_is_nid4(&ni->ni_nid) && + data->ioc_nid == lnet_nid_to_nid4(&ni->ni_nid)) + return 0; + + CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", + libcfs_nid2str(data->ioc_nid), + libcfs_nidstr(&ni->ni_nid)); + return -EINVAL; + + case IOC_LIBCFS_PUSH_CONNECTION: + lnet_nid4_to_nid(data->ioc_nid, &id.nid); + id.pid = LNET_PID_ANY; + return ksocknal_push(ni, &id); + + default: + return -EINVAL; + } + /* not reached */ } static void @@ -2233,22 +1975,17 @@ ksocknal_free_buffers (void) if (ksocknal_data.ksnd_schedulers != NULL) cfs_percpt_free(ksocknal_data.ksnd_schedulers); - LIBCFS_FREE (ksocknal_data.ksnd_peers, - sizeof(struct list_head) * - ksocknal_data.ksnd_peer_hash_size); - spin_lock(&ksocknal_data.ksnd_tx_lock); if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - struct list_head zlist; + LIST_HEAD(zlist); struct ksock_tx *tx; - list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); - list_del_init(&ksocknal_data.ksnd_idle_noop_txs); + list_splice_init(&ksocknal_data.ksnd_idle_noop_txs, &zlist); spin_unlock(&ksocknal_data.ksnd_tx_lock); - while (!list_empty(&zlist)) { - tx = list_entry(zlist.next, struct ksock_tx, tx_list); + while ((tx = list_first_entry_or_null(&zlist, struct ksock_tx, + tx_list)) != NULL) { list_del(&tx->tx_list); LIBCFS_FREE(tx, tx->tx_desc_size); } @@ -2257,26 +1994,232 @@ ksocknal_free_buffers (void) } } +static int +ksocknal_handle_link_state_change(struct net_device *dev, + unsigned char operstate) +{ + struct lnet_ni *ni = NULL; + struct ksock_net *net; + struct ksock_net *cnxt; + int ifindex; + unsigned char link_down; + struct in_device *in_dev; + bool found_ip = false; + struct ksock_interface *ksi = NULL; + struct sockaddr_in *sa; + __u32 ni_state_before; + bool update_ping_buf = false; + int state; + DECLARE_CONST_IN_IFADDR(ifa); + + link_down = !((operstate == IF_OPER_UP) || (operstate == IF_OPER_UNKNOWN)); + ifindex = dev->ifindex; + + if (!ksocknal_data.ksnd_nnets) + goto out; + + list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets, + ksnn_list) { + + ksi = &net->ksnn_interface; + sa = (void *)&ksi->ksni_addr; + found_ip = false; + + if (strcmp(ksi->ksni_name, dev->name)) + continue; + + if (ksi->ksni_index == -1) { + if (dev->reg_state != NETREG_REGISTERED) + continue; + /* A registration just happened: save the new index for + * the device */ + ksi->ksni_index = ifindex; + goto out; + } + + if (ksi->ksni_index != ifindex) + continue; + + if (dev->reg_state == NETREG_UNREGISTERING) { + /* Device is being unregistered, we need to clear the + * index, it can change when device will be back */ + ksi->ksni_index = -1; + goto out; + } + + ni = net->ksnn_ni; + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) { + CDEBUG(D_NET, "Interface %s has no IPv4 status.\n", + dev->name); + ni_state_before = lnet_set_link_fatal_state(ni, 1); + goto ni_done; + } + in_dev_for_each_ifa_rtnl(ifa, in_dev) { + if (sa->sin_addr.s_addr == ifa->ifa_local) + found_ip = true; + } + endfor_ifa(in_dev); + + if (!found_ip) { + CDEBUG(D_NET, "Interface %s has no matching ip\n", + dev->name); + ni_state_before = lnet_set_link_fatal_state(ni, 1); + goto ni_done; + } + + if (link_down) { + ni_state_before = lnet_set_link_fatal_state(ni, 1); + } else { + state = (lnet_get_link_status(dev) == 0); + ni_state_before = lnet_set_link_fatal_state(ni, + state); + } +ni_done: + if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && + (atomic_read(&ni->ni_fatal_error_on) != ni_state_before)) + update_ping_buf = true; + } + + if (update_ping_buf) + lnet_mark_ping_buffer_for_update(); +out: + return 0; +} + + +static int +ksocknal_handle_inetaddr_change(struct in_ifaddr *ifa, unsigned long event) +{ + struct lnet_ni *ni = NULL; + struct ksock_net *net; + struct ksock_net *cnxt; + struct net_device *event_netdev = ifa->ifa_dev->dev; + int ifindex; + struct ksock_interface *ksi = NULL; + struct sockaddr_in *sa; + __u32 ni_state_before; + bool update_ping_buf = false; + bool link_down; + + if (!ksocknal_data.ksnd_nnets) + goto out; + + ifindex = event_netdev->ifindex; + + list_for_each_entry_safe(net, cnxt, &ksocknal_data.ksnd_nets, + ksnn_list) { + + ksi = &net->ksnn_interface; + sa = (void *)&ksi->ksni_addr; + + if (ksi->ksni_index != ifindex || + strcmp(ksi->ksni_name, event_netdev->name)) + continue; + + if (sa->sin_addr.s_addr == ifa->ifa_local) { + ni = net->ksnn_ni; + link_down = (event == NETDEV_DOWN); + ni_state_before = lnet_set_link_fatal_state(ni, + link_down); + + if (!update_ping_buf && + (ni->ni_state == LNET_NI_STATE_ACTIVE) && + ((event == NETDEV_DOWN) != ni_state_before)) + update_ping_buf = true; + } + } + + if (update_ping_buf) + lnet_mark_ping_buffer_for_update(); +out: + return 0; +} + +/************************************ + * Net device notifier event handler + ************************************/ +static int ksocknal_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + unsigned char operstate; + + operstate = dev->operstate; + + CDEBUG(D_NET, "devevent: status=%ld, iface=%s ifindex %d state %u\n", + event, dev->name, dev->ifindex, operstate); + + switch (event) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + case NETDEV_REGISTER: + case NETDEV_UNREGISTER: + ksocknal_handle_link_state_change(dev, operstate); + break; + } + + return NOTIFY_OK; +} + +/************************************ + * Inetaddr notifier event handler + ************************************/ +static int ksocknal_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + + CDEBUG(D_NET, "addrevent: status %ld ip addr %pI4, netmask %pI4.\n", + event, &ifa->ifa_address, &ifa->ifa_mask); + + switch (event) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + ksocknal_handle_inetaddr_change(ifa, event); + break; + + } + return NOTIFY_OK; +} + +static struct notifier_block ksocknal_dev_notifier_block = { + .notifier_call = ksocknal_device_event, +}; + +static struct notifier_block ksocknal_inetaddr_notifier_block = { + .notifier_call = ksocknal_inetaddr_event, +}; + static void ksocknal_base_shutdown(void) { struct ksock_sched *sched; + struct ksock_peer_ni *peer_ni; int i; - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %lld\n", + libcfs_kmem_read()); LASSERT (ksocknal_data.ksnd_nnets == 0); - switch (ksocknal_data.ksnd_init) { - default: - LASSERT (0); + if (ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL) { + unregister_netdevice_notifier(&ksocknal_dev_notifier_block); + unregister_inetaddr_notifier(&ksocknal_inetaddr_notifier_block); + } + + switch (ksocknal_data.ksnd_init) { + default: + LASSERT(0); + fallthrough; - case SOCKNAL_INIT_ALL: - case SOCKNAL_INIT_DATA: - LASSERT (ksocknal_data.ksnd_peers != NULL); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - LASSERT(list_empty(&ksocknal_data.ksnd_peers[i])); - } + case SOCKNAL_INIT_ALL: + case SOCKNAL_INIT_DATA: + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) + LASSERT(0); LASSERT(list_empty(&ksocknal_data.ksnd_nets)); LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns)); @@ -2298,7 +2241,7 @@ ksocknal_base_shutdown(void) /* flag threads to terminate; wake and wait for them to die */ ksocknal_data.ksnd_shuttingdown = 1; wake_up_all(&ksocknal_data.ksnd_connd_waitq); - wake_up_all(&ksocknal_data.ksnd_reaper_waitq); + wake_up(&ksocknal_data.ksnd_reaper_waitq); if (ksocknal_data.ksnd_schedulers != NULL) { cfs_percpt_for_each(sched, i, @@ -2306,29 +2249,19 @@ ksocknal_base_shutdown(void) wake_up_all(&sched->kss_waitq); } - i = 4; - read_lock(&ksocknal_data.ksnd_global_lock); - while (ksocknal_data.ksnd_nthreads != 0) { - i++; - /* power of 2? */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "waiting for %d threads to terminate\n", - ksocknal_data.ksnd_nthreads); - read_unlock(&ksocknal_data.ksnd_global_lock); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); - read_lock(&ksocknal_data.ksnd_global_lock); - } - read_unlock(&ksocknal_data.ksnd_global_lock); + wait_var_event_warning(&ksocknal_data.ksnd_nthreads, + atomic_read(&ksocknal_data.ksnd_nthreads) == 0, + "waiting for %d threads to terminate\n", + atomic_read(&ksocknal_data.ksnd_nthreads)); - ksocknal_free_buffers(); + ksocknal_free_buffers(); - ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; - break; - } + ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; + break; + } - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); + CDEBUG(D_MALLOC, "after NAL cleanup: kmem %lld\n", + libcfs_kmem_read()); module_put(THIS_MODULE); } @@ -2340,20 +2273,12 @@ ksocknal_base_startup(void) int rc; int i; - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - LASSERT (ksocknal_data.ksnd_nnets == 0); + LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); + LASSERT(ksocknal_data.ksnd_nnets == 0); - memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ + memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */ - ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC(ksocknal_data.ksnd_peers, - sizeof(struct list_head) * - ksocknal_data.ksnd_peer_hash_size); - if (ksocknal_data.ksnd_peers == NULL) - return -ENOMEM; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); + hash_init(ksocknal_data.ksnd_peers); rwlock_init(&ksocknal_data.ksnd_global_lock); INIT_LIST_HEAD(&ksocknal_data.ksnd_nets); @@ -2376,7 +2301,8 @@ ksocknal_base_startup(void) /* flag lists/ptrs/locks initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - try_module_get(THIS_MODULE); + if (!try_module_get(THIS_MODULE)) + goto failed; /* Create a scheduler block per available CPT */ ksocknal_data.ksnd_schedulers = cfs_percpt_alloc(lnet_cpt_table(), @@ -2427,15 +2353,13 @@ ksocknal_base_startup(void) } for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) { - char name[16]; spin_lock_bh(&ksocknal_data.ksnd_connd_lock); ksocknal_data.ksnd_connd_starting++; spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); - - snprintf(name, sizeof(name), "socknal_cd%02d", i); rc = ksocknal_thread_start(ksocknal_connd, - (void *)((uintptr_t)i), name); + (void *)((uintptr_t)i), + "socknal_cd%02d", i); if (rc != 0) { spin_lock_bh(&ksocknal_data.ksnd_connd_lock); ksocknal_data.ksnd_connd_starting--; @@ -2451,6 +2375,9 @@ ksocknal_base_startup(void) goto failed; } + register_netdevice_notifier(&ksocknal_dev_notifier_block); + register_inetaddr_notifier(&ksocknal_inetaddr_notifier_block); + /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; @@ -2461,207 +2388,113 @@ ksocknal_base_startup(void) return -ENETDOWN; } -static void +static int ksocknal_debug_peerhash(struct lnet_ni *ni) { - struct ksock_peer_ni *peer_ni = NULL; - struct list_head *tmp; + struct ksock_peer_ni *peer_ni; int i; read_lock(&ksocknal_data.ksnd_global_lock); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(tmp, struct ksock_peer_ni, ksnp_list); - - if (peer_ni->ksnp_ni == ni) break; + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) { + struct ksock_conn_cb *conn_cb; + struct ksock_conn *conn; - peer_ni = NULL; - } - } + if (peer_ni->ksnp_ni != ni) + continue; - if (peer_ni != NULL) { - struct ksock_route *route; - struct ksock_conn *conn; - - CWARN ("Active peer_ni on shutdown: %s, ref %d, scnt %d, " - "closing %d, accepting %d, err %d, zcookie %llu, " - "txq %d, zc_req %d\n", libcfs_id2str(peer_ni->ksnp_id), - atomic_read(&peer_ni->ksnp_refcount), - peer_ni->ksnp_sharecount, peer_ni->ksnp_closing, - peer_ni->ksnp_accepting, peer_ni->ksnp_error, - peer_ni->ksnp_zc_next_cookie, - !list_empty(&peer_ni->ksnp_tx_queue), - !list_empty(&peer_ni->ksnp_zc_req_list)); - - list_for_each(tmp, &peer_ni->ksnp_routes) { - route = list_entry(tmp, struct ksock_route, ksnr_list); - CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, " - "del %d\n", atomic_read(&route->ksnr_refcount), - route->ksnr_scheduled, route->ksnr_connecting, - route->ksnr_connected, route->ksnr_deleted); + CWARN("Active peer_ni on shutdown: %s, ref %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n", + libcfs_idstr(&peer_ni->ksnp_id), + refcount_read(&peer_ni->ksnp_refcount), + peer_ni->ksnp_closing, + peer_ni->ksnp_accepting, peer_ni->ksnp_error, + peer_ni->ksnp_zc_next_cookie, + !list_empty(&peer_ni->ksnp_tx_queue), + !list_empty(&peer_ni->ksnp_zc_req_list)); + + conn_cb = peer_ni->ksnp_conn_cb; + if (conn_cb) { + CWARN("ConnCB: ref %d, schd %d, conn %d, cnted %d, del %d\n", + refcount_read(&conn_cb->ksnr_refcount), + conn_cb->ksnr_scheduled, conn_cb->ksnr_connecting, + conn_cb->ksnr_connected, conn_cb->ksnr_deleted); } - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, struct ksock_conn, ksnc_list); - CWARN ("Conn: ref %d, sref %d, t %d, c %d\n", - atomic_read(&conn->ksnc_conn_refcount), - atomic_read(&conn->ksnc_sock_refcount), - conn->ksnc_type, conn->ksnc_closing); + list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) { + CWARN("Conn: ref %d, sref %d, t %d, c %d\n", + refcount_read(&conn->ksnc_conn_refcount), + refcount_read(&conn->ksnc_sock_refcount), + conn->ksnc_type, conn->ksnc_closing); } + break; } read_unlock(&ksocknal_data.ksnd_global_lock); - return; + return 0; } void ksocknal_shutdown(struct lnet_ni *ni) { struct ksock_net *net = ni->ni_data; - struct lnet_process_id anyid = { - .nid = LNET_NID_ANY, - .pid = LNET_PID_ANY, - }; - int i; - LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); - LASSERT(ksocknal_data.ksnd_nnets > 0); + LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); + LASSERT(ksocknal_data.ksnd_nnets > 0); - spin_lock_bh(&net->ksnn_lock); - net->ksnn_shutdown = 1; /* prevent new peers */ - spin_unlock_bh(&net->ksnn_lock); + /* prevent new peers */ + atomic_add(SOCKNAL_SHUTDOWN_BIAS, &net->ksnn_npeers); /* Delete all peers */ - ksocknal_del_peer(ni, anyid, 0); + ksocknal_del_peer(ni, NULL); /* Wait for all peer_ni state to clean up */ - i = 2; - spin_lock_bh(&net->ksnn_lock); - while (net->ksnn_npeers != 0) { - spin_unlock_bh(&net->ksnn_lock); - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to disconnect\n", - net->ksnn_npeers); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + wait_var_event_warning(&net->ksnn_npeers, + atomic_read(&net->ksnn_npeers) == + SOCKNAL_SHUTDOWN_BIAS, + "waiting for %d peers to disconnect\n", + ksocknal_debug_peerhash(ni) + + atomic_read(&net->ksnn_npeers) - + SOCKNAL_SHUTDOWN_BIAS); - ksocknal_debug_peerhash(ni); - - spin_lock_bh(&net->ksnn_lock); - } - spin_unlock_bh(&net->ksnn_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0); - LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0); - } + LASSERT(net->ksnn_interface.ksni_npeers == 0); + LASSERT(net->ksnn_interface.ksni_nroutes == 0); list_del(&net->ksnn_list); LIBCFS_FREE(net, sizeof(*net)); - ksocknal_data.ksnd_nnets--; - if (ksocknal_data.ksnd_nnets == 0) - ksocknal_base_shutdown(); -} - -static int -ksocknal_enumerate_interfaces(struct ksock_net *net, char *iname) -{ - struct net_device *dev; - - rtnl_lock(); - for_each_netdev(&init_net, dev) { - /* The iname specified by an user land configuration can - * map to an ifa_label so always treat iname as an ifa_label. - * If iname is NULL then fall back to the net device name. - */ - const char *name = iname ? iname : dev->name; - struct in_device *in_dev; - - if (strcmp(dev->name, "lo") == 0) /* skip the loopback IF */ - continue; - - if (!(dev_get_flags(dev) & IFF_UP)) { - CWARN("Ignoring interface %s (down)\n", dev->name); - continue; - } - - in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) { - CWARN("Interface %s has no IPv4 status.\n", dev->name); - continue; - } - - for_ifa(in_dev) - if (strcmp(name, ifa->ifa_label) == 0) { - int idx = net->ksnn_ninterfaces; - struct ksock_interface *ksi; - - if (idx >= ARRAY_SIZE(net->ksnn_interfaces)) { - rtnl_unlock(); - return -E2BIG; - } - - ksi = &net->ksnn_interfaces[idx]; - ksi->ksni_ipaddr = ntohl(ifa->ifa_local); - ksi->ksni_netmask = ifa->ifa_mask; - strlcpy(ksi->ksni_name, - name, sizeof(ksi->ksni_name)); - net->ksnn_ninterfaces++; - break; - } - endfor_ifa(in_dev); - } - rtnl_unlock(); - - if (net->ksnn_ninterfaces == 0) - CERROR("Can't find any usable interfaces\n"); - - return net->ksnn_ninterfaces > 0 ? 0 : -ENOENT; + ksocknal_data.ksnd_nnets--; + if (ksocknal_data.ksnd_nnets == 0) + ksocknal_base_shutdown(); } static int ksocknal_search_new_ipif(struct ksock_net *net) { int new_ipif = 0; - int i; + char *ifnam = &net->ksnn_interface.ksni_name[0]; + char *colon = strchr(ifnam, ':'); + bool found = false; + struct ksock_net *tmp; - for (i = 0; i < net->ksnn_ninterfaces; i++) { - char *ifnam = &net->ksnn_interfaces[i].ksni_name[0]; - char *colon = strchr(ifnam, ':'); - int found = 0; - struct ksock_net *tmp; - int j; - - if (colon != NULL) /* ignore alias device */ - *colon = 0; - - list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, - ksnn_list) { - for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) { - char *ifnam2 = &tmp->ksnn_interfaces[j].\ - ksni_name[0]; - char *colon2 = strchr(ifnam2, ':'); - - if (colon2 != NULL) - *colon2 = 0; - - found = strcmp(ifnam, ifnam2) == 0; - if (colon2 != NULL) - *colon2 = ':'; - } - if (found) - break; - } + if (colon != NULL) + *colon = 0; - new_ipif += !found; - if (colon != NULL) - *colon = ':'; + list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) { + char *ifnam2 = &tmp->ksnn_interface.ksni_name[0]; + char *colon2 = strchr(ifnam2, ':'); + + if (colon2 != NULL) + *colon2 = 0; + + found = strcmp(ifnam, ifnam2) == 0; + if (colon2 != NULL) + *colon2 = ':'; } + new_ipif += !found; + if (colon != NULL) + *colon = ':'; + return new_ipif; } @@ -2690,14 +2523,12 @@ ksocknal_start_schedulers(struct ksock_sched *sched) for (i = 0; i < nthrs; i++) { long id; - char name[20]; id = KSOCK_THREAD_ID(sched->kss_cpt, sched->kss_nthreads + i); - snprintf(name, sizeof(name), "socknal_sd%02d_%02d", - sched->kss_cpt, (int)KSOCK_THREAD_SID(id)); - - rc = ksocknal_thread_start(ksocknal_scheduler, - (void *)id, name); + rc = ksocknal_thread_start(ksocknal_scheduler, (void *)id, + "socknal_sd%02d_%02d", + sched->kss_cpt, + (int)KSOCK_THREAD_SID(id)); if (rc == 0) continue; @@ -2741,152 +2572,133 @@ int ksocknal_startup(struct lnet_ni *ni) { struct ksock_net *net; - struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables; - int rc; - int i; - struct net_device *net_dev; - int node_id; - - LASSERT (ni->ni_net->net_lnd == &the_ksocklnd); - - if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { - rc = ksocknal_base_startup(); - if (rc != 0) - return rc; - } - - LIBCFS_ALLOC(net, sizeof(*net)); - if (net == NULL) - goto fail_0; + struct ksock_interface *ksi = NULL; + struct lnet_inetdev *ifaces = NULL; + int rc, if_idx; + int dev_status; + + LASSERT (ni->ni_net->net_lnd == &the_ksocklnd); + if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { + rc = ksocknal_base_startup(); + if (rc != 0) + return rc; + } + LIBCFS_ALLOC(net, sizeof(*net)); + if (net == NULL) + goto out_base; - spin_lock_init(&net->ksnn_lock); net->ksnn_incarnation = ktime_get_real_ns(); ni->ni_data = net; - net_tunables = &ni->ni_net->net_tunables; - - if (net_tunables->lct_peer_timeout == -1) - net_tunables->lct_peer_timeout = - *ksocknal_tunables.ksnd_peertimeout; - if (net_tunables->lct_max_tx_credits == -1) - net_tunables->lct_max_tx_credits = - *ksocknal_tunables.ksnd_credits; + ksocknal_tunables_setup(ni); - if (net_tunables->lct_peer_tx_credits == -1) - net_tunables->lct_peer_tx_credits = - *ksocknal_tunables.ksnd_peertxcredits; + rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns, + ni->ni_net->net_tunables.lct_version); + if (rc < 0) + goto out_net; - if (net_tunables->lct_peer_tx_credits > - net_tunables->lct_max_tx_credits) - net_tunables->lct_peer_tx_credits = - net_tunables->lct_max_tx_credits; + ksi = &net->ksnn_interface; - if (net_tunables->lct_peer_rtr_credits == -1) - net_tunables->lct_peer_rtr_credits = - *ksocknal_tunables.ksnd_peerrtrcredits; + /* Interface and/or IP address is specified otherwise default to + * the first Interface + */ + if_idx = lnet_inet_select(ni, ifaces, rc); + if (if_idx < 0) + goto out_net; - if (!ni->ni_interfaces[0]) { - rc = ksocknal_enumerate_interfaces(net, NULL); + if (!ni->ni_interface) { + rc = lnet_ni_add_interface(ni, ifaces[if_idx].li_name); if (rc < 0) - goto fail_1; - } else { - /* Before Multi-Rail ksocklnd would manage - * multiple interfaces with its own tcp bonding. - * If we encounter an old configuration using - * this tcp bonding approach then we need to - * handle more than one ni_interfaces. - * - * In Multi-Rail configuration only ONE ni_interface - * should exist. Each IP alias should be mapped to - * each 'struct net_ni'. - */ - for (i = 0; i < LNET_INTERFACES_NUM; i++) { - int j; - - if (!ni->ni_interfaces[i]) - break; - - for (j = 0; j < net->ksnn_ninterfaces; j++) { - struct ksock_interface *ksi; - - ksi = &net->ksnn_interfaces[j]; - - if (strcmp(ni->ni_interfaces[i], - ksi->ksni_name) == 0) { - CERROR("found duplicate %s\n", - ksi->ksni_name); - rc = -EEXIST; - goto fail_1; - } - } - - rc = ksocknal_enumerate_interfaces(net, ni->ni_interfaces[i]); - if (rc < 0) - goto fail_1; - } + CWARN("ksocklnd failed to allocate ni_interface\n"); } - net_dev = dev_get_by_name(&init_net, - net->ksnn_interfaces[0].ksni_name); - if (net_dev != NULL) { - node_id = dev_to_node(&net_dev->dev); - ni->ni_dev_cpt = cfs_cpt_of_node(lnet_cpt_table(), node_id); - dev_put(net_dev); + ni->ni_dev_cpt = ifaces[if_idx].li_cpt; + ksi->ksni_index = ifaces[if_idx].li_index; + if (ifaces[if_idx].li_size == sizeof(struct in6_addr)) { + struct sockaddr_in6 *sa; + sa = (void *)&ksi->ksni_addr; + memset(sa, 0, sizeof(*sa)); + sa->sin6_family = AF_INET6; + memcpy(&sa->sin6_addr, ifaces[if_idx].li_ipv6addr, + sizeof(struct in6_addr)); + ni->ni_nid.nid_size = sizeof(struct in6_addr) - 4; + memcpy(&ni->ni_nid.nid_addr, ifaces[if_idx].li_ipv6addr, + sizeof(struct in6_addr)); } else { - ni->ni_dev_cpt = CFS_CPT_ANY; + struct sockaddr_in *sa; + sa = (void *)&ksi->ksni_addr; + memset(sa, 0, sizeof(*sa)); + sa->sin_family = AF_INET; + sa->sin_addr.s_addr = ifaces[if_idx].li_ipaddr; + ksi->ksni_netmask = ifaces[if_idx].li_netmask; + ni->ni_nid.nid_size = 0; + ni->ni_nid.nid_addr[0] = sa->sin_addr.s_addr; } + strscpy(ksi->ksni_name, ifaces[if_idx].li_name, sizeof(ksi->ksni_name)); /* call it before add it to ksocknal_data.ksnd_nets */ rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts); if (rc != 0) - goto fail_1; + goto out_net; - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - net->ksnn_interfaces[0].ksni_ipaddr); - list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); + if ((ksocknal_ip2index((struct sockaddr *)&ksi->ksni_addr, + ni, + &dev_status) < 0) || + (dev_status <= 0)) + lnet_set_link_fatal_state(ni, 1); - ksocknal_data.ksnd_nnets++; + list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); + net->ksnn_ni = ni; + ksocknal_data.ksnd_nnets++; + kfree(ifaces); - return 0; + return 0; - fail_1: - LIBCFS_FREE(net, sizeof(*net)); - fail_0: - if (ksocknal_data.ksnd_nnets == 0) - ksocknal_base_shutdown(); +out_net: + LIBCFS_FREE(net, sizeof(*net)); +out_base: + if (ksocknal_data.ksnd_nnets == 0) + ksocknal_base_shutdown(); + kfree(ifaces); - return -ENETDOWN; + return -ENETDOWN; } - static void __exit ksocklnd_exit(void) { lnet_unregister_lnd(&the_ksocklnd); } +static const struct lnet_lnd the_ksocklnd = { + .lnd_type = SOCKLND, + .lnd_startup = ksocknal_startup, + .lnd_shutdown = ksocknal_shutdown, + .lnd_ctl = ksocknal_ctl, + .lnd_send = ksocknal_send, + .lnd_recv = ksocknal_recv, + .lnd_notify_peer_down = ksocknal_notify_gw_down, + .lnd_accept = ksocknal_accept, + .lnd_nl_get = ksocknal_nl_get, + .lnd_nl_set = ksocknal_nl_set, + .lnd_keys = &ksocknal_tunables_keys, +}; + static int __init ksocklnd_init(void) { int rc; /* check ksnr_connected/connecting field large enough */ - CLASSERT(SOCKLND_CONN_NTYPES <= 4); - CLASSERT(SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN); - - /* initialize the_ksocklnd */ - the_ksocklnd.lnd_type = SOCKLND; - the_ksocklnd.lnd_startup = ksocknal_startup; - the_ksocklnd.lnd_shutdown = ksocknal_shutdown; - the_ksocklnd.lnd_ctl = ksocknal_ctl; - the_ksocklnd.lnd_send = ksocknal_send; - the_ksocklnd.lnd_recv = ksocknal_recv; - the_ksocklnd.lnd_notify_peer_down = ksocknal_notify_gw_down; - the_ksocklnd.lnd_query = ksocknal_query; - the_ksocklnd.lnd_accept = ksocknal_accept; + BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4); + BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN); rc = ksocknal_tunables_init(); if (rc != 0) return rc; + rc = libcfs_setup(); + if (rc) + return rc; + lnet_register_lnd(&the_ksocklnd); return 0;