X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fsocklnd%2Fsocklnd.c;h=364fb54f6674a5651b24eb09e42dd41e07781ddd;hp=0db9d6d70deb2eff3d9c77c6076de0087c1613fa;hb=0d816af574b7063c0ce339b67d2066b229d20f59;hpb=05ad99f1f3a9f85756bf3355505722bb4f20f420 diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 0db9d6d..364fb54 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -23,7 +23,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2015, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -37,34 +37,34 @@ * Author: Eric Barton */ -#include #include "socklnd.h" +#include -static struct lnet_lnd the_ksocklnd; -ksock_nal_data_t ksocknal_data; +static const struct lnet_lnd the_ksocklnd; +struct ksock_nal_data ksocknal_data; -static ksock_interface_t * +static struct ksock_interface * ksocknal_ip2iface(struct lnet_ni *ni, __u32 ip) { - ksock_net_t *net = ni->ni_data; - int i; - ksock_interface_t *iface; + struct ksock_net *net = ni->ni_data; + int i; + struct ksock_interface *iface; - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(i < LNET_MAX_INTERFACES); - iface = &net->ksnn_interfaces[i]; + for (i = 0; i < net->ksnn_ninterfaces; i++) { + LASSERT(i < LNET_INTERFACES_NUM); + iface = &net->ksnn_interfaces[i]; - if (iface->ksni_ipaddr == ip) - return (iface); - } + if (iface->ksni_ipaddr == ip) + return iface; + } - return (NULL); + return NULL; } -static ksock_route_t * -ksocknal_create_route (__u32 ipaddr, int port) +static struct ksock_route * +ksocknal_create_route(__u32 ipaddr, int port) { - ksock_route_t *route; + struct ksock_route *route; LIBCFS_ALLOC (route, sizeof (*route)); if (route == NULL) @@ -86,7 +86,7 @@ ksocknal_create_route (__u32 ipaddr, int port) } void -ksocknal_destroy_route (ksock_route_t *route) +ksocknal_destroy_route(struct ksock_route *route) { LASSERT (atomic_read(&route->ksnr_refcount) == 0); @@ -96,21 +96,27 @@ ksocknal_destroy_route (ksock_route_t *route) LIBCFS_FREE (route, sizeof (*route)); } -static int -ksocknal_create_peer(ksock_peer_ni_t **peerp, struct lnet_ni *ni, - struct lnet_process_id id) +static struct ksock_peer_ni * +ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id) { - int cpt = lnet_cpt_of_nid(id.nid, ni); - ksock_net_t *net = ni->ni_data; - ksock_peer_ni_t *peer_ni; + int cpt = lnet_cpt_of_nid(id.nid, ni); + struct ksock_net *net = ni->ni_data; + struct ksock_peer_ni *peer_ni; LASSERT(id.nid != LNET_NID_ANY); LASSERT(id.pid != LNET_PID_ANY); LASSERT(!in_interrupt()); + if (!atomic_inc_unless_negative(&net->ksnn_npeers)) { + CERROR("Can't create peer_ni: network shutdown\n"); + return ERR_PTR(-ESHUTDOWN); + } + LIBCFS_CPT_ALLOC(peer_ni, lnet_cpt_table(), cpt, sizeof(*peer_ni)); - if (peer_ni == NULL) - return -ENOMEM; + if (!peer_ni) { + atomic_dec(&net->ksnn_npeers); + return ERR_PTR(-ENOMEM); + } peer_ni->ksnp_ni = ni; peer_ni->ksnp_id = id; @@ -127,28 +133,13 @@ ksocknal_create_peer(ksock_peer_ni_t **peerp, struct lnet_ni *ni, INIT_LIST_HEAD(&peer_ni->ksnp_zc_req_list); spin_lock_init(&peer_ni->ksnp_lock); - spin_lock_bh(&net->ksnn_lock); - - if (net->ksnn_shutdown) { - spin_unlock_bh(&net->ksnn_lock); - - LIBCFS_FREE(peer_ni, sizeof(*peer_ni)); - CERROR("Can't create peer_ni: network shutdown\n"); - return -ESHUTDOWN; - } - - net->ksnn_npeers++; - - spin_unlock_bh(&net->ksnn_lock); - - *peerp = peer_ni; - return 0; + return peer_ni; } void -ksocknal_destroy_peer (ksock_peer_ni_t *peer_ni) +ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni) { - ksock_net_t *net = peer_ni->ksnp_ni->ni_data; + struct ksock_net *net = peer_ni->ksnp_ni->ni_data; CDEBUG (D_NET, "peer_ni %s %p deleted\n", libcfs_id2str(peer_ni->ksnp_id), peer_ni); @@ -162,26 +153,21 @@ ksocknal_destroy_peer (ksock_peer_ni_t *peer_ni) LIBCFS_FREE(peer_ni, sizeof(*peer_ni)); - /* NB a peer_ni's connections and routes keep a reference on their peer_ni - * until they are destroyed, so we can be assured that _all_ state to - * do with this peer_ni has been cleaned up when its refcount drops to - * zero. */ - spin_lock_bh(&net->ksnn_lock); - net->ksnn_npeers--; - spin_unlock_bh(&net->ksnn_lock); + /* NB a peer_ni's connections and routes keep a reference on their + * peer_ni until they are destroyed, so we can be assured that _all_ + * state to do with this peer_ni has been cleaned up when its refcount + * drops to zero. + */ + atomic_dec(&net->ksnn_npeers); } -ksock_peer_ni_t * +struct ksock_peer_ni * ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) { - struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); - struct list_head *tmp; - ksock_peer_ni_t *peer_ni; - - list_for_each(tmp, peer_list) { - - peer_ni = list_entry(tmp, ksock_peer_ni_t, ksnp_list); + struct ksock_peer_ni *peer_ni; + hash_for_each_possible(ksocknal_data.ksnd_peers, peer_ni, + ksnp_list, id.nid) { LASSERT(!peer_ni->ksnp_closing); if (peer_ni->ksnp_ni != ni) @@ -199,10 +185,10 @@ ksocknal_find_peer_locked(struct lnet_ni *ni, struct lnet_process_id id) return NULL; } -ksock_peer_ni_t * +struct ksock_peer_ni * ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) { - ksock_peer_ni_t *peer_ni; + struct ksock_peer_ni *peer_ni; read_lock(&ksocknal_data.ksnd_global_lock); peer_ni = ksocknal_find_peer_locked(ni, id); @@ -214,31 +200,33 @@ ksocknal_find_peer(struct lnet_ni *ni, struct lnet_process_id id) } static void -ksocknal_unlink_peer_locked (ksock_peer_ni_t *peer_ni) +ksocknal_unlink_peer_locked(struct ksock_peer_ni *peer_ni) { - int i; - __u32 ip; - ksock_interface_t *iface; - - for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++) { - LASSERT (i < LNET_MAX_INTERFACES); - ip = peer_ni->ksnp_passive_ips[i]; - - iface = ksocknal_ip2iface(peer_ni->ksnp_ni, ip); - /* All IPs in peer_ni->ksnp_passive_ips[] come from the - * interface list, therefore the call must succeed. */ - LASSERT (iface != NULL); - - CDEBUG(D_NET, "peer_ni=%p iface=%p ksni_nroutes=%d\n", - peer_ni, iface, iface->ksni_nroutes); - iface->ksni_npeers--; - } + int i; + __u32 ip; + struct ksock_interface *iface; + + for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++) { + LASSERT(i < LNET_INTERFACES_NUM); + ip = peer_ni->ksnp_passive_ips[i]; + + iface = ksocknal_ip2iface(peer_ni->ksnp_ni, ip); + /* + * All IPs in peer_ni->ksnp_passive_ips[] come from the + * interface list, therefore the call must succeed. + */ + LASSERT(iface != NULL); + + CDEBUG(D_NET, "peer_ni=%p iface=%p ksni_nroutes=%d\n", + peer_ni, iface, iface->ksni_nroutes); + iface->ksni_npeers--; + } LASSERT(list_empty(&peer_ni->ksnp_conns)); LASSERT(list_empty(&peer_ni->ksnp_routes)); LASSERT(!peer_ni->ksnp_closing); peer_ni->ksnp_closing = 1; - list_del(&peer_ni->ksnp_list); + hlist_del(&peer_ni->ksnp_list); /* lose peerlist's ref */ ksocknal_peer_decref(peer_ni); } @@ -248,68 +236,64 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, struct lnet_process_id *id, __u32 *myip, __u32 *peer_ip, int *port, int *conn_count, int *share_count) { - ksock_peer_ni_t *peer_ni; - struct list_head *ptmp; - ksock_route_t *route; - struct list_head *rtmp; - int i; - int j; - int rc = -ENOENT; + struct ksock_peer_ni *peer_ni; + struct ksock_route *route; + struct list_head *rtmp; + int i; + int j; + int rc = -ENOENT; read_lock(&ksocknal_data.ksnd_global_lock); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(ptmp, ksock_peer_ni_t, ksnp_list); + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) { - if (peer_ni->ksnp_ni != ni) - continue; + if (peer_ni->ksnp_ni != ni) + continue; - if (peer_ni->ksnp_n_passive_ips == 0 && - list_empty(&peer_ni->ksnp_routes)) { - if (index-- > 0) - continue; + if (peer_ni->ksnp_n_passive_ips == 0 && + list_empty(&peer_ni->ksnp_routes)) { + if (index-- > 0) + continue; - *id = peer_ni->ksnp_id; - *myip = 0; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } + *id = peer_ni->ksnp_id; + *myip = 0; + *peer_ip = 0; + *port = 0; + *conn_count = 0; + *share_count = 0; + rc = 0; + goto out; + } - for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) { - if (index-- > 0) - continue; + for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) { + if (index-- > 0) + continue; - *id = peer_ni->ksnp_id; - *myip = peer_ni->ksnp_passive_ips[j]; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } + *id = peer_ni->ksnp_id; + *myip = peer_ni->ksnp_passive_ips[j]; + *peer_ip = 0; + *port = 0; + *conn_count = 0; + *share_count = 0; + rc = 0; + goto out; + } - list_for_each(rtmp, &peer_ni->ksnp_routes) { - if (index-- > 0) - continue; + list_for_each(rtmp, &peer_ni->ksnp_routes) { + if (index-- > 0) + continue; - route = list_entry(rtmp, ksock_route_t, - ksnr_list); + route = list_entry(rtmp, struct ksock_route, + ksnr_list); - *id = peer_ni->ksnp_id; - *myip = route->ksnr_myipaddr; - *peer_ip = route->ksnr_ipaddr; - *port = route->ksnr_port; - *conn_count = route->ksnr_conn_count; - *share_count = route->ksnr_share_count; - rc = 0; - goto out; - } + *id = peer_ni->ksnp_id; + *myip = route->ksnr_myipaddr; + *peer_ip = route->ksnr_ipaddr; + *port = route->ksnr_port; + *conn_count = route->ksnr_conn_count; + *share_count = route->ksnr_share_count; + rc = 0; + goto out; } } out: @@ -318,11 +302,11 @@ out: } static void -ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) +ksocknal_associate_route_conn_locked(struct ksock_route *route, struct ksock_conn *conn) { - ksock_peer_ni_t *peer_ni = route->ksnr_peer; - int type = conn->ksnc_type; - ksock_interface_t *iface; + struct ksock_peer_ni *peer_ni = route->ksnr_peer; + int type = conn->ksnc_type; + struct ksock_interface *iface; conn->ksnc_route = route; ksocknal_route_addref(route); @@ -362,11 +346,11 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) } static void -ksocknal_add_route_locked (ksock_peer_ni_t *peer_ni, ksock_route_t *route) +ksocknal_add_route_locked(struct ksock_peer_ni *peer_ni, struct ksock_route *route) { struct list_head *tmp; - ksock_conn_t *conn; - ksock_route_t *route2; + struct ksock_conn *conn; + struct ksock_route *route2; LASSERT(!peer_ni->ksnp_closing); LASSERT(route->ksnr_peer == NULL); @@ -376,7 +360,7 @@ ksocknal_add_route_locked (ksock_peer_ni_t *peer_ni, ksock_route_t *route) /* LASSERT(unique) */ list_for_each(tmp, &peer_ni->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); + route2 = list_entry(tmp, struct ksock_route, ksnr_list); if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { CERROR("Duplicate route %s %pI4h\n", @@ -392,7 +376,7 @@ ksocknal_add_route_locked (ksock_peer_ni_t *peer_ni, ksock_route_t *route) list_add_tail(&route->ksnr_list, &peer_ni->ksnp_routes); list_for_each(tmp, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); + conn = list_entry(tmp, struct ksock_conn, ksnc_list); if (conn->ksnc_ipaddr != route->ksnr_ipaddr) continue; @@ -403,19 +387,19 @@ ksocknal_add_route_locked (ksock_peer_ni_t *peer_ni, ksock_route_t *route) } static void -ksocknal_del_route_locked (ksock_route_t *route) +ksocknal_del_route_locked(struct ksock_route *route) { - ksock_peer_ni_t *peer_ni = route->ksnr_peer; - ksock_interface_t *iface; - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; + struct ksock_peer_ni *peer_ni = route->ksnr_peer; + struct ksock_interface *iface; + struct ksock_conn *conn; + struct list_head *ctmp; + struct list_head *cnxt; LASSERT(!route->ksnr_deleted); /* Close associated conns */ list_for_each_safe(ctmp, cnxt, &peer_ni->ksnp_conns) { - conn = list_entry(ctmp, ksock_conn_t, ksnc_list); + conn = list_entry(ctmp, struct ksock_conn, ksnc_list); if (conn->ksnc_route != route) continue; @@ -447,20 +431,19 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, int port) { struct list_head *tmp; - ksock_peer_ni_t *peer_ni; - ksock_peer_ni_t *peer2; - ksock_route_t *route; - ksock_route_t *route2; - int rc; + struct ksock_peer_ni *peer_ni; + struct ksock_peer_ni *peer2; + struct ksock_route *route; + struct ksock_route *route2; if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY) return (-EINVAL); - /* Have a brand new peer_ni ready... */ - rc = ksocknal_create_peer(&peer_ni, ni, id); - if (rc != 0) - return rc; + /* Have a brand new peer_ni ready... */ + peer_ni = ksocknal_create_peer(ni, id); + if (IS_ERR(peer_ni)) + return PTR_ERR(peer_ni); route = ksocknal_create_route (ipaddr, port); if (route == NULL) { @@ -471,7 +454,8 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, write_lock_bh(&ksocknal_data.ksnd_global_lock); /* always called with a ref on ni, so shutdown can't have started */ - LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); + LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) + >= 0); peer2 = ksocknal_find_peer_locked(ni, id); if (peer2 != NULL) { @@ -479,13 +463,12 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, peer_ni = peer2; } else { /* peer_ni table takes my ref on peer_ni */ - list_add_tail(&peer_ni->ksnp_list, - ksocknal_nid2peerlist(id.nid)); + hash_add(ksocknal_data.ksnd_peers, &peer_ni->ksnp_list, id.nid); } route2 = NULL; list_for_each(tmp, &peer_ni->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); + route2 = list_entry(tmp, struct ksock_route, ksnr_list); if (route2->ksnr_ipaddr == ipaddr) break; @@ -506,13 +489,13 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr, } static void -ksocknal_del_peer_locked (ksock_peer_ni_t *peer_ni, __u32 ip) +ksocknal_del_peer_locked(struct ksock_peer_ni *peer_ni, __u32 ip) { - ksock_conn_t *conn; - ksock_route_t *route; + struct ksock_conn *conn; + struct ksock_route *route; struct list_head *tmp; struct list_head *nxt; - int nshared; + int nshared; LASSERT(!peer_ni->ksnp_closing); @@ -520,7 +503,7 @@ ksocknal_del_peer_locked (ksock_peer_ni_t *peer_ni, __u32 ip) ksocknal_peer_addref(peer_ni); list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + route = list_entry(tmp, struct ksock_route, ksnr_list); /* no match */ if (!(ip == 0 || route->ksnr_ipaddr == ip)) @@ -533,7 +516,7 @@ ksocknal_del_peer_locked (ksock_peer_ni_t *peer_ni, __u32 ip) nshared = 0; list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + route = list_entry(tmp, struct ksock_route, ksnr_list); nshared += route->ksnr_share_count; } @@ -542,7 +525,7 @@ ksocknal_del_peer_locked (ksock_peer_ni_t *peer_ni, __u32 ip) * left */ list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + route = list_entry(tmp, struct ksock_route, ksnr_list); /* we should only be removing auto-entries */ LASSERT(route->ksnr_share_count == 0); @@ -550,44 +533,41 @@ ksocknal_del_peer_locked (ksock_peer_ni_t *peer_ni, __u32 ip) } list_for_each_safe(tmp, nxt, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); + conn = list_entry(tmp, struct ksock_conn, ksnc_list); ksocknal_close_conn_locked(conn, 0); } } ksocknal_peer_decref(peer_ni); - /* NB peer_ni unlinks itself when last conn/route is removed */ + /* NB peer_ni unlinks itself when last conn/route is removed */ } static int ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) { - struct list_head zombies = LIST_HEAD_INIT(zombies); - struct list_head *ptmp; - struct list_head *pnxt; - ksock_peer_ni_t *peer_ni; - int lo; - int hi; - int i; - int rc = -ENOENT; + LIST_HEAD(zombies); + struct hlist_node *pnxt; + struct ksock_peer_ni *peer_ni; + int lo; + int hi; + int i; + int rc = -ENOENT; write_lock_bh(&ksocknal_data.ksnd_global_lock); if (id.nid != LNET_NID_ANY) { - hi = (int)(ksocknal_nid2peerlist(id.nid) - - ksocknal_data.ksnd_peers); - lo = hi; + lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers)); + hi = lo; } else { lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; + hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1; } for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, - &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(ptmp, ksock_peer_ni_t, ksnp_list); - + hlist_for_each_entry_safe(peer_ni, pnxt, + &ksocknal_data.ksnd_peers[i], + ksnp_list) { if (peer_ni->ksnp_ni != ni) continue; @@ -618,42 +598,36 @@ ksocknal_del_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ip) write_unlock_bh(&ksocknal_data.ksnd_global_lock); - ksocknal_txlist_done(ni, &zombies, 1); + ksocknal_txlist_done(ni, &zombies, -ENETDOWN); return rc; } -static ksock_conn_t * +static struct ksock_conn * ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index) { - ksock_peer_ni_t *peer_ni; - struct list_head *ptmp; - ksock_conn_t *conn; + struct ksock_peer_ni *peer_ni; + struct ksock_conn *conn; struct list_head *ctmp; - int i; + int i; read_lock(&ksocknal_data.ksnd_global_lock); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(ptmp, ksock_peer_ni_t, ksnp_list); + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) { + LASSERT(!peer_ni->ksnp_closing); - LASSERT(!peer_ni->ksnp_closing); + if (peer_ni->ksnp_ni != ni) + continue; - if (peer_ni->ksnp_ni != ni) + list_for_each(ctmp, &peer_ni->ksnp_conns) { + if (index-- > 0) continue; - list_for_each(ctmp, &peer_ni->ksnp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, ksock_conn_t, - ksnc_list); - ksocknal_conn_addref(conn); - read_unlock(&ksocknal_data. \ - ksnd_global_lock); - return conn; - } + conn = list_entry(ctmp, struct ksock_conn, + ksnc_list); + ksocknal_conn_addref(conn); + read_unlock(&ksocknal_data.ksnd_global_lock); + return conn; } } @@ -661,24 +635,21 @@ ksocknal_get_conn_by_idx(struct lnet_ni *ni, int index) return NULL; } -static ksock_sched_t * +static struct ksock_sched * ksocknal_choose_scheduler_locked(unsigned int cpt) { - struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt]; - ksock_sched_t *sched; - int i; - - LASSERT(info->ksi_nthreads > 0); + struct ksock_sched *sched = ksocknal_data.ksnd_schedulers[cpt]; + int i; - sched = &info->ksi_scheds[0]; - /* - * NB: it's safe so far, but info->ksi_nthreads could be changed - * at runtime when we have dynamic LNet configuration, then we - * need to take care of this. - */ - for (i = 1; i < info->ksi_nthreads; i++) { - if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns) - sched = &info->ksi_scheds[i]; + if (sched->kss_nthreads == 0) { + cfs_percpt_for_each(sched, i, ksocknal_data.ksnd_schedulers) { + if (sched->kss_nthreads > 0) { + CDEBUG(D_NET, "scheduler[%d] has no threads. selected scheduler[%d]\n", + cpt, sched->kss_cpt); + return sched; + } + } + return NULL; } return sched; @@ -687,40 +658,42 @@ ksocknal_choose_scheduler_locked(unsigned int cpt) static int ksocknal_local_ipvec(struct lnet_ni *ni, __u32 *ipaddrs) { - ksock_net_t *net = ni->ni_data; - int i; - int nip; + struct ksock_net *net = ni->ni_data; + int i; + int nip; read_lock(&ksocknal_data.ksnd_global_lock); - nip = net->ksnn_ninterfaces; - LASSERT (nip <= LNET_MAX_INTERFACES); + nip = net->ksnn_ninterfaces; + LASSERT(nip <= LNET_INTERFACES_NUM); - /* Only offer interfaces for additional connections if I have - * more than one. */ - if (nip < 2) { + /* + * Only offer interfaces for additional connections if I have + * more than one. + */ + if (nip < 2) { read_unlock(&ksocknal_data.ksnd_global_lock); - return 0; - } + return 0; + } - for (i = 0; i < nip; i++) { - ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; - LASSERT (ipaddrs[i] != 0); - } + for (i = 0; i < nip; i++) { + ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; + LASSERT(ipaddrs[i] != 0); + } read_unlock(&ksocknal_data.ksnd_global_lock); - return (nip); + return nip; } static int -ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips) +ksocknal_match_peerip(struct ksock_interface *iface, __u32 *ips, int nips) { - int best_netmatch = 0; - int best_xor = 0; - int best = -1; - int this_xor; - int this_netmatch; - int i; + int best_netmatch = 0; + int best_xor = 0; + int best = -1; + int this_xor; + int this_netmatch; + int i; for (i = 0; i < nips; i++) { if (ips[i] == 0) @@ -745,21 +718,21 @@ ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips) } static int -ksocknal_select_ips(ksock_peer_ni_t *peer_ni, __u32 *peerips, int n_peerips) +ksocknal_select_ips(struct ksock_peer_ni *peer_ni, __u32 *peerips, int n_peerips) { - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - ksock_net_t *net = peer_ni->ksnp_ni->ni_data; - ksock_interface_t *iface; - ksock_interface_t *best_iface; - int n_ips; - int i; - int j; - int k; - __u32 ip; - __u32 xor; - int this_netmatch; - int best_netmatch; - int best_npeers; + rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; + struct ksock_net *net = peer_ni->ksnp_ni->ni_data; + struct ksock_interface *iface; + struct ksock_interface *best_iface; + int n_ips; + int i; + int j; + int k; + u32 ip; + u32 xor; + int this_netmatch; + int best_netmatch; + int best_npeers; /* CAVEAT EMPTOR: We do all our interface matching with an * exclusive hold of global lock at IRQ priority. We're only @@ -771,8 +744,8 @@ ksocknal_select_ips(ksock_peer_ni_t *peer_ni, __u32 *peerips, int n_peerips) write_lock_bh(global_lock); - LASSERT (n_peerips <= LNET_MAX_INTERFACES); - LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); + LASSERT(n_peerips <= LNET_INTERFACES_NUM); + LASSERT(net->ksnn_ninterfaces <= LNET_INTERFACES_NUM); /* Only match interfaces for additional connections * if I have > 1 interface */ @@ -851,17 +824,17 @@ ksocknal_select_ips(ksock_peer_ni_t *peer_ni, __u32 *peerips, int n_peerips) } static void -ksocknal_create_routes(ksock_peer_ni_t *peer_ni, int port, +ksocknal_create_routes(struct ksock_peer_ni *peer_ni, int port, __u32 *peer_ipaddrs, int npeer_ipaddrs) { - ksock_route_t *newroute = NULL; + struct ksock_route *newroute = NULL; rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; struct lnet_ni *ni = peer_ni->ksnp_ni; - ksock_net_t *net = ni->ni_data; + struct ksock_net *net = ni->ni_data; struct list_head *rtmp; - ksock_route_t *route; - ksock_interface_t *iface; - ksock_interface_t *best_iface; + struct ksock_route *route; + struct ksock_interface *iface; + struct ksock_interface *best_iface; int best_netmatch; int this_netmatch; int best_nroutes; @@ -882,7 +855,7 @@ ksocknal_create_routes(ksock_peer_ni_t *peer_ni, int port, return; } - LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES); + LASSERT(npeer_ipaddrs <= LNET_INTERFACES_NUM); for (i = 0; i < npeer_ipaddrs; i++) { if (newroute != NULL) { @@ -905,7 +878,7 @@ ksocknal_create_routes(ksock_peer_ni_t *peer_ni, int port, /* Already got a route? */ route = NULL; list_for_each(rtmp, &peer_ni->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); + route = list_entry(rtmp, struct ksock_route, ksnr_list); if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) break; @@ -919,7 +892,7 @@ ksocknal_create_routes(ksock_peer_ni_t *peer_ni, int port, best_nroutes = 0; best_netmatch = 0; - LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); + LASSERT(net->ksnn_ninterfaces <= LNET_INTERFACES_NUM); /* Select interface to connect from */ for (j = 0; j < net->ksnn_ninterfaces; j++) { @@ -927,7 +900,7 @@ ksocknal_create_routes(ksock_peer_ni_t *peer_ni, int port, /* Using this interface already? */ list_for_each(rtmp, &peer_ni->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, + route = list_entry(rtmp, struct ksock_route, ksnr_list); if (route->ksnr_myipaddr == iface->ksni_ipaddr) @@ -971,10 +944,10 @@ ksocknal_create_routes(ksock_peer_ni_t *peer_ni, int port, int ksocknal_accept(struct lnet_ni *ni, struct socket *sock) { - ksock_connreq_t *cr; - int rc; - __u32 peer_ip; - int peer_port; + struct ksock_connreq *cr; + int rc; + u32 peer_ip; + int peer_port; rc = lnet_sock_getaddr(sock, true, &peer_ip, &peer_port); LASSERT(rc == 0); /* we succeeded before */ @@ -1000,9 +973,9 @@ ksocknal_accept(struct lnet_ni *ni, struct socket *sock) } static int -ksocknal_connecting (ksock_peer_ni_t *peer_ni, __u32 ipaddr) +ksocknal_connecting(struct ksock_peer_ni *peer_ni, __u32 ipaddr) { - ksock_route_t *route; + struct ksock_route *route; list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list) { if (route->ksnr_ipaddr == ipaddr) @@ -1012,26 +985,27 @@ ksocknal_connecting (ksock_peer_ni_t *peer_ni, __u32 ipaddr) } int -ksocknal_create_conn(struct lnet_ni *ni, ksock_route_t *route, +ksocknal_create_conn(struct lnet_ni *ni, struct ksock_route *route, struct socket *sock, int type) { - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - struct list_head zombies = LIST_HEAD_INIT(zombies); + rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; + LIST_HEAD(zombies); struct lnet_process_id peerid; - struct list_head *tmp; - __u64 incarnation; - ksock_conn_t *conn; - ksock_conn_t *conn2; - ksock_peer_ni_t *peer_ni = NULL; - ksock_peer_ni_t *peer2; - ksock_sched_t *sched; + struct list_head *tmp; + u64 incarnation; + struct ksock_conn *conn; + struct ksock_conn *conn2; + struct ksock_peer_ni *peer_ni = NULL; + struct ksock_peer_ni *peer2; + struct ksock_sched *sched; struct ksock_hello_msg *hello; - int cpt; - ksock_tx_t *tx; - ksock_tx_t *txtmp; - int rc; - int active; - char *warn = NULL; + int cpt; + struct ksock_tx *tx; + struct ksock_tx *txtmp; + int rc; + int rc2; + int active; + char *warn = NULL; active = (route != NULL); @@ -1063,7 +1037,7 @@ ksocknal_create_conn(struct lnet_ni *ni, ksock_route_t *route, atomic_set (&conn->ksnc_tx_nob, 0); LIBCFS_ALLOC(hello, offsetof(struct ksock_hello_msg, - kshm_ips[LNET_MAX_INTERFACES])); + kshm_ips[LNET_INTERFACES_NUM])); if (hello == NULL) { rc = -ENOMEM; goto failed_1; @@ -1122,25 +1096,27 @@ ksocknal_create_conn(struct lnet_ni *ni, ksock_route_t *route, cpt = lnet_cpt_of_nid(peerid.nid, ni); - if (active) { - ksocknal_peer_addref(peer_ni); + if (active) { + ksocknal_peer_addref(peer_ni); write_lock_bh(global_lock); - } else { - rc = ksocknal_create_peer(&peer_ni, ni, peerid); - if (rc != 0) - goto failed_1; + } else { + peer_ni = ksocknal_create_peer(ni, peerid); + if (IS_ERR(peer_ni)) { + rc = PTR_ERR(peer_ni); + goto failed_1; + } write_lock_bh(global_lock); - /* called with a ref on ni, so shutdown can't have started */ - LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); + /* called with a ref on ni, so shutdown can't have started */ + LASSERT(atomic_read(&((struct ksock_net *)ni->ni_data)->ksnn_npeers) >= 0); peer2 = ksocknal_find_peer_locked(ni, peerid); if (peer2 == NULL) { /* NB this puts an "empty" peer_ni in the peer_ni * table (which takes my ref) */ - list_add_tail(&peer_ni->ksnp_list, - ksocknal_nid2peerlist(peerid.nid)); + hash_add(ksocknal_data.ksnd_peers, + &peer_ni->ksnp_list, peerid.nid); } else { ksocknal_peer_decref(peer_ni); peer_ni = peer2; @@ -1209,7 +1185,7 @@ ksocknal_create_conn(struct lnet_ni *ni, ksock_route_t *route, * loopback connection */ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { list_for_each(tmp, &peer_ni->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); + conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || conn2->ksnc_myipaddr != conn->ksnc_myipaddr || @@ -1243,7 +1219,7 @@ ksocknal_create_conn(struct lnet_ni *ni, ksock_route_t *route, * by routes in my peer_ni to match my own route entries so I don't * continually create duplicate routes. */ list_for_each(tmp, &peer_ni->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + route = list_entry(tmp, struct ksock_route, ksnr_list); if (route->ksnr_ipaddr != conn->ksnc_ipaddr) continue; @@ -1253,18 +1229,28 @@ ksocknal_create_conn(struct lnet_ni *ni, ksock_route_t *route, } conn->ksnc_peer = peer_ni; /* conn takes my ref on peer_ni */ - peer_ni->ksnp_last_alive = ktime_get_real_seconds(); + peer_ni->ksnp_last_alive = ktime_get_seconds(); peer_ni->ksnp_send_keepalive = 0; peer_ni->ksnp_error = 0; sched = ksocknal_choose_scheduler_locked(cpt); + if (!sched) { + CERROR("no schedulers available. node is unhealthy\n"); + goto failed_2; + } + /* + * The cpt might have changed if we ended up selecting a non cpt + * native scheduler. So use the scheduler's cpt instead. + */ + cpt = sched->kss_cpt; sched->kss_nconns++; conn->ksnc_scheduler = sched; - conn->ksnc_tx_last_post = ktime_get_real_seconds(); + conn->ksnc_tx_last_post = ktime_get_seconds(); /* Set the deadline for the outgoing HELLO to drain */ conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued; - conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); + conn->ksnc_tx_deadline = ktime_get_seconds() + + lnet_get_lnd_timeout(); smp_mb(); /* order with adding to peer_ni's conn list */ list_add(&conn->ksnc_list, &peer_ni->ksnp_conns); @@ -1295,11 +1281,10 @@ ksocknal_create_conn(struct lnet_ni *ni, ksock_route_t *route, */ CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d" - " incarnation:%lld sched[%d:%d]\n", + " incarnation:%lld sched[%d]\n", libcfs_id2str(peerid), conn->ksnc_proto->pro_version, &conn->ksnc_myipaddr, &conn->ksnc_ipaddr, - conn->ksnc_port, incarnation, cpt, - (int)(sched - &sched->kss_info->ksi_scheds[0])); + conn->ksnc_port, incarnation, cpt); if (active) { /* additional routes after interface exchange? */ @@ -1312,7 +1297,7 @@ ksocknal_create_conn(struct lnet_ni *ni, ksock_route_t *route, } LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg, - kshm_ips[LNET_MAX_INTERFACES])); + kshm_ips[LNET_INTERFACES_NUM])); /* setup the socket AFTER I've received hello (it disables * SO_LINGER). I might call back to the acceptor who may want @@ -1384,13 +1369,19 @@ failed_2: write_unlock_bh(global_lock); } - ksocknal_txlist_done(ni, &zombies, 1); + /* + * If we get here without an error code, just use -EALREADY. + * Depending on how we got here, the error may be positive + * or negative. Normalize the value for ksocknal_txlist_done(). + */ + rc2 = (rc == 0 ? -EALREADY : (rc > 0 ? -rc : rc)); + ksocknal_txlist_done(ni, &zombies, rc2); ksocknal_peer_decref(peer_ni); failed_1: if (hello != NULL) LIBCFS_FREE(hello, offsetof(struct ksock_hello_msg, - kshm_ips[LNET_MAX_INTERFACES])); + kshm_ips[LNET_INTERFACES_NUM])); LIBCFS_FREE(conn, sizeof(*conn)); @@ -1400,15 +1391,15 @@ failed_0: } void -ksocknal_close_conn_locked (ksock_conn_t *conn, int error) +ksocknal_close_conn_locked(struct ksock_conn *conn, int error) { /* This just does the immmediate housekeeping, and queues the * connection for the reaper to terminate. * Caller holds ksnd_global_lock exclusively in irq context */ - ksock_peer_ni_t *peer_ni = conn->ksnc_peer; - ksock_route_t *route; - ksock_conn_t *conn2; - struct list_head *tmp; + struct ksock_peer_ni *peer_ni = conn->ksnc_peer; + struct ksock_route *route; + struct ksock_conn *conn2; + struct list_head *tmp; LASSERT(peer_ni->ksnp_error == 0); LASSERT(!conn->ksnc_closing); @@ -1425,7 +1416,7 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) conn2 = NULL; list_for_each(tmp, &peer_ni->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); + conn2 = list_entry(tmp, struct ksock_conn, ksnc_list); if (conn2->ksnc_route == route && conn2->ksnc_type == conn->ksnc_type) @@ -1445,7 +1436,7 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) /* No more connections to this peer_ni */ if (!list_empty(&peer_ni->ksnp_tx_queue)) { - ksock_tx_t *tx; + struct ksock_tx *tx; LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x); @@ -1475,18 +1466,17 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); - list_add_tail(&conn->ksnc_list, - &ksocknal_data.ksnd_deathrow_conns); + list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns); wake_up(&ksocknal_data.ksnd_reaper_waitq); spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock); } void -ksocknal_peer_failed (ksock_peer_ni_t *peer_ni) +ksocknal_peer_failed(struct ksock_peer_ni *peer_ni) { - int notify = 0; - cfs_time_t last_alive = 0; + int notify = 0; + time64_t last_alive = 0; /* There has been a connection failure or comms error; but I'll only * tell LNET I think the peer_ni is dead if it's to another kernel and @@ -1505,17 +1495,17 @@ ksocknal_peer_failed (ksock_peer_ni_t *peer_ni) read_unlock(&ksocknal_data.ksnd_global_lock); if (notify) - lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid, 0, - last_alive); + lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid, + false, false, last_alive); } void -ksocknal_finalize_zcreq(ksock_conn_t *conn) +ksocknal_finalize_zcreq(struct ksock_conn *conn) { - ksock_peer_ni_t *peer_ni = conn->ksnc_peer; - ksock_tx_t *tx; - ksock_tx_t *tmp; - struct list_head zlist = LIST_HEAD_INIT(zlist); + struct ksock_peer_ni *peer_ni = conn->ksnc_peer; + struct ksock_tx *tx; + struct ksock_tx *tmp; + LIST_HEAD(zlist); /* NB safe to finalize TXs because closing of socket will * abort all buffered data */ @@ -1531,14 +1521,13 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn) tx->tx_msg.ksm_zc_cookies[0] = 0; tx->tx_zc_aborted = 1; /* mark it as not-acked */ - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); + list_move(&tx->tx_zc_list, &zlist); } spin_unlock(&peer_ni->ksnp_lock); while (!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list); + tx = list_entry(zlist.next, struct ksock_tx, tx_zc_list); list_del(&tx->tx_zc_list); ksocknal_tx_decref(tx); @@ -1546,15 +1535,15 @@ ksocknal_finalize_zcreq(ksock_conn_t *conn) } void -ksocknal_terminate_conn(ksock_conn_t *conn) +ksocknal_terminate_conn(struct ksock_conn *conn) { /* This gets called by the reaper (guaranteed thread context) to * disengage the socket from its callbacks and close it. * ksnc_refcount will eventually hit zero, and then the reaper will * destroy it. */ - ksock_peer_ni_t *peer_ni = conn->ksnc_peer; - ksock_sched_t *sched = conn->ksnc_scheduler; - int failed = 0; + struct ksock_peer_ni *peer_ni = conn->ksnc_peer; + struct ksock_sched *sched = conn->ksnc_scheduler; + int failed = 0; LASSERT(conn->ksnc_closing); @@ -1607,10 +1596,9 @@ ksocknal_terminate_conn(ksock_conn_t *conn) } void -ksocknal_queue_zombie_conn (ksock_conn_t *conn) +ksocknal_queue_zombie_conn(struct ksock_conn *conn) { /* Queue the conn for the reaper to destroy */ - LASSERT(atomic_read(&conn->ksnc_conn_refcount) == 0); spin_lock_bh(&ksocknal_data.ksnd_reaper_lock); @@ -1621,9 +1609,9 @@ ksocknal_queue_zombie_conn (ksock_conn_t *conn) } void -ksocknal_destroy_conn (ksock_conn_t *conn) +ksocknal_destroy_conn(struct ksock_conn *conn) { - cfs_time_t last_rcv; + time64_t last_rcv; /* Final coup-de-grace of the reaper */ CDEBUG (D_NET, "connection %p\n", conn); @@ -1640,18 +1628,19 @@ ksocknal_destroy_conn (ksock_conn_t *conn) switch (conn->ksnc_rx_state) { case SOCKNAL_RX_LNET_PAYLOAD: last_rcv = conn->ksnc_rx_deadline - - cfs_time_seconds(*ksocknal_tunables.ksnd_timeout); + lnet_get_lnd_timeout(); CERROR("Completing partial receive from %s[%d], " "ip %pI4h:%d, with error, wanted: %d, left: %d, " - "last alive is %ld secs ago\n", + "last alive is %lld secs ago\n", libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, &conn->ksnc_ipaddr, conn->ksnc_port, conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, - cfs_duration_sec(cfs_time_sub(ktime_get_real_seconds(), - last_rcv))); - lnet_finalize (conn->ksnc_peer->ksnp_ni, - conn->ksnc_cookie, -EIO); - break; + ktime_get_seconds() - last_rcv); + if (conn->ksnc_lnet_msg) + conn->ksnc_lnet_msg->msg_health_status = + LNET_MSG_STATUS_REMOTE_ERROR; + lnet_finalize(conn->ksnc_lnet_msg, -EIO); + break; case SOCKNAL_RX_LNET_HEADER: if (conn->ksnc_rx_started) CERROR("Incomplete receive of lnet header from %s, " @@ -1686,15 +1675,15 @@ ksocknal_destroy_conn (ksock_conn_t *conn) } int -ksocknal_close_peer_conns_locked (ksock_peer_ni_t *peer_ni, __u32 ipaddr, int why) +ksocknal_close_peer_conns_locked(struct ksock_peer_ni *peer_ni, __u32 ipaddr, int why) { - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; + struct ksock_conn *conn; + struct list_head *ctmp; + struct list_head *cnxt; + int count = 0; list_for_each_safe(ctmp, cnxt, &peer_ni->ksnp_conns) { - conn = list_entry(ctmp, ksock_conn_t, ksnc_list); + conn = list_entry(ctmp, struct ksock_conn, ksnc_list); if (ipaddr == 0 || conn->ksnc_ipaddr == ipaddr) { @@ -1707,11 +1696,11 @@ ksocknal_close_peer_conns_locked (ksock_peer_ni_t *peer_ni, __u32 ipaddr, int wh } int -ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) +ksocknal_close_conn_and_siblings(struct ksock_conn *conn, int why) { - ksock_peer_ni_t *peer_ni = conn->ksnc_peer; - __u32 ipaddr = conn->ksnc_ipaddr; - int count; + struct ksock_peer_ni *peer_ni = conn->ksnc_peer; + u32 ipaddr = conn->ksnc_ipaddr; + int count; write_lock_bh(&ksocknal_data.ksnd_global_lock); @@ -1725,47 +1714,50 @@ ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) int ksocknal_close_matching_conns(struct lnet_process_id id, __u32 ipaddr) { - ksock_peer_ni_t *peer_ni; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; + struct ksock_peer_ni *peer_ni; + struct hlist_node *pnxt; + int lo; + int hi; + int i; + int count = 0; write_lock_bh(&ksocknal_data.ksnd_global_lock); - if (id.nid != LNET_NID_ANY) - lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); - else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { + if (id.nid != LNET_NID_ANY) { + lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers)); + hi = lo; + } else { + lo = 0; + hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1; + } - peer_ni = list_entry(ptmp, ksock_peer_ni_t, ksnp_list); + for (i = lo; i <= hi; i++) { + hlist_for_each_entry_safe(peer_ni, pnxt, + &ksocknal_data.ksnd_peers[i], + ksnp_list) { - if (!((id.nid == LNET_NID_ANY || id.nid == peer_ni->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || id.pid == peer_ni->ksnp_id.pid))) - continue; + if (!((id.nid == LNET_NID_ANY || + id.nid == peer_ni->ksnp_id.nid) && + (id.pid == LNET_PID_ANY || + id.pid == peer_ni->ksnp_id.pid))) + continue; - count += ksocknal_close_peer_conns_locked (peer_ni, ipaddr, 0); - } - } + count += ksocknal_close_peer_conns_locked(peer_ni, + ipaddr, 0); + } + } write_unlock_bh(&ksocknal_data.ksnd_global_lock); - /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) - return (0); + /* wildcards always succeed */ + if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) + return 0; - return (count == 0 ? -ENOENT : 0); + return (count == 0 ? -ENOENT : 0); } void -ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive) +ksocknal_notify_gw_down(lnet_nid_t gw_nid) { /* The router is telling me she's been notified of a change in * gateway state.... @@ -1775,90 +1767,23 @@ ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive) .pid = LNET_PID_ANY, }; - CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), - alive ? "up" : "down"); - - if (!alive) { - /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns (id, 0); - return; - } - - /* ...otherwise do nothing. We can only establish new connections - * if we have autroutes, and these connect on demand. */ -} + CDEBUG(D_NET, "gw %s down\n", libcfs_nid2str(gw_nid)); -void -ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when) -{ - int connect = 1; - time64_t last_alive = 0; - time64_t now = ktime_get_real_seconds(); - ksock_peer_ni_t *peer_ni = NULL; - rwlock_t *glock = &ksocknal_data.ksnd_global_lock; - struct lnet_process_id id = { - .nid = nid, - .pid = LNET_PID_LUSTRE, - }; - - read_lock(glock); - - peer_ni = ksocknal_find_peer_locked(ni, id); - if (peer_ni != NULL) { - struct list_head *tmp; - ksock_conn_t *conn; - int bufnob; - - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; - - if (bufnob < conn->ksnc_tx_bufnob) { - /* something got ACKed */ - conn->ksnc_tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - peer_ni->ksnp_last_alive = now; - conn->ksnc_tx_bufnob = bufnob; - } - } - - last_alive = peer_ni->ksnp_last_alive; - if (ksocknal_find_connectable_route_locked(peer_ni) == NULL) - connect = 0; - } - - read_unlock(glock); - - if (last_alive != 0) - *when = last_alive; - - CDEBUG(D_NET, "peer_ni %s %p, alive %ld secs ago, connect %d\n", - libcfs_nid2str(nid), peer_ni, - last_alive ? cfs_duration_sec(now - last_alive) : -1, - connect); - - if (!connect) - return; - - ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port()); - - write_lock_bh(glock); - - peer_ni = ksocknal_find_peer_locked(ni, id); - if (peer_ni != NULL) - ksocknal_launch_all_connections_locked(peer_ni); + /* If the gateway crashed, close all open connections... */ + ksocknal_close_matching_conns(id, 0); + return; - write_unlock_bh(glock); - return; + /* We can only establish new connections + * if we have autroutes, and these connect on demand. */ } static void -ksocknal_push_peer (ksock_peer_ni_t *peer_ni) +ksocknal_push_peer(struct ksock_peer_ni *peer_ni) { - int index; - int i; - struct list_head *tmp; - ksock_conn_t *conn; + int index; + int i; + struct list_head *tmp; + struct ksock_conn *conn; for (index = 0; ; index++) { read_lock(&ksocknal_data.ksnd_global_lock); @@ -1868,8 +1793,8 @@ ksocknal_push_peer (ksock_peer_ni_t *peer_ni) list_for_each(tmp, &peer_ni->ksnp_conns) { if (i++ == index) { - conn = list_entry(tmp, ksock_conn_t, - ksnc_list); + conn = list_entry(tmp, struct ksock_conn, + ksnc_list); ksocknal_conn_addref(conn); break; } @@ -1888,28 +1813,30 @@ ksocknal_push_peer (ksock_peer_ni_t *peer_ni) static int ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) { - struct list_head *start; - struct list_head *end; - struct list_head *tmp; - int rc = -ENOENT; - unsigned int hsize = ksocknal_data.ksnd_peer_hash_size; + int lo; + int hi; + int bkt; + int rc = -ENOENT; - if (id.nid == LNET_NID_ANY) { - start = &ksocknal_data.ksnd_peers[0]; - end = &ksocknal_data.ksnd_peers[hsize - 1]; + if (id.nid != LNET_NID_ANY) { + lo = hash_min(id.nid, HASH_BITS(ksocknal_data.ksnd_peers)); + hi = lo; } else { - start = end = ksocknal_nid2peerlist(id.nid); + lo = 0; + hi = HASH_SIZE(ksocknal_data.ksnd_peers) - 1; } - for (tmp = start; tmp <= end; tmp++) { - int peer_off; /* searching offset in peer_ni hash table */ + for (bkt = lo; bkt <= hi; bkt++) { + int peer_off; /* searching offset in peer_ni hash table */ for (peer_off = 0; ; peer_off++) { - ksock_peer_ni_t *peer_ni; + struct ksock_peer_ni *peer_ni; int i = 0; read_lock(&ksocknal_data.ksnd_global_lock); - list_for_each_entry(peer_ni, tmp, ksnp_list) { + hlist_for_each_entry(peer_ni, + &ksocknal_data.ksnd_peers[bkt], + ksnp_list) { if (!((id.nid == LNET_NID_ANY || id.nid == peer_ni->ksnp_id.nid) && (id.pid == LNET_PID_ANY || @@ -1923,7 +1850,7 @@ ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) } read_unlock(&ksocknal_data.ksnd_global_lock); - if (i == 0) /* no match */ + if (i <= peer_off) /* no match */ break; rc = 0; @@ -1937,74 +1864,70 @@ ksocknal_push(struct lnet_ni *ni, struct lnet_process_id id) static int ksocknal_add_interface(struct lnet_ni *ni, __u32 ipaddress, __u32 netmask) { - ksock_net_t *net = ni->ni_data; - ksock_interface_t *iface; - int rc; - int i; - int j; - struct list_head *ptmp; - ksock_peer_ni_t *peer_ni; - struct list_head *rtmp; - ksock_route_t *route; - - if (ipaddress == 0 || - netmask == 0) - return (-EINVAL); + struct ksock_net *net = ni->ni_data; + struct ksock_interface *iface; + int rc; + int i; + int j; + struct ksock_peer_ni *peer_ni; + struct list_head *rtmp; + struct ksock_route *route; + + if (ipaddress == 0 || + netmask == 0) + return -EINVAL; write_lock_bh(&ksocknal_data.ksnd_global_lock); - iface = ksocknal_ip2iface(ni, ipaddress); - if (iface != NULL) { - /* silently ignore dups */ - rc = 0; - } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) { - rc = -ENOSPC; - } else { - iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++]; - - iface->ksni_ipaddr = ipaddress; - iface->ksni_netmask = netmask; - iface->ksni_nroutes = 0; - iface->ksni_npeers = 0; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(ptmp, ksock_peer_ni_t, - ksnp_list); - - for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) - if (peer_ni->ksnp_passive_ips[j] == ipaddress) - iface->ksni_npeers++; - - list_for_each(rtmp, &peer_ni->ksnp_routes) { - route = list_entry(rtmp, - ksock_route_t, - ksnr_list); - - if (route->ksnr_myipaddr == ipaddress) - iface->ksni_nroutes++; - } - } - } + iface = ksocknal_ip2iface(ni, ipaddress); + if (iface != NULL) { + /* silently ignore dups */ + rc = 0; + } else if (net->ksnn_ninterfaces == LNET_INTERFACES_NUM) { + rc = -ENOSPC; + } else { + iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++]; - rc = 0; - /* NB only new connections will pay attention to the new interface! */ - } + iface->ksni_ipaddr = ipaddress; + iface->ksni_netmask = netmask; + iface->ksni_nroutes = 0; + iface->ksni_npeers = 0; + + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) { + for (j = 0; j < peer_ni->ksnp_n_passive_ips; j++) + if (peer_ni->ksnp_passive_ips[j] == ipaddress) + iface->ksni_npeers++; + + list_for_each(rtmp, &peer_ni->ksnp_routes) { + route = list_entry(rtmp, + struct ksock_route, + ksnr_list); + + if (route->ksnr_myipaddr == ipaddress) + iface->ksni_nroutes++; + } + } + + rc = 0; + /* NB only new connections will pay attention to the new + * interface! + */ + } write_unlock_bh(&ksocknal_data.ksnd_global_lock); - return (rc); + return rc; } static void -ksocknal_peer_del_interface_locked(ksock_peer_ni_t *peer_ni, __u32 ipaddr) +ksocknal_peer_del_interface_locked(struct ksock_peer_ni *peer_ni, __u32 ipaddr) { - struct list_head *tmp; - struct list_head *nxt; - ksock_route_t *route; - ksock_conn_t *conn; - int i; - int j; + struct list_head *tmp; + struct list_head *nxt; + struct ksock_route *route; + struct ksock_conn *conn; + int i; + int j; for (i = 0; i < peer_ni->ksnp_n_passive_ips; i++) if (peer_ni->ksnp_passive_ips[i] == ipaddr) { @@ -2016,7 +1939,7 @@ ksocknal_peer_del_interface_locked(ksock_peer_ni_t *peer_ni, __u32 ipaddr) } list_for_each_safe(tmp, nxt, &peer_ni->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + route = list_entry(tmp, struct ksock_route, ksnr_list); if (route->ksnr_myipaddr != ipaddr) continue; @@ -2030,7 +1953,7 @@ ksocknal_peer_del_interface_locked(ksock_peer_ni_t *peer_ni, __u32 ipaddr) } list_for_each_safe(tmp, nxt, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); + conn = list_entry(tmp, struct ksock_conn, ksnc_list); if (conn->ksnc_myipaddr == ipaddr) ksocknal_close_conn_locked (conn, 0); @@ -2040,49 +1963,43 @@ ksocknal_peer_del_interface_locked(ksock_peer_ni_t *peer_ni, __u32 ipaddr) static int ksocknal_del_interface(struct lnet_ni *ni, __u32 ipaddress) { - ksock_net_t *net = ni->ni_data; - int rc = -ENOENT; - struct list_head *tmp; - struct list_head *nxt; - ksock_peer_ni_t *peer_ni; - __u32 this_ip; - int i; - int j; + struct ksock_net *net = ni->ni_data; + int rc = -ENOENT; + struct hlist_node *nxt; + struct ksock_peer_ni *peer_ni; + u32 this_ip; + int i; + int j; write_lock_bh(&ksocknal_data.ksnd_global_lock); - for (i = 0; i < net->ksnn_ninterfaces; i++) { - this_ip = net->ksnn_interfaces[i].ksni_ipaddr; - - if (!(ipaddress == 0 || - ipaddress == this_ip)) - continue; + for (i = 0; i < net->ksnn_ninterfaces; i++) { + this_ip = net->ksnn_interfaces[i].ksni_ipaddr; - rc = 0; + if (!(ipaddress == 0 || + ipaddress == this_ip)) + continue; - for (j = i+1; j < net->ksnn_ninterfaces; j++) - net->ksnn_interfaces[j-1] = - net->ksnn_interfaces[j]; + rc = 0; - net->ksnn_ninterfaces--; + for (j = i+1; j < net->ksnn_ninterfaces; j++) + net->ksnn_interfaces[j-1] = + net->ksnn_interfaces[j]; - for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { - list_for_each_safe(tmp, nxt, - &ksocknal_data.ksnd_peers[j]) { - peer_ni = list_entry(tmp, ksock_peer_ni_t, - ksnp_list); + net->ksnn_ninterfaces--; - if (peer_ni->ksnp_ni != ni) - continue; + hash_for_each_safe(ksocknal_data.ksnd_peers, j, + nxt, peer_ni, ksnp_list) { + if (peer_ni->ksnp_ni != ni) + continue; - ksocknal_peer_del_interface_locked(peer_ni, this_ip); - } - } - } + ksocknal_peer_del_interface_locked(peer_ni, this_ip); + } + } write_unlock_bh(&ksocknal_data.ksnd_global_lock); - return (rc); + return rc; } int @@ -2094,8 +2011,8 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) switch(cmd) { case IOC_LIBCFS_GET_INTERFACE: { - ksock_net_t *net = ni->ni_data; - ksock_interface_t *iface; + struct ksock_net *net = ni->ni_data; + struct ksock_interface *iface; read_lock(&ksocknal_data.ksnd_global_lock); @@ -2164,7 +2081,7 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) int txmem; int rxmem; int nagle; - ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count); + struct ksock_conn *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count); if (conn == NULL) return -ENOENT; @@ -2178,7 +2095,7 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) data->ioc_u32[1] = conn->ksnc_port; data->ioc_u32[2] = conn->ksnc_myipaddr; data->ioc_u32[3] = conn->ksnc_type; - data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt; + data->ioc_u32[4] = conn->ksnc_scheduler->kss_cpt; data->ioc_u32[5] = rxmem; data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; ksocknal_conn_decref(conn); @@ -2217,36 +2134,21 @@ ksocknal_free_buffers (void) { LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0); - if (ksocknal_data.ksnd_sched_info != NULL) { - struct ksock_sched_info *info; - int i; - - cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { - if (info->ksi_scheds != NULL) { - LIBCFS_FREE(info->ksi_scheds, - info->ksi_nthreads_max * - sizeof(info->ksi_scheds[0])); - } - } - cfs_percpt_free(ksocknal_data.ksnd_sched_info); - } - - LIBCFS_FREE (ksocknal_data.ksnd_peers, - sizeof(struct list_head) * - ksocknal_data.ksnd_peer_hash_size); + if (ksocknal_data.ksnd_schedulers != NULL) + cfs_percpt_free(ksocknal_data.ksnd_schedulers); spin_lock(&ksocknal_data.ksnd_tx_lock); if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - struct list_head zlist; - ksock_tx_t *tx; + struct list_head zlist; + struct ksock_tx *tx; list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); list_del_init(&ksocknal_data.ksnd_idle_noop_txs); spin_unlock(&ksocknal_data.ksnd_tx_lock); while (!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_list); + tx = list_entry(zlist.next, struct ksock_tx, tx_list); list_del(&tx->tx_list); LIBCFS_FREE(tx, tx->tx_desc_size); } @@ -2258,25 +2160,23 @@ ksocknal_free_buffers (void) static void ksocknal_base_shutdown(void) { - struct ksock_sched_info *info; - ksock_sched_t *sched; - int i; - int j; + struct ksock_sched *sched; + struct ksock_peer_ni *peer_ni; + int i; CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&libcfs_kmemory)); LASSERT (ksocknal_data.ksnd_nnets == 0); - switch (ksocknal_data.ksnd_init) { - default: - LASSERT (0); + switch (ksocknal_data.ksnd_init) { + default: + LASSERT(0); + /* fallthrough */ - case SOCKNAL_INIT_ALL: - case SOCKNAL_INIT_DATA: - LASSERT (ksocknal_data.ksnd_peers != NULL); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - LASSERT(list_empty(&ksocknal_data.ksnd_peers[i])); - } + case SOCKNAL_INIT_ALL: + case SOCKNAL_INIT_DATA: + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) + LASSERT(0); LASSERT(list_empty(&ksocknal_data.ksnd_nets)); LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns)); @@ -2284,23 +2184,14 @@ ksocknal_base_shutdown(void) LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs)); LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes)); - if (ksocknal_data.ksnd_sched_info != NULL) { - cfs_percpt_for_each(info, i, - ksocknal_data.ksnd_sched_info) { - if (info->ksi_scheds == NULL) - continue; + if (ksocknal_data.ksnd_schedulers != NULL) { + cfs_percpt_for_each(sched, i, + ksocknal_data.ksnd_schedulers) { - for (j = 0; j < info->ksi_nthreads_max; j++) { - - sched = &info->ksi_scheds[j]; - LASSERT(list_empty(&sched->\ - kss_tx_conns)); - LASSERT(list_empty(&sched->\ - kss_rx_conns)); - LASSERT(list_empty(&sched-> \ - kss_zombie_noop_txs)); - LASSERT(sched->kss_nconns == 0); - } + LASSERT(list_empty(&sched->kss_tx_conns)); + LASSERT(list_empty(&sched->kss_rx_conns)); + LASSERT(list_empty(&sched->kss_zombie_noop_txs)); + LASSERT(sched->kss_nconns == 0); } } @@ -2309,17 +2200,10 @@ ksocknal_base_shutdown(void) wake_up_all(&ksocknal_data.ksnd_connd_waitq); wake_up_all(&ksocknal_data.ksnd_reaper_waitq); - if (ksocknal_data.ksnd_sched_info != NULL) { - cfs_percpt_for_each(info, i, - ksocknal_data.ksnd_sched_info) { - if (info->ksi_scheds == NULL) - continue; - - for (j = 0; j < info->ksi_nthreads_max; j++) { - sched = &info->ksi_scheds[j]; + if (ksocknal_data.ksnd_schedulers != NULL) { + cfs_percpt_for_each(sched, i, + ksocknal_data.ksnd_schedulers) wake_up_all(&sched->kss_waitq); - } - } } i = 4; @@ -2349,41 +2233,19 @@ ksocknal_base_shutdown(void) module_put(THIS_MODULE); } -static __u64 ksocknal_new_incarnation(void) -{ - struct timeval tv; - - /* The incarnation number is the time this module loaded and it - * identifies this particular instance of the socknal. Hopefully - * we won't be able to reboot more frequently than 1MHz for the - * forseeable future :) */ - - do_gettimeofday(&tv); - - return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; -} - static int ksocknal_base_startup(void) { - struct ksock_sched_info *info; - int rc; - int i; - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - LASSERT (ksocknal_data.ksnd_nnets == 0); + struct ksock_sched *sched; + int rc; + int i; - memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ + LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); + LASSERT(ksocknal_data.ksnd_nnets == 0); - ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC(ksocknal_data.ksnd_peers, - sizeof(struct list_head) * - ksocknal_data.ksnd_peer_hash_size); - if (ksocknal_data.ksnd_peers == NULL) - return -ENOMEM; + memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */ - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); + hash_init(ksocknal_data.ksnd_peers); rwlock_init(&ksocknal_data.ksnd_global_lock); INIT_LIST_HEAD(&ksocknal_data.ksnd_nets); @@ -2406,44 +2268,41 @@ ksocknal_base_startup(void) /* flag lists/ptrs/locks initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - try_module_get(THIS_MODULE); + if (!try_module_get(THIS_MODULE)) + goto failed; - ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(*info)); - if (ksocknal_data.ksnd_sched_info == NULL) + /* Create a scheduler block per available CPT */ + ksocknal_data.ksnd_schedulers = cfs_percpt_alloc(lnet_cpt_table(), + sizeof(*sched)); + if (ksocknal_data.ksnd_schedulers == NULL) goto failed; - cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { - ksock_sched_t *sched; - int nthrs; + cfs_percpt_for_each(sched, i, ksocknal_data.ksnd_schedulers) { + int nthrs; + /* + * make sure not to allocate more threads than there are + * cores/CPUs in teh CPT + */ nthrs = cfs_cpt_weight(lnet_cpt_table(), i); if (*ksocknal_tunables.ksnd_nscheds > 0) { nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds); } else { - /* max to half of CPUs, assume another half should be - * reserved for upper layer modules */ + /* + * max to half of CPUs, assume another half should be + * reserved for upper layer modules + */ nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); } - info->ksi_nthreads_max = nthrs; - info->ksi_cpt = i; - - LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i, - info->ksi_nthreads_max * sizeof(*sched)); - if (info->ksi_scheds == NULL) - goto failed; - - for (; nthrs > 0; nthrs--) { - sched = &info->ksi_scheds[nthrs - 1]; + sched->kss_nthreads_max = nthrs; + sched->kss_cpt = i; - sched->kss_info = info; - spin_lock_init(&sched->kss_lock); - INIT_LIST_HEAD(&sched->kss_rx_conns); - INIT_LIST_HEAD(&sched->kss_tx_conns); - INIT_LIST_HEAD(&sched->kss_zombie_noop_txs); - init_waitqueue_head(&sched->kss_waitq); - } + spin_lock_init(&sched->kss_lock); + INIT_LIST_HEAD(&sched->kss_rx_conns); + INIT_LIST_HEAD(&sched->kss_tx_conns); + INIT_LIST_HEAD(&sched->kss_zombie_noop_txs); + init_waitqueue_head(&sched->kss_waitq); } ksocknal_data.ksnd_connd_starting = 0; @@ -2498,100 +2357,83 @@ ksocknal_base_startup(void) static void ksocknal_debug_peerhash(struct lnet_ni *ni) { - ksock_peer_ni_t *peer_ni = NULL; - struct list_head *tmp; - int i; + struct ksock_peer_ni *peer_ni; + int i; read_lock(&ksocknal_data.ksnd_global_lock); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) { - peer_ni = list_entry(tmp, ksock_peer_ni_t, ksnp_list); - - if (peer_ni->ksnp_ni == ni) break; + hash_for_each(ksocknal_data.ksnd_peers, i, peer_ni, ksnp_list) { + struct ksock_route *route; + struct ksock_conn *conn; - peer_ni = NULL; - } - } + if (peer_ni->ksnp_ni != ni) + continue; - if (peer_ni != NULL) { - ksock_route_t *route; - ksock_conn_t *conn; - - CWARN ("Active peer_ni on shutdown: %s, ref %d, scnt %d, " - "closing %d, accepting %d, err %d, zcookie %llu, " - "txq %d, zc_req %d\n", libcfs_id2str(peer_ni->ksnp_id), - atomic_read(&peer_ni->ksnp_refcount), - peer_ni->ksnp_sharecount, peer_ni->ksnp_closing, - peer_ni->ksnp_accepting, peer_ni->ksnp_error, - peer_ni->ksnp_zc_next_cookie, - !list_empty(&peer_ni->ksnp_tx_queue), - !list_empty(&peer_ni->ksnp_zc_req_list)); - - list_for_each(tmp, &peer_ni->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, " - "del %d\n", atomic_read(&route->ksnr_refcount), - route->ksnr_scheduled, route->ksnr_connecting, - route->ksnr_connected, route->ksnr_deleted); + CWARN("Active peer_ni on shutdown: %s, ref %d, " + "closing %d, accepting %d, err %d, zcookie %llu, " + "txq %d, zc_req %d\n", libcfs_id2str(peer_ni->ksnp_id), + atomic_read(&peer_ni->ksnp_refcount), + peer_ni->ksnp_closing, + peer_ni->ksnp_accepting, peer_ni->ksnp_error, + peer_ni->ksnp_zc_next_cookie, + !list_empty(&peer_ni->ksnp_tx_queue), + !list_empty(&peer_ni->ksnp_zc_req_list)); + + list_for_each_entry(route, &peer_ni->ksnp_routes, ksnr_list) { + CWARN("Route: ref %d, schd %d, conn %d, cnted %d, " + "del %d\n", atomic_read(&route->ksnr_refcount), + route->ksnr_scheduled, route->ksnr_connecting, + route->ksnr_connected, route->ksnr_deleted); } - list_for_each(tmp, &peer_ni->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - CWARN ("Conn: ref %d, sref %d, t %d, c %d\n", - atomic_read(&conn->ksnc_conn_refcount), - atomic_read(&conn->ksnc_sock_refcount), - conn->ksnc_type, conn->ksnc_closing); + list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) { + CWARN("Conn: ref %d, sref %d, t %d, c %d\n", + atomic_read(&conn->ksnc_conn_refcount), + atomic_read(&conn->ksnc_sock_refcount), + conn->ksnc_type, conn->ksnc_closing); } + break; } read_unlock(&ksocknal_data.ksnd_global_lock); - return; } void ksocknal_shutdown(struct lnet_ni *ni) { - ksock_net_t *net = ni->ni_data; + struct ksock_net *net = ni->ni_data; struct lnet_process_id anyid = { .nid = LNET_NID_ANY, .pid = LNET_PID_ANY, }; int i; - LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); - LASSERT(ksocknal_data.ksnd_nnets > 0); + LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); + LASSERT(ksocknal_data.ksnd_nnets > 0); - spin_lock_bh(&net->ksnn_lock); - net->ksnn_shutdown = 1; /* prevent new peers */ - spin_unlock_bh(&net->ksnn_lock); + /* prevent new peers */ + atomic_add(SOCKNAL_SHUTDOWN_BIAS, &net->ksnn_npeers); /* Delete all peers */ ksocknal_del_peer(ni, anyid, 0); /* Wait for all peer_ni state to clean up */ i = 2; - spin_lock_bh(&net->ksnn_lock); - while (net->ksnn_npeers != 0) { - spin_unlock_bh(&net->ksnn_lock); - + while (atomic_read(&net->ksnn_npeers) > SOCKNAL_SHUTDOWN_BIAS) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "waiting for %d peers to disconnect\n", - net->ksnn_npeers); + atomic_read(&net->ksnn_npeers) - SOCKNAL_SHUTDOWN_BIAS); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(cfs_time_seconds(1)); ksocknal_debug_peerhash(ni); - - spin_lock_bh(&net->ksnn_lock); } - spin_unlock_bh(&net->ksnn_lock); - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0); - LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0); - } + for (i = 0; i < net->ksnn_ninterfaces; i++) { + LASSERT(net->ksnn_interfaces[i].ksni_npeers == 0); + LASSERT(net->ksnn_interfaces[i].ksni_nroutes == 0); + } list_del(&net->ksnn_list); LIBCFS_FREE(net, sizeof(*net)); @@ -2602,74 +2444,17 @@ ksocknal_shutdown(struct lnet_ni *ni) } static int -ksocknal_enumerate_interfaces(ksock_net_t *net) +ksocknal_search_new_ipif(struct ksock_net *net) { - char **names; - int i; - int j; - int rc; - int n; - - n = lnet_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Can't enumerate interfaces: %d\n", n); - return n; - } - - for (i = j = 0; i < n; i++) { - int up; - __u32 ip; - __u32 mask; - - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = lnet_ipif_query(names[i], &up, &ip, &mask); - if (rc != 0) { - CWARN("Can't get interface %s info: %d\n", - names[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s (down)\n", - names[i]); - continue; - } - - if (j == LNET_MAX_INTERFACES) { - CWARN("Ignoring interface %s (too many interfaces)\n", - names[i]); - continue; - } - - net->ksnn_interfaces[j].ksni_ipaddr = ip; - net->ksnn_interfaces[j].ksni_netmask = mask; - strlcpy(net->ksnn_interfaces[j].ksni_name, - names[i], sizeof(net->ksnn_interfaces[j].ksni_name)); - j++; - } - - lnet_ipif_free_enumeration(names, n); - - if (j == 0) - CERROR("Can't find any usable interfaces\n"); - - return j; -} - -static int -ksocknal_search_new_ipif(ksock_net_t *net) -{ - int new_ipif = 0; - int i; + int new_ipif = 0; + int i; for (i = 0; i < net->ksnn_ninterfaces; i++) { - char *ifnam = &net->ksnn_interfaces[i].ksni_name[0]; - char *colon = strchr(ifnam, ':'); - int found = 0; - ksock_net_t *tmp; - int j; + char *ifnam = &net->ksnn_interfaces[i].ksni_name[0]; + char *colon = strchr(ifnam, ':'); + int found = 0; + struct ksock_net *tmp; + int j; if (colon != NULL) /* ignore alias device */ *colon = 0; @@ -2701,36 +2486,35 @@ ksocknal_search_new_ipif(ksock_net_t *net) } static int -ksocknal_start_schedulers(struct ksock_sched_info *info) +ksocknal_start_schedulers(struct ksock_sched *sched) { int nthrs; int rc = 0; int i; - if (info->ksi_nthreads == 0) { + if (sched->kss_nthreads == 0) { if (*ksocknal_tunables.ksnd_nscheds > 0) { - nthrs = info->ksi_nthreads_max; + nthrs = sched->kss_nthreads_max; } else { nthrs = cfs_cpt_weight(lnet_cpt_table(), - info->ksi_cpt); + sched->kss_cpt); nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs); } - nthrs = min(nthrs, info->ksi_nthreads_max); + nthrs = min(nthrs, sched->kss_nthreads_max); } else { - LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max); + LASSERT(sched->kss_nthreads <= sched->kss_nthreads_max); /* increase two threads if there is new interface */ - nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads); + nthrs = min(2, sched->kss_nthreads_max - sched->kss_nthreads); } for (i = 0; i < nthrs; i++) { - long id; - char name[20]; - ksock_sched_t *sched; - id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i); - sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)]; + long id; + char name[20]; + + id = KSOCK_THREAD_ID(sched->kss_cpt, sched->kss_nthreads + i); snprintf(name, sizeof(name), "socknal_sd%02d_%02d", - info->ksi_cpt, (int)(sched - &info->ksi_scheds[0])); + sched->kss_cpt, (int)KSOCK_THREAD_SID(id)); rc = ksocknal_thread_start(ksocknal_scheduler, (void *)id, name); @@ -2738,35 +2522,35 @@ ksocknal_start_schedulers(struct ksock_sched_info *info) continue; CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", - info->ksi_cpt, info->ksi_nthreads + i, rc); + sched->kss_cpt, (int) KSOCK_THREAD_SID(id), rc); break; } - info->ksi_nthreads += i; + sched->kss_nthreads += i; return rc; } static int -ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts) +ksocknal_net_start_threads(struct ksock_net *net, __u32 *cpts, int ncpts) { - int newif = ksocknal_search_new_ipif(net); - int rc; - int i; + int newif = ksocknal_search_new_ipif(net); + int rc; + int i; if (ncpts > 0 && ncpts > cfs_cpt_number(lnet_cpt_table())) return -EINVAL; for (i = 0; i < ncpts; i++) { - struct ksock_sched_info *info; + struct ksock_sched *sched; int cpt = (cpts == NULL) ? i : cpts[i]; LASSERT(cpt < cfs_cpt_number(lnet_cpt_table())); - info = ksocknal_data.ksnd_sched_info[cpt]; + sched = ksocknal_data.ksnd_schedulers[cpt]; - if (!newif && info->ksi_nthreads > 0) + if (!newif && sched->kss_nthreads > 0) continue; - rc = ksocknal_start_schedulers(info); + rc = ksocknal_start_schedulers(sched); if (rc != 0) return rc; } @@ -2776,11 +2560,12 @@ ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts) int ksocknal_startup(struct lnet_ni *ni) { - ksock_net_t *net; - int rc; - int i; - struct net_device *net_dev; - int node_id; + struct ksock_net *net; + struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables; + struct ksock_interface *ksi = NULL; + struct lnet_inetdev *ifaces = NULL; + int i = 0; + int rc; LASSERT (ni->ni_net->net_lnd == &the_ksocklnd); @@ -2790,71 +2575,98 @@ ksocknal_startup(struct lnet_ni *ni) return rc; } - LIBCFS_ALLOC(net, sizeof(*net)); - if (net == NULL) - goto fail_0; + LIBCFS_ALLOC(net, sizeof(*net)); + if (net == NULL) + goto fail_0; - spin_lock_init(&net->ksnn_lock); - net->ksnn_incarnation = ksocknal_new_incarnation(); - ni->ni_data = net; - if (!ni->ni_net->net_tunables_set) { - ni->ni_net->net_tunables.lct_peer_timeout = + net->ksnn_incarnation = ktime_get_real_ns(); + ni->ni_data = net; + net_tunables = &ni->ni_net->net_tunables; + + if (net_tunables->lct_peer_timeout == -1) + net_tunables->lct_peer_timeout = *ksocknal_tunables.ksnd_peertimeout; - ni->ni_net->net_tunables.lct_max_tx_credits = + + if (net_tunables->lct_max_tx_credits == -1) + net_tunables->lct_max_tx_credits = *ksocknal_tunables.ksnd_credits; - ni->ni_net->net_tunables.lct_peer_tx_credits = + + if (net_tunables->lct_peer_tx_credits == -1) + net_tunables->lct_peer_tx_credits = *ksocknal_tunables.ksnd_peertxcredits; - ni->ni_net->net_tunables.lct_peer_rtr_credits = - *ksocknal_tunables.ksnd_peerrtrcredits; - ni->ni_net->net_tunables_set = true; - } + if (net_tunables->lct_peer_tx_credits > + net_tunables->lct_max_tx_credits) + net_tunables->lct_peer_tx_credits = + net_tunables->lct_max_tx_credits; - if (ni->ni_interfaces[0] == NULL) { - rc = ksocknal_enumerate_interfaces(net); - if (rc <= 0) - goto fail_1; + if (net_tunables->lct_peer_rtr_credits == -1) + net_tunables->lct_peer_rtr_credits = + *ksocknal_tunables.ksnd_peerrtrcredits; - net->ksnn_ninterfaces = 1; - } else { - for (i = 0; i < LNET_MAX_INTERFACES; i++) { - int up; + rc = lnet_inet_enumerate(&ifaces, ni->ni_net_ns); + if (rc < 0) + goto fail_1; - if (ni->ni_interfaces[i] == NULL) - break; + if (!ni->ni_interfaces[0]) { + ksi = &net->ksnn_interfaces[0]; - rc = lnet_ipif_query(ni->ni_interfaces[i], &up, - &net->ksnn_interfaces[i].ksni_ipaddr, - &net->ksnn_interfaces[i].ksni_netmask); + /* Use the first discovered interface */ + net->ksnn_ninterfaces = 1; + ni->ni_dev_cpt = ifaces[0].li_cpt; + ksi->ksni_ipaddr = ifaces[0].li_ipaddr; + ksi->ksni_netmask = ifaces[0].li_netmask; + strlcpy(ksi->ksni_name, ifaces[0].li_name, + sizeof(ksi->ksni_name)); + } else { + /* Before Multi-Rail ksocklnd would manage + * multiple interfaces with its own tcp bonding. + * If we encounter an old configuration using + * this tcp bonding approach then we need to + * handle more than one ni_interfaces. + * + * In Multi-Rail configuration only ONE ni_interface + * should exist. Each IP alias should be mapped to + * each 'struct net_ni'. + */ + for (i = 0; i < LNET_INTERFACES_NUM; i++) { + int j; - if (rc != 0) { - CERROR("Can't get interface %s info: %d\n", - ni->ni_interfaces[i], rc); - goto fail_1; - } + if (!ni->ni_interfaces[i]) + break; - if (!up) { - CERROR("Interface %s is down\n", - ni->ni_interfaces[i]); - goto fail_1; - } + for (j = 0; j < LNET_INTERFACES_NUM; j++) { + if (i != j && ni->ni_interfaces[j] && + strcmp(ni->ni_interfaces[i], + ni->ni_interfaces[j]) == 0) { + rc = -EEXIST; + CERROR("ksocklnd: found duplicate %s at %d and %d, rc = %d\n", + ni->ni_interfaces[i], i, j, rc); + goto fail_1; + } + } - strlcpy(net->ksnn_interfaces[i].ksni_name, - ni->ni_interfaces[i], - sizeof(net->ksnn_interfaces[i].ksni_name)); + for (j = 0; j < rc; j++) { + if (strcmp(ifaces[j].li_name, + ni->ni_interfaces[i]) != 0) + continue; + ksi = &net->ksnn_interfaces[j]; + ni->ni_dev_cpt = ifaces[j].li_cpt; + ksi->ksni_ipaddr = ifaces[j].li_ipaddr; + ksi->ksni_netmask = ifaces[j].li_netmask; + strlcpy(ksi->ksni_name, ifaces[j].li_name, + sizeof(ksi->ksni_name)); + net->ksnn_ninterfaces++; + break; + } + } + /* ni_interfaces don't map to all network interfaces */ + if (!ksi || net->ksnn_ninterfaces != i) { + CERROR("ksocklnd: requested %d but only %d interfaces found\n", + i, net->ksnn_ninterfaces); + goto fail_1; } - net->ksnn_ninterfaces = i; - } - - net_dev = dev_get_by_name(&init_net, - net->ksnn_interfaces[0].ksni_name); - if (net_dev != NULL) { - node_id = dev_to_node(&net_dev->dev); - ni->ni_dev_cpt = cfs_cpt_of_node(lnet_cpt_table(), node_id); - dev_put(net_dev); - } else { - ni->ni_dev_cpt = CFS_CPT_ANY; } /* call it before add it to ksocknal_data.ksnd_nets */ @@ -2862,8 +2674,8 @@ ksocknal_startup(struct lnet_ni *ni) if (rc != 0) goto fail_1; - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - net->ksnn_interfaces[0].ksni_ipaddr); + LASSERT(ksi); + ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ksi->ksni_ipaddr); list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); ksocknal_data.ksnd_nnets++; @@ -2885,24 +2697,24 @@ static void __exit ksocklnd_exit(void) lnet_unregister_lnd(&the_ksocklnd); } +static const struct lnet_lnd the_ksocklnd = { + .lnd_type = SOCKLND, + .lnd_startup = ksocknal_startup, + .lnd_shutdown = ksocknal_shutdown, + .lnd_ctl = ksocknal_ctl, + .lnd_send = ksocknal_send, + .lnd_recv = ksocknal_recv, + .lnd_notify_peer_down = ksocknal_notify_gw_down, + .lnd_accept = ksocknal_accept, +}; + static int __init ksocklnd_init(void) { int rc; /* check ksnr_connected/connecting field large enough */ - CLASSERT(SOCKLND_CONN_NTYPES <= 4); - CLASSERT(SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN); - - /* initialize the_ksocklnd */ - the_ksocklnd.lnd_type = SOCKLND; - the_ksocklnd.lnd_startup = ksocknal_startup; - the_ksocklnd.lnd_shutdown = ksocknal_shutdown; - the_ksocklnd.lnd_ctl = ksocknal_ctl; - the_ksocklnd.lnd_send = ksocknal_send; - the_ksocklnd.lnd_recv = ksocknal_recv; - the_ksocklnd.lnd_notify = ksocknal_notify; - the_ksocklnd.lnd_query = ksocknal_query; - the_ksocklnd.lnd_accept = ksocknal_accept; + BUILD_BUG_ON(SOCKLND_CONN_NTYPES > 4); + BUILD_BUG_ON(SOCKLND_CONN_ACK != SOCKLND_CONN_BULK_IN); rc = ksocknal_tunables_init(); if (rc != 0)