X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fsocklnd%2Fsocklnd.c;h=c26de82da84d0d986dabe15ff38cf9151d5cf7f7;hp=aece49f72bd15ce6d6fd83671224bf724ec661d5;hb=b43a6b1800265608cfa18159d4d0d006a1c23015;hpb=2517c4f647f10a713a428e8c77765c4b1bb7f013 diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index aece49f..c26de82 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1,41 +1,49 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, Whamcloud, Inc. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/klnds/socklnd/socklnd.c + * + * Author: Zach Brown + * Author: Peter J. Braam + * Author: Phil Schwan + * Author: Eric Barton */ #include "socklnd.h" -lnd_t the_ksocklnd = { - .lnd_type = SOCKLND, - .lnd_startup = ksocknal_startup, - .lnd_shutdown = ksocknal_shutdown, - .lnd_ctl = ksocknal_ctl, - .lnd_send = ksocknal_send, - .lnd_recv = ksocknal_recv, - .lnd_notify = ksocknal_notify, - .lnd_accept = ksocknal_accept, -}; - +lnd_t the_ksocklnd; ksock_nal_data_t ksocknal_data; ksock_interface_t * @@ -65,7 +73,7 @@ ksocknal_create_route (__u32 ipaddr, int port) if (route == NULL) return (NULL); - atomic_set (&route->ksnr_refcount, 1); + cfs_atomic_set (&route->ksnr_refcount, 1); route->ksnr_peer = NULL; route->ksnr_retry_interval = 0; /* OK to connect at any time */ route->ksnr_ipaddr = ipaddr; @@ -83,7 +91,7 @@ ksocknal_create_route (__u32 ipaddr, int port) void ksocknal_destroy_route (ksock_route_t *route) { - LASSERT (atomic_read(&route->ksnr_refcount) == 0); + LASSERT (cfs_atomic_read(&route->ksnr_refcount) == 0); if (route->ksnr_peer != NULL) ksocknal_peer_decref(route->ksnr_peer); @@ -99,7 +107,7 @@ ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) LASSERT (id.nid != LNET_NID_ANY); LASSERT (id.pid != LNET_PID_ANY); - LASSERT (!in_interrupt()); + LASSERT (!cfs_in_interrupt()); LIBCFS_ALLOC (peer, sizeof (*peer)); if (peer == NULL) @@ -109,21 +117,23 @@ ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) peer->ksnp_ni = ni; peer->ksnp_id = id; - atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */ + cfs_atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */ peer->ksnp_closing = 0; peer->ksnp_accepting = 0; - peer->ksnp_zc_next_cookie = 1; peer->ksnp_proto = NULL; + peer->ksnp_last_alive = 0; + peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1; + CFS_INIT_LIST_HEAD (&peer->ksnp_conns); CFS_INIT_LIST_HEAD (&peer->ksnp_routes); CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue); CFS_INIT_LIST_HEAD (&peer->ksnp_zc_req_list); - spin_lock_init(&peer->ksnp_lock); + cfs_spin_lock_init(&peer->ksnp_lock); - spin_lock_bh (&net->ksnn_lock); + cfs_spin_lock_bh (&net->ksnn_lock); if (net->ksnn_shutdown) { - spin_unlock_bh (&net->ksnn_lock); + cfs_spin_unlock_bh (&net->ksnn_lock); LIBCFS_FREE(peer, sizeof(*peer)); CERROR("Can't create peer: network shutdown\n"); @@ -132,7 +142,7 @@ ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) net->ksnn_npeers++; - spin_unlock_bh (&net->ksnn_lock); + cfs_spin_unlock_bh (&net->ksnn_lock); *peerp = peer; return 0; @@ -146,12 +156,12 @@ ksocknal_destroy_peer (ksock_peer_t *peer) CDEBUG (D_NET, "peer %s %p deleted\n", libcfs_id2str(peer->ksnp_id), peer); - LASSERT (atomic_read (&peer->ksnp_refcount) == 0); + LASSERT (cfs_atomic_read (&peer->ksnp_refcount) == 0); LASSERT (peer->ksnp_accepting == 0); - LASSERT (list_empty (&peer->ksnp_conns)); - LASSERT (list_empty (&peer->ksnp_routes)); - LASSERT (list_empty (&peer->ksnp_tx_queue)); - LASSERT (list_empty (&peer->ksnp_zc_req_list)); + LASSERT (cfs_list_empty (&peer->ksnp_conns)); + LASSERT (cfs_list_empty (&peer->ksnp_routes)); + LASSERT (cfs_list_empty (&peer->ksnp_tx_queue)); + LASSERT (cfs_list_empty (&peer->ksnp_zc_req_list)); LIBCFS_FREE (peer, sizeof (*peer)); @@ -159,21 +169,21 @@ ksocknal_destroy_peer (ksock_peer_t *peer) * until they are destroyed, so we can be assured that _all_ state to * do with this peer has been cleaned up when its refcount drops to * zero. */ - spin_lock_bh (&net->ksnn_lock); + cfs_spin_lock_bh (&net->ksnn_lock); net->ksnn_npeers--; - spin_unlock_bh (&net->ksnn_lock); + cfs_spin_unlock_bh (&net->ksnn_lock); } ksock_peer_t * ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id) { - struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); - struct list_head *tmp; + cfs_list_t *peer_list = ksocknal_nid2peerlist(id.nid); + cfs_list_t *tmp; ksock_peer_t *peer; - list_for_each (tmp, peer_list) { + cfs_list_for_each (tmp, peer_list) { - peer = list_entry (tmp, ksock_peer_t, ksnp_list); + peer = cfs_list_entry (tmp, ksock_peer_t, ksnp_list); LASSERT (!peer->ksnp_closing); @@ -186,7 +196,7 @@ ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id) CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", peer, libcfs_id2str(id), - atomic_read(&peer->ksnp_refcount)); + cfs_atomic_read(&peer->ksnp_refcount)); return (peer); } return (NULL); @@ -197,11 +207,11 @@ ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id) { ksock_peer_t *peer; - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); peer = ksocknal_find_peer_locked (ni, id); if (peer != NULL) /* +1 ref for caller? */ ksocknal_peer_addref(peer); - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); return (peer); } @@ -211,48 +221,56 @@ ksocknal_unlink_peer_locked (ksock_peer_t *peer) { int i; __u32 ip; + ksock_interface_t *iface; for (i = 0; i < peer->ksnp_n_passive_ips; i++) { LASSERT (i < LNET_MAX_INTERFACES); ip = peer->ksnp_passive_ips[i]; - ksocknal_ip2iface(peer->ksnp_ni, ip)->ksni_npeers--; + iface = ksocknal_ip2iface(peer->ksnp_ni, ip); + /* All IPs in peer->ksnp_passive_ips[] come from the + * interface list, therefore the call must succeed. */ + LASSERT (iface != NULL); + + CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n", + peer, iface, iface->ksni_nroutes); + iface->ksni_npeers--; } - LASSERT (list_empty(&peer->ksnp_conns)); - LASSERT (list_empty(&peer->ksnp_routes)); + LASSERT (cfs_list_empty(&peer->ksnp_conns)); + LASSERT (cfs_list_empty(&peer->ksnp_routes)); LASSERT (!peer->ksnp_closing); peer->ksnp_closing = 1; - list_del (&peer->ksnp_list); + cfs_list_del (&peer->ksnp_list); /* lose peerlist's ref */ ksocknal_peer_decref(peer); } int ksocknal_get_peer_info (lnet_ni_t *ni, int index, - lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port, - int *conn_count, int *share_count) + lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, + int *port, int *conn_count, int *share_count) { ksock_peer_t *peer; - struct list_head *ptmp; + cfs_list_t *ptmp; ksock_route_t *route; - struct list_head *rtmp; + cfs_list_t *rtmp; int i; int j; int rc = -ENOENT; - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); + cfs_list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { + peer = cfs_list_entry (ptmp, ksock_peer_t, ksnp_list); if (peer->ksnp_ni != ni) continue; if (peer->ksnp_n_passive_ips == 0 && - list_empty(&peer->ksnp_routes)) { + cfs_list_empty(&peer->ksnp_routes)) { if (index-- > 0) continue; @@ -280,12 +298,12 @@ ksocknal_get_peer_info (lnet_ni_t *ni, int index, goto out; } - list_for_each (rtmp, &peer->ksnp_routes) { + cfs_list_for_each (rtmp, &peer->ksnp_routes) { if (index-- > 0) continue; - route = list_entry(rtmp, ksock_route_t, - ksnr_list); + route = cfs_list_entry(rtmp, ksock_route_t, + ksnr_list); *id = peer->ksnp_id; *myip = route->ksnr_myipaddr; @@ -299,7 +317,7 @@ ksocknal_get_peer_info (lnet_ni_t *ni, int index, } } out: - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); return (rc); } @@ -351,7 +369,7 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) void ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) { - struct list_head *tmp; + cfs_list_t *tmp; ksock_conn_t *conn; ksock_route_t *route2; @@ -362,8 +380,8 @@ ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) LASSERT (route->ksnr_connected == 0); /* LASSERT(unique) */ - list_for_each(tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); + cfs_list_for_each(tmp, &peer->ksnp_routes) { + route2 = cfs_list_entry(tmp, ksock_route_t, ksnr_list); if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { CERROR ("Duplicate route %s %u.%u.%u.%u\n", @@ -376,10 +394,10 @@ ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) route->ksnr_peer = peer; ksocknal_peer_addref(peer); /* peer's routelist takes over my ref on 'route' */ - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); + cfs_list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - list_for_each(tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); + cfs_list_for_each(tmp, &peer->ksnp_conns) { + conn = cfs_list_entry(tmp, ksock_conn_t, ksnc_list); if (conn->ksnc_ipaddr != route->ksnr_ipaddr) continue; @@ -395,14 +413,14 @@ ksocknal_del_route_locked (ksock_route_t *route) ksock_peer_t *peer = route->ksnr_peer; ksock_interface_t *iface; ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; + cfs_list_t *ctmp; + cfs_list_t *cnxt; LASSERT (!route->ksnr_deleted); /* Close associated conns */ - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, ksock_conn_t, ksnc_list); + cfs_list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { + conn = cfs_list_entry(ctmp, ksock_conn_t, ksnc_list); if (conn->ksnc_route != route) continue; @@ -418,11 +436,11 @@ ksocknal_del_route_locked (ksock_route_t *route) } route->ksnr_deleted = 1; - list_del (&route->ksnr_list); + cfs_list_del (&route->ksnr_list); ksocknal_route_decref(route); /* drop peer's ref */ - if (list_empty (&peer->ksnp_routes) && - list_empty (&peer->ksnp_conns)) { + if (cfs_list_empty (&peer->ksnp_routes) && + cfs_list_empty (&peer->ksnp_conns)) { /* I've just removed the last route to a peer with no active * connections */ ksocknal_unlink_peer_locked (peer); @@ -432,7 +450,7 @@ ksocknal_del_route_locked (ksock_route_t *route) int ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) { - struct list_head *tmp; + cfs_list_t *tmp; ksock_peer_t *peer; ksock_peer_t *peer2; ksock_route_t *route; @@ -454,7 +472,7 @@ ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) return (-ENOMEM); } - write_lock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); /* always called with a ref on ni, so shutdown can't have started */ LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); @@ -465,13 +483,13 @@ ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) peer = peer2; } else { /* peer table takes my ref on peer */ - list_add_tail (&peer->ksnp_list, - ksocknal_nid2peerlist (id.nid)); + cfs_list_add_tail (&peer->ksnp_list, + ksocknal_nid2peerlist (id.nid)); } route2 = NULL; - list_for_each (tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); + cfs_list_for_each (tmp, &peer->ksnp_routes) { + route2 = cfs_list_entry(tmp, ksock_route_t, ksnr_list); if (route2->ksnr_ipaddr == ipaddr) break; @@ -486,7 +504,7 @@ ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) route2->ksnr_share_count++; } - write_unlock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock); return (0); } @@ -496,8 +514,8 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip) { ksock_conn_t *conn; ksock_route_t *route; - struct list_head *tmp; - struct list_head *nxt; + cfs_list_t *tmp; + cfs_list_t *nxt; int nshared; LASSERT (!peer->ksnp_closing); @@ -505,8 +523,8 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip) /* Extra ref prevents peer disappearing until I'm done with it */ ksocknal_peer_addref(peer); - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + cfs_list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { + route = cfs_list_entry(tmp, ksock_route_t, ksnr_list); /* no match */ if (!(ip == 0 || route->ksnr_ipaddr == ip)) @@ -518,8 +536,8 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip) } nshared = 0; - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + cfs_list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { + route = cfs_list_entry(tmp, ksock_route_t, ksnr_list); nshared += route->ksnr_share_count; } @@ -527,16 +545,16 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip) /* remove everything else if there are no explicit entries * left */ - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + cfs_list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { + route = cfs_list_entry(tmp, ksock_route_t, ksnr_list); /* we should only be removing auto-entries */ LASSERT(route->ksnr_share_count == 0); ksocknal_del_route_locked (route); } - list_for_each_safe (tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); + cfs_list_for_each_safe (tmp, nxt, &peer->ksnp_conns) { + conn = cfs_list_entry(tmp, ksock_conn_t, ksnc_list); ksocknal_close_conn_locked(conn, 0); } @@ -550,26 +568,27 @@ int ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip) { CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; + cfs_list_t *ptmp; + cfs_list_t *pnxt; ksock_peer_t *peer; int lo; int hi; int i; int rc = -ENOENT; - write_lock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); if (id.nid != LNET_NID_ANY) - lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers; + lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); else { lo = 0; hi = ksocknal_data.ksnd_peer_hash_size - 1; } for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); + cfs_list_for_each_safe (ptmp, pnxt, + &ksocknal_data.ksnd_peers[i]) { + peer = cfs_list_entry (ptmp, ksock_peer_t, ksnp_list); if (peer->ksnp_ni != ni) continue; @@ -582,11 +601,13 @@ ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip) ksocknal_del_peer_locked (peer, ip); - if (peer->ksnp_closing && !list_empty(&peer->ksnp_tx_queue)) { - LASSERT (list_empty(&peer->ksnp_conns)); - LASSERT (list_empty(&peer->ksnp_routes)); + if (peer->ksnp_closing && + !cfs_list_empty(&peer->ksnp_tx_queue)) { + LASSERT (cfs_list_empty(&peer->ksnp_conns)); + LASSERT (cfs_list_empty(&peer->ksnp_routes)); - list_splice_init(&peer->ksnp_tx_queue, &zombies); + cfs_list_splice_init(&peer->ksnp_tx_queue, + &zombies); } ksocknal_peer_decref(peer); /* ...till here */ @@ -595,7 +616,7 @@ ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip) } } - write_unlock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock); ksocknal_txlist_done(ni, &zombies, 1); @@ -606,70 +627,61 @@ ksock_conn_t * ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index) { ksock_peer_t *peer; - struct list_head *ptmp; + cfs_list_t *ptmp; ksock_conn_t *conn; - struct list_head *ctmp; + cfs_list_t *ctmp; int i; - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); + cfs_list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { + peer = cfs_list_entry (ptmp, ksock_peer_t, ksnp_list); LASSERT (!peer->ksnp_closing); if (peer->ksnp_ni != ni) continue; - list_for_each (ctmp, &peer->ksnp_conns) { + cfs_list_for_each (ctmp, &peer->ksnp_conns) { if (index-- > 0) continue; - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); + conn = cfs_list_entry (ctmp, ksock_conn_t, + ksnc_list); ksocknal_conn_addref(conn); - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data. \ + ksnd_global_lock); return (conn); } } } - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); return (NULL); } ksock_sched_t * -ksocknal_choose_scheduler_locked (unsigned int irq) +ksocknal_choose_scheduler_locked(unsigned int cpt) { - ksock_sched_t *sched; - ksock_irqinfo_t *info; - int i; - - LASSERT (irq < NR_IRQS); - info = &ksocknal_data.ksnd_irqinfo[irq]; - - if (irq != 0 && /* hardware NIC */ - info->ksni_valid) { /* already set up */ - return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]); - } - - /* software NIC (irq == 0) || not associated with a scheduler yet. - * Choose the CPU with the fewest connections... */ - sched = &ksocknal_data.ksnd_schedulers[0]; - for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++) - if (sched->kss_nconns > - ksocknal_data.ksnd_schedulers[i].kss_nconns) - sched = &ksocknal_data.ksnd_schedulers[i]; - - if (irq != 0) { /* Hardware NIC */ - info->ksni_valid = 1; - info->ksni_sched = sched - ksocknal_data.ksnd_schedulers; - - /* no overflow... */ - LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers); - } - - return (sched); + struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt]; + ksock_sched_t *sched; + int i; + + LASSERT(info->ksi_nthreads > 0); + + sched = &info->ksi_scheds[0]; + /* + * NB: it's safe so far, but info->ksi_nthreads could be changed + * at runtime when we have dynamic LNet configuration, then we + * need to take care of this. + */ + for (i = 1; i < info->ksi_nthreads; i++) { + if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns) + sched = &info->ksi_scheds[i]; + } + + return sched; } int @@ -679,7 +691,7 @@ ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs) int i; int nip; - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); nip = net->ksnn_ninterfaces; LASSERT (nip <= LNET_MAX_INTERFACES); @@ -687,7 +699,7 @@ ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs) /* Only offer interfaces for additional connections if I have * more than one. */ if (nip < 2) { - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); return 0; } @@ -696,7 +708,7 @@ ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs) LASSERT (ipaddrs[i] != 0); } - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); return (nip); } @@ -735,7 +747,7 @@ ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips) int ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) { - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; + cfs_rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; ksock_net_t *net = peer->ksnp_ni->ni_data; ksock_interface_t *iface; ksock_interface_t *best_iface; @@ -757,7 +769,7 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) /* Also note that I'm not going to return more than n_peerips * interfaces, even if I have more myself */ - write_lock_bh (global_lock); + cfs_write_lock_bh (global_lock); LASSERT (n_peerips <= LNET_MAX_INTERFACES); LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); @@ -833,7 +845,7 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) /* Overwrite input peer IP addresses */ memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips)); - write_unlock_bh (global_lock); + cfs_write_unlock_bh (global_lock); return (n_ips); } @@ -842,31 +854,31 @@ void ksocknal_create_routes(ksock_peer_t *peer, int port, __u32 *peer_ipaddrs, int npeer_ipaddrs) { - ksock_route_t *newroute = NULL; - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - lnet_ni_t *ni = peer->ksnp_ni; - ksock_net_t *net = ni->ni_data; - struct list_head *rtmp; - ksock_route_t *route; - ksock_interface_t *iface; - ksock_interface_t *best_iface; - int best_netmatch; - int this_netmatch; - int best_nroutes; - int i; - int j; + ksock_route_t *newroute = NULL; + cfs_rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; + lnet_ni_t *ni = peer->ksnp_ni; + ksock_net_t *net = ni->ni_data; + cfs_list_t *rtmp; + ksock_route_t *route; + ksock_interface_t *iface; + ksock_interface_t *best_iface; + int best_netmatch; + int this_netmatch; + int best_nroutes; + int i; + int j; /* CAVEAT EMPTOR: We do all our interface matching with an * exclusive hold of global lock at IRQ priority. We're only * expecting to be dealing with small numbers of interfaces, so the * O(n**3)-ness here shouldn't matter */ - write_lock_bh (global_lock); + cfs_write_lock_bh (global_lock); if (net->ksnn_ninterfaces < 2) { /* Only create additional connections * if I have > 1 interface */ - write_unlock_bh (global_lock); + cfs_write_unlock_bh (global_lock); return; } @@ -876,13 +888,13 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, if (newroute != NULL) { newroute->ksnr_ipaddr = peer_ipaddrs[i]; } else { - write_unlock_bh (global_lock); + cfs_write_unlock_bh (global_lock); newroute = ksocknal_create_route(peer_ipaddrs[i], port); if (newroute == NULL) return; - write_lock_bh (global_lock); + cfs_write_lock_bh (global_lock); } if (peer->ksnp_closing) { @@ -892,8 +904,8 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, /* Already got a route? */ route = NULL; - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); + cfs_list_for_each(rtmp, &peer->ksnp_routes) { + route = cfs_list_entry(rtmp, ksock_route_t, ksnr_list); if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) break; @@ -914,8 +926,9 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, iface = &net->ksnn_interfaces[j]; /* Using this interface already? */ - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); + cfs_list_for_each(rtmp, &peer->ksnp_routes) { + route = cfs_list_entry(rtmp, ksock_route_t, + ksnr_list); if (route->ksnr_myipaddr == iface->ksni_ipaddr) break; @@ -950,7 +963,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, newroute = NULL; } - write_unlock_bh (global_lock); + cfs_write_unlock_bh (global_lock); if (newroute != NULL) ksocknal_route_decref(newroute); } @@ -978,12 +991,12 @@ ksocknal_accept (lnet_ni_t *ni, cfs_socket_t *sock) cr->ksncr_ni = ni; cr->ksncr_sock = sock; - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); + cfs_spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); + cfs_list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq); - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); + cfs_spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); return 0; } @@ -992,7 +1005,8 @@ ksocknal_connecting (ksock_peer_t *peer, __u32 ipaddr) { ksock_route_t *route; - list_for_each_entry (route, &peer->ksnp_routes, ksnr_list) { + cfs_list_for_each_entry_typed (route, &peer->ksnp_routes, + ksock_route_t, ksnr_list) { if (route->ksnr_ipaddr == ipaddr) return route->ksnr_connecting; @@ -1004,10 +1018,10 @@ int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, cfs_socket_t *sock, int type) { - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; + cfs_rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; CFS_LIST_HEAD (zombies); lnet_process_id_t peerid; - struct list_head *tmp; + cfs_list_t *tmp; __u64 incarnation; ksock_conn_t *conn; ksock_conn_t *conn2; @@ -1015,8 +1029,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksock_peer_t *peer2; ksock_sched_t *sched; ksock_hello_msg_t *hello; - unsigned int irq; + int cpt; ksock_tx_t *tx; + ksock_tx_t *txtmp; int rc; int active; char *warn = NULL; @@ -1025,8 +1040,6 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, LASSERT (active == (type != SOCKLND_CONN_NONE)); - irq = ksocknal_lib_sock_irq (sock); - LIBCFS_ALLOC(conn, sizeof(*conn)); if (conn == NULL) { rc = -ENOMEM; @@ -1034,23 +1047,25 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, } memset (conn, 0, sizeof (*conn)); + conn->ksnc_peer = NULL; conn->ksnc_route = NULL; conn->ksnc_sock = sock; - atomic_set (&conn->ksnc_sock_refcount, 1); /* 1 ref for conn */ + /* 2 ref, 1 for conn, another extra ref prevents socket + * being closed before establishment of connection */ + cfs_atomic_set (&conn->ksnc_sock_refcount, 2); conn->ksnc_type = type; ksocknal_lib_save_callback(sock, conn); - atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ + cfs_atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ - conn->ksnc_zc_capable = ksocknal_lib_zc_capable(sock); conn->ksnc_rx_ready = 0; conn->ksnc_rx_scheduled = 0; CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue); conn->ksnc_tx_ready = 0; conn->ksnc_tx_scheduled = 0; - conn->ksnc_tx_mono = NULL; - atomic_set (&conn->ksnc_tx_nob, 0); + conn->ksnc_tx_carrier = NULL; + cfs_atomic_set (&conn->ksnc_tx_nob, 0); LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t, kshm_ips[LNET_MAX_INTERFACES])); @@ -1077,15 +1092,17 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips); peerid = peer->ksnp_id; - write_lock_bh(global_lock); + cfs_write_lock_bh(global_lock); conn->ksnc_proto = peer->ksnp_proto; - write_unlock_bh(global_lock); + cfs_write_unlock_bh(global_lock); if (conn->ksnc_proto == NULL) { - conn->ksnc_proto = &ksocknal_protocol_v2x; + conn->ksnc_proto = &ksocknal_protocol_v3x; #if SOCKNAL_VERSION_DEBUG - if (*ksocknal_tunables.ksnd_protocol != 2) - conn->ksnc_proto = &ksocknal_protocol_v1x; + if (*ksocknal_tunables.ksnd_protocol == 2) + conn->ksnc_proto = &ksocknal_protocol_v2x; + else if (*ksocknal_tunables.ksnd_protocol == 1) + conn->ksnc_proto = &ksocknal_protocol_v1x; #endif } @@ -1108,15 +1125,17 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, LASSERT (conn->ksnc_proto != NULL); LASSERT (peerid.nid != LNET_NID_ANY); + cpt = lnet_cpt_of_nid(peerid.nid); + if (active) { ksocknal_peer_addref(peer); - write_lock_bh (global_lock); + cfs_write_lock_bh (global_lock); } else { rc = ksocknal_create_peer(&peer, ni, peerid); if (rc != 0) goto failed_1; - write_lock_bh (global_lock); + cfs_write_lock_bh (global_lock); /* called with a ref on ni, so shutdown can't have started */ LASSERT (((ksock_net_t *) ni->ni_data)->ksnn_shutdown == 0); @@ -1125,8 +1144,8 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, if (peer2 == NULL) { /* NB this puts an "empty" peer in the peer * table (which takes my ref) */ - list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(peerid.nid)); + cfs_list_add_tail(&peer->ksnp_list, + ksocknal_nid2peerlist(peerid.nid)); } else { ksocknal_peer_decref(peer); peer = peer2; @@ -1154,12 +1173,12 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, goto failed_2; } - if (peer->ksnp_proto == NULL) { + if (peer->ksnp_proto == NULL) { /* Never connected before. * NB recv_hello may have returned EPROTO to signal my peer * wants a different protocol than the one I asked for. */ - LASSERT (list_empty(&peer->ksnp_conns)); + LASSERT (cfs_list_empty(&peer->ksnp_conns)); peer->ksnp_proto = conn->ksnc_proto; peer->ksnp_incarnation = incarnation; @@ -1194,8 +1213,8 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, /* Refuse to duplicate an existing connection, unless this is a * loopback connection */ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); + cfs_list_for_each(tmp, &peer->ksnp_conns) { + conn2 = cfs_list_entry(tmp, ksock_conn_t, ksnc_list); if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || conn2->ksnc_myipaddr != conn->ksnc_myipaddr || @@ -1228,8 +1247,8 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, * create an association. This allows incoming connections created * by routes in my peer to match my own route entries so I don't * continually create duplicate routes. */ - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + cfs_list_for_each (tmp, &peer->ksnp_routes) { + route = cfs_list_entry(tmp, ksock_route_t, ksnr_list); if (route->ksnr_ipaddr != conn->ksnc_ipaddr) continue; @@ -1240,34 +1259,36 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, conn->ksnc_peer = peer; /* conn takes my ref on peer */ peer->ksnp_last_alive = cfs_time_current(); + peer->ksnp_send_keepalive = 0; peer->ksnp_error = 0; - sched = ksocknal_choose_scheduler_locked (irq); + sched = ksocknal_choose_scheduler_locked(cpt); sched->kss_nconns++; conn->ksnc_scheduler = sched; + conn->ksnc_tx_last_post = cfs_time_current(); /* Set the deadline for the outgoing HELLO to drain */ - conn->ksnc_tx_bufnob = SOCK_WMEM_QUEUED(sock); + conn->ksnc_tx_bufnob = libcfs_sock_wmem_queued(sock); conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with adding to peer's conn list */ + cfs_mb(); /* order with adding to peer's conn list */ - list_add (&conn->ksnc_list, &peer->ksnp_conns); + cfs_list_add (&conn->ksnc_list, &peer->ksnp_conns); ksocknal_conn_addref(conn); ksocknal_new_packet(conn, 0); - /* Take all the packets blocking for a connection. - * NB, it might be nicer to share these blocked packets among any - * other connections that are becoming established. */ - while (!list_empty (&peer->ksnp_tx_queue)) { - tx = list_entry (peer->ksnp_tx_queue.next, - ksock_tx_t, tx_list); + conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn); - list_del (&tx->tx_list); + /* Take packets blocking for this connection. */ + cfs_list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) { + if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) == SOCKNAL_MATCH_NO) + continue; + + cfs_list_del (&tx->tx_list); ksocknal_queue_tx_locked (tx, conn); } - write_unlock_bh (global_lock); + cfs_write_unlock_bh (global_lock); /* We've now got a new connection. Any errors from here on are just * like "normal" comms errors and we close the connection normally. @@ -1277,14 +1298,12 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, * socket callbacks. */ - ksocknal_lib_bind_irq (irq); - - CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d" - " incarnation:"LPD64" sched[%d]/%d\n", - libcfs_id2str(peerid), conn->ksnc_proto->pro_version, - HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port, incarnation, - (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); + CDEBUG(D_NET, "New conn %s p %d.x %u.%u.%u.%u -> %u.%u.%u.%u/%d" + " incarnation:"LPD64" sched[%d:%d]\n", + libcfs_id2str(peerid), conn->ksnc_proto->pro_version, + HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr), + conn->ksnc_port, incarnation, cpt, + (int)(sched - &sched->kss_info->ksi_scheds[0])); if (active) { /* additional routes after interface exchange? */ @@ -1307,7 +1326,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, if (rc == 0) rc = ksocknal_lib_setup_sock(sock); - write_lock_bh(global_lock); + cfs_write_lock_bh(global_lock); /* NB my callbacks block while I hold ksnd_global_lock */ ksocknal_lib_set_callback(sock, conn); @@ -1315,12 +1334,15 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, if (!active) peer->ksnp_accepting--; - write_unlock_bh(global_lock); + cfs_write_unlock_bh(global_lock); if (rc != 0) { - write_lock_bh(global_lock); - ksocknal_close_conn_locked(conn, rc); - write_unlock_bh(global_lock); + cfs_write_lock_bh(global_lock); + if (!conn->ksnc_closing) { + /* could be closed by another thread */ + ksocknal_close_conn_locked(conn, rc); + } + cfs_write_unlock_bh(global_lock); } else if (ksocknal_connsock_addref(conn) == 0) { /* Allow I/O to proceed. */ ksocknal_read_callback(conn); @@ -1328,19 +1350,20 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksocknal_connsock_decref(conn); } + ksocknal_connsock_decref(conn); ksocknal_conn_decref(conn); return rc; failed_2: if (!peer->ksnp_closing && - list_empty (&peer->ksnp_conns) && - list_empty (&peer->ksnp_routes)) { - list_add(&zombies, &peer->ksnp_tx_queue); - list_del_init(&peer->ksnp_tx_queue); + cfs_list_empty (&peer->ksnp_conns) && + cfs_list_empty (&peer->ksnp_routes)) { + cfs_list_add(&zombies, &peer->ksnp_tx_queue); + cfs_list_del_init(&peer->ksnp_tx_queue); ksocknal_unlink_peer_locked(peer); } - write_unlock_bh (global_lock); + cfs_write_unlock_bh (global_lock); if (warn != NULL) { if (rc < 0) @@ -1360,9 +1383,9 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, ksocknal_send_hello(ni, conn, peerid.nid, hello); } - write_lock_bh(global_lock); + cfs_write_lock_bh(global_lock); peer->ksnp_accepting--; - write_unlock_bh(global_lock); + cfs_write_unlock_bh(global_lock); } ksocknal_txlist_done(ni, &zombies, 1); @@ -1389,14 +1412,14 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) ksock_peer_t *peer = conn->ksnc_peer; ksock_route_t *route; ksock_conn_t *conn2; - struct list_head *tmp; + cfs_list_t *tmp; LASSERT (peer->ksnp_error == 0); LASSERT (!conn->ksnc_closing); conn->ksnc_closing = 1; /* ksnd_deathrow_conns takes over peer's ref */ - list_del (&conn->ksnc_list); + cfs_list_del (&conn->ksnc_list); route = conn->ksnc_route; if (route != NULL) { @@ -1405,8 +1428,8 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); conn2 = NULL; - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); + cfs_list_for_each(tmp, &peer->ksnp_conns) { + conn2 = cfs_list_entry(tmp, ksock_conn_t, ksnc_list); if (conn2->ksnc_route == route && conn2->ksnc_type == conn->ksnc_type) @@ -1420,56 +1443,73 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) conn->ksnc_route = NULL; #if 0 /* irrelevent with only eager routes */ - list_del (&route->ksnr_list); /* make route least favourite */ - list_add_tail (&route->ksnr_list, &peer->ksnp_routes); + /* make route least favourite */ + cfs_list_del (&route->ksnr_list); + cfs_list_add_tail (&route->ksnr_list, &peer->ksnp_routes); #endif ksocknal_route_decref(route); /* drop conn's ref on route */ } - if (list_empty (&peer->ksnp_conns)) { + if (cfs_list_empty (&peer->ksnp_conns)) { /* No more connections to this peer */ + if (!cfs_list_empty(&peer->ksnp_tx_queue)) { + ksock_tx_t *tx; + + LASSERT (conn->ksnc_proto == &ksocknal_protocol_v3x); + + /* throw them to the last connection..., + * these TXs will be send to /dev/null by scheduler */ + cfs_list_for_each_entry(tx, &peer->ksnp_tx_queue, + tx_list) + ksocknal_tx_prep(conn, tx); + + cfs_spin_lock_bh(&conn->ksnc_scheduler->kss_lock); + cfs_list_splice_init(&peer->ksnp_tx_queue, + &conn->ksnc_tx_queue); + cfs_spin_unlock_bh(&conn->ksnc_scheduler->kss_lock); + } + peer->ksnp_proto = NULL; /* renegotiate protocol version */ peer->ksnp_error = error; /* stash last conn close reason */ - if (list_empty (&peer->ksnp_routes)) { + if (cfs_list_empty (&peer->ksnp_routes)) { /* I've just closed last conn belonging to a * peer with no routes to it */ ksocknal_unlink_peer_locked (peer); } } - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); + cfs_spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns); + cfs_list_add_tail (&conn->ksnc_list, + &ksocknal_data.ksnd_deathrow_conns); cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); + cfs_spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); } void ksocknal_peer_failed (ksock_peer_t *peer) { - time_t last_alive = 0; - int notify = 0; + int notify = 0; + cfs_time_t last_alive = 0; /* There has been a connection failure or comms error; but I'll only * tell LNET I think the peer is dead if it's to another kernel and * there are no connections or connection attempts in existance. */ - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 && - list_empty(&peer->ksnp_conns) && + cfs_list_empty(&peer->ksnp_conns) && peer->ksnp_accepting == 0 && ksocknal_find_connecting_route_locked(peer) == NULL) { notify = 1; - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ksnp_last_alive); + last_alive = peer->ksnp_last_alive; } - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); if (notify) lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, @@ -1477,6 +1517,43 @@ ksocknal_peer_failed (ksock_peer_t *peer) } void +ksocknal_finalize_zcreq(ksock_conn_t *conn) +{ + ksock_peer_t *peer = conn->ksnc_peer; + ksock_tx_t *tx; + ksock_tx_t *tmp; + CFS_LIST_HEAD (zlist); + + /* NB safe to finalize TXs because closing of socket will + * abort all buffered data */ + LASSERT (conn->ksnc_sock == NULL); + + cfs_spin_lock(&peer->ksnp_lock); + + cfs_list_for_each_entry_safe_typed(tx, tmp, &peer->ksnp_zc_req_list, + ksock_tx_t, tx_zc_list) { + if (tx->tx_conn != conn) + continue; + + LASSERT (tx->tx_msg.ksm_zc_cookies[0] != 0); + + tx->tx_msg.ksm_zc_cookies[0] = 0; + tx->tx_zc_aborted = 1; /* mark it as not-acked */ + cfs_list_del(&tx->tx_zc_list); + cfs_list_add(&tx->tx_zc_list, &zlist); + } + + cfs_spin_unlock(&peer->ksnp_lock); + + while (!cfs_list_empty(&zlist)) { + tx = cfs_list_entry(zlist.next, ksock_tx_t, tx_zc_list); + + cfs_list_del(&tx->tx_zc_list); + ksocknal_tx_decref(tx); + } +} + +void ksocknal_terminate_conn (ksock_conn_t *conn) { /* This gets called by the reaper (guaranteed thread context) to @@ -1486,22 +1563,19 @@ ksocknal_terminate_conn (ksock_conn_t *conn) ksock_peer_t *peer = conn->ksnc_peer; ksock_sched_t *sched = conn->ksnc_scheduler; int failed = 0; - struct list_head *tmp; - struct list_head *nxt; - ksock_tx_t *tx; - LIST_HEAD (zlist); LASSERT(conn->ksnc_closing); /* wake up the scheduler to "send" all remaining packets to /dev/null */ - spin_lock_bh (&sched->kss_lock); + cfs_spin_lock_bh (&sched->kss_lock); + + /* a closing conn is always ready to tx */ + conn->ksnc_tx_ready = 1; if (!conn->ksnc_tx_scheduled && - !list_empty(&conn->ksnc_tx_queue)){ - list_add_tail (&conn->ksnc_tx_list, + !cfs_list_empty(&conn->ksnc_tx_queue)){ + cfs_list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); - /* a closing conn is always ready to tx */ - conn->ksnc_tx_ready = 1; conn->ksnc_tx_scheduled = 1; /* extra ref for scheduler */ ksocknal_conn_addref(conn); @@ -1509,34 +1583,10 @@ ksocknal_terminate_conn (ksock_conn_t *conn) cfs_waitq_signal (&sched->kss_waitq); } - spin_unlock_bh (&sched->kss_lock); - - spin_lock(&peer->ksnp_lock); - - list_for_each_safe(tmp, nxt, &peer->ksnp_zc_req_list) { - tx = list_entry(tmp, ksock_tx_t, tx_zc_list); - - if (tx->tx_conn != conn) - continue; - - LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0); - - tx->tx_msg.ksm_zc_req_cookie = 0; - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); - } - - spin_unlock(&peer->ksnp_lock); - - list_for_each_safe(tmp, nxt, &zlist) { - tx = list_entry(tmp, ksock_tx_t, tx_zc_list); - - list_del(&tx->tx_zc_list); - ksocknal_tx_decref(tx); - } + cfs_spin_unlock_bh (&sched->kss_lock); /* serialise with callbacks */ - write_lock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); ksocknal_lib_reset_callback(conn->ksnc_sock, conn); @@ -1546,12 +1596,12 @@ ksocknal_terminate_conn (ksock_conn_t *conn) if (peer->ksnp_error != 0) { /* peer's last conn closed in error */ - LASSERT (list_empty (&peer->ksnp_conns)); + LASSERT (cfs_list_empty (&peer->ksnp_conns)); failed = 1; peer->ksnp_error = 0; /* avoid multiple notifications */ } - write_unlock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock); if (failed) ksocknal_peer_failed(peer); @@ -1569,36 +1619,44 @@ ksocknal_queue_zombie_conn (ksock_conn_t *conn) { /* Queue the conn for the reaper to destroy */ - LASSERT (atomic_read(&conn->ksnc_conn_refcount) == 0); - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); + LASSERT (cfs_atomic_read(&conn->ksnc_conn_refcount) == 0); + cfs_spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); + cfs_list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq); - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); + cfs_spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); } void ksocknal_destroy_conn (ksock_conn_t *conn) { + cfs_time_t last_rcv; + /* Final coup-de-grace of the reaper */ CDEBUG (D_NET, "connection %p\n", conn); - LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0); - LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0); + LASSERT (cfs_atomic_read (&conn->ksnc_conn_refcount) == 0); + LASSERT (cfs_atomic_read (&conn->ksnc_sock_refcount) == 0); LASSERT (conn->ksnc_sock == NULL); LASSERT (conn->ksnc_route == NULL); LASSERT (!conn->ksnc_tx_scheduled); LASSERT (!conn->ksnc_rx_scheduled); - LASSERT (list_empty(&conn->ksnc_tx_queue)); + LASSERT (cfs_list_empty(&conn->ksnc_tx_queue)); /* complete current receive if any */ switch (conn->ksnc_rx_state) { case SOCKNAL_RX_LNET_PAYLOAD: - CERROR("Completing partial receive from %s" - ", ip %d.%d.%d.%d:%d, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); + last_rcv = conn->ksnc_rx_deadline - + cfs_time_seconds(*ksocknal_tunables.ksnd_timeout); + CERROR("Completing partial receive from %s[%d]" + ", ip %d.%d.%d.%d:%d, with error, wanted: %d, left: %d, " + "last alive is %ld secs ago\n", + libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, + HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, + conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, + cfs_duration_sec(cfs_time_sub(cfs_time_current(), + last_rcv))); lnet_finalize (conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, -EIO); break; @@ -1639,12 +1697,12 @@ int ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) { ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; + cfs_list_t *ctmp; + cfs_list_t *cnxt; int count = 0; - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); + cfs_list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { + conn = cfs_list_entry (ctmp, ksock_conn_t, ksnc_list); if (ipaddr == 0 || conn->ksnc_ipaddr == ipaddr) { @@ -1663,11 +1721,11 @@ ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) __u32 ipaddr = conn->ksnc_ipaddr; int count; - write_lock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); count = ksocknal_close_peer_conns_locked (peer, ipaddr, why); - write_unlock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock); return (count); } @@ -1676,26 +1734,27 @@ int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr) { ksock_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; + cfs_list_t *ptmp; + cfs_list_t *pnxt; int lo; int hi; int i; int count = 0; - write_lock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); if (id.nid != LNET_NID_ANY) - lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers; + lo = hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers); else { lo = 0; hi = ksocknal_data.ksnd_peer_hash_size - 1; } for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { + cfs_list_for_each_safe (ptmp, pnxt, + &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); + peer = cfs_list_entry (ptmp, ksock_peer_t, ksnp_list); if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) && (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid))) @@ -1705,7 +1764,7 @@ ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr) } } - write_unlock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock); /* wildcards always succeed */ if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) @@ -1719,7 +1778,10 @@ ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive) { /* The router is telling me she's been notified of a change in * gateway state.... */ - lnet_process_id_t id = {.nid = gw_nid, .pid = LNET_PID_ANY}; + lnet_process_id_t id = {0}; + + id.nid = gw_nid; + id.pid = LNET_PID_ANY; CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), alive ? "up" : "down"); @@ -1735,28 +1797,90 @@ ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive) } void +ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when) +{ + int connect = 1; + cfs_time_t last_alive = 0; + cfs_time_t now = cfs_time_current(); + ksock_peer_t *peer = NULL; + cfs_rwlock_t *glock = &ksocknal_data.ksnd_global_lock; + lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID}; + + cfs_read_lock(glock); + + peer = ksocknal_find_peer_locked(ni, id); + if (peer != NULL) { + cfs_list_t *tmp; + ksock_conn_t *conn; + int bufnob; + + cfs_list_for_each (tmp, &peer->ksnp_conns) { + conn = cfs_list_entry(tmp, ksock_conn_t, ksnc_list); + bufnob = libcfs_sock_wmem_queued(conn->ksnc_sock); + + if (bufnob < conn->ksnc_tx_bufnob) { + /* something got ACKed */ + conn->ksnc_tx_deadline = + cfs_time_shift(*ksocknal_tunables.ksnd_timeout); + peer->ksnp_last_alive = now; + conn->ksnc_tx_bufnob = bufnob; + } + } + + last_alive = peer->ksnp_last_alive; + if (ksocknal_find_connectable_route_locked(peer) == NULL) + connect = 0; + } + + cfs_read_unlock(glock); + + if (last_alive != 0) + *when = last_alive; + + CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n", + libcfs_nid2str(nid), peer, + last_alive ? cfs_duration_sec(now - last_alive) : -1, + connect); + + if (!connect) + return; + + ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port()); + + cfs_write_lock_bh(glock); + + peer = ksocknal_find_peer_locked(ni, id); + if (peer != NULL) + ksocknal_launch_all_connections_locked(peer); + + cfs_write_unlock_bh(glock); + return; +} + +void ksocknal_push_peer (ksock_peer_t *peer) { int index; int i; - struct list_head *tmp; + cfs_list_t *tmp; ksock_conn_t *conn; for (index = 0; ; index++) { - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); i = 0; conn = NULL; - list_for_each (tmp, &peer->ksnp_conns) { + cfs_list_for_each (tmp, &peer->ksnp_conns) { if (i++ == index) { - conn = list_entry (tmp, ksock_conn_t, ksnc_list); + conn = cfs_list_entry (tmp, ksock_conn_t, + ksnc_list); ksocknal_conn_addref(conn); break; } } - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); if (conn == NULL) break; @@ -1770,7 +1894,7 @@ int ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id) { ksock_peer_t *peer; - struct list_head *tmp; + cfs_list_t *tmp; int index; int i; int j; @@ -1778,14 +1902,14 @@ ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id) for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { for (j = 0; ; j++) { - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); index = 0; peer = NULL; - list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(tmp, ksock_peer_t, - ksnp_list); + cfs_list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { + peer = cfs_list_entry(tmp, ksock_peer_t, + ksnp_list); if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) && @@ -1801,7 +1925,7 @@ ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id) } } - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); if (peer != NULL) { rc = 0; @@ -1823,16 +1947,16 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) int rc; int i; int j; - struct list_head *ptmp; + cfs_list_t *ptmp; ksock_peer_t *peer; - struct list_head *rtmp; + cfs_list_t *rtmp; ksock_route_t *route; if (ipaddress == 0 || netmask == 0) return (-EINVAL); - write_lock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); iface = ksocknal_ip2iface(ni, ipaddress); if (iface != NULL) { @@ -1849,15 +1973,18 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) iface->ksni_npeers = 0; for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, ksock_peer_t, ksnp_list); + cfs_list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { + peer = cfs_list_entry(ptmp, ksock_peer_t, + ksnp_list); for (j = 0; j < peer->ksnp_n_passive_ips; j++) if (peer->ksnp_passive_ips[j] == ipaddress) iface->ksni_npeers++; - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); + cfs_list_for_each(rtmp, &peer->ksnp_routes) { + route = cfs_list_entry(rtmp, + ksock_route_t, + ksnr_list); if (route->ksnr_myipaddr == ipaddress) iface->ksni_nroutes++; @@ -1869,7 +1996,7 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) /* NB only new connections will pay attention to the new interface! */ } - write_unlock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock); return (rc); } @@ -1877,8 +2004,8 @@ ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) void ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) { - struct list_head *tmp; - struct list_head *nxt; + cfs_list_t *tmp; + cfs_list_t *nxt; ksock_route_t *route; ksock_conn_t *conn; int i; @@ -1893,8 +2020,8 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) break; } - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); + cfs_list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { + route = cfs_list_entry (tmp, ksock_route_t, ksnr_list); if (route->ksnr_myipaddr != ipaddr) continue; @@ -1907,8 +2034,8 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) } } - list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); + cfs_list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { + conn = cfs_list_entry(tmp, ksock_conn_t, ksnc_list); if (conn->ksnc_myipaddr == ipaddr) ksocknal_close_conn_locked (conn, 0); @@ -1920,14 +2047,14 @@ ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress) { ksock_net_t *net = ni->ni_data; int rc = -ENOENT; - struct list_head *tmp; - struct list_head *nxt; + cfs_list_t *tmp; + cfs_list_t *nxt; ksock_peer_t *peer; __u32 this_ip; int i; int j; - write_lock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); for (i = 0; i < net->ksnn_ninterfaces; i++) { this_ip = net->ksnn_interfaces[i].ksni_ipaddr; @@ -1945,8 +2072,10 @@ ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress) net->ksnn_ninterfaces--; for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { - list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) { - peer = list_entry(tmp, ksock_peer_t, ksnp_list); + cfs_list_for_each_safe(tmp, nxt, + &ksocknal_data.ksnd_peers[j]) { + peer = cfs_list_entry(tmp, ksock_peer_t, + ksnp_list); if (peer->ksnp_ni != ni) continue; @@ -1956,7 +2085,7 @@ ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress) } } - write_unlock_bh (&ksocknal_data.ksnd_global_lock); + cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock); return (rc); } @@ -1964,6 +2093,7 @@ ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress) int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) { + lnet_process_id_t id = {0}; struct libcfs_ioctl_data *data = arg; int rc; @@ -1972,10 +2102,9 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) ksock_net_t *net = ni->ni_data; ksock_interface_t *iface; - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); - if (data->ioc_count < 0 || - data->ioc_count >= net->ksnn_ninterfaces) { + if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) { rc = -ENOENT; } else { rc = 0; @@ -1987,7 +2116,7 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) data->ioc_u32[3] = iface->ksni_nroutes; } - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); return rc; } @@ -2001,7 +2130,6 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) data->ioc_u32[0]); /* IP address */ case IOC_LIBCFS_GET_PEER: { - lnet_process_id_t id = {0,}; __u32 myip = 0; __u32 ip = 0; int port = 0; @@ -2024,19 +2152,19 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) return 0; } - case IOC_LIBCFS_ADD_PEER: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LUSTRE_SRV_LNET_PID}; + case IOC_LIBCFS_ADD_PEER: + id.nid = data->ioc_nid; + id.pid = LUSTRE_SRV_LNET_PID; return ksocknal_add_peer (ni, id, data->ioc_u32[0], /* IP */ data->ioc_u32[1]); /* port */ - } - case IOC_LIBCFS_DEL_PEER: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; + + case IOC_LIBCFS_DEL_PEER: + id.nid = data->ioc_nid; + id.pid = LNET_PID_ANY; return ksocknal_del_peer (ni, id, data->ioc_u32[0]); /* IP */ - } + case IOC_LIBCFS_GET_CONN: { int txmem; int rxmem; @@ -2055,21 +2183,19 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) data->ioc_u32[1] = conn->ksnc_port; data->ioc_u32[2] = conn->ksnc_myipaddr; data->ioc_u32[3] = conn->ksnc_type; - data->ioc_u32[4] = conn->ksnc_scheduler - - ksocknal_data.ksnd_schedulers; + data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt; data->ioc_u32[5] = rxmem; data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; ksocknal_conn_decref(conn); return 0; } - case IOC_LIBCFS_CLOSE_CONNECTION: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; - + case IOC_LIBCFS_CLOSE_CONNECTION: + id.nid = data->ioc_nid; + id.pid = LNET_PID_ANY; return ksocknal_close_matching_conns (id, data->ioc_u32[0]); - } + case IOC_LIBCFS_REGISTER_MYNID: /* Ignore if this is a noop */ if (data->ioc_nid == ni->ni_nid) @@ -2080,12 +2206,11 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) libcfs_nid2str(ni->ni_nid)); return -EINVAL; - case IOC_LIBCFS_PUSH_CONNECTION: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; - + case IOC_LIBCFS_PUSH_CONNECTION: + id.nid = data->ioc_nid; + id.pid = LNET_PID_ANY; return ksocknal_push(ni, id); - } + default: return -EINVAL; } @@ -2095,44 +2220,56 @@ ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) void ksocknal_free_buffers (void) { - LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0); - - if (ksocknal_data.ksnd_schedulers != NULL) - LIBCFS_FREE (ksocknal_data.ksnd_schedulers, - sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers); + LASSERT (cfs_atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0); + + if (ksocknal_data.ksnd_sched_info != NULL) { + struct ksock_sched_info *info; + int i; + + cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { + if (info->ksi_scheds != NULL) { + LIBCFS_FREE(info->ksi_scheds, + info->ksi_nthreads_max * + sizeof(info->ksi_scheds[0])); + } + } + cfs_percpt_free(ksocknal_data.ksnd_sched_info); + } LIBCFS_FREE (ksocknal_data.ksnd_peers, - sizeof (struct list_head) * + sizeof (cfs_list_t) * ksocknal_data.ksnd_peer_hash_size); - spin_lock(&ksocknal_data.ksnd_tx_lock); + cfs_spin_lock(&ksocknal_data.ksnd_tx_lock); - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - struct list_head zlist; + if (!cfs_list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { + cfs_list_t zlist; ksock_tx_t *tx; - list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); - list_del_init(&ksocknal_data.ksnd_idle_noop_txs); - spin_unlock(&ksocknal_data.ksnd_tx_lock); + cfs_list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); + cfs_list_del_init(&ksocknal_data.ksnd_idle_noop_txs); + cfs_spin_unlock(&ksocknal_data.ksnd_tx_lock); - while(!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_list); - list_del(&tx->tx_list); + while(!cfs_list_empty(&zlist)) { + tx = cfs_list_entry(zlist.next, ksock_tx_t, tx_list); + cfs_list_del(&tx->tx_list); LIBCFS_FREE(tx, tx->tx_desc_size); } } else { - spin_unlock(&ksocknal_data.ksnd_tx_lock); + cfs_spin_unlock(&ksocknal_data.ksnd_tx_lock); } } void -ksocknal_base_shutdown (void) +ksocknal_base_shutdown(void) { - ksock_sched_t *sched; - int i; + struct ksock_sched_info *info; + ksock_sched_t *sched; + int i; + int j; CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); + cfs_atomic_read (&libcfs_kmemory)); LASSERT (ksocknal_data.ksnd_nnets == 0); switch (ksocknal_data.ksnd_init) { @@ -2143,47 +2280,65 @@ ksocknal_base_shutdown (void) case SOCKNAL_INIT_DATA: LASSERT (ksocknal_data.ksnd_peers != NULL); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - LASSERT (list_empty (&ksocknal_data.ksnd_peers[i])); + LASSERT (cfs_list_empty (&ksocknal_data.ksnd_peers[i])); } - LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs)); - LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes)); - - if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - ksock_sched_t *kss = - &ksocknal_data.ksnd_schedulers[i]; - - LASSERT (list_empty (&kss->kss_tx_conns)); - LASSERT (list_empty (&kss->kss_rx_conns)); - LASSERT (list_empty (&kss->kss_zombie_noop_txs)); - LASSERT (kss->kss_nconns == 0); - } - /* flag threads to terminate; wake and wait for them to die */ - ksocknal_data.ksnd_shuttingdown = 1; - cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq); - cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq); - - if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - sched = &ksocknal_data.ksnd_schedulers[i]; - cfs_waitq_broadcast(&sched->kss_waitq); - } + LASSERT(cfs_list_empty(&ksocknal_data.ksnd_nets)); + LASSERT (cfs_list_empty (&ksocknal_data.ksnd_enomem_conns)); + LASSERT (cfs_list_empty (&ksocknal_data.ksnd_zombie_conns)); + LASSERT (cfs_list_empty (&ksocknal_data.ksnd_connd_connreqs)); + LASSERT (cfs_list_empty (&ksocknal_data.ksnd_connd_routes)); + + if (ksocknal_data.ksnd_sched_info != NULL) { + cfs_percpt_for_each(info, i, + ksocknal_data.ksnd_sched_info) { + if (info->ksi_scheds == NULL) + continue; + + for (j = 0; j < info->ksi_nthreads_max; j++) { + + sched = &info->ksi_scheds[j]; + LASSERT(cfs_list_empty(&sched->\ + kss_tx_conns)); + LASSERT(cfs_list_empty(&sched->\ + kss_rx_conns)); + LASSERT(cfs_list_empty(&sched-> \ + kss_zombie_noop_txs)); + LASSERT(sched->kss_nconns == 0); + } + } + } + + /* flag threads to terminate; wake and wait for them to die */ + ksocknal_data.ksnd_shuttingdown = 1; + cfs_waitq_broadcast(&ksocknal_data.ksnd_connd_waitq); + cfs_waitq_broadcast(&ksocknal_data.ksnd_reaper_waitq); + + if (ksocknal_data.ksnd_sched_info != NULL) { + cfs_percpt_for_each(info, i, + ksocknal_data.ksnd_sched_info) { + if (info->ksi_scheds == NULL) + continue; + + for (j = 0; j < info->ksi_nthreads_max; j++) { + sched = &info->ksi_scheds[j]; + cfs_waitq_broadcast(&sched->kss_waitq); + } + } + } i = 4; - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); while (ksocknal_data.ksnd_nthreads != 0) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "waiting for %d threads to terminate\n", ksocknal_data.ksnd_nthreads); - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); cfs_pause(cfs_time_seconds(1)); - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); } - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); ksocknal_free_buffers(); @@ -2192,12 +2347,11 @@ ksocknal_base_shutdown (void) } CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); + cfs_atomic_read (&libcfs_kmemory)); PORTAL_MODULE_UNUSE; } - __u64 ksocknal_new_incarnation (void) { @@ -2208,16 +2362,17 @@ ksocknal_new_incarnation (void) * we won't be able to reboot more frequently than 1MHz for the * forseeable future :) */ - do_gettimeofday(&tv); + cfs_gettimeofday(&tv); return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; } int -ksocknal_base_startup (void) +ksocknal_base_startup(void) { - int rc; - int i; + struct ksock_sched_info *info; + int rc; + int i; LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); LASSERT (ksocknal_data.ksnd_nnets == 0); @@ -2226,70 +2381,100 @@ ksocknal_base_startup (void) ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; LIBCFS_ALLOC (ksocknal_data.ksnd_peers, - sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); + sizeof (cfs_list_t) * + ksocknal_data.ksnd_peer_hash_size); if (ksocknal_data.ksnd_peers == NULL) return -ENOMEM; for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); - rwlock_init(&ksocknal_data.ksnd_global_lock); + cfs_rwlock_init(&ksocknal_data.ksnd_global_lock); + CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_nets); - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); + cfs_spin_lock_init (&ksocknal_data.ksnd_reaper_lock); CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq); - spin_lock_init (&ksocknal_data.ksnd_connd_lock); + cfs_spin_lock_init (&ksocknal_data.ksnd_connd_lock); CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs); CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes); cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq); - spin_lock_init (&ksocknal_data.ksnd_tx_lock); + cfs_spin_lock_init (&ksocknal_data.ksnd_tx_lock); CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs); - /* NB memset above zeros whole of ksocknal_data, including - * ksocknal_data.ksnd_irqinfo[all].ksni_valid */ - - /* flag lists/ptrs/locks initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - PORTAL_MODULE_USE; - - ksocknal_data.ksnd_nschedulers = ksocknal_nsched(); - LIBCFS_ALLOC(ksocknal_data.ksnd_schedulers, - sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers); - if (ksocknal_data.ksnd_schedulers == NULL) - goto failed; - - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i]; - - spin_lock_init (&kss->kss_lock); - CFS_INIT_LIST_HEAD (&kss->kss_rx_conns); - CFS_INIT_LIST_HEAD (&kss->kss_tx_conns); - CFS_INIT_LIST_HEAD (&kss->kss_zombie_noop_txs); - cfs_waitq_init (&kss->kss_waitq); - } - - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - rc = ksocknal_thread_start (ksocknal_scheduler, - &ksocknal_data.ksnd_schedulers[i]); - if (rc != 0) { - CERROR("Can't spawn socknal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - + /* NB memset above zeros whole of ksocknal_data */ + + /* flag lists/ptrs/locks initialised */ + ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; + PORTAL_MODULE_USE; + + ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(), + sizeof(*info)); + if (ksocknal_data.ksnd_sched_info == NULL) + goto failed; + + cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) { + ksock_sched_t *sched; + int nthrs; + + nthrs = cfs_cpt_weight(lnet_cpt_table(), i); + if (*ksocknal_tunables.ksnd_nscheds > 0) { + nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds); + } else { + /* max to half of CPUs, assume another half should be + * reserved for upper layer modules */ + nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); + } + + info->ksi_nthreads_max = nthrs; + info->ksi_cpt = i; + + LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i, + info->ksi_nthreads_max * sizeof(*sched)); + if (info->ksi_scheds == NULL) + goto failed; + + for (; nthrs > 0; nthrs--) { + sched = &info->ksi_scheds[nthrs - 1]; + + sched->kss_info = info; + cfs_spin_lock_init(&sched->kss_lock); + CFS_INIT_LIST_HEAD(&sched->kss_rx_conns); + CFS_INIT_LIST_HEAD(&sched->kss_tx_conns); + CFS_INIT_LIST_HEAD(&sched->kss_zombie_noop_txs); + cfs_waitq_init(&sched->kss_waitq); + } + } + + ksocknal_data.ksnd_connd_starting = 0; + ksocknal_data.ksnd_connd_failed_stamp = 0; + ksocknal_data.ksnd_connd_starting_stamp = cfs_time_current_sec(); /* must have at least 2 connds to remain responsive to accepts while * connecting */ - if (*ksocknal_tunables.ksnd_nconnds < 2) - *ksocknal_tunables.ksnd_nconnds = 2; + if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1) + *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1; + + if (*ksocknal_tunables.ksnd_nconnds_max < + *ksocknal_tunables.ksnd_nconnds) { + ksocknal_tunables.ksnd_nconnds_max = + ksocknal_tunables.ksnd_nconnds; + } for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) { - rc = ksocknal_thread_start (ksocknal_connd, (void *)((long)i)); + cfs_spin_lock_bh(&ksocknal_data.ksnd_connd_lock); + ksocknal_data.ksnd_connd_starting++; + cfs_spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); + + rc = ksocknal_thread_start (ksocknal_connd, + (void *)((ulong_ptr_t)i)); if (rc != 0) { + cfs_spin_lock_bh(&ksocknal_data.ksnd_connd_lock); + ksocknal_data.ksnd_connd_starting--; + cfs_spin_unlock_bh(&ksocknal_data.ksnd_connd_lock); CERROR("Can't spawn socknal connd: %d\n", rc); goto failed; } @@ -2315,14 +2500,14 @@ void ksocknal_debug_peerhash (lnet_ni_t *ni) { ksock_peer_t *peer = NULL; - struct list_head *tmp; + cfs_list_t *tmp; int i; - read_lock (&ksocknal_data.ksnd_global_lock); + cfs_read_lock (&ksocknal_data.ksnd_global_lock); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (tmp, ksock_peer_t, ksnp_list); + cfs_list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { + peer = cfs_list_entry (tmp, ksock_peer_t, ksnp_list); if (peer->ksnp_ni == ni) break; @@ -2337,31 +2522,31 @@ ksocknal_debug_peerhash (lnet_ni_t *ni) CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, " "closing %d, accepting %d, err %d, zcookie "LPU64", " "txq %d, zc_req %d\n", libcfs_id2str(peer->ksnp_id), - atomic_read(&peer->ksnp_refcount), + cfs_atomic_read(&peer->ksnp_refcount), peer->ksnp_sharecount, peer->ksnp_closing, peer->ksnp_accepting, peer->ksnp_error, peer->ksnp_zc_next_cookie, - !list_empty(&peer->ksnp_tx_queue), - !list_empty(&peer->ksnp_zc_req_list)); + !cfs_list_empty(&peer->ksnp_tx_queue), + !cfs_list_empty(&peer->ksnp_zc_req_list)); - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); + cfs_list_for_each (tmp, &peer->ksnp_routes) { + route = cfs_list_entry(tmp, ksock_route_t, ksnr_list); CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, " - "del %d\n", atomic_read(&route->ksnr_refcount), + "del %d\n", cfs_atomic_read(&route->ksnr_refcount), route->ksnr_scheduled, route->ksnr_connecting, route->ksnr_connected, route->ksnr_deleted); } - list_for_each (tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); + cfs_list_for_each (tmp, &peer->ksnp_conns) { + conn = cfs_list_entry(tmp, ksock_conn_t, ksnc_list); CWARN ("Conn: ref %d, sref %d, t %d, c %d\n", - atomic_read(&conn->ksnc_conn_refcount), - atomic_read(&conn->ksnc_sock_refcount), + cfs_atomic_read(&conn->ksnc_conn_refcount), + cfs_atomic_read(&conn->ksnc_sock_refcount), conn->ksnc_type, conn->ksnc_closing); } } - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); return; } @@ -2370,24 +2555,26 @@ ksocknal_shutdown (lnet_ni_t *ni) { ksock_net_t *net = ni->ni_data; int i; - lnet_process_id_t anyid = {.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}; + lnet_process_id_t anyid = {0}; + + anyid.nid = LNET_NID_ANY; + anyid.pid = LNET_PID_ANY; LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); LASSERT(ksocknal_data.ksnd_nnets > 0); - spin_lock_bh (&net->ksnn_lock); + cfs_spin_lock_bh (&net->ksnn_lock); net->ksnn_shutdown = 1; /* prevent new peers */ - spin_unlock_bh (&net->ksnn_lock); + cfs_spin_unlock_bh (&net->ksnn_lock); /* Delete all peers */ ksocknal_del_peer(ni, anyid, 0); /* Wait for all peer state to clean up */ i = 2; - spin_lock_bh (&net->ksnn_lock); + cfs_spin_lock_bh (&net->ksnn_lock); while (net->ksnn_npeers != 0) { - spin_unlock_bh (&net->ksnn_lock); + cfs_spin_unlock_bh (&net->ksnn_lock); i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ @@ -2397,16 +2584,17 @@ ksocknal_shutdown (lnet_ni_t *ni) ksocknal_debug_peerhash(ni); - spin_lock_bh (&net->ksnn_lock); + cfs_spin_lock_bh (&net->ksnn_lock); } - spin_unlock_bh (&net->ksnn_lock); + cfs_spin_unlock_bh (&net->ksnn_lock); for (i = 0; i < net->ksnn_ninterfaces; i++) { LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0); LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0); } - LIBCFS_FREE(net, sizeof(*net)); + cfs_list_del(&net->ksnn_list); + LIBCFS_FREE(net, sizeof(*net)); ksocknal_data.ksnd_nnets--; if (ksocknal_data.ksnd_nnets == 0) @@ -2457,6 +2645,8 @@ ksocknal_enumerate_interfaces(ksock_net_t *net) net->ksnn_interfaces[j].ksni_ipaddr = ip; net->ksnn_interfaces[j].ksni_netmask = mask; + strncpy(&net->ksnn_interfaces[j].ksni_name[0], + names[i], IFNAMSIZ); j++; } @@ -2469,6 +2659,114 @@ ksocknal_enumerate_interfaces(ksock_net_t *net) } int +ksocknal_search_new_ipif(ksock_net_t *net) +{ + int new_ipif = 0; + int i; + + for (i = 0; i < net->ksnn_ninterfaces; i++) { + char *ifnam = &net->ksnn_interfaces[i].ksni_name[0]; + char *colon = strchr(ifnam, ':'); + int found = 0; + ksock_net_t *tmp; + int j; + + if (colon != NULL) /* ignore alias device */ + *colon = 0; + + cfs_list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, + ksnn_list) { + for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) { + char *ifnam2 = &tmp->ksnn_interfaces[j].\ + ksni_name[0]; + char *colon2 = strchr(ifnam2, ':'); + + if (colon2 != NULL) + *colon2 = 0; + + found = strcmp(ifnam, ifnam2) == 0; + if (colon2 != NULL) + *colon2 = ':'; + } + if (found) + break; + } + + new_ipif += !found; + if (colon != NULL) + *colon = ':'; + } + + return new_ipif; +} + +int +ksocknal_start_schedulers(struct ksock_sched_info *info) +{ + int nthrs; + int rc = 0; + int i; + + if (info->ksi_nthreads == 0) { + if (*ksocknal_tunables.ksnd_nscheds > 0) { + nthrs = info->ksi_nthreads_max; + } else { + nthrs = cfs_cpt_weight(lnet_cpt_table(), + info->ksi_cpt); + nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs); + nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs); + } + nthrs = min(nthrs, info->ksi_nthreads_max); + } else { + LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max); + /* increase two threads if there is new interface */ + nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads); + } + + for (i = 0; i < nthrs; i++) { + long id; + + id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i); + rc = ksocknal_thread_start(ksocknal_scheduler, (void *)id); + if (rc == 0) + continue; + + CERROR("Can't spawn thread %d for scheduler[%d]: %d\n", + info->ksi_cpt, info->ksi_nthreads + i, rc); + break; + } + + info->ksi_nthreads += i; + return rc; +} + +int +ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts) +{ + int newif = ksocknal_search_new_ipif(net); + int rc; + int i; + + LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table())); + + for (i = 0; i < ncpts; i++) { + struct ksock_sched_info *info; + int cpt = (cpts == NULL) ? i : cpts[i]; + + LASSERT(cpt < cfs_cpt_number(lnet_cpt_table())); + info = ksocknal_data.ksnd_sched_info[cpt]; + + if (!newif && info->ksi_nthreads > 0) + continue; + + rc = ksocknal_start_schedulers(info); + if (rc != 0) + return rc; + } + return 0; +} + +int ksocknal_startup (lnet_ni_t *ni) { ksock_net_t *net; @@ -2487,12 +2785,13 @@ ksocknal_startup (lnet_ni_t *ni) if (net == NULL) goto fail_0; - memset(net, 0, sizeof(*net)); - spin_lock_init(&net->ksnn_lock); + cfs_spin_lock_init(&net->ksnn_lock); net->ksnn_incarnation = ksocknal_new_incarnation(); ni->ni_data = net; - ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; - ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits; + ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout; + ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; + ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits; + ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits; if (ni->ni_interfaces[0] == NULL) { rc = ksocknal_enumerate_interfaces(net); @@ -2523,12 +2822,21 @@ ksocknal_startup (lnet_ni_t *ni) ni->ni_interfaces[i]); goto fail_1; } - } - net->ksnn_ninterfaces = i; - } - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - net->ksnn_interfaces[0].ksni_ipaddr); + strncpy(&net->ksnn_interfaces[i].ksni_name[0], + ni->ni_interfaces[i], IFNAMSIZ); + } + net->ksnn_ninterfaces = i; + } + + /* call it before add it to ksocknal_data.ksnd_nets */ + rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts); + if (rc != 0) + goto fail_1; + + ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), + net->ksnn_interfaces[0].ksni_ipaddr); + cfs_list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets); ksocknal_data.ksnd_nnets++; @@ -2548,7 +2856,7 @@ void __exit ksocknal_module_fini (void) { lnet_unregister_lnd(&the_ksocklnd); - ksocknal_lib_tunables_fini(); + ksocknal_tunables_fini(); } int __init @@ -2557,9 +2865,21 @@ ksocknal_module_init (void) int rc; /* check ksnr_connected/connecting field large enough */ - CLASSERT(SOCKLND_CONN_NTYPES <= 4); - - rc = ksocknal_lib_tunables_init(); + CLASSERT (SOCKLND_CONN_NTYPES <= 4); + CLASSERT (SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN); + + /* initialize the_ksocklnd */ + the_ksocklnd.lnd_type = SOCKLND; + the_ksocklnd.lnd_startup = ksocknal_startup; + the_ksocklnd.lnd_shutdown = ksocknal_shutdown; + the_ksocklnd.lnd_ctl = ksocknal_ctl; + the_ksocklnd.lnd_send = ksocknal_send; + the_ksocklnd.lnd_recv = ksocknal_recv; + the_ksocklnd.lnd_notify = ksocknal_notify; + the_ksocklnd.lnd_query = ksocknal_query; + the_ksocklnd.lnd_accept = ksocknal_accept; + + rc = ksocknal_tunables_init(); if (rc != 0) return rc; @@ -2568,8 +2888,8 @@ ksocknal_module_init (void) return 0; } -MODULE_AUTHOR("Cluster File Systems, Inc. "); -MODULE_DESCRIPTION("Kernel TCP Socket LND v2.0.0"); +MODULE_AUTHOR("Sun Microsystems, Inc. "); +MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0"); MODULE_LICENSE("GPL"); -cfs_module(ksocknal, "2.0.0", ksocknal_module_init, ksocknal_module_fini); +cfs_module(ksocknal, "3.0.0", ksocknal_module_init, ksocknal_module_fini);