From: John L. Hammond Date: Mon, 29 Sep 2014 18:33:24 +0000 (-0500) Subject: LU-2675 lnet: remove ulnds X-Git-Tag: 2.6.91~29 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=86b8c3319b01e5ab03b1285012db1c7b18ddd908;hp=7c814c2971be916520bd552f44173b41140ebac6 LU-2675 lnet: remove ulnds Remove the unused userspace LND code (all of lnet/ulnds/) and supporting autocrud. Signed-off-by: John L. Hammond Change-Id: I104d8b22afdde5027a2a0ef1a9ecc0423b67fae5 Reviewed-on: http://review.whamcloud.com/12117 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Amir Shehata Reviewed-by: James Simmons Reviewed-by: Isaac Huang Reviewed-by: Oleg Drokin --- diff --git a/lnet/autoMakefile.am b/lnet/autoMakefile.am index 77c2566..8beb61b 100644 --- a/lnet/autoMakefile.am +++ b/lnet/autoMakefile.am @@ -34,8 +34,8 @@ # Lustre is a trademark of Sun Microsystems, Inc. # -SUBDIRS = lnet klnds ulnds selftest doc utils include \ +SUBDIRS = lnet klnds selftest doc utils include \ autoconf -DIST_SUBDIRS = lnet klnds ulnds selftest doc utils include \ +DIST_SUBDIRS = lnet klnds selftest doc utils include \ autoconf diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index 314e935..29f9bdd 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -114,31 +114,6 @@ AS_IF([test "x$enable_backoff" = xyes], [ ]) # LN_CONFIG_BACKOFF # -# LN_CONFIG_USOCKLND -# -# configure support for userspace TCP/IP LND -# -AC_DEFUN([LN_CONFIG_USOCKLND], [ -AC_MSG_CHECKING([whether to enable usocklnd]) -AC_ARG_ENABLE([usocklnd], - AC_HELP_STRING([--disable-usocklnd], - [disable usocklnd]), - [], [enable_usocklnd="yes"]) -AC_MSG_RESULT([$enable_usocklnd]) -USOCKLND="" -AS_IF([test "x$enable_usocklnd" = xyes], [ - AC_MSG_CHECKING([whether to build usocklnd]) - AS_IF([test "$ENABLE_LIBPTHREAD" = yes], [ - AC_MSG_RESULT([yes]) - USOCKLND="usocklnd" - ], [ - AC_MSG_RESULT([no (libpthread not present or disabled)]) - ]) -]) -AC_SUBST(USOCKLND) -]) # LN_CONFIG_USOCKLND - -# # LN_CONFIG_DLC # # Configure dlc if enabled @@ -696,7 +671,6 @@ AS_IF([test "x$enable_libwrap" = xyes], [ AC_SUBST(LIBWRAP) LN_CONFIG_MAX_PAYLOAD -LN_CONFIG_USOCKLND LN_CONFIG_DLC ]) # LN_CONFIGURE @@ -712,7 +686,6 @@ AM_CONDITIONAL(BUILD_O2IBLND, test x$O2IBLND = "xo2iblnd") AM_CONDITIONAL(BUILD_RALND, test x$RALND = "xralnd") AM_CONDITIONAL(BUILD_GNILND, test x$GNILND = "xgnilnd") AM_CONDITIONAL(BUILD_GNILND_RCA, test x$GNILNDRCA = "xgnilndrca") -AM_CONDITIONAL(BUILD_USOCKLND, test x$USOCKLND = "xusocklnd") AM_CONDITIONAL(BUILD_DLC, test x$USE_DLC = "xyes") ]) # LN_CONDITIONALS @@ -749,9 +722,6 @@ lnet/lnet/Makefile lnet/lnet/autoMakefile lnet/selftest/Makefile lnet/selftest/autoMakefile -lnet/ulnds/Makefile -lnet/ulnds/autoMakefile -lnet/ulnds/socklnd/Makefile lnet/utils/Makefile lnet/utils/lnetconfig/Makefile ]) diff --git a/lnet/ulnds/Makefile.in b/lnet/ulnds/Makefile.in deleted file mode 100644 index dd3c8ce..0000000 --- a/lnet/ulnds/Makefile.in +++ /dev/null @@ -1,4 +0,0 @@ -@BUILD_USOCKLND_TRUE@subdir-m += socklnd - -@INCLUDE_RULES@ - diff --git a/lnet/ulnds/autoMakefile.am b/lnet/ulnds/autoMakefile.am deleted file mode 100644 index 5917a31..0000000 --- a/lnet/ulnds/autoMakefile.am +++ /dev/null @@ -1,37 +0,0 @@ -# -# GPL HEADER START -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 only, -# as published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License version 2 for more details (a copy is included -# in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU General Public License -# version 2 along with this program; If not, see -# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf -# -# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, -# CA 95054 USA or visit www.sun.com if you need additional information or -# have any questions. -# -# GPL HEADER END -# - -# -# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. -# Use is subject to license terms. -# - -# -# This file is part of Lustre, http://www.lustre.org/ -# Lustre is a trademark of Sun Microsystems, Inc. -# - -SUBDIRS = socklnd diff --git a/lnet/ulnds/socklnd/.gitignore b/lnet/ulnds/socklnd/.gitignore deleted file mode 100644 index 10a7e8d..0000000 --- a/lnet/ulnds/socklnd/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/Makefile.in diff --git a/lnet/ulnds/socklnd/Makefile.am b/lnet/ulnds/socklnd/Makefile.am deleted file mode 100644 index d94443c..0000000 --- a/lnet/ulnds/socklnd/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -if LIBLUSTRE -if BUILD_USOCKLND -noinst_LIBRARIES = libsocklnd.a -endif -endif - -noinst_HEADERS = usocklnd.h -libsocklnd_a_SOURCES = usocklnd.h usocklnd.c usocklnd_cb.c poll.c \ - handlers.c conn.c -libsocklnd_a_CPPFLAGS = $(LLCPPFLAGS) -libsocklnd_a_CFLAGS = $(LLCFLAGS) diff --git a/lnet/ulnds/socklnd/conn.c b/lnet/ulnds/socklnd/conn.c deleted file mode 100644 index 424bf31..0000000 --- a/lnet/ulnds/socklnd/conn.c +++ /dev/null @@ -1,1118 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/ulnds/socklnd/conn.c - * - * Author: Maxim Patlasov - */ - -#include "usocklnd.h" - -/* Return 1 if the conn is timed out, 0 else */ -int -usocklnd_conn_timed_out(usock_conn_t *conn, cfs_time_t current_time) -{ - if (conn->uc_tx_flag && /* sending is in progress */ - cfs_time_aftereq(current_time, conn->uc_tx_deadline)) - return 1; - - if (conn->uc_rx_flag && /* receiving is in progress */ - cfs_time_aftereq(current_time, conn->uc_rx_deadline)) - return 1; - - return 0; -} - -void -usocklnd_conn_kill(usock_conn_t *conn) -{ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state != UC_DEAD) - usocklnd_conn_kill_locked(conn); - pthread_mutex_unlock(&conn->uc_lock); -} - -/* Mark the conn as DEAD and schedule its deletion */ -void -usocklnd_conn_kill_locked(usock_conn_t *conn) -{ - conn->uc_rx_flag = conn->uc_tx_flag = 0; - conn->uc_state = UC_DEAD; - usocklnd_add_killrequest(conn); -} - -usock_conn_t * -usocklnd_conn_allocate() -{ - usock_conn_t *conn; - usock_pollrequest_t *pr; - - LIBCFS_ALLOC (pr, sizeof(*pr)); - if (pr == NULL) - return NULL; - - LIBCFS_ALLOC (conn, sizeof(*conn)); - if (conn == NULL) { - LIBCFS_FREE (pr, sizeof(*pr)); - return NULL; - } - memset(conn, 0, sizeof(*conn)); - conn->uc_preq = pr; - - LIBCFS_ALLOC (conn->uc_rx_hello, - offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - if (conn->uc_rx_hello == NULL) { - LIBCFS_FREE (pr, sizeof(*pr)); - LIBCFS_FREE (conn, sizeof(*conn)); - return NULL; - } - - return conn; -} - -void -usocklnd_conn_free(usock_conn_t *conn) -{ - usock_pollrequest_t *pr = conn->uc_preq; - - if (pr != NULL) - LIBCFS_FREE (pr, sizeof(*pr)); - - if (conn->uc_rx_hello != NULL) - LIBCFS_FREE (conn->uc_rx_hello, - offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - - LIBCFS_FREE (conn, sizeof(*conn)); -} - -void -usocklnd_tear_peer_conn(usock_conn_t *conn) -{ - usock_peer_t *peer = conn->uc_peer; - int idx = usocklnd_type2idx(conn->uc_type); - lnet_ni_t *ni; - lnet_process_id_t id; - int decref_flag = 0; - int killall_flag = 0; - void *rx_lnetmsg = NULL; - struct list_head zombie_txs = LIST_HEAD_INIT(zombie_txs); - - if (peer == NULL) /* nothing to tear */ - return; - - pthread_mutex_lock(&peer->up_lock); - pthread_mutex_lock(&conn->uc_lock); - - ni = peer->up_ni; - id = peer->up_peerid; - - if (peer->up_conns[idx] == conn) { - if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) { - /* change state not to finalize twice */ - conn->uc_rx_state = UC_RX_KSM_HEADER; - /* stash lnetmsg while holding locks */ - rx_lnetmsg = conn->uc_rx_lnetmsg; - } - - /* we cannot finilize txs right now (bug #18844) */ - list_splice_init(&conn->uc_tx_list, &zombie_txs); - - peer->up_conns[idx] = NULL; - conn->uc_peer = NULL; - decref_flag = 1; - - if(conn->uc_errored && !peer->up_errored) - peer->up_errored = killall_flag = 1; - - /* prevent queueing new txs to this conn */ - conn->uc_errored = 1; - } - - pthread_mutex_unlock(&conn->uc_lock); - - if (killall_flag) - usocklnd_del_conns_locked(peer); - - pthread_mutex_unlock(&peer->up_lock); - - if (!decref_flag) - return; - - if (rx_lnetmsg != NULL) - lnet_finalize(ni, rx_lnetmsg, -EIO); - - usocklnd_destroy_txlist(ni, &zombie_txs); - - usocklnd_conn_decref(conn); - usocklnd_peer_decref(peer); - - usocklnd_check_peer_stale(ni, id); -} - -/* Remove peer from hash list if all up_conns[i] is NULL && - * hash table is the only consumer of the peer */ -void -usocklnd_check_peer_stale(lnet_ni_t *ni, lnet_process_id_t id) -{ - usock_peer_t *peer; - - pthread_rwlock_wrlock(&usock_data.ud_peers_lock); - peer = usocklnd_find_peer_locked(ni, id); - - if (peer == NULL) { - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - return; - } - - if (mt_atomic_read(&peer->up_refcount) == 2) { - int i; - for (i = 0; i < N_CONN_TYPES; i++) - LASSERT (peer->up_conns[i] == NULL); - - list_del(&peer->up_list); - - if (peer->up_errored && - (peer->up_peerid.pid & LNET_PID_USERFLAG) == 0) - lnet_notify (peer->up_ni, peer->up_peerid.nid, 0, - cfs_time_seconds(peer->up_last_alive)); - - usocklnd_peer_decref(peer); - } - - usocklnd_peer_decref(peer); - pthread_rwlock_unlock(&usock_data.ud_peers_lock); -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_create_passive_conn(lnet_ni_t *ni, - cfs_socket_t *sock, usock_conn_t **connp) -{ - int rc; - __u32 peer_ip; - int peer_port; - usock_conn_t *conn; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - if (rc) - return rc; - - LASSERT (peer_port >= 0); /* uc_peer_port is u16 */ - - rc = usocklnd_set_sock_options(sock); - if (rc) - return rc; - - conn = usocklnd_conn_allocate(); - if (conn == NULL) - return -ENOMEM; - - usocklnd_rx_hellomagic_state_transition(conn); - - conn->uc_sock = sock; - conn->uc_peer_ip = peer_ip; - conn->uc_peer_port = peer_port; - conn->uc_state = UC_RECEIVING_HELLO; - conn->uc_pt_idx = usocklnd_ip2pt_idx(peer_ip); - conn->uc_ni = ni; - INIT_LIST_HEAD(&conn->uc_tx_list); - INIT_LIST_HEAD(&conn->uc_zcack_list); - pthread_mutex_init(&conn->uc_lock, NULL); - mt_atomic_set(&conn->uc_refcount, 1); /* 1 ref for me */ - - *connp = conn; - return 0; -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_create_active_conn(usock_peer_t *peer, int type, - usock_conn_t **connp) -{ - int rc; - cfs_socket_t *sock; - usock_conn_t *conn; - __u32 dst_ip = LNET_NIDADDR(peer->up_peerid.nid); - __u16 dst_port = lnet_acceptor_port(); - - conn = usocklnd_conn_allocate(); - if (conn == NULL) - return -ENOMEM; - - conn->uc_tx_hello = usocklnd_create_cr_hello_tx(peer->up_ni, type, - peer->up_peerid.nid); - if (conn->uc_tx_hello == NULL) { - usocklnd_conn_free(conn); - return -ENOMEM; - } - - if (the_lnet.ln_pid & LNET_PID_USERFLAG) - rc = usocklnd_connect_cli_mode(&sock, dst_ip, dst_port); - else - rc = usocklnd_connect_srv_mode(&sock, dst_ip, dst_port); - - if (rc) { - usocklnd_destroy_tx(NULL, conn->uc_tx_hello); - usocklnd_conn_free(conn); - return rc; - } - - conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - - conn->uc_sock = sock; - conn->uc_peer_ip = dst_ip; - conn->uc_peer_port = dst_port; - conn->uc_type = type; - conn->uc_activeflag = 1; - conn->uc_state = UC_CONNECTING; - conn->uc_pt_idx = usocklnd_ip2pt_idx(dst_ip); - conn->uc_ni = NULL; - conn->uc_peerid = peer->up_peerid; - conn->uc_peer = peer; - - usocklnd_peer_addref(peer); - INIT_LIST_HEAD(&conn->uc_tx_list); - INIT_LIST_HEAD(&conn->uc_zcack_list); - pthread_mutex_init(&conn->uc_lock, NULL); - mt_atomic_set(&conn->uc_refcount, 1); /* 1 ref for me */ - - *connp = conn; - return 0; -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_connect_srv_mode(cfs_socket_t **sockp, __u32 dst_ip, __u16 dst_port) -{ - __u16 port; - cfs_socket_t *sock; - int rc; - int fatal; - - for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT; - port >= LNET_ACCEPTOR_MIN_RESERVED_PORT; - port--) { - /* Iterate through reserved ports. */ - rc = libcfs_sock_create(&sock, &fatal, 0, port); - if (rc) { - if (fatal) - return rc; - continue; - } - - rc = usocklnd_set_sock_options(sock); - if (rc) { - libcfs_sock_release(sock); - return rc; - } - - rc = libcfs_sock_connect(sock, dst_ip, dst_port); - if (rc == 0) { - *sockp = sock; - return 0; - } - - if (rc != -EADDRINUSE && rc != -EADDRNOTAVAIL) { - libcfs_sock_release(sock); - return rc; - } - - libcfs_sock_release(sock); - } - - CERROR("Can't bind to any reserved port\n"); - return rc; -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_connect_cli_mode(cfs_socket_t **sockp, __u32 dst_ip, __u16 dst_port) -{ - cfs_socket_t *sock; - int rc; - int fatal; - - rc = libcfs_sock_create(&sock, &fatal, 0, 0); - if (rc) - return rc; - - rc = usocklnd_set_sock_options(sock); - if (rc) { - libcfs_sock_release(sock); - return rc; - } - - rc = libcfs_sock_connect(sock, dst_ip, dst_port); - if (rc) { - libcfs_sock_release(sock); - return rc; - } - - *sockp = sock; - return 0; -} - -int -usocklnd_set_sock_options(cfs_socket_t *sock) -{ - int rc; - - rc = libcfs_sock_set_nagle(sock, usock_tuns.ut_socknagle); - if (rc) - return rc; - - if (usock_tuns.ut_sockbufsiz) { - rc = libcfs_sock_set_bufsiz(sock, usock_tuns.ut_sockbufsiz); - if (rc) - return rc; - } - - return libcfs_fcntl_nonblock(sock); -} - -usock_tx_t * -usocklnd_create_noop_tx(__u64 cookie) -{ - usock_tx_t *tx; - - LIBCFS_ALLOC (tx, sizeof(usock_tx_t)); - if (tx == NULL) - return NULL; - - tx->tx_size = sizeof(usock_tx_t); - tx->tx_lnetmsg = NULL; - - socklnd_init_msg(&tx->tx_msg, KSOCK_MSG_NOOP); - tx->tx_msg.ksm_zc_cookies[1] = cookie; - - tx->tx_iova[0].iov_base = (void *)&tx->tx_msg; - tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob = - offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_iov = tx->tx_iova; - tx->tx_niov = 1; - - return tx; -} - -usock_tx_t * -usocklnd_create_tx(lnet_msg_t *lntmsg) -{ - usock_tx_t *tx; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - int size = offsetof(usock_tx_t, - tx_iova[1 + payload_niov]); - - LIBCFS_ALLOC (tx, size); - if (tx == NULL) - return NULL; - - tx->tx_size = size; - tx->tx_lnetmsg = lntmsg; - - tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + payload_nob; - - socklnd_init_msg(&tx->tx_msg, KSOCK_MSG_LNET); - tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = lntmsg->msg_hdr; - tx->tx_iova[0].iov_base = (void *)&tx->tx_msg; - tx->tx_iova[0].iov_len = sizeof(ksock_msg_t); - tx->tx_iov = tx->tx_iova; - - tx->tx_niov = 1 + - lnet_extract_iov(payload_niov, &tx->tx_iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); - - return tx; -} - -void -usocklnd_init_hello_msg(ksock_hello_msg_t *hello, - lnet_ni_t *ni, int type, lnet_nid_t peer_nid) -{ - usock_net_t *net = (usock_net_t *)ni->ni_data; - - hello->kshm_magic = LNET_PROTO_MAGIC; - hello->kshm_version = KSOCK_PROTO_V2; - hello->kshm_nips = 0; - hello->kshm_ctype = type; - - hello->kshm_dst_incarnation = 0; /* not used */ - hello->kshm_src_incarnation = net->un_incarnation; - - hello->kshm_src_pid = the_lnet.ln_pid; - hello->kshm_src_nid = ni->ni_nid; - hello->kshm_dst_nid = peer_nid; - hello->kshm_dst_pid = 0; /* not used */ -} - -usock_tx_t * -usocklnd_create_hello_tx(lnet_ni_t *ni, - int type, lnet_nid_t peer_nid) -{ - usock_tx_t *tx; - int size; - ksock_hello_msg_t *hello; - - size = sizeof(usock_tx_t) + offsetof(ksock_hello_msg_t, kshm_ips); - LIBCFS_ALLOC (tx, size); - if (tx == NULL) - return NULL; - - tx->tx_size = size; - tx->tx_lnetmsg = NULL; - - hello = (ksock_hello_msg_t *)&tx->tx_iova[1]; - usocklnd_init_hello_msg(hello, ni, type, peer_nid); - - tx->tx_iova[0].iov_base = (void *)hello; - tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob = - offsetof(ksock_hello_msg_t, kshm_ips); - tx->tx_iov = tx->tx_iova; - tx->tx_niov = 1; - - return tx; -} - -usock_tx_t * -usocklnd_create_cr_hello_tx(lnet_ni_t *ni, - int type, lnet_nid_t peer_nid) -{ - usock_tx_t *tx; - int size; - lnet_acceptor_connreq_t *cr; - ksock_hello_msg_t *hello; - - size = sizeof(usock_tx_t) + - sizeof(lnet_acceptor_connreq_t) + - offsetof(ksock_hello_msg_t, kshm_ips); - LIBCFS_ALLOC (tx, size); - if (tx == NULL) - return NULL; - - tx->tx_size = size; - tx->tx_lnetmsg = NULL; - - cr = (lnet_acceptor_connreq_t *)&tx->tx_iova[1]; - memset(cr, 0, sizeof(*cr)); - cr->acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr->acr_version = LNET_PROTO_ACCEPTOR_VERSION; - cr->acr_nid = peer_nid; - - hello = (ksock_hello_msg_t *)((char *)cr + sizeof(*cr)); - usocklnd_init_hello_msg(hello, ni, type, peer_nid); - - tx->tx_iova[0].iov_base = (void *)cr; - tx->tx_iova[0].iov_len = tx->tx_resid = tx->tx_nob = - sizeof(lnet_acceptor_connreq_t) + - offsetof(ksock_hello_msg_t, kshm_ips); - tx->tx_iov = tx->tx_iova; - tx->tx_niov = 1; - - return tx; -} - -void -usocklnd_destroy_tx(lnet_ni_t *ni, usock_tx_t *tx) -{ - lnet_msg_t *lnetmsg = tx->tx_lnetmsg; - int rc = (tx->tx_resid == 0) ? 0 : -EIO; - - LASSERT (ni != NULL || lnetmsg == NULL); - - LIBCFS_FREE (tx, tx->tx_size); - - if (lnetmsg != NULL) /* NOOP and hello go without lnetmsg */ - lnet_finalize(ni, lnetmsg, rc); -} - -void -usocklnd_destroy_txlist(lnet_ni_t *ni, struct list_head *txlist) -{ - usock_tx_t *tx; - - while (!list_empty(txlist)) { - tx = list_entry(txlist->next, usock_tx_t, tx_list); - list_del(&tx->tx_list); - - usocklnd_destroy_tx(ni, tx); - } -} - -void -usocklnd_destroy_zcack_list(struct list_head *zcack_list) -{ - usock_zc_ack_t *zcack; - - while (!list_empty(zcack_list)) { - zcack = list_entry(zcack_list->next, usock_zc_ack_t, - zc_list); - list_del(&zcack->zc_list); - - LIBCFS_FREE (zcack, sizeof(*zcack)); - } -} - -void -usocklnd_destroy_peer(usock_peer_t *peer) -{ - usock_net_t *net = peer->up_ni->ni_data; - int i; - - for (i = 0; i < N_CONN_TYPES; i++) - LASSERT (peer->up_conns[i] == NULL); - - LIBCFS_FREE (peer, sizeof (*peer)); - - pthread_mutex_lock(&net->un_lock); - if(--net->un_peercount == 0) - pthread_cond_signal(&net->un_cond); - pthread_mutex_unlock(&net->un_lock); -} - -void -usocklnd_destroy_conn(usock_conn_t *conn) -{ - LASSERT (conn->uc_peer == NULL || conn->uc_ni == NULL); - - if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) { - LASSERT (conn->uc_peer != NULL); - lnet_finalize(conn->uc_peer->up_ni, conn->uc_rx_lnetmsg, -EIO); - } - - if (!list_empty(&conn->uc_tx_list)) { - LASSERT (conn->uc_peer != NULL); - usocklnd_destroy_txlist(conn->uc_peer->up_ni, &conn->uc_tx_list); - } - - usocklnd_destroy_zcack_list(&conn->uc_zcack_list); - - if (conn->uc_peer != NULL) - usocklnd_peer_decref(conn->uc_peer); - - if (conn->uc_ni != NULL) - lnet_ni_decref(conn->uc_ni); - - if (conn->uc_tx_hello) - usocklnd_destroy_tx(NULL, conn->uc_tx_hello); - - usocklnd_conn_free(conn); -} - -int -usocklnd_get_conn_type(lnet_msg_t *lntmsg) -{ - int nob; - - if (the_lnet.ln_pid & LNET_PID_USERFLAG) - return SOCKLND_CONN_ANY; - - nob = sizeof(ksock_msg_t) + lntmsg->msg_len; - - if (nob >= usock_tuns.ut_min_bulk) - return SOCKLND_CONN_BULK_OUT; - else - return SOCKLND_CONN_CONTROL; -} - -int usocklnd_type2idx(int type) -{ - switch (type) { - case SOCKLND_CONN_ANY: - case SOCKLND_CONN_CONTROL: - return 0; - case SOCKLND_CONN_BULK_IN: - return 1; - case SOCKLND_CONN_BULK_OUT: - return 2; - default: - LBUG(); - } -} - -usock_peer_t * -usocklnd_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id) -{ - struct list_head *peer_list = usocklnd_nid2peerlist(id.nid); - struct list_head *tmp; - usock_peer_t *peer; - - list_for_each(tmp, peer_list) { - - peer = list_entry(tmp, usock_peer_t, up_list); - - if (peer->up_ni != ni) - continue; - - if (peer->up_peerid.nid != id.nid || - peer->up_peerid.pid != id.pid) - continue; - - usocklnd_peer_addref(peer); - return peer; - } - return (NULL); -} - -int -usocklnd_create_peer(lnet_ni_t *ni, lnet_process_id_t id, - usock_peer_t **peerp) -{ - usock_net_t *net = ni->ni_data; - usock_peer_t *peer; - int i; - - LIBCFS_ALLOC (peer, sizeof (*peer)); - if (peer == NULL) - return -ENOMEM; - - for (i = 0; i < N_CONN_TYPES; i++) - peer->up_conns[i] = NULL; - - peer->up_peerid = id; - peer->up_ni = ni; - peer->up_incrn_is_set = 0; - peer->up_errored = 0; - peer->up_last_alive = 0; - mt_atomic_set(&peer->up_refcount, 1); /* 1 ref for caller */ - pthread_mutex_init(&peer->up_lock, NULL); - - pthread_mutex_lock(&net->un_lock); - net->un_peercount++; - pthread_mutex_unlock(&net->un_lock); - - *peerp = peer; - return 0; -} - -/* Safely create new peer if needed. Save result in *peerp. - * Returns 0 on success, <0 else */ -int -usocklnd_find_or_create_peer(lnet_ni_t *ni, lnet_process_id_t id, - usock_peer_t **peerp) -{ - int rc; - usock_peer_t *peer; - usock_peer_t *peer2; - usock_net_t *net = ni->ni_data; - - pthread_rwlock_rdlock(&usock_data.ud_peers_lock); - peer = usocklnd_find_peer_locked(ni, id); - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - - if (peer != NULL) - goto find_or_create_peer_done; - - rc = usocklnd_create_peer(ni, id, &peer); - if (rc) - return rc; - - pthread_rwlock_wrlock(&usock_data.ud_peers_lock); - peer2 = usocklnd_find_peer_locked(ni, id); - if (peer2 == NULL) { - if (net->un_shutdown) { - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - usocklnd_peer_decref(peer); /* should destroy peer */ - CERROR("Can't create peer: network shutdown\n"); - return -ESHUTDOWN; - } - - /* peer table will take 1 of my refs on peer */ - usocklnd_peer_addref(peer); - list_add_tail(&peer->up_list, - usocklnd_nid2peerlist(id.nid)); - } else { - usocklnd_peer_decref(peer); /* should destroy peer */ - peer = peer2; - } - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - - find_or_create_peer_done: - *peerp = peer; - return 0; -} - -/* NB: both peer and conn locks are held */ -static int -usocklnd_enqueue_zcack(usock_conn_t *conn, usock_zc_ack_t *zc_ack) -{ - if (conn->uc_state == UC_READY && - list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list) && - !conn->uc_sending) { - int rc = usocklnd_add_pollrequest(conn, POLL_TX_SET_REQUEST, - POLLOUT); - if (rc != 0) - return rc; - } - - list_add_tail(&zc_ack->zc_list, &conn->uc_zcack_list); - return 0; -} - -/* NB: both peer and conn locks are held - * NB: if sending isn't in progress. the caller *MUST* send tx - * immediately after we'll return */ -static void -usocklnd_enqueue_tx(usock_conn_t *conn, usock_tx_t *tx, - int *send_immediately) -{ - if (conn->uc_state == UC_READY && - list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list) && - !conn->uc_sending) { - conn->uc_sending = 1; - *send_immediately = 1; - return; - } - - *send_immediately = 0; - list_add_tail(&tx->tx_list, &conn->uc_tx_list); -} - -/* Safely create new conn if needed. Save result in *connp. - * Returns 0 on success, <0 else */ -int -usocklnd_find_or_create_conn(usock_peer_t *peer, int type, - usock_conn_t **connp, - usock_tx_t *tx, usock_zc_ack_t *zc_ack, - int *send_immediately) -{ - usock_conn_t *conn; - int idx; - int rc; - lnet_pid_t userflag = peer->up_peerid.pid & LNET_PID_USERFLAG; - - if (userflag) - type = SOCKLND_CONN_ANY; - - idx = usocklnd_type2idx(type); - - pthread_mutex_lock(&peer->up_lock); - if (peer->up_conns[idx] != NULL) { - conn = peer->up_conns[idx]; - LASSERT(conn->uc_type == type); - } else { - if (userflag) { - CERROR("Refusing to create a connection to " - "userspace process %s\n", - libcfs_id2str(peer->up_peerid)); - rc = -EHOSTUNREACH; - goto find_or_create_conn_failed; - } - - rc = usocklnd_create_active_conn(peer, type, &conn); - if (rc) { - peer->up_errored = 1; - usocklnd_del_conns_locked(peer); - goto find_or_create_conn_failed; - } - - /* peer takes 1 of conn refcount */ - usocklnd_link_conn_to_peer(conn, peer, idx); - - rc = usocklnd_add_pollrequest(conn, POLL_ADD_REQUEST, POLLOUT); - if (rc) { - peer->up_conns[idx] = NULL; - usocklnd_conn_decref(conn); /* should destroy conn */ - goto find_or_create_conn_failed; - } - usocklnd_wakeup_pollthread(conn->uc_pt_idx); - } - - pthread_mutex_lock(&conn->uc_lock); - LASSERT(conn->uc_peer == peer); - - LASSERT(tx == NULL || zc_ack == NULL); - if (tx != NULL) { - /* usocklnd_tear_peer_conn() could signal us stop queueing */ - if (conn->uc_errored) { - rc = -EIO; - pthread_mutex_unlock(&conn->uc_lock); - goto find_or_create_conn_failed; - } - - usocklnd_enqueue_tx(conn, tx, send_immediately); - } else { - rc = usocklnd_enqueue_zcack(conn, zc_ack); - if (rc != 0) { - usocklnd_conn_kill_locked(conn); - pthread_mutex_unlock(&conn->uc_lock); - goto find_or_create_conn_failed; - } - } - pthread_mutex_unlock(&conn->uc_lock); - - usocklnd_conn_addref(conn); - pthread_mutex_unlock(&peer->up_lock); - - *connp = conn; - return 0; - - find_or_create_conn_failed: - pthread_mutex_unlock(&peer->up_lock); - return rc; -} - -void -usocklnd_link_conn_to_peer(usock_conn_t *conn, usock_peer_t *peer, int idx) -{ - peer->up_conns[idx] = conn; - peer->up_errored = 0; /* this new fresh conn will try - * revitalize even stale errored peer */ -} - -int -usocklnd_invert_type(int type) -{ - switch (type) - { - case SOCKLND_CONN_ANY: - case SOCKLND_CONN_CONTROL: - return (type); - case SOCKLND_CONN_BULK_IN: - return SOCKLND_CONN_BULK_OUT; - case SOCKLND_CONN_BULK_OUT: - return SOCKLND_CONN_BULK_IN; - default: - return SOCKLND_CONN_NONE; - } -} - -void -usocklnd_conn_new_state(usock_conn_t *conn, int new_state) -{ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state != UC_DEAD) - conn->uc_state = new_state; - pthread_mutex_unlock(&conn->uc_lock); -} - -/* NB: peer is locked by caller */ -void -usocklnd_cleanup_stale_conns(usock_peer_t *peer, __u64 incrn, - usock_conn_t *skip_conn) -{ - int i; - - if (!peer->up_incrn_is_set) { - peer->up_incarnation = incrn; - peer->up_incrn_is_set = 1; - return; - } - - if (peer->up_incarnation == incrn) - return; - - peer->up_incarnation = incrn; - - for (i = 0; i < N_CONN_TYPES; i++) { - usock_conn_t *conn = peer->up_conns[i]; - - if (conn == NULL || conn == skip_conn) - continue; - - pthread_mutex_lock(&conn->uc_lock); - LASSERT (conn->uc_peer == peer); - conn->uc_peer = NULL; - peer->up_conns[i] = NULL; - if (conn->uc_state != UC_DEAD) - usocklnd_conn_kill_locked(conn); - pthread_mutex_unlock(&conn->uc_lock); - - usocklnd_conn_decref(conn); - usocklnd_peer_decref(peer); - } -} - -/* RX state transition to UC_RX_HELLO_MAGIC: update RX part to receive - * MAGIC part of hello and set uc_rx_state - */ -void -usocklnd_rx_hellomagic_state_transition(usock_conn_t *conn) -{ - LASSERT(conn->uc_rx_hello != NULL); - - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_magic; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - sizeof(conn->uc_rx_hello->kshm_magic); - - conn->uc_rx_state = UC_RX_HELLO_MAGIC; - - conn->uc_rx_flag = 1; /* waiting for incoming hello */ - conn->uc_rx_deadline = cfs_time_shift(usock_tuns.ut_timeout); -} - -/* RX state transition to UC_RX_HELLO_VERSION: update RX part to receive - * VERSION part of hello and set uc_rx_state - */ -void -usocklnd_rx_helloversion_state_transition(usock_conn_t *conn) -{ - LASSERT(conn->uc_rx_hello != NULL); - - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_version; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - sizeof(conn->uc_rx_hello->kshm_version); - - conn->uc_rx_state = UC_RX_HELLO_VERSION; -} - -/* RX state transition to UC_RX_HELLO_BODY: update RX part to receive - * the rest of hello and set uc_rx_state - */ -void -usocklnd_rx_hellobody_state_transition(usock_conn_t *conn) -{ - LASSERT(conn->uc_rx_hello != NULL); - - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_src_nid; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - offsetof(ksock_hello_msg_t, kshm_ips) - - offsetof(ksock_hello_msg_t, kshm_src_nid); - - conn->uc_rx_state = UC_RX_HELLO_BODY; -} - -/* RX state transition to UC_RX_HELLO_IPS: update RX part to receive - * array of IPs and set uc_rx_state - */ -void -usocklnd_rx_helloIPs_state_transition(usock_conn_t *conn) -{ - LASSERT(conn->uc_rx_hello != NULL); - - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_hello->kshm_ips; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - conn->uc_rx_hello->kshm_nips * - sizeof(conn->uc_rx_hello->kshm_ips[0]); - - conn->uc_rx_state = UC_RX_HELLO_IPS; -} - -/* RX state transition to UC_RX_LNET_HEADER: update RX part to receive - * LNET header and set uc_rx_state - */ -void -usocklnd_rx_lnethdr_state_transition(usock_conn_t *conn) -{ - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_msg.ksm_u.lnetmsg; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - sizeof(ksock_lnet_msg_t); - - conn->uc_rx_state = UC_RX_LNET_HEADER; - conn->uc_rx_flag = 1; -} - -/* RX state transition to UC_RX_KSM_HEADER: update RX part to receive - * KSM header and set uc_rx_state - */ -void -usocklnd_rx_ksmhdr_state_transition(usock_conn_t *conn) -{ - conn->uc_rx_niov = 1; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_iov[0].iov_base = &conn->uc_rx_msg; - conn->uc_rx_iov[0].iov_len = - conn->uc_rx_nob_wanted = - conn->uc_rx_nob_left = - offsetof(ksock_msg_t, ksm_u); - - conn->uc_rx_state = UC_RX_KSM_HEADER; - conn->uc_rx_flag = 0; -} - -/* RX state transition to UC_RX_SKIPPING: update RX part for - * skipping and set uc_rx_state - */ -void -usocklnd_rx_skipping_state_transition(usock_conn_t *conn) -{ - static char skip_buffer[4096]; - - int nob; - unsigned int niov = 0; - int skipped = 0; - int nob_to_skip = conn->uc_rx_nob_left; - - LASSERT(nob_to_skip != 0); - - conn->uc_rx_iov = conn->uc_rx_iova; - - /* Set up to skip as much as possible now. If there's more left - * (ran out of iov entries) we'll get called again */ - - do { - nob = MIN (nob_to_skip, sizeof(skip_buffer)); - - conn->uc_rx_iov[niov].iov_base = skip_buffer; - conn->uc_rx_iov[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -=nob; - - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->uc_rx_iova) / sizeof (struct iovec)); - - conn->uc_rx_niov = niov; - conn->uc_rx_nob_wanted = skipped; - - conn->uc_rx_state = UC_RX_SKIPPING; -} diff --git a/lnet/ulnds/socklnd/handlers.c b/lnet/ulnds/socklnd/handlers.c deleted file mode 100644 index da01983..0000000 --- a/lnet/ulnds/socklnd/handlers.c +++ /dev/null @@ -1,1034 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/ulnds/socklnd/handlers.c - * - * Author: Maxim Patlasov - */ - -#include "usocklnd.h" -#include -#include - -int -usocklnd_notifier_handler(int fd) -{ - int notification; - return syscall(SYS_read, fd, ¬ification, sizeof(notification)); -} - -void -usocklnd_exception_handler(usock_conn_t *conn) -{ - pthread_mutex_lock(&conn->uc_lock); - - if (conn->uc_state == UC_CONNECTING || - conn->uc_state == UC_SENDING_HELLO) - usocklnd_conn_kill_locked(conn); - - pthread_mutex_unlock(&conn->uc_lock); -} - -int -usocklnd_read_handler(usock_conn_t *conn) -{ - int rc; - int continue_reading; - int state; - - read_again: - rc = 0; - pthread_mutex_lock(&conn->uc_lock); - state = conn->uc_state; - - /* process special case: LNET calls lnd_recv() asyncronously */ - if (state == UC_READY && conn->uc_rx_state == UC_RX_PARSE) { - /* still don't have usocklnd_recv() called */ - rc = usocklnd_add_pollrequest(conn, POLL_RX_SET_REQUEST, 0); - if (rc == 0) - conn->uc_rx_state = UC_RX_PARSE_WAIT; - else - usocklnd_conn_kill_locked(conn); - - pthread_mutex_unlock(&conn->uc_lock); - return rc; - } - - pthread_mutex_unlock(&conn->uc_lock); - /* From here and below the conn cannot be changed - * asyncronously, except: - * 1) usocklnd_send() can work with uc_tx_list and uc_zcack_list, - * 2) usocklnd_shutdown() can change uc_state to UC_DEAD */ - - switch (state) { - - case UC_RECEIVING_HELLO: - case UC_READY: - if (conn->uc_rx_nob_wanted != 0) { - /* read from conn fd as much wanted data as possible */ - rc = usocklnd_read_data(conn); - if (rc == 0) /* partial read */ - break; - if (rc < 0) {/* error happened or EOF */ - usocklnd_conn_kill(conn); - break; - } - } - - /* process incoming data */ - if (state == UC_READY ) - rc = usocklnd_read_msg(conn, &continue_reading); - else /* state == UC_RECEIVING_HELLO */ - rc = usocklnd_read_hello(conn, &continue_reading); - - if (rc < 0) { - usocklnd_conn_kill(conn); - break; - } - - if (continue_reading) - goto read_again; - - break; - - case UC_DEAD: - break; - - default: - LBUG(); - } - - return rc; -} - -/* Switch on rx_state. - * Return 0 on success, 1 if whole packet is read, else return <0 - * Always set cont_flag: 1 if we're ready to continue reading, else 0 - * NB: If whole packet is read, cont_flag will be set to zero to take - * care of fairess - */ -int -usocklnd_read_msg(usock_conn_t *conn, int *cont_flag) -{ - int rc = 0; - __u64 cookie; - - *cont_flag = 0; - - /* smth. new emerged in RX part - let's process it */ - switch (conn->uc_rx_state) { - case UC_RX_KSM_HEADER: - if (conn->uc_flip) { - __swab32s(&conn->uc_rx_msg.ksm_type); - __swab32s(&conn->uc_rx_msg.ksm_csum); - __swab64s(&conn->uc_rx_msg.ksm_zc_cookies[0]); - __swab64s(&conn->uc_rx_msg.ksm_zc_cookies[1]); - } - - /* we never send packets for wich zc-acking is required */ - if (conn->uc_rx_msg.ksm_type != KSOCK_MSG_LNET || - conn->uc_rx_msg.ksm_zc_cookies[1] != 0) { - conn->uc_errored = 1; - return -EPROTO; - } - - /* zc_req will be processed later, when - lnet payload will be received */ - - usocklnd_rx_lnethdr_state_transition(conn); - *cont_flag = 1; - break; - - case UC_RX_LNET_HEADER: - if (the_lnet.ln_pid & LNET_PID_USERFLAG) { - /* replace dest_nid,pid (ksocknal sets its own) */ - conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr.dest_nid = - cpu_to_le64(conn->uc_peer->up_ni->ni_nid); - conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr.dest_pid = - cpu_to_le32(the_lnet.ln_pid); - - } else if (conn->uc_peer->up_peerid.pid & LNET_PID_USERFLAG) { - /* Userspace peer */ - lnet_process_id_t *id = &conn->uc_peer->up_peerid; - lnet_hdr_t *lhdr = &conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr; - - /* Substitute process ID assigned at connection time */ - lhdr->src_pid = cpu_to_le32(id->pid); - lhdr->src_nid = cpu_to_le64(id->nid); - } - - conn->uc_rx_state = UC_RX_PARSE; - usocklnd_conn_addref(conn); /* ++ref while parsing */ - - rc = lnet_parse(conn->uc_peer->up_ni, - &conn->uc_rx_msg.ksm_u.lnetmsg.ksnm_hdr, - conn->uc_peerid.nid, conn, 0); - - if (rc < 0) { - /* I just received garbage: give up on this conn */ - conn->uc_errored = 1; - usocklnd_conn_decref(conn); - return -EPROTO; - } - - /* Race with usocklnd_recv() is possible */ - pthread_mutex_lock(&conn->uc_lock); - LASSERT (conn->uc_rx_state == UC_RX_PARSE || - conn->uc_rx_state == UC_RX_LNET_PAYLOAD); - - /* check whether usocklnd_recv() got called */ - if (conn->uc_rx_state == UC_RX_LNET_PAYLOAD) - *cont_flag = 1; - pthread_mutex_unlock(&conn->uc_lock); - break; - - case UC_RX_PARSE: - LBUG(); /* it's error to be here, because this special - * case is handled by caller */ - break; - - case UC_RX_PARSE_WAIT: - LBUG(); /* it's error to be here, because the conn - * shouldn't wait for POLLIN event in this - * state */ - break; - - case UC_RX_LNET_PAYLOAD: - /* payload all received */ - - lnet_finalize(conn->uc_peer->up_ni, conn->uc_rx_lnetmsg, 0); - - cookie = conn->uc_rx_msg.ksm_zc_cookies[0]; - if (cookie != 0) - rc = usocklnd_handle_zc_req(conn->uc_peer, cookie); - - if (rc != 0) { - /* change state not to finalize twice */ - conn->uc_rx_state = UC_RX_KSM_HEADER; - return -EPROTO; - } - - /* Fall through */ - - case UC_RX_SKIPPING: - if (conn->uc_rx_nob_left != 0) { - usocklnd_rx_skipping_state_transition(conn); - *cont_flag = 1; - } else { - usocklnd_rx_ksmhdr_state_transition(conn); - rc = 1; /* whole packet is read */ - } - - break; - - default: - LBUG(); /* unknown state */ - } - - return rc; -} - -/* Handle incoming ZC request from sender. - * NB: it's called only from read_handler, so we're sure that - * the conn cannot become zombie in the middle of processing */ -int -usocklnd_handle_zc_req(usock_peer_t *peer, __u64 cookie) -{ - usock_conn_t *conn; - usock_zc_ack_t *zc_ack; - int type; - int rc; - int dummy; - - LIBCFS_ALLOC (zc_ack, sizeof(*zc_ack)); - if (zc_ack == NULL) - return -ENOMEM; - zc_ack->zc_cookie = cookie; - - /* Let's assume that CONTROL is the best type for zcack, - * but userspace clients don't use typed connections */ - if (the_lnet.ln_pid & LNET_PID_USERFLAG) - type = SOCKLND_CONN_ANY; - else - type = SOCKLND_CONN_CONTROL; - - rc = usocklnd_find_or_create_conn(peer, type, &conn, NULL, zc_ack, - &dummy); - if (rc != 0) { - LIBCFS_FREE (zc_ack, sizeof(*zc_ack)); - return rc; - } - usocklnd_conn_decref(conn); - - return 0; -} - -/* Switch on rx_state. - * Return 0 on success, else return <0 - * Always set cont_flag: 1 if we're ready to continue reading, else 0 - */ -int -usocklnd_read_hello(usock_conn_t *conn, int *cont_flag) -{ - int rc = 0; - ksock_hello_msg_t *hello = conn->uc_rx_hello; - - *cont_flag = 0; - - /* smth. new emerged in hello - let's process it */ - switch (conn->uc_rx_state) { - case UC_RX_HELLO_MAGIC: - if (hello->kshm_magic == LNET_PROTO_MAGIC) - conn->uc_flip = 0; - else if (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC)) - conn->uc_flip = 1; - else - return -EPROTO; - - usocklnd_rx_helloversion_state_transition(conn); - *cont_flag = 1; - break; - - case UC_RX_HELLO_VERSION: - if ((!conn->uc_flip && - (hello->kshm_version != KSOCK_PROTO_V2)) || - (conn->uc_flip && - (hello->kshm_version != __swab32(KSOCK_PROTO_V2)))) - return -EPROTO; - - usocklnd_rx_hellobody_state_transition(conn); - *cont_flag = 1; - break; - - case UC_RX_HELLO_BODY: - if (conn->uc_flip) { - ksock_hello_msg_t *hello = conn->uc_rx_hello; - __swab32s(&hello->kshm_src_pid); - __swab64s(&hello->kshm_src_nid); - __swab32s(&hello->kshm_dst_pid); - __swab64s(&hello->kshm_dst_nid); - __swab64s(&hello->kshm_src_incarnation); - __swab64s(&hello->kshm_dst_incarnation); - __swab32s(&hello->kshm_ctype); - __swab32s(&hello->kshm_nips); - } - - if (conn->uc_rx_hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %u.%u.%u.%u port %d\n", - conn->uc_rx_hello->kshm_nips, - HIPQUAD(conn->uc_peer_ip), conn->uc_peer_port); - return -EPROTO; - } - - if (conn->uc_rx_hello->kshm_nips) { - usocklnd_rx_helloIPs_state_transition(conn); - *cont_flag = 1; - break; - } - /* fall through */ - - case UC_RX_HELLO_IPS: - if (conn->uc_activeflag == 1) /* active conn */ - rc = usocklnd_activeconn_hellorecv(conn); - else /* passive conn */ - rc = usocklnd_passiveconn_hellorecv(conn); - - break; - - default: - LBUG(); /* unknown state */ - } - - return rc; -} - -/* All actions that we need after receiving hello on active conn: - * 1) Schedule removing if we're zombie - * 2) Restart active conn if we lost the race - * 3) Else: update RX part to receive KSM header - */ -int -usocklnd_activeconn_hellorecv(usock_conn_t *conn) -{ - int rc = 0; - ksock_hello_msg_t *hello = conn->uc_rx_hello; - usock_peer_t *peer = conn->uc_peer; - - /* Active conn with peer==NULL is zombie. - * Don't try to link it to peer because the conn - * has already had a chance to proceed at the beginning */ - if (peer == NULL) { - LASSERT(list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list)); - - usocklnd_conn_kill(conn); - return 0; - } - - peer->up_last_alive = cfs_time_current(); - - /* peer says that we lost the race */ - if (hello->kshm_ctype == SOCKLND_CONN_NONE) { - /* Start new active conn, relink txs and zc_acks from - * the conn to new conn, schedule removing the conn. - * Actually, we're expecting that a passive conn will - * make us zombie soon and take care of our txs and - * zc_acks */ - - struct list_head tx_list, zcack_list; - usock_conn_t *conn2; - int idx = usocklnd_type2idx(conn->uc_type); - - INIT_LIST_HEAD(&tx_list); - INIT_LIST_HEAD(&zcack_list); - - /* Block usocklnd_send() to check peer->up_conns[idx] - * and to enqueue more txs */ - pthread_mutex_lock(&peer->up_lock); - pthread_mutex_lock(&conn->uc_lock); - - /* usocklnd_shutdown() could kill us */ - if (conn->uc_state == UC_DEAD) { - pthread_mutex_unlock(&conn->uc_lock); - pthread_mutex_unlock(&peer->up_lock); - return 0; - } - - LASSERT (peer == conn->uc_peer); - LASSERT (peer->up_conns[idx] == conn); - - rc = usocklnd_create_active_conn(peer, conn->uc_type, &conn2); - if (rc) { - conn->uc_errored = 1; - pthread_mutex_unlock(&conn->uc_lock); - pthread_mutex_unlock(&peer->up_lock); - return rc; - } - - usocklnd_link_conn_to_peer(conn2, peer, idx); - conn2->uc_peer = peer; - - /* unlink txs and zcack from the conn */ - list_add(&tx_list, &conn->uc_tx_list); - list_del_init(&conn->uc_tx_list); - list_add(&zcack_list, &conn->uc_zcack_list); - list_del_init(&conn->uc_zcack_list); - - /* link they to the conn2 */ - list_add(&conn2->uc_tx_list, &tx_list); - list_del_init(&tx_list); - list_add(&conn2->uc_zcack_list, &zcack_list); - list_del_init(&zcack_list); - - /* make conn zombie */ - conn->uc_peer = NULL; - usocklnd_peer_decref(peer); - - /* schedule conn2 for processing */ - rc = usocklnd_add_pollrequest(conn2, POLL_ADD_REQUEST, POLLOUT); - if (rc) { - peer->up_conns[idx] = NULL; - usocklnd_conn_decref(conn2); /* should destroy conn */ - } else { - usocklnd_conn_kill_locked(conn); - } - - pthread_mutex_unlock(&conn->uc_lock); - pthread_mutex_unlock(&peer->up_lock); - usocklnd_conn_decref(conn); - - } else { /* hello->kshm_ctype != SOCKLND_CONN_NONE */ - if (conn->uc_type != usocklnd_invert_type(hello->kshm_ctype)) - return -EPROTO; - - pthread_mutex_lock(&peer->up_lock); - usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation, - conn); - pthread_mutex_unlock(&peer->up_lock); - - /* safely transit to UC_READY state */ - /* rc == 0 */ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state != UC_DEAD) { - usocklnd_rx_ksmhdr_state_transition(conn); - - /* POLLIN is already set because we just - * received hello, but maybe we've smth. to - * send? */ - LASSERT (conn->uc_sending == 0); - if (!list_empty(&conn->uc_tx_list) || - !list_empty(&conn->uc_zcack_list)) { - - conn->uc_tx_deadline = - cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - rc = usocklnd_add_pollrequest(conn, - POLL_SET_REQUEST, - POLLIN | POLLOUT); - } - - if (rc == 0) - conn->uc_state = UC_READY; - } - pthread_mutex_unlock(&conn->uc_lock); - } - - return rc; -} - -/* All actions that we need after receiving hello on passive conn: - * 1) Stash peer's nid, pid, incarnation and conn type - * 2) Cope with easy case: conn[idx] is empty - just save conn there - * 3) Resolve race: - * a) if our nid is higher - reply with CONN_NONE and make us zombie - * b) if peer's nid is higher - postpone race resolution till - * READY state - * 4) Anyhow, send reply hello -*/ -int -usocklnd_passiveconn_hellorecv(usock_conn_t *conn) -{ - ksock_hello_msg_t *hello = conn->uc_rx_hello; - int type; - int idx; - int rc; - usock_peer_t *peer; - lnet_ni_t *ni = conn->uc_ni; - __u32 peer_ip = conn->uc_peer_ip; - __u16 peer_port = conn->uc_peer_port; - - /* don't know parent peer yet and not zombie */ - LASSERT (conn->uc_peer == NULL && - ni != NULL); - - /* don't know peer's nid and incarnation yet */ - if (peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - /* do not trust liblustre clients */ - conn->uc_peerid.pid = peer_port | LNET_PID_USERFLAG; - conn->uc_peerid.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - peer_ip); - if (hello->kshm_ctype != SOCKLND_CONN_ANY) { - lnet_ni_decref(ni); - conn->uc_ni = NULL; - CERROR("Refusing to accept connection of type=%d from " - "userspace process %u.%u.%u.%u:%d\n", hello->kshm_ctype, - HIPQUAD(peer_ip), peer_port); - return -EINVAL; - } - } else { - conn->uc_peerid.pid = hello->kshm_src_pid; - conn->uc_peerid.nid = hello->kshm_src_nid; - } - conn->uc_type = type = usocklnd_invert_type(hello->kshm_ctype); - - rc = usocklnd_find_or_create_peer(ni, conn->uc_peerid, &peer); - if (rc) { - lnet_ni_decref(ni); - conn->uc_ni = NULL; - return rc; - } - - peer->up_last_alive = cfs_time_current(); - - idx = usocklnd_type2idx(conn->uc_type); - - /* safely check whether we're first */ - pthread_mutex_lock(&peer->up_lock); - - usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation, NULL); - - if (peer->up_conns[idx] == NULL) { - peer->up_last_alive = cfs_time_current(); - conn->uc_peer = peer; - conn->uc_ni = NULL; - usocklnd_link_conn_to_peer(conn, peer, idx); - usocklnd_conn_addref(conn); - } else { - usocklnd_peer_decref(peer); - - /* Resolve race in favour of higher NID */ - if (conn->uc_peerid.nid < conn->uc_ni->ni_nid) { - /* make us zombie */ - conn->uc_ni = NULL; - type = SOCKLND_CONN_NONE; - } - - /* if conn->uc_peerid.nid > conn->uc_ni->ni_nid, - * postpone race resolution till READY state - * (hopefully that conn[idx] will die because of - * incoming hello of CONN_NONE type) */ - } - pthread_mutex_unlock(&peer->up_lock); - - /* allocate and initialize fake tx with hello */ - conn->uc_tx_hello = usocklnd_create_hello_tx(ni, type, - conn->uc_peerid.nid); - if (conn->uc_ni == NULL) - lnet_ni_decref(ni); - - if (conn->uc_tx_hello == NULL) - return -ENOMEM; - - /* rc == 0 */ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state == UC_DEAD) - goto passive_hellorecv_done; - - conn->uc_state = UC_SENDING_HELLO; - conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLOUT); - - passive_hellorecv_done: - pthread_mutex_unlock(&conn->uc_lock); - return rc; -} - -int -usocklnd_write_handler(usock_conn_t *conn) -{ - usock_tx_t *tx; - int ret; - int rc = 0; - int state; - usock_peer_t *peer; - lnet_ni_t *ni; - - pthread_mutex_lock(&conn->uc_lock); /* like membar */ - state = conn->uc_state; - pthread_mutex_unlock(&conn->uc_lock); - - switch (state) { - case UC_CONNECTING: - /* hello_tx has already been initialized - * in usocklnd_create_active_conn() */ - usocklnd_conn_new_state(conn, UC_SENDING_HELLO); - /* fall through */ - - case UC_SENDING_HELLO: - rc = usocklnd_send_tx(conn, conn->uc_tx_hello); - if (rc <= 0) /* error or partial send or connection closed */ - break; - - /* tx with hello was sent successfully */ - usocklnd_destroy_tx(NULL, conn->uc_tx_hello); - conn->uc_tx_hello = NULL; - - if (conn->uc_activeflag == 1) /* active conn */ - rc = usocklnd_activeconn_hellosent(conn); - else /* passive conn */ - rc = usocklnd_passiveconn_hellosent(conn); - - break; - - case UC_READY: - pthread_mutex_lock(&conn->uc_lock); - - peer = conn->uc_peer; - LASSERT (peer != NULL); - ni = peer->up_ni; - - if (list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list)) { - LASSERT(usock_tuns.ut_fair_limit > 1); - pthread_mutex_unlock(&conn->uc_lock); - return 0; - } - - tx = usocklnd_try_piggyback(&conn->uc_tx_list, - &conn->uc_zcack_list); - if (tx != NULL) - conn->uc_sending = 1; - else - rc = -ENOMEM; - - pthread_mutex_unlock(&conn->uc_lock); - - if (rc) - break; - - rc = usocklnd_send_tx(conn, tx); - if (rc == 0) { /* partial send or connection closed */ - pthread_mutex_lock(&conn->uc_lock); - list_add(&tx->tx_list, &conn->uc_tx_list); - conn->uc_sending = 0; - pthread_mutex_unlock(&conn->uc_lock); - break; - } - if (rc < 0) { /* real error */ - usocklnd_destroy_tx(ni, tx); - break; - } - - /* rc == 1: tx was sent completely */ - usocklnd_destroy_tx(ni, tx); - - pthread_mutex_lock(&conn->uc_lock); - conn->uc_sending = 0; - if (conn->uc_state != UC_DEAD && - list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list)) { - conn->uc_tx_flag = 0; - ret = usocklnd_add_pollrequest(conn, - POLL_TX_SET_REQUEST, 0); - if (ret) - rc = ret; - } - pthread_mutex_unlock(&conn->uc_lock); - - break; - - case UC_DEAD: - break; - - default: - LBUG(); - } - - if (rc < 0) - usocklnd_conn_kill(conn); - - return rc; -} - -/* Return the first tx from tx_list with piggybacked zc_ack - * from zcack_list when possible. If tx_list is empty, return - * brand new noop tx for zc_ack from zcack_list. Return NULL - * if an error happened */ -usock_tx_t * -usocklnd_try_piggyback(struct list_head *tx_list_p, - struct list_head *zcack_list_p) -{ - usock_tx_t *tx; - usock_zc_ack_t *zc_ack; - - /* assign tx and zc_ack */ - if (list_empty(tx_list_p)) - tx = NULL; - else { - tx = list_entry(tx_list_p->next, usock_tx_t, tx_list); - list_del(&tx->tx_list); - - /* already piggybacked or partially send */ - if (tx->tx_msg.ksm_zc_cookies[1] != 0 || - tx->tx_resid != tx->tx_nob) - return tx; - } - - if (list_empty(zcack_list_p)) { - /* nothing to piggyback */ - return tx; - } else { - zc_ack = list_entry(zcack_list_p->next, - usock_zc_ack_t, zc_list); - list_del(&zc_ack->zc_list); - } - - if (tx != NULL) - /* piggyback the zc-ack cookie */ - tx->tx_msg.ksm_zc_cookies[1] = zc_ack->zc_cookie; - else - /* cannot piggyback, need noop */ - tx = usocklnd_create_noop_tx(zc_ack->zc_cookie); - - LIBCFS_FREE (zc_ack, sizeof(*zc_ack)); - return tx; -} - -/* All actions that we need after sending hello on active conn: - * 1) update RX iov to receive hello - * 2) state transition to UC_RECEIVING_HELLO - * 3) notify poll_thread that we're waiting for incoming hello */ -int -usocklnd_activeconn_hellosent(usock_conn_t *conn) -{ - int rc = 0; - - pthread_mutex_lock(&conn->uc_lock); - - if (conn->uc_state != UC_DEAD) { - usocklnd_rx_hellomagic_state_transition(conn); - conn->uc_state = UC_RECEIVING_HELLO; - conn->uc_tx_flag = 0; - rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLIN); - } - - pthread_mutex_unlock(&conn->uc_lock); - - return rc; -} - -/* All actions that we need after sending hello on passive conn: - * 1) Cope with 1st easy case: conn is already linked to a peer - * 2) Cope with 2nd easy case: remove zombie conn - * 3) Resolve race: - * a) find the peer - * b) link the conn to the peer if conn[idx] is empty - * c) if the conn[idx] isn't empty and is in READY state, - * remove the conn as duplicated - * d) if the conn[idx] isn't empty and isn't in READY state, - * override conn[idx] with the conn - */ -int -usocklnd_passiveconn_hellosent(usock_conn_t *conn) -{ - usock_conn_t *conn2; - usock_peer_t *peer; - struct list_head tx_list; - struct list_head zcack_list; - int idx; - int rc = 0; - - /* almost nothing to do if conn is already linked to peer hash table */ - if (conn->uc_peer != NULL) - goto passive_hellosent_done; - - /* conn->uc_peer == NULL, so the conn isn't accessible via - * peer hash list, so nobody can touch the conn but us */ - - if (conn->uc_ni == NULL) /* remove zombie conn */ - goto passive_hellosent_connkill; - - /* all code below is race resolution, because normally - * passive conn is linked to peer just after receiving hello */ - INIT_LIST_HEAD(&tx_list); - INIT_LIST_HEAD(&zcack_list); - - /* conn is passive and isn't linked to any peer, - so its tx and zc_ack lists have to be empty */ - LASSERT(list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list) && - conn->uc_sending == 0); - - rc = usocklnd_find_or_create_peer(conn->uc_ni, conn->uc_peerid, &peer); - if (rc) - return rc; - - idx = usocklnd_type2idx(conn->uc_type); - - /* try to link conn to peer */ - pthread_mutex_lock(&peer->up_lock); - if (peer->up_conns[idx] == NULL) { - usocklnd_link_conn_to_peer(conn, peer, idx); - usocklnd_conn_addref(conn); - conn->uc_peer = peer; - usocklnd_peer_addref(peer); - } else { - conn2 = peer->up_conns[idx]; - pthread_mutex_lock(&conn2->uc_lock); - - if (conn2->uc_state == UC_READY) { - /* conn2 is in READY state, so conn is "duplicated" */ - pthread_mutex_unlock(&conn2->uc_lock); - pthread_mutex_unlock(&peer->up_lock); - usocklnd_peer_decref(peer); - goto passive_hellosent_connkill; - } - - /* uc_state != UC_READY => switch conn and conn2 */ - /* Relink txs and zc_acks from conn2 to conn. - * We're sure that nobody but us can access to conn, - * nevertheless we use mutex (if we're wrong yet, - * deadlock is easy to see that corrupted list */ - list_add(&tx_list, &conn2->uc_tx_list); - list_del_init(&conn2->uc_tx_list); - list_add(&zcack_list, &conn2->uc_zcack_list); - list_del_init(&conn2->uc_zcack_list); - - pthread_mutex_lock(&conn->uc_lock); - list_add_tail(&conn->uc_tx_list, &tx_list); - list_del_init(&tx_list); - list_add_tail(&conn->uc_zcack_list, &zcack_list); - list_del_init(&zcack_list); - conn->uc_peer = peer; - pthread_mutex_unlock(&conn->uc_lock); - - conn2->uc_peer = NULL; /* make conn2 zombie */ - pthread_mutex_unlock(&conn2->uc_lock); - usocklnd_conn_decref(conn2); - - usocklnd_link_conn_to_peer(conn, peer, idx); - usocklnd_conn_addref(conn); - conn->uc_peer = peer; - } - - lnet_ni_decref(conn->uc_ni); - conn->uc_ni = NULL; - pthread_mutex_unlock(&peer->up_lock); - usocklnd_peer_decref(peer); - - passive_hellosent_done: - /* safely transit to UC_READY state */ - /* rc == 0 */ - pthread_mutex_lock(&conn->uc_lock); - if (conn->uc_state != UC_DEAD) { - usocklnd_rx_ksmhdr_state_transition(conn); - - /* we're ready to recive incoming packets and maybe - already have smth. to transmit */ - LASSERT (conn->uc_sending == 0); - if (list_empty(&conn->uc_tx_list) && - list_empty(&conn->uc_zcack_list)) { - conn->uc_tx_flag = 0; - rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, - POLLIN); - } else { - conn->uc_tx_deadline = - cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, - POLLIN | POLLOUT); - } - - if (rc == 0) - conn->uc_state = UC_READY; - } - pthread_mutex_unlock(&conn->uc_lock); - return rc; - - passive_hellosent_connkill: - usocklnd_conn_kill(conn); - return 0; -} - -/* Send as much tx data as possible. - * Returns 0 or 1 on succsess, <0 if fatal error. - * 0 means partial send or non-fatal error, 1 - complete. - * Rely on libcfs_sock_writev() for differentiating fatal and - * non-fatal errors. An error should be considered as non-fatal if: - * 1) it still makes sense to continue reading && - * 2) anyway, poll() will set up POLLHUP|POLLERR flags */ -int -usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx) -{ - struct iovec *iov; - int nob; - cfs_time_t t; - - LASSERT (tx->tx_resid != 0); - - do { - usock_peer_t *peer = conn->uc_peer; - - LASSERT (tx->tx_niov > 0); - - nob = libcfs_sock_writev(conn->uc_sock, - tx->tx_iov, tx->tx_niov); - if (nob < 0) - conn->uc_errored = 1; - if (nob <= 0) /* write queue is flow-controlled or error */ - return nob; - - LASSERT (nob <= tx->tx_resid); - tx->tx_resid -= nob; - t = cfs_time_current(); - conn->uc_tx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout)); - - if(peer != NULL) - peer->up_last_alive = t; - - /* "consume" iov */ - iov = tx->tx_iov; - do { - LASSERT (tx->tx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); - iov->iov_len -= nob; - break; - } - - nob -= iov->iov_len; - tx->tx_iov = ++iov; - tx->tx_niov--; - } while (nob != 0); - - } while (tx->tx_resid != 0); - - return 1; /* send complete */ -} - -/* Read from wire as much data as possible. - * Returns 0 or 1 on succsess, <0 if error or EOF. - * 0 means partial read, 1 - complete */ -int -usocklnd_read_data(usock_conn_t *conn) -{ - struct iovec *iov; - int nob; - cfs_time_t t; - - LASSERT (conn->uc_rx_nob_wanted != 0); - - do { - usock_peer_t *peer = conn->uc_peer; - - LASSERT (conn->uc_rx_niov > 0); - - nob = libcfs_sock_readv(conn->uc_sock, - conn->uc_rx_iov, conn->uc_rx_niov); - if (nob <= 0) {/* read nothing or error */ - if (nob < 0) - conn->uc_errored = 1; - return nob; - } - - LASSERT (nob <= conn->uc_rx_nob_wanted); - conn->uc_rx_nob_wanted -= nob; - conn->uc_rx_nob_left -= nob; - t = cfs_time_current(); - conn->uc_rx_deadline = cfs_time_add(t, cfs_time_seconds(usock_tuns.ut_timeout)); - - if(peer != NULL) - peer->up_last_alive = t; - - /* "consume" iov */ - iov = conn->uc_rx_iov; - do { - LASSERT (conn->uc_rx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); - iov->iov_len -= nob; - break; - } - - nob -= iov->iov_len; - conn->uc_rx_iov = ++iov; - conn->uc_rx_niov--; - } while (nob != 0); - - } while (conn->uc_rx_nob_wanted != 0); - - return 1; /* read complete */ -} diff --git a/lnet/ulnds/socklnd/poll.c b/lnet/ulnds/socklnd/poll.c deleted file mode 100644 index 254abbe..0000000 --- a/lnet/ulnds/socklnd/poll.c +++ /dev/null @@ -1,514 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/ulnds/socklnd/poll.c - * - * Author: Maxim Patlasov - */ - -#include "usocklnd.h" -#include -#include - -void -usocklnd_process_stale_list(usock_pollthread_t *pt_data) -{ - while (!list_empty(&pt_data->upt_stale_list)) { - usock_conn_t *conn; - conn = list_entry(pt_data->upt_stale_list.next, - usock_conn_t, uc_stale_list); - - list_del(&conn->uc_stale_list); - - usocklnd_tear_peer_conn(conn); - usocklnd_conn_decref(conn); /* -1 for idx2conn[idx] or pr */ - } -} - -int -usocklnd_poll_thread(void *arg) -{ - int rc = 0; - usock_pollthread_t *pt_data = (usock_pollthread_t *)arg; - cfs_time_t current_time; - cfs_time_t planned_time; - int idx; - int idx_start; - int idx_finish; - int chunk; - int saved_nfds; - int extra; - int times; - - /* mask signals to avoid SIGPIPE, etc */ - sigset_t sigs; - sigfillset (&sigs); - pthread_sigmask (SIG_SETMASK, &sigs, 0); - - LASSERT(pt_data != NULL); - - planned_time = cfs_time_shift(usock_tuns.ut_poll_timeout); - chunk = usocklnd_calculate_chunk_size(pt_data->upt_nfds); - saved_nfds = pt_data->upt_nfds; - idx_start = 1; - - /* Main loop */ - while (usock_data.ud_shutdown == 0) { - rc = 0; - - /* Process all enqueued poll requests */ - pthread_mutex_lock(&pt_data->upt_pollrequests_lock); - while (!list_empty(&pt_data->upt_pollrequests)) { - usock_pollrequest_t *pr; - pr = list_entry(pt_data->upt_pollrequests.next, - usock_pollrequest_t, upr_list); - - list_del(&pr->upr_list); - rc = usocklnd_process_pollrequest(pr, pt_data); - if (rc) - break; - } - pthread_mutex_unlock(&pt_data->upt_pollrequests_lock); - - if (rc) - break; - - /* Delete conns orphaned due to POLL_DEL_REQUESTs */ - usocklnd_process_stale_list(pt_data); - - /* Actual polling for events */ - rc = poll(pt_data->upt_pollfd, - pt_data->upt_nfds, - usock_tuns.ut_poll_timeout * 1000); - - if (rc < 0 && errno != EINTR) { - CERROR("Cannot poll(2): errno=%d\n", errno); - break; - } - - if (rc > 0) - usocklnd_execute_handlers(pt_data); - - current_time = cfs_time_current(); - - if (pt_data->upt_nfds < 2 || - cfs_time_before(current_time, planned_time)) - continue; - - /* catch up growing pollfd[] */ - if (pt_data->upt_nfds > saved_nfds) { - extra = pt_data->upt_nfds - saved_nfds; - saved_nfds = pt_data->upt_nfds; - } else { - extra = 0; - } - - times = cfs_duration_sec(cfs_time_sub(current_time, planned_time)) + 1; - idx_finish = MIN(idx_start + chunk*times + extra, pt_data->upt_nfds); - - for (idx = idx_start; idx < idx_finish; idx++) { - usock_conn_t *conn = pt_data->upt_idx2conn[idx]; - pthread_mutex_lock(&conn->uc_lock); - if (usocklnd_conn_timed_out(conn, current_time) && - conn->uc_state != UC_DEAD) { - conn->uc_errored = 1; - usocklnd_conn_kill_locked(conn); - } - pthread_mutex_unlock(&conn->uc_lock); - } - - if (idx_finish == pt_data->upt_nfds) { - chunk = usocklnd_calculate_chunk_size(pt_data->upt_nfds); - saved_nfds = pt_data->upt_nfds; - idx_start = 1; - } - else { - idx_start = idx_finish; - } - - planned_time = cfs_time_add(current_time, - cfs_time_seconds(usock_tuns.ut_poll_timeout)); - } - - /* All conns should be deleted by POLL_DEL_REQUESTs while shutdown */ - LASSERT (rc != 0 || pt_data->upt_nfds == 1); - - if (rc) { - pthread_mutex_lock(&pt_data->upt_pollrequests_lock); - - /* Block new poll requests to be enqueued */ - pt_data->upt_errno = rc; - - while (!list_empty(&pt_data->upt_pollrequests)) { - usock_pollrequest_t *pr; - pr = list_entry(pt_data->upt_pollrequests.next, - usock_pollrequest_t, upr_list); - - list_del(&pr->upr_list); - - if (pr->upr_type == POLL_ADD_REQUEST) { - libcfs_sock_release(pr->upr_conn->uc_sock); - list_add_tail(&pr->upr_conn->uc_stale_list, - &pt_data->upt_stale_list); - } else { - usocklnd_conn_decref(pr->upr_conn); - } - - LIBCFS_FREE (pr, sizeof(*pr)); - } - pthread_mutex_unlock(&pt_data->upt_pollrequests_lock); - - usocklnd_process_stale_list(pt_data); - - for (idx = 1; idx < pt_data->upt_nfds; idx++) { - usock_conn_t *conn = pt_data->upt_idx2conn[idx]; - LASSERT(conn != NULL); - libcfs_sock_release(conn->uc_sock); - usocklnd_tear_peer_conn(conn); - usocklnd_conn_decref(conn); - } - } - - /* unblock usocklnd_shutdown() */ - complete(&pt_data->upt_completion); - - return 0; -} - -/* Returns 0 on success, <0 else */ -int -usocklnd_add_pollrequest(usock_conn_t *conn, int type, short value) -{ - int pt_idx = conn->uc_pt_idx; - usock_pollthread_t *pt = &usock_data.ud_pollthreads[pt_idx]; - usock_pollrequest_t *pr; - - LIBCFS_ALLOC(pr, sizeof(*pr)); - if (pr == NULL) { - CERROR ("Cannot allocate poll request\n"); - return -ENOMEM; - } - - pr->upr_conn = conn; - pr->upr_type = type; - pr->upr_value = value; - - usocklnd_conn_addref(conn); /* +1 for poll request */ - - pthread_mutex_lock(&pt->upt_pollrequests_lock); - - if (pt->upt_errno) { /* very rare case: errored poll thread */ - int rc = pt->upt_errno; - pthread_mutex_unlock(&pt->upt_pollrequests_lock); - usocklnd_conn_decref(conn); - LIBCFS_FREE(pr, sizeof(*pr)); - return rc; - } - - list_add_tail(&pr->upr_list, &pt->upt_pollrequests); - pthread_mutex_unlock(&pt->upt_pollrequests_lock); - return 0; -} - -void -usocklnd_add_killrequest(usock_conn_t *conn) -{ - int pt_idx = conn->uc_pt_idx; - usock_pollthread_t *pt = &usock_data.ud_pollthreads[pt_idx]; - usock_pollrequest_t *pr = conn->uc_preq; - - /* Use preallocated poll request because there is no good - * workaround for ENOMEM error while killing connection */ - if (pr) { - pr->upr_conn = conn; - pr->upr_type = POLL_DEL_REQUEST; - pr->upr_value = 0; - - usocklnd_conn_addref(conn); /* +1 for poll request */ - - pthread_mutex_lock(&pt->upt_pollrequests_lock); - - if (pt->upt_errno) { /* very rare case: errored poll thread */ - pthread_mutex_unlock(&pt->upt_pollrequests_lock); - usocklnd_conn_decref(conn); - return; /* conn will be killed in poll thread anyway */ - } - - list_add_tail(&pr->upr_list, &pt->upt_pollrequests); - pthread_mutex_unlock(&pt->upt_pollrequests_lock); - - conn->uc_preq = NULL; - } -} - -/* Process poll request. Update poll data. - * Returns 0 on success, <0 else */ -int -usocklnd_process_pollrequest(usock_pollrequest_t *pr, - usock_pollthread_t *pt_data) -{ - int type = pr->upr_type; - short value = pr->upr_value; - usock_conn_t *conn = pr->upr_conn; - int idx = 0; - struct pollfd *pollfd = pt_data->upt_pollfd; - int *fd2idx = pt_data->upt_fd2idx; - usock_conn_t **idx2conn = pt_data->upt_idx2conn; - int *skip = pt_data->upt_skip; - - LASSERT(conn != NULL); - LASSERT(conn->uc_sock != NULL); - LASSERT(type == POLL_ADD_REQUEST || - LIBCFS_SOCK2FD(conn->uc_sock) < pt_data->upt_nfd2idx); - - if (type != POLL_ADD_REQUEST) { - idx = fd2idx[LIBCFS_SOCK2FD(conn->uc_sock)]; - if (idx > 0 && idx < pt_data->upt_nfds) { /* hot path */ - LASSERT(pollfd[idx].fd == - LIBCFS_SOCK2FD(conn->uc_sock)); - } else { /* unlikely */ - CWARN("Very unlikely event happend: trying to" - " handle poll request of type %d but idx=%d" - " is out of range [1 ... %d]. Is shutdown" - " in progress (%d)?\n", - type, idx, pt_data->upt_nfds - 1, - usock_data.ud_shutdown); - - LIBCFS_FREE (pr, sizeof(*pr)); - usocklnd_conn_decref(conn); - return 0; - } - } - - LIBCFS_FREE (pr, sizeof(*pr)); - - switch (type) { - case POLL_ADD_REQUEST: - if (pt_data->upt_nfds >= pt_data->upt_npollfd) { - /* resize pollfd[], idx2conn[] and skip[] */ - struct pollfd *new_pollfd; - int new_npollfd = pt_data->upt_npollfd * 2; - usock_conn_t **new_idx2conn; - int *new_skip; - - new_pollfd = LIBCFS_REALLOC(pollfd, new_npollfd * - sizeof(struct pollfd)); - if (new_pollfd == NULL) - goto process_pollrequest_enomem; - pt_data->upt_pollfd = pollfd = new_pollfd; - - new_idx2conn = LIBCFS_REALLOC(idx2conn, new_npollfd * - sizeof(usock_conn_t *)); - if (new_idx2conn == NULL) - goto process_pollrequest_enomem; - pt_data->upt_idx2conn = idx2conn = new_idx2conn; - - new_skip = LIBCFS_REALLOC(skip, new_npollfd * - sizeof(int)); - if (new_skip == NULL) - goto process_pollrequest_enomem; - pt_data->upt_skip = new_skip; - - pt_data->upt_npollfd = new_npollfd; - } - - if (LIBCFS_SOCK2FD(conn->uc_sock) >= pt_data->upt_nfd2idx) { - /* resize fd2idx[] */ - int *new_fd2idx; - int new_nfd2idx = pt_data->upt_nfd2idx * 2; - - while (new_nfd2idx <= LIBCFS_SOCK2FD(conn->uc_sock)) - new_nfd2idx *= 2; - - new_fd2idx = LIBCFS_REALLOC(fd2idx, new_nfd2idx * - sizeof(int)); - if (new_fd2idx == NULL) - goto process_pollrequest_enomem; - - pt_data->upt_fd2idx = fd2idx = new_fd2idx; - memset(fd2idx + pt_data->upt_nfd2idx, 0, - (new_nfd2idx - pt_data->upt_nfd2idx) - * sizeof(int)); - pt_data->upt_nfd2idx = new_nfd2idx; - } - - LASSERT(fd2idx[LIBCFS_SOCK2FD(conn->uc_sock)] == 0); - - idx = pt_data->upt_nfds++; - idx2conn[idx] = conn; - fd2idx[LIBCFS_SOCK2FD(conn->uc_sock)] = idx; - - pollfd[idx].fd = LIBCFS_SOCK2FD(conn->uc_sock); - pollfd[idx].events = value; - pollfd[idx].revents = 0; - break; - case POLL_DEL_REQUEST: - fd2idx[LIBCFS_SOCK2FD(conn->uc_sock)] = 0; /* invalidate this - * entry */ - --pt_data->upt_nfds; - if (idx != pt_data->upt_nfds) { - /* shift last entry into released position */ - memcpy(&pollfd[idx], &pollfd[pt_data->upt_nfds], - sizeof(struct pollfd)); - idx2conn[idx] = idx2conn[pt_data->upt_nfds]; - fd2idx[pollfd[idx].fd] = idx; - } - - libcfs_sock_release(conn->uc_sock); - list_add_tail(&conn->uc_stale_list, - &pt_data->upt_stale_list); - break; - case POLL_RX_SET_REQUEST: - pollfd[idx].events = (pollfd[idx].events & ~POLLIN) | value; - break; - case POLL_TX_SET_REQUEST: - pollfd[idx].events = (pollfd[idx].events & ~POLLOUT) | value; - break; - case POLL_SET_REQUEST: - pollfd[idx].events = value; - break; - default: - LBUG(); /* unknown type */ - } - - /* In the case of POLL_ADD_REQUEST, idx2conn[idx] takes the - * reference that poll request possesses */ - if (type != POLL_ADD_REQUEST) - usocklnd_conn_decref(conn); - - return 0; - - process_pollrequest_enomem: - usocklnd_conn_decref(conn); - return -ENOMEM; -} - -/* Loop on poll data executing handlers repeatedly until - * fair_limit is reached or all entries are exhausted */ -void -usocklnd_execute_handlers(usock_pollthread_t *pt_data) -{ - struct pollfd *pollfd = pt_data->upt_pollfd; - int nfds = pt_data->upt_nfds; - usock_conn_t **idx2conn = pt_data->upt_idx2conn; - int *skip = pt_data->upt_skip; - int j; - - if (pollfd[0].revents & POLLIN) - while (usocklnd_notifier_handler(pollfd[0].fd) > 0) - ; - - skip[0] = 1; /* always skip notifier fd */ - - for (j = 0; j < usock_tuns.ut_fair_limit; j++) { - int prev = 0; - int i = skip[0]; - - if (i >= nfds) /* nothing ready */ - break; - - do { - usock_conn_t *conn = idx2conn[i]; - int next; - - if (j == 0) /* first pass... */ - next = skip[i] = i+1; /* set skip chain */ - else /* later passes... */ - next = skip[i]; /* skip unready pollfds */ - - /* kill connection if it's closed by peer and - * there is no data pending for reading */ - if ((pollfd[i].revents & POLLERR) != 0 || - (pollfd[i].revents & POLLHUP) != 0) { - if ((pollfd[i].events & POLLIN) != 0 && - (pollfd[i].revents & POLLIN) == 0) - usocklnd_conn_kill(conn); - else - usocklnd_exception_handler(conn); - } - - if ((pollfd[i].revents & POLLIN) != 0 && - usocklnd_read_handler(conn) <= 0) - pollfd[i].revents &= ~POLLIN; - - if ((pollfd[i].revents & POLLOUT) != 0 && - usocklnd_write_handler(conn) <= 0) - pollfd[i].revents &= ~POLLOUT; - - if ((pollfd[i].revents & (POLLIN | POLLOUT)) == 0) - skip[prev] = next; /* skip this entry next pass */ - else - prev = i; - - i = next; - } while (i < nfds); - } -} - -int -usocklnd_calculate_chunk_size(int num) -{ - const int n = 4; - const int p = usock_tuns.ut_poll_timeout; - int chunk = num; - - /* chunk should be big enough to detect a timeout on any - * connection within (n+1)/n times the timeout interval - * if we checks every 'p' seconds 'chunk' conns */ - - if (usock_tuns.ut_timeout > n * p) - chunk = (chunk * n * p) / usock_tuns.ut_timeout; - - if (chunk == 0) - chunk = 1; - - return chunk; -} - -void -usocklnd_wakeup_pollthread(int i) -{ - usock_pollthread_t *pt = &usock_data.ud_pollthreads[i]; - int notification = 0; - int rc; - - rc = syscall(SYS_write, LIBCFS_SOCK2FD(pt->upt_notifier[0]), - ¬ification, sizeof(notification)); - - if (rc != sizeof(notification)) - CERROR("Very unlikely event happend: " - "cannot write to notifier fd (rc=%d; errno=%d)\n", - rc, errno); -} diff --git a/lnet/ulnds/socklnd/usocklnd.c b/lnet/ulnds/socklnd/usocklnd.c deleted file mode 100644 index 717c3ef..0000000 --- a/lnet/ulnds/socklnd/usocklnd.c +++ /dev/null @@ -1,572 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/ulnds/socklnd/usocklnd.c - * - * Author: Maxim Patlasov - */ - -#include "usocklnd.h" -#include - -lnd_t the_tcplnd = { - .lnd_type = SOCKLND, - .lnd_startup = usocklnd_startup, - .lnd_shutdown = usocklnd_shutdown, - .lnd_send = usocklnd_send, - .lnd_recv = usocklnd_recv, - .lnd_accept = usocklnd_accept, -}; - -usock_data_t usock_data; -usock_tunables_t usock_tuns = { - .ut_timeout = 50, - .ut_poll_timeout = 1, - .ut_fair_limit = 1, - .ut_npollthreads = 0, - .ut_min_bulk = 1<<10, - .ut_txcredits = 256, - .ut_peertxcredits = 8, - .ut_socknagle = 0, - .ut_sockbufsiz = 0, -}; - -#define MAX_REASONABLE_TIMEOUT 36000 /* 10 hours */ -#define MAX_REASONABLE_NPT 1000 - -int -usocklnd_validate_tunables() -{ - if (usock_tuns.ut_timeout <= 0 || - usock_tuns.ut_timeout > MAX_REASONABLE_TIMEOUT) { - CERROR("USOCK_TIMEOUT: %d is out of reasonable limits\n", - usock_tuns.ut_timeout); - return -1; - } - - if (usock_tuns.ut_poll_timeout <= 0 || - usock_tuns.ut_poll_timeout > MAX_REASONABLE_TIMEOUT) { - CERROR("USOCK_POLL_TIMEOUT: %d is out of reasonable limits\n", - usock_tuns.ut_poll_timeout); - return -1; - } - - if (usock_tuns.ut_fair_limit <= 0) { - CERROR("Invalid USOCK_FAIR_LIMIT: %d (should be >0)\n", - usock_tuns.ut_fair_limit); - return -1; - } - - if (usock_tuns.ut_npollthreads < 0 || - usock_tuns.ut_npollthreads > MAX_REASONABLE_NPT) { - CERROR("USOCK_NPOLLTHREADS: %d is out of reasonable limits\n", - usock_tuns.ut_npollthreads); - return -1; - } - - if (usock_tuns.ut_txcredits <= 0) { - CERROR("USOCK_TXCREDITS: %d should be positive\n", - usock_tuns.ut_txcredits); - return -1; - } - - if (usock_tuns.ut_peertxcredits <= 0) { - CERROR("USOCK_PEERTXCREDITS: %d should be positive\n", - usock_tuns.ut_peertxcredits); - return -1; - } - - if (usock_tuns.ut_peertxcredits > usock_tuns.ut_txcredits) { - CERROR("USOCK_PEERTXCREDITS: %d should not be greater" - " than USOCK_TXCREDITS: %d\n", - usock_tuns.ut_peertxcredits, usock_tuns.ut_txcredits); - return -1; - } - - if (usock_tuns.ut_socknagle != 0 && - usock_tuns.ut_socknagle != 1) { - CERROR("USOCK_SOCKNAGLE: %d should be 0 or 1\n", - usock_tuns.ut_socknagle); - return -1; - } - - if (usock_tuns.ut_sockbufsiz < 0) { - CERROR("USOCK_SOCKBUFSIZ: %d should be 0 or positive\n", - usock_tuns.ut_sockbufsiz); - return -1; - } - - return 0; -} - -void -usocklnd_release_poll_states(int n) -{ - int i; - - for (i = 0; i < n; i++) { - usock_pollthread_t *pt = &usock_data.ud_pollthreads[i]; - - libcfs_sock_release(pt->upt_notifier[0]); - libcfs_sock_release(pt->upt_notifier[1]); - - pthread_mutex_destroy(&pt->upt_pollrequests_lock); - fini_completion(&pt->upt_completion); - - LIBCFS_FREE (pt->upt_pollfd, - sizeof(struct pollfd) * pt->upt_npollfd); - LIBCFS_FREE (pt->upt_idx2conn, - sizeof(usock_conn_t *) * pt->upt_npollfd); - LIBCFS_FREE (pt->upt_fd2idx, - sizeof(int) * pt->upt_nfd2idx); - } -} - -int -usocklnd_update_tunables() -{ - int rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_timeout, - "USOCK_TIMEOUT"); - if (rc) - return rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_poll_timeout, - "USOCK_POLL_TIMEOUT"); - if (rc) - return rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_npollthreads, - "USOCK_NPOLLTHREADS"); - if (rc) - return rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_fair_limit, - "USOCK_FAIR_LIMIT"); - if (rc) - return rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_min_bulk, - "USOCK_MIN_BULK"); - if (rc) - return rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_txcredits, - "USOCK_TXCREDITS"); - if (rc) - return rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_peertxcredits, - "USOCK_PEERTXCREDITS"); - if (rc) - return rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_socknagle, - "USOCK_SOCKNAGLE"); - if (rc) - return rc; - - rc = lnet_parse_int_tunable(&usock_tuns.ut_sockbufsiz, - "USOCK_SOCKBUFSIZ"); - if (rc) - return rc; - - if (usocklnd_validate_tunables()) - return -EINVAL; - - if (usock_tuns.ut_npollthreads == 0) { - usock_tuns.ut_npollthreads = cfs_online_cpus(); - - if (usock_tuns.ut_npollthreads <= 0) { - CERROR("Cannot find out the number of online CPUs\n"); - return -EINVAL; - } - } - - return 0; -} - - -int -usocklnd_base_startup() -{ - usock_pollthread_t *pt; - int i; - int rc; - - rc = usocklnd_update_tunables(); - if (rc) - return rc; - - usock_data.ud_npollthreads = usock_tuns.ut_npollthreads; - - LIBCFS_ALLOC (usock_data.ud_pollthreads, - usock_data.ud_npollthreads * - sizeof(usock_pollthread_t)); - if (usock_data.ud_pollthreads == NULL) - return -ENOMEM; - - /* Initialize poll thread state structures */ - for (i = 0; i < usock_data.ud_npollthreads; i++) { - - pt = &usock_data.ud_pollthreads[i]; - - rc = -ENOMEM; - - LIBCFS_ALLOC (pt->upt_pollfd, - sizeof(struct pollfd) * UPT_START_SIZ); - if (pt->upt_pollfd == NULL) - goto base_startup_failed_0; - - LIBCFS_ALLOC (pt->upt_idx2conn, - sizeof(usock_conn_t *) * UPT_START_SIZ); - if (pt->upt_idx2conn == NULL) - goto base_startup_failed_1; - - LIBCFS_ALLOC (pt->upt_fd2idx, - sizeof(int) * UPT_START_SIZ); - if (pt->upt_fd2idx == NULL) - goto base_startup_failed_2; - - memset(pt->upt_fd2idx, 0, - sizeof(int) * UPT_START_SIZ); - - LIBCFS_ALLOC (pt->upt_skip, - sizeof(int) * UPT_START_SIZ); - if (pt->upt_skip == NULL) - goto base_startup_failed_3; - - pt->upt_npollfd = pt->upt_nfd2idx = UPT_START_SIZ; - - rc = libcfs_socketpair(pt->upt_notifier); - if (rc != 0) - goto base_startup_failed_4; - - pt->upt_pollfd[0].fd = LIBCFS_SOCK2FD(pt->upt_notifier[1]); - pt->upt_pollfd[0].events = POLLIN; - pt->upt_pollfd[0].revents = 0; - - pt->upt_nfds = 1; - pt->upt_idx2conn[0] = NULL; - - pt->upt_errno = 0; - INIT_LIST_HEAD(&pt->upt_pollrequests); - INIT_LIST_HEAD(&pt->upt_stale_list); - pthread_mutex_init(&pt->upt_pollrequests_lock, NULL); - init_completion(&pt->upt_completion); - } - - /* Initialize peer hash list */ - for (i = 0; i < UD_PEER_HASH_SIZE; i++) - INIT_LIST_HEAD(&usock_data.ud_peers[i]); - - pthread_rwlock_init(&usock_data.ud_peers_lock, NULL); - - /* Spawn poll threads */ - for (i = 0; i < usock_data.ud_npollthreads; i++) { - struct task_struct *task; - - task = kthread_run(usocklnd_poll_thread, - &usock_data.ud_pollthreads[i], ""); - if (IS_ERR(task)) { - usocklnd_base_shutdown(i); - return PTR_ERR(task); - } - } - - usock_data.ud_state = UD_STATE_INITIALIZED; - - return 0; - - base_startup_failed_4: - LIBCFS_FREE (pt->upt_skip, sizeof(int) * UPT_START_SIZ); - base_startup_failed_3: - LIBCFS_FREE (pt->upt_fd2idx, sizeof(int) * UPT_START_SIZ); - base_startup_failed_2: - LIBCFS_FREE (pt->upt_idx2conn, sizeof(usock_conn_t *) * UPT_START_SIZ); - base_startup_failed_1: - LIBCFS_FREE (pt->upt_pollfd, sizeof(struct pollfd) * UPT_START_SIZ); - base_startup_failed_0: - LASSERT(rc != 0); - usocklnd_release_poll_states(i); - LIBCFS_FREE (usock_data.ud_pollthreads, - usock_data.ud_npollthreads * - sizeof(usock_pollthread_t)); - return rc; -} - -void -usocklnd_base_shutdown(int n) -{ - int i; - - usock_data.ud_shutdown = 1; - for (i = 0; i < n; i++) { - usock_pollthread_t *pt = &usock_data.ud_pollthreads[i]; - usocklnd_wakeup_pollthread(i); - wait_for_completion(&pt->upt_completion); - } - - pthread_rwlock_destroy(&usock_data.ud_peers_lock); - - usocklnd_release_poll_states(usock_data.ud_npollthreads); - - LIBCFS_FREE (usock_data.ud_pollthreads, - usock_data.ud_npollthreads * - sizeof(usock_pollthread_t)); - - usock_data.ud_state = UD_STATE_INIT_NOTHING; -} - -__u64 -usocklnd_new_incarnation() -{ - struct timeval tv; - int rc = gettimeofday(&tv, NULL); - LASSERT (rc == 0); - return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; -} - -static int -usocklnd_assign_ni_nid(lnet_ni_t *ni) -{ - int rc; - int up; - __u32 ipaddr; - - /* Find correct IP-address and update ni_nid with it. - * Two cases are supported: - * 1) no explicit interfaces are defined. NID will be assigned to - * first non-lo interface that is up; - * 2) exactly one explicit interface is defined. For example, - * LNET_NETWORKS='tcp(eth0)' */ - - if (ni->ni_interfaces[0] == NULL) { - char **names; - int i, n; - - n = libcfs_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Can't enumerate interfaces: %d\n", n); - return -1; - } - - for (i = 0; i < n; i++) { - - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = libcfs_ipif_query(names[i], &up, &ipaddr); - if (rc != 0) { - CWARN("Can't get interface %s info: %d\n", - names[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s (down)\n", - names[i]); - continue; - } - - break; /* one address is quite enough */ - } - - libcfs_ipif_free_enumeration(names, n); - - if (i >= n) { - CERROR("Can't find any usable interfaces\n"); - return -1; - } - - CDEBUG(D_NET, "No explicit interfaces defined. " - "%u.%u.%u.%u used\n", HIPQUAD(ipaddr)); - } else { - if (ni->ni_interfaces[1] != NULL) { - CERROR("only one explicit interface is allowed\n"); - return -1; - } - - rc = libcfs_ipif_query(ni->ni_interfaces[0], &up, &ipaddr); - if (rc != 0) { - CERROR("Can't get interface %s info: %d\n", - ni->ni_interfaces[0], rc); - return -1; - } - - if (!up) { - CERROR("Explicit interface defined: %s but is down\n", - ni->ni_interfaces[0]); - return -1; - } - - CDEBUG(D_NET, "Explicit interface defined: %s. " - "%u.%u.%u.%u used\n", - ni->ni_interfaces[0], HIPQUAD(ipaddr)); - - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ipaddr); - - return 0; -} - -int -usocklnd_startup(lnet_ni_t *ni) -{ - int rc; - usock_net_t *net; - - if (usock_data.ud_state == UD_STATE_INIT_NOTHING) { - rc = usocklnd_base_startup(); - if (rc != 0) - return rc; - } - - LIBCFS_ALLOC(net, sizeof(*net)); - if (net == NULL) - goto startup_failed_0; - - memset(net, 0, sizeof(*net)); - net->un_incarnation = usocklnd_new_incarnation(); - pthread_mutex_init(&net->un_lock, NULL); - pthread_cond_init(&net->un_cond, NULL); - - ni->ni_data = net; - - rc = usocklnd_assign_ni_nid(ni); - if (rc != 0) - goto startup_failed_1; - - LASSERT (ni->ni_lnd == &the_tcplnd); - - ni->ni_maxtxcredits = usock_tuns.ut_txcredits; - ni->ni_peertxcredits = usock_tuns.ut_peertxcredits; - - usock_data.ud_nets_count++; - return 0; - - startup_failed_1: - pthread_mutex_destroy(&net->un_lock); - pthread_cond_destroy(&net->un_cond); - LIBCFS_FREE(net, sizeof(*net)); - startup_failed_0: - if (usock_data.ud_nets_count == 0) - usocklnd_base_shutdown(usock_data.ud_npollthreads); - - return -ENETDOWN; -} - -void -usocklnd_shutdown(lnet_ni_t *ni) -{ - usock_net_t *net = ni->ni_data; - - net->un_shutdown = 1; - - usocklnd_del_all_peers(ni); - - /* Wait for all peer state to clean up */ - pthread_mutex_lock(&net->un_lock); - while (net->un_peercount != 0) - pthread_cond_wait(&net->un_cond, &net->un_lock); - pthread_mutex_unlock(&net->un_lock); - - /* Release usock_net_t structure */ - pthread_mutex_destroy(&net->un_lock); - pthread_cond_destroy(&net->un_cond); - LIBCFS_FREE(net, sizeof(*net)); - - usock_data.ud_nets_count--; - if (usock_data.ud_nets_count == 0) - usocklnd_base_shutdown(usock_data.ud_npollthreads); -} - -void -usocklnd_del_all_peers(lnet_ni_t *ni) -{ - struct list_head *ptmp; - struct list_head *pnxt; - usock_peer_t *peer; - int i; - - pthread_rwlock_wrlock(&usock_data.ud_peers_lock); - - for (i = 0; i < UD_PEER_HASH_SIZE; i++) { - list_for_each_safe(ptmp, pnxt, &usock_data.ud_peers[i]) { - peer = list_entry(ptmp, usock_peer_t, up_list); - - if (peer->up_ni != ni) - continue; - - usocklnd_del_peer_and_conns(peer); - } - } - - pthread_rwlock_unlock(&usock_data.ud_peers_lock); - - /* wakeup all threads */ - for (i = 0; i < usock_data.ud_npollthreads; i++) - usocklnd_wakeup_pollthread(i); -} - -void -usocklnd_del_peer_and_conns(usock_peer_t *peer) -{ - /* peer cannot disappear because it's still in hash list */ - - pthread_mutex_lock(&peer->up_lock); - /* content of conn[] array cannot change now */ - usocklnd_del_conns_locked(peer); - pthread_mutex_unlock(&peer->up_lock); - - /* peer hash list is still protected by the caller */ - list_del(&peer->up_list); - - usocklnd_peer_decref(peer); /* peer isn't in hash list anymore */ -} - -void -usocklnd_del_conns_locked(usock_peer_t *peer) -{ - int i; - - for (i=0; i < N_CONN_TYPES; i++) { - usock_conn_t *conn = peer->up_conns[i]; - if (conn != NULL) - usocklnd_conn_kill(conn); - } -} diff --git a/lnet/ulnds/socklnd/usocklnd.h b/lnet/ulnds/socklnd/usocklnd.h deleted file mode 100644 index 865d549..0000000 --- a/lnet/ulnds/socklnd/usocklnd.h +++ /dev/null @@ -1,367 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/ulnds/socklnd/usocklnd.h - * - * Author: Maxim Patlasov - */ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#include -#include -#include -#include - -typedef struct { - struct list_head tx_list; /* neccessary to form tx list */ - lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */ - ksock_msg_t tx_msg; /* buffer for wire header of ksock msg */ - int tx_resid; /* # of residual bytes */ - int tx_nob; /* # of packet bytes */ - int tx_size; /* size of this descriptor */ - struct iovec *tx_iov; /* points to tx_iova[i] */ - int tx_niov; /* # of packet iovec frags */ - struct iovec tx_iova[1]; /* iov for header */ -} usock_tx_t; - -struct usock_peer_s; - -typedef struct { - cfs_socket_t *uc_sock; /* socket */ - int uc_type; /* conn type */ - int uc_activeflag; /* active side of connection? */ - int uc_flip; /* is peer other endian? */ - int uc_state; /* connection state */ - struct usock_peer_s *uc_peer; /* owning peer */ - lnet_process_id_t uc_peerid; /* id of remote peer */ - int uc_pt_idx; /* index in ud_pollthreads[] of - * owning poll thread */ - lnet_ni_t *uc_ni; /* parent NI while accepting */ - struct usock_preq_s *uc_preq; /* preallocated request */ - __u32 uc_peer_ip; /* IP address of the peer */ - __u16 uc_peer_port; /* port of the peer */ - struct list_head uc_stale_list; /* orphaned connections */ - - /* Receive state */ - int uc_rx_state; /* message or hello state */ - ksock_hello_msg_t *uc_rx_hello; /* hello buffer */ - struct iovec *uc_rx_iov; /* points to uc_rx_iova[i] */ - struct iovec uc_rx_iova[LNET_MAX_IOV]; /* message frags */ - int uc_rx_niov; /* # frags */ - int uc_rx_nob_left; /* # bytes to next hdr/body */ - int uc_rx_nob_wanted; /* # of bytes actually wanted */ - void *uc_rx_lnetmsg; /* LNET message being received */ - cfs_time_t uc_rx_deadline; /* when to time out */ - int uc_rx_flag; /* deadline valid? */ - ksock_msg_t uc_rx_msg; /* message buffer */ - - /* Send state */ - struct list_head uc_tx_list; /* pending txs */ - struct list_head uc_zcack_list; /* pending zc_acks */ - cfs_time_t uc_tx_deadline; /* when to time out */ - int uc_tx_flag; /* deadline valid? */ - int uc_sending; /* send op is in progress */ - usock_tx_t *uc_tx_hello; /* fake tx with hello */ - - mt_atomic_t uc_refcount; /* # of users */ - pthread_mutex_t uc_lock; /* serialize */ - int uc_errored; /* a flag for lnet_notify() */ -} usock_conn_t; - -/* Allowable conn states are: */ -#define UC_CONNECTING 1 -#define UC_SENDING_HELLO 2 -#define UC_RECEIVING_HELLO 3 -#define UC_READY 4 -#define UC_DEAD 5 - -/* Allowable RX states are: */ -#define UC_RX_HELLO_MAGIC 1 -#define UC_RX_HELLO_VERSION 2 -#define UC_RX_HELLO_BODY 3 -#define UC_RX_HELLO_IPS 4 -#define UC_RX_KSM_HEADER 5 -#define UC_RX_LNET_HEADER 6 -#define UC_RX_PARSE 7 -#define UC_RX_PARSE_WAIT 8 -#define UC_RX_LNET_PAYLOAD 9 -#define UC_RX_SKIPPING 10 - -#define N_CONN_TYPES 3 /* CONTROL, BULK_IN and BULK_OUT */ - -typedef struct usock_peer_s { - /* neccessary to form peer list */ - struct list_head up_list; - lnet_process_id_t up_peerid; /* id of remote peer */ - usock_conn_t *up_conns[N_CONN_TYPES]; /* conns that connect us - * us with the peer */ - lnet_ni_t *up_ni; /* pointer to parent NI */ - __u64 up_incarnation; /* peer's incarnation */ - int up_incrn_is_set;/* 0 if peer's incarnation - * hasn't been set so far */ - mt_atomic_t up_refcount; /* # of users */ - pthread_mutex_t up_lock; /* serialize */ - int up_errored; /* a flag for lnet_notify() */ - cfs_time_t up_last_alive; /* when the peer was last alive */ -} usock_peer_t; - -typedef struct { - cfs_socket_t *upt_notifier[2]; /* notifier sockets: 1st for - * writing, 2nd for reading */ - struct pollfd *upt_pollfd; /* poll fds */ - int upt_nfds; /* active poll fds */ - int upt_npollfd; /* allocated poll fds */ - usock_conn_t **upt_idx2conn; /* conns corresponding to - * upt_pollfd[idx] */ - int *upt_skip; /* skip chain */ - int *upt_fd2idx; /* index into upt_pollfd[] - * by fd */ - int upt_nfd2idx; /* # of allocated elements - * of upt_fd2idx[] */ - struct list_head upt_stale_list; /* list of orphaned conns */ - struct list_head upt_pollrequests; /* list of poll requests */ - pthread_mutex_t upt_pollrequests_lock; /* serialize */ - int upt_errno; /* non-zero if errored */ - struct completion upt_completion; /* wait/signal facility for - * syncronizing shutdown */ -} usock_pollthread_t; - -/* Number of elements in upt_pollfd[], upt_idx2conn[] and upt_fd2idx[] - * at initialization time. Will be resized on demand */ -#define UPT_START_SIZ 32 - -/* # peer lists */ -#define UD_PEER_HASH_SIZE 101 - -typedef struct { - int ud_state; /* initialization state */ - int ud_npollthreads; /* # of poll threads */ - usock_pollthread_t *ud_pollthreads; /* their state */ - int ud_shutdown; /* shutdown flag */ - int ud_nets_count; /* # of instances */ - struct list_head ud_peers[UD_PEER_HASH_SIZE]; /* peer hash table */ - pthread_rwlock_t ud_peers_lock; /* serialize */ -} usock_data_t; - -extern usock_data_t usock_data; - -/* ud_state allowed values */ -#define UD_STATE_INIT_NOTHING 0 -#define UD_STATE_INITIALIZED 1 - -typedef struct { - int un_peercount; /* # of peers */ - int un_shutdown; /* shutdown flag */ - __u64 un_incarnation; /* my epoch */ - pthread_cond_t un_cond; /* condvar to wait for notifications */ - pthread_mutex_t un_lock; /* a lock to protect un_cond */ -} usock_net_t; - -typedef struct { - int ut_poll_timeout; /* the third arg for poll(2) (seconds) */ - int ut_timeout; /* "stuck" socket timeout (seconds) */ - int ut_npollthreads; /* number of poll thread to spawn */ - int ut_fair_limit; /* how many packets can we receive or transmit - * without calling poll(2) */ - int ut_min_bulk; /* smallest "large" message */ - int ut_txcredits; /* # concurrent sends */ - int ut_peertxcredits; /* # concurrent sends to 1 peer */ - int ut_socknagle; /* Is Nagle alg on ? */ - int ut_sockbufsiz; /* size of socket buffers */ -} usock_tunables_t; - -extern usock_tunables_t usock_tuns; - -typedef struct usock_preq_s { - int upr_type; /* type of requested action */ - short upr_value; /* bitmask of POLLIN and POLLOUT bits */ - usock_conn_t * upr_conn; /* a conn for the sake of which - * action will be performed */ - struct list_head upr_list; /* neccessary to form list */ -} usock_pollrequest_t; - -/* Allowable poll request types are: */ -#define POLL_ADD_REQUEST 1 -#define POLL_DEL_REQUEST 2 -#define POLL_RX_SET_REQUEST 3 -#define POLL_TX_SET_REQUEST 4 -#define POLL_SET_REQUEST 5 - -typedef struct { - struct list_head zc_list; /* neccessary to form zc_ack list */ - __u64 zc_cookie; /* zero-copy cookie */ -} usock_zc_ack_t; - -static inline void -usocklnd_conn_addref(usock_conn_t *conn) -{ - LASSERT(mt_atomic_read(&conn->uc_refcount) > 0); - mt_atomic_inc(&conn->uc_refcount); -} - -void usocklnd_destroy_conn(usock_conn_t *conn); - -static inline void -usocklnd_conn_decref(usock_conn_t *conn) -{ - LASSERT(mt_atomic_read(&conn->uc_refcount) > 0); - if (mt_atomic_dec_and_test(&conn->uc_refcount)) - usocklnd_destroy_conn(conn); -} - -static inline void -usocklnd_peer_addref(usock_peer_t *peer) -{ - LASSERT(mt_atomic_read(&peer->up_refcount) > 0); - mt_atomic_inc(&peer->up_refcount); -} - -void usocklnd_destroy_peer(usock_peer_t *peer); - -static inline void -usocklnd_peer_decref(usock_peer_t *peer) -{ - LASSERT(mt_atomic_read(&peer->up_refcount) > 0); - if (mt_atomic_dec_and_test(&peer->up_refcount)) - usocklnd_destroy_peer(peer); -} - -static inline int -usocklnd_ip2pt_idx(__u32 ip) { - return ip % usock_data.ud_npollthreads; -} - -static inline struct list_head * -usocklnd_nid2peerlist(lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % UD_PEER_HASH_SIZE; - - return &usock_data.ud_peers[hash]; -} - -int usocklnd_startup(lnet_ni_t *ni); -void usocklnd_shutdown(lnet_ni_t *ni); -int usocklnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int usocklnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int usocklnd_accept(lnet_ni_t *ni, cfs_socket_t *sock); - -int usocklnd_poll_thread(void *arg); -int usocklnd_add_pollrequest(usock_conn_t *conn, int type, short value); -void usocklnd_add_killrequest(usock_conn_t *conn); -int usocklnd_process_pollrequest(usock_pollrequest_t *pr, - usock_pollthread_t *pt_data); -void usocklnd_execute_handlers(usock_pollthread_t *pt_data); -int usocklnd_calculate_chunk_size(int num); -void usocklnd_wakeup_pollthread(int i); - -int usocklnd_notifier_handler(int fd); -void usocklnd_exception_handler(usock_conn_t *conn); -int usocklnd_read_handler(usock_conn_t *conn); -int usocklnd_read_msg(usock_conn_t *conn, int *cont_flag); -int usocklnd_handle_zc_req(usock_peer_t *peer, __u64 cookie); -int usocklnd_read_hello(usock_conn_t *conn, int *cont_flag); -int usocklnd_activeconn_hellorecv(usock_conn_t *conn); -int usocklnd_passiveconn_hellorecv(usock_conn_t *conn); -int usocklnd_write_handler(usock_conn_t *conn); -usock_tx_t *usocklnd_try_piggyback(struct list_head *tx_list_p, - struct list_head *zcack_list_p); -int usocklnd_activeconn_hellosent(usock_conn_t *conn); -int usocklnd_passiveconn_hellosent(usock_conn_t *conn); -int usocklnd_send_tx(usock_conn_t *conn, usock_tx_t *tx); -int usocklnd_read_data(usock_conn_t *conn); - -void usocklnd_release_poll_states(int n); -int usocklnd_base_startup(); -void usocklnd_base_shutdown(int n); -__u64 usocklnd_new_incarnation(); -void usocklnd_del_all_peers(lnet_ni_t *ni); -void usocklnd_del_peer_and_conns(usock_peer_t *peer); -void usocklnd_del_conns_locked(usock_peer_t *peer); - -int usocklnd_conn_timed_out(usock_conn_t *conn, cfs_time_t current_time); -void usocklnd_conn_kill(usock_conn_t *conn); -void usocklnd_conn_kill_locked(usock_conn_t *conn); -usock_conn_t *usocklnd_conn_allocate(); -void usocklnd_conn_free(usock_conn_t *conn); -void usocklnd_tear_peer_conn(usock_conn_t *conn); -void usocklnd_check_peer_stale(lnet_ni_t *ni, lnet_process_id_t id); -int usocklnd_create_passive_conn(lnet_ni_t *ni, - cfs_socket_t *sock, usock_conn_t **connp); -int usocklnd_create_active_conn(usock_peer_t *peer, int type, - usock_conn_t **connp); -int usocklnd_connect_srv_mode(cfs_socket_t **sockp, - __u32 dst_ip, __u16 dst_port); -int usocklnd_connect_cli_mode(cfs_socket_t **sockp, - __u32 dst_ip, __u16 dst_port); -int usocklnd_set_sock_options(cfs_socket_t *sock); -usock_tx_t *usocklnd_create_noop_tx(__u64 cookie); -usock_tx_t *usocklnd_create_tx(lnet_msg_t *lntmsg); -void usocklnd_init_hello_msg(ksock_hello_msg_t *hello, - lnet_ni_t *ni, int type, lnet_nid_t peer_nid); -usock_tx_t *usocklnd_create_hello_tx(lnet_ni_t *ni, - int type, lnet_nid_t peer_nid); -usock_tx_t *usocklnd_create_cr_hello_tx(lnet_ni_t *ni, - int type, lnet_nid_t peer_nid); -void usocklnd_destroy_tx(lnet_ni_t *ni, usock_tx_t *tx); -void usocklnd_destroy_txlist(lnet_ni_t *ni, struct list_head *txlist); -void usocklnd_destroy_zcack_list(struct list_head *zcack_list); -void usocklnd_destroy_peer (usock_peer_t *peer); -int usocklnd_get_conn_type(lnet_msg_t *lntmsg); -int usocklnd_type2idx(int type); -usock_peer_t *usocklnd_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id); -int usocklnd_create_peer(lnet_ni_t *ni, lnet_process_id_t id, - usock_peer_t **peerp); -int usocklnd_find_or_create_peer(lnet_ni_t *ni, lnet_process_id_t id, - usock_peer_t **peerp); -int usocklnd_find_or_create_conn(usock_peer_t *peer, int type, - usock_conn_t **connp, - usock_tx_t *tx, usock_zc_ack_t *zc_ack, - int *send_immediately_flag); -void usocklnd_link_conn_to_peer(usock_conn_t *conn, usock_peer_t *peer, int idx); -int usocklnd_invert_type(int type); -void usocklnd_conn_new_state(usock_conn_t *conn, int new_state); -void usocklnd_cleanup_stale_conns(usock_peer_t *peer, __u64 incrn, - usock_conn_t *skip_conn); - -void usocklnd_rx_hellomagic_state_transition(usock_conn_t *conn); -void usocklnd_rx_helloversion_state_transition(usock_conn_t *conn); -void usocklnd_rx_hellobody_state_transition(usock_conn_t *conn); -void usocklnd_rx_helloIPs_state_transition(usock_conn_t *conn); -void usocklnd_rx_lnethdr_state_transition(usock_conn_t *conn); -void usocklnd_rx_ksmhdr_state_transition(usock_conn_t *conn); -void usocklnd_rx_skipping_state_transition(usock_conn_t *conn); diff --git a/lnet/ulnds/socklnd/usocklnd_cb.c b/lnet/ulnds/socklnd/usocklnd_cb.c deleted file mode 100644 index 1bebbe4..0000000 --- a/lnet/ulnds/socklnd/usocklnd_cb.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/ulnds/socklnd/usocklnd_cb.c - * - * Author: Maxim Patlasov - */ - -#include "usocklnd.h" - -static int -usocklnd_send_tx_immediately(usock_conn_t *conn, usock_tx_t *tx) -{ - int rc; - int rc2; - int partial_send = 0; - usock_peer_t *peer = conn->uc_peer; - - LASSERT (peer != NULL); - - /* usocklnd_enqueue_tx() turned it on for us */ - LASSERT(conn->uc_sending); - - //counter_imm_start++; - rc = usocklnd_send_tx(conn, tx); - if (rc == 0) { /* partial send or connection closed */ - pthread_mutex_lock(&conn->uc_lock); - list_add(&tx->tx_list, &conn->uc_tx_list); - conn->uc_sending = 0; - pthread_mutex_unlock(&conn->uc_lock); - partial_send = 1; - } else { - usocklnd_destroy_tx(peer->up_ni, tx); - /* NB: lnetmsg was finalized, so we *must* return 0 */ - - if (rc < 0) { /* real error */ - usocklnd_conn_kill(conn); - return 0; - } - - /* rc == 1: tx was sent completely */ - rc = 0; /* let's say to caller 'Ok' */ - //counter_imm_complete++; - } - - pthread_mutex_lock(&conn->uc_lock); - conn->uc_sending = 0; - - /* schedule write handler */ - if (partial_send || - (conn->uc_state == UC_READY && - (!list_empty(&conn->uc_tx_list) || - !list_empty(&conn->uc_zcack_list)))) { - conn->uc_tx_deadline = - cfs_time_shift(usock_tuns.ut_timeout); - conn->uc_tx_flag = 1; - rc2 = usocklnd_add_pollrequest(conn, POLL_TX_SET_REQUEST, POLLOUT); - if (rc2 != 0) - usocklnd_conn_kill_locked(conn); - else - usocklnd_wakeup_pollthread(conn->uc_pt_idx); - } - - pthread_mutex_unlock(&conn->uc_lock); - - return rc; -} - -int -usocklnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - usock_tx_t *tx; - lnet_process_id_t target = lntmsg->msg_target; - usock_peer_t *peer; - int type; - int rc; - usock_conn_t *conn; - int send_immediately; - - tx = usocklnd_create_tx(lntmsg); - if (tx == NULL) - return -ENOMEM; - - rc = usocklnd_find_or_create_peer(ni, target, &peer); - if (rc) { - LIBCFS_FREE (tx, tx->tx_size); - return rc; - } - /* peer cannot disappear now because its refcount was incremented */ - - type = usocklnd_get_conn_type(lntmsg); - rc = usocklnd_find_or_create_conn(peer, type, &conn, tx, NULL, - &send_immediately); - if (rc != 0) { - usocklnd_peer_decref(peer); - usocklnd_check_peer_stale(ni, target); - LIBCFS_FREE (tx, tx->tx_size); - return rc; - } - /* conn cannot disappear now because its refcount was incremented */ - - if (send_immediately) - rc = usocklnd_send_tx_immediately(conn, tx); - - usocklnd_conn_decref(conn); - usocklnd_peer_decref(peer); - return rc; -} - -int -usocklnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - int rc = 0; - usock_conn_t *conn = (usock_conn_t *)private; - - /* I don't think that we'll win much concurrency moving lock() - * call below lnet_extract_iov() */ - pthread_mutex_lock(&conn->uc_lock); - - conn->uc_rx_lnetmsg = msg; - conn->uc_rx_nob_wanted = mlen; - conn->uc_rx_nob_left = rlen; - conn->uc_rx_iov = conn->uc_rx_iova; - conn->uc_rx_niov = - lnet_extract_iov(LNET_MAX_IOV, conn->uc_rx_iov, - niov, iov, offset, mlen); - - /* the gap between lnet_parse() and usocklnd_recv() happened? */ - if (conn->uc_rx_state == UC_RX_PARSE_WAIT) { - conn->uc_rx_flag = 1; /* waiting for incoming lnet payload */ - conn->uc_rx_deadline = - cfs_time_shift(usock_tuns.ut_timeout); - rc = usocklnd_add_pollrequest(conn, POLL_RX_SET_REQUEST, POLLIN); - if (rc != 0) { - usocklnd_conn_kill_locked(conn); - goto recv_out; - } - usocklnd_wakeup_pollthread(conn->uc_pt_idx); - } - - conn->uc_rx_state = UC_RX_LNET_PAYLOAD; - recv_out: - pthread_mutex_unlock(&conn->uc_lock); - usocklnd_conn_decref(conn); - return rc; -} - -int -usocklnd_accept(lnet_ni_t *ni, cfs_socket_t *sock) -{ - int rc; - usock_conn_t *conn; - - rc = usocklnd_create_passive_conn(ni, sock, &conn); - if (rc) - return rc; - LASSERT(conn != NULL); - - /* disable shutdown event temporarily */ - lnet_ni_addref(ni); - - rc = usocklnd_add_pollrequest(conn, POLL_ADD_REQUEST, POLLIN); - if (rc == 0) - usocklnd_wakeup_pollthread(conn->uc_pt_idx); - - /* NB: conn reference counter was incremented while adding - * poll request if rc == 0 */ - - usocklnd_conn_decref(conn); /* should destroy conn if rc != 0 */ - return rc; -} diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am index d43150d..5c01c8c 100644 --- a/lnet/utils/Makefile.am +++ b/lnet/utils/Makefile.am @@ -74,10 +74,3 @@ lnetctl_LDADD = -L. -lm -lyaml -llnetconfig \ $(LIBCFSUTIL) $(LIBCFS) $(LIBREADLINE) $(LIBEFENCE) lnetctl_DEPENDENCIES = $(top_builddir)/lnet/utils/lnetconfig/liblnetconfig.so endif # UTILS - -LND_LIBS = -if BUILD_USOCKLND -LND_LIBS += $(top_builddir)/lnet/ulnds/socklnd/libsocklnd.a -endif - -EXTRA_DIST = genlib.sh diff --git a/lnet/utils/genlib.sh b/lnet/utils/genlib.sh deleted file mode 100755 index 02b22c6..0000000 --- a/lnet/utils/genlib.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -#set -xv -set -e - -CWD=`pwd` - -LIBS=$1 -LND_LIBS=$2 -PTHREAD_LIBS=$3 - -# do cleanup at first -rm -f liblst.so - -ALL_OBJS= - -build_obj_list() { - _objs=`$AR -t $1/$2 | grep -v SYMDEF | grep -v SORTED` - for _lib in $_objs; do - ALL_OBJS=$ALL_OBJS"$1/$_lib "; - done; -} - -# lnet components libs -build_obj_list ../../libcfs/libcfs libcfs.a -build_obj_list ../../libcfs/libcfs libcfsutil.a -if $(echo "$LND_LIBS" | grep "socklnd" >/dev/null) ; then - build_obj_list ../../lnet/ulnds/socklnd libsocklnd.a -fi -build_obj_list ../../lnet/lnet liblnet.a -build_obj_list ../../lnet/selftest libselftest.a - -# create static lib lustre -rm -f $CWD/liblst.a -$AR -cru $CWD/liblst.a $ALL_OBJS -$RANLIB $CWD/liblst.a