int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
+/* libcfs tcpip */
+int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
+int libcfs_ipif_enumerate(char ***names);
+void libcfs_ipif_free_enumeration(char **names, int n);
+int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog);
+int libcfs_sock_accept(struct socket **newsockp, struct socket *sock,
+ int bufsize);
+void libcfs_sock_abort_accept(struct socket *sock);
+int libcfs_sock_connect(struct socket **sockp, int *fatal, int bufsize,
+ __u32 local_ip, int local_port,
+ __u32 peer_ip, int peer_port);
+int libcfs_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize);
+int libcfs_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize);
+int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout);
+int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout);
+void libcfs_sock_release(struct socket *sock);
+
+void libcfs_pause(cfs_duration_t ticks);
+
/* libcfs watchdogs */
struct lc_watchdog;
EXTRA_DIST := kp30.h libcfs.h linux-fs.h linux-lock.h linux-mem.h \
- linux-prim.h linux-time.h lltrace.h portals_compat25.h \
- portals_lib.h portals_utils.h
+ linux-prim.h linux-time.h linux-tcpip.h lltrace.h \
+ portals_compat25.h portals_lib.h portals_utils.h
#include <libcfs/linux/linux-prim.h>
#include <libcfs/linux/linux-lock.h>
#include <libcfs/linux/linux-fs.h>
+#include <libcfs/linux/linux-tcpip.h>
#ifdef HAVE_ASM_TYPES_H
#include <asm/types.h>
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_TCP_H__
+#define __LIBCFS_LINUX_CFS_TCP_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <net/sock.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72))
+# define sk_allocation allocation
+# define sk_data_ready data_ready
+# define sk_write_space write_space
+# define sk_user_data user_data
+# define sk_prot prot
+# define sk_sndbuf sndbuf
+# define sk_socket socket
+# define sk_sleep sleep
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+# define sk_wmem_queued wmem_queued
+# define sk_err err
+#endif
+
+#define SOCK_SNDBUF(so) ((so)->sk->sk_sndbuf)
+#define SOCK_WMEM_QUEUED(so) ((so)->sk->sk_wmem_queued)
+#define SOCK_ERROR(so) ((so)->sk->sk_err)
+#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags)
+
+#endif
+
+#endif
#ifdef __KERNEL__
extern void ptl_register_nal(ptl_nal_t *nal);
extern void ptl_unregister_nal(ptl_nal_t *nal);
+extern ptl_err_t ptl_set_ip_niaddr (ptl_ni_t *ni);
#endif
extern ptl_err_t ptl_parse_routes (char *route_str);
#ifdef __KERNEL__
extern void ptl_register_nal(ptl_nal_t *nal);
extern void ptl_unregister_nal(ptl_nal_t *nal);
+extern ptl_err_t ptl_set_ip_niaddr (ptl_ni_t *ni);
#endif
extern ptl_err_t ptl_parse_routes (char *route_str);
MODULES := kranal
-kranal-objs := ranal.o ranal_cb.o
+kranal-objs := ranal.o ranal_cb.o ranal_modparams.o
EXTRA_POST_CFLAGS := @RACPPFLAGS@
*/
#include "ranal.h"
-static int kranal_devids[] = {RAPK_MAIN_DEVICE_ID,
- RAPK_EXPANSION_DEVICE_ID};
+static int kranal_devids[RANAL_MAXDEVS] = {RAPK_MAIN_DEVICE_ID,
+ RAPK_EXPANSION_DEVICE_ID};
ptl_nal_t kranal_nal = {
.nal_type = RANAL,
};
kra_data_t kranal_data;
-kra_tunables_t kranal_tunables;
-
-#define RANAL_SYSCTL_TIMEOUT 1
-#define RANAL_SYSCTL_LISTENER_TIMEOUT 2
-#define RANAL_SYSCTL_BACKLOG 3
-#define RANAL_SYSCTL_PORT 4
-#define RANAL_SYSCTL_MAX_IMMEDIATE 5
-
-#define RANAL_SYSCTL 202
-
-static ctl_table kranal_ctl_table[] = {
- {RANAL_SYSCTL_TIMEOUT, "timeout",
- &kranal_tunables.kra_timeout, sizeof(int),
- 0644, NULL, &proc_dointvec},
- {RANAL_SYSCTL_LISTENER_TIMEOUT, "listener_timeout",
- &kranal_tunables.kra_listener_timeout, sizeof(int),
- 0644, NULL, &proc_dointvec},
- {RANAL_SYSCTL_BACKLOG, "backlog",
- &kranal_tunables.kra_backlog, sizeof(int),
- 0644, NULL, kranal_listener_procint},
- {RANAL_SYSCTL_PORT, "port",
- &kranal_tunables.kra_port, sizeof(int),
- 0644, NULL, kranal_listener_procint},
- {RANAL_SYSCTL_MAX_IMMEDIATE, "max_immediate",
- &kranal_tunables.kra_max_immediate, sizeof(int),
- 0644, NULL, &proc_dointvec},
- { 0 }
-};
-
-static ctl_table kranal_top_ctl_table[] = {
- {RANAL_SYSCTL, "ranal", NULL, 0, 0555, kranal_ctl_table},
- { 0 }
-};
-
-int
-kranal_sock_write (struct socket *sock, void *buffer, int nob)
-{
- int rc;
- mm_segment_t oldmm = get_fs();
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = MSG_DONTWAIT
- };
-
- /* We've set up the socket's send buffer to be large enough for
- * everything we send, so a single non-blocking send should
- * complete without error. */
-
- set_fs(KERNEL_DS);
- rc = sock_sendmsg(sock, &msg, iov.iov_len);
- set_fs(oldmm);
-
- if (rc == nob)
- return 0;
-
- if (rc >= 0)
- return -EAGAIN;
-
- return rc;
-}
-
-int
-kranal_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
-{
- int rc;
- mm_segment_t oldmm = get_fs();
- long ticks = timeout * HZ;
- unsigned long then;
- struct timeval tv;
-
- LASSERT (nob > 0);
- LASSERT (ticks > 0);
-
- for (;;) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
-
- /* Set receive timeout to remaining time */
- tv = (struct timeval) {
- .tv_sec = ticks / HZ,
- .tv_usec = ((ticks % HZ) * 1000000) / HZ
- };
- set_fs(KERNEL_DS);
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
- (char *)&tv, sizeof(tv));
- set_fs(oldmm);
- if (rc != 0) {
- CERROR("Can't set socket recv timeout %d: %d\n",
- timeout, rc);
- return rc;
- }
-
- set_fs(KERNEL_DS);
- then = jiffies;
- rc = sock_recvmsg(sock, &msg, iov.iov_len, 0);
- ticks -= jiffies - then;
- set_fs(oldmm);
-
- if (rc < 0)
- return rc;
-
- if (rc == 0)
- return -ECONNABORTED;
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
-
- if (nob == 0)
- return 0;
-
- if (ticks <= 0)
- return -ETIMEDOUT;
- }
-}
-
-int
-kranal_create_sock(struct socket **sockp)
-{
- struct socket *sock;
- int rc;
- int option;
- mm_segment_t oldmm = get_fs();
-
- rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
- if (rc != 0) {
- CERROR("Can't create socket: %d\n", rc);
- return rc;
- }
-
- /* Ensure sending connection info doesn't block */
- option = 2 * sizeof(kra_connreq_t);
- set_fs(KERNEL_DS);
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof(option));
- set_fs(oldmm);
- if (rc != 0) {
- CERROR("Can't set send buffer %d: %d\n", option, rc);
- goto failed;
- }
-
- option = 1;
- set_fs(KERNEL_DS);
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof(option));
- set_fs(oldmm);
- if (rc != 0) {
- CERROR("Can't set SO_REUSEADDR: %d\n", rc);
- goto failed;
- }
-
- *sockp = sock;
- return 0;
-
- failed:
- sock_release(sock);
- return rc;
-}
-
-void
-kranal_pause(int ticks)
-{
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ticks);
-}
void
kranal_pack_connreq(kra_connreq_t *connreq, kra_conn_t *conn, ptl_nid_t dstnid)
{
int rc;
- rc = kranal_sock_read(sock, connreq, sizeof(*connreq), timeout);
+ rc = libcfs_sock_read(sock, connreq, sizeof(*connreq), timeout);
if (rc != 0) {
CERROR("Read failed: %d\n", rc);
return rc;
kranal_set_conn_uniqueness(conn);
conn->rac_device = dev;
- conn->rac_timeout = MAX(kranal_tunables.kra_timeout, RANAL_MIN_TIMEOUT);
+ conn->rac_timeout = MAX(*kranal_tunables.kra_timeout, RANAL_MIN_TIMEOUT);
kranal_update_reaper_timeout(conn->rac_timeout);
rrc = RapkCreateRi(dev->rad_handle, conn->rac_cqid,
}
rc = kranal_recv_connreq(sock, &rx_connreq,
- kranal_tunables.kra_listener_timeout);
+ *kranal_tunables.kra_listener_timeout);
if (rc != 0) {
CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n",
HIPQUAD(peer_ip), peer_port, rc);
kranal_pack_connreq(&tx_connreq, conn, rx_connreq.racr_srcnid);
- rc = kranal_sock_write(sock, &tx_connreq, sizeof(tx_connreq));
+ rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq), 0);
if (rc != 0) {
CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n",
HIPQUAD(peer_ip), peer_port, rc);
int
ranal_connect_sock(kra_peer_t *peer, struct socket **sockp)
{
- struct sockaddr_in locaddr;
- struct sockaddr_in srvaddr;
- struct socket *sock;
unsigned int port;
+ int fatal;
int rc;
for (port = 1023; port >= 512; port--) {
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(port);
- locaddr.sin_addr.s_addr = htonl(INADDR_ANY);
-
- memset (&srvaddr, 0, sizeof (srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons (peer->rap_port);
- srvaddr.sin_addr.s_addr = htonl (peer->rap_ip);
-
- rc = kranal_create_sock(&sock);
- if (rc != 0)
- return rc;
-
- rc = sock->ops->bind(sock,
- (struct sockaddr *)&locaddr, sizeof(locaddr));
- if (rc != 0) {
- sock_release(sock);
-
- if (rc == -EADDRINUSE) {
- CDEBUG(D_NET, "Port %d already in use\n", port);
- continue;
- }
-
- CERROR("Can't bind to reserved port %d: %d\n", port, rc);
- return rc;
- }
-
- rc = sock->ops->connect(sock,
- (struct sockaddr *)&srvaddr, sizeof(srvaddr),
- 0);
- if (rc == 0) {
- *sockp = sock;
+ rc = libcfs_sock_connect(sockp, &fatal,
+ 2 * sizeof(kra_conn_t),
+ 0, port,
+ peer->rap_ip, peer->rap_port);
+ if (rc == 0)
return 0;
+
+ if (!fatal) {
+ CDEBUG(D_NET, "Port %d already in use\n", port);
+ continue;
}
- sock_release(sock);
-
- if (rc != -EADDRNOTAVAIL) {
- CERROR("Can't connect port %d to %u.%u.%u.%u/%d: %d\n",
- port, HIPQUAD(peer->rap_ip), peer->rap_port, rc);
- return rc;
- }
-
- CDEBUG(D_NET, "Port %d not available for %u.%u.%u.%u/%d\n",
- port, HIPQUAD(peer->rap_ip), peer->rap_port);
+ CERROR("Can't connect port %d to %u.%u.%u.%u/%d: %d\n",
+ port, HIPQUAD(peer->rap_ip), peer->rap_port, rc);
+ return rc;
}
/* all ports busy */
+ CERROR("Can't connect to %u.%u.%u.%u/%d: all ports busy\n",
+ HIPQUAD(peer->rap_ip), peer->rap_port);
return -EHOSTUNREACH;
}
* immediately after accepting a connection, so we connect and then
* send immediately. */
- rc = kranal_sock_write(sock, &connreq, sizeof(connreq));
+ rc = libcfs_sock_write(sock, &connreq, sizeof(connreq), 0);
if (rc != 0) {
CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n",
HIPQUAD(peer->rap_ip), peer->rap_port, rc);
goto failed_1;
}
- rc = kranal_recv_connreq(sock, &connreq, kranal_tunables.kra_timeout);
+ rc = kranal_recv_connreq(sock, &connreq, *kranal_tunables.kra_timeout);
if (rc != 0) {
CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n",
HIPQUAD(peer->rap_ip), peer->rap_port, rc);
goto failed_1;
}
- sock_release(sock);
+ libcfs_sock_release(sock);
rc = -EPROTO;
if (connreq.racr_srcnid != peer->rap_nid) {
return 0;
failed_1:
- sock_release(sock);
+ libcfs_sock_release(sock);
failed_0:
kranal_conn_decref(conn);
return rc;
return rc;
/* assume this is a new peer */
- peer = kranal_create_peer(peer_nid);
- if (peer == NULL) {
- CERROR("Can't allocate peer for "LPX64"\n", peer_nid);
+ rc = kranal_create_peer(&peer, peer_nid);
+ if (rc != 0) {
+ CERROR("Can't create conn for %s\n",
+ libcfs_nid2str(peer_nid));
kranal_conn_decref(conn);
return -ENOMEM;
}
LASSERT (list_empty(&peer->rap_tx_queue));
/* reset reconnection timeouts */
- peer->rap_reconnect_interval = RANAL_MIN_RECONNECT_INTERVAL;
+ peer->rap_reconnect_interval =
+ *kranal_tunables.kra_min_reconnect_interval;
peer->rap_reconnect_time = CURRENT_SECONDS;
write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
LASSERT (peer->rap_reconnect_interval != 0);
peer->rap_reconnect_time = CURRENT_SECONDS + peer->rap_reconnect_interval;
- peer->rap_reconnect_interval = MAX(RANAL_MAX_RECONNECT_INTERVAL,
- 1 * peer->rap_reconnect_interval);
+ peer->rap_reconnect_interval =
+ MAX(*kranal_tunables.kra_max_reconnect_interval,
+ 2 * peer->rap_reconnect_interval);
/* Grab all blocked packets while we have the global lock */
list_add(&zombies, &peer->rap_tx_queue);
void
kranal_free_acceptsock (kra_acceptsock_t *ras)
{
- sock_release(ras->ras_sock);
+ libcfs_sock_release(ras->ras_sock);
PORTAL_FREE(ras, sizeof(*ras));
}
int
kranal_listener (void *arg)
{
- struct sockaddr_in addr;
wait_queue_t wait;
struct socket *sock;
kra_acceptsock_t *ras;
int rc;
unsigned long flags;
- /* Parent thread holds kra_nid_mutex, and is, or is about to
- * block on kra_listener_signal */
+ /* Parent thread is blocked or about to block on kra_listener_signal */
- port = kranal_tunables.kra_port;
+ port = *kranal_tunables.kra_port;
snprintf(name, sizeof(name), "kranal_lstn%03d", port);
kportal_daemonize(name);
kportal_blockallsigs();
init_waitqueue_entry(&wait, current);
- rc = kranal_create_sock(&sock);
+ rc = libcfs_sock_listen(&sock, 0, port,
+ *kranal_tunables.kra_backlog);
if (rc != 0)
- goto out_0;
-
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_port = htons(port);
- addr.sin_addr.s_addr = INADDR_ANY;
-
- rc = sock->ops->bind(sock, (struct sockaddr *)&addr, sizeof(addr));
- if (rc != 0) {
- CERROR("Can't bind to port %d\n", port);
- goto out_1;
- }
-
- rc = sock->ops->listen(sock, kranal_tunables.kra_backlog);
- if (rc != 0) {
- CERROR("Can't set listen backlog %d: %d\n",
- kranal_tunables.kra_backlog, rc);
- goto out_1;
- }
+ goto out;
LASSERT (kranal_data.kra_listener_sock == NULL);
kranal_data.kra_listener_sock = sock;
if (ras == NULL) {
PORTAL_ALLOC(ras, sizeof(*ras));
if (ras == NULL) {
- CERROR("Out of Memory: pausing...\n");
- kranal_pause(HZ);
+ CERROR("ENOMEM: listener pausing\n");
+ libcfs_pause(cfs_time_seconds(1));
continue;
}
- ras->ras_sock = NULL;
}
- if (ras->ras_sock == NULL) {
- ras->ras_sock = sock_alloc();
- if (ras->ras_sock == NULL) {
- CERROR("Can't allocate socket: pausing...\n");
- kranal_pause(HZ);
- continue;
+ rc = libcfs_sock_accept(&ras->ras_sock, sock,
+ 2 * sizeof(kra_conn_t));
+ if (rc != 0) {
+ if (rc != -EAGAIN) {
+ CWARN("Accept error: %d, listener pausing\n", rc);
+ libcfs_pause(cfs_time_seconds(1));
}
- /* XXX this should add a ref to sock->ops->owner, if
- * TCP could be a module */
- ras->ras_sock->type = sock->type;
- ras->ras_sock->ops = sock->ops;
+ continue;
}
- set_current_state(TASK_INTERRUPTIBLE);
-
- rc = sock->ops->accept(sock, ras->ras_sock, O_NONBLOCK);
+ spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
- /* Sleep for socket activity? */
- if (rc == -EAGAIN &&
- kranal_data.kra_listener_shutdown == 0)
- schedule();
+ list_add_tail(&ras->ras_list, &kranal_data.kra_connd_acceptq);
- set_current_state(TASK_RUNNING);
-
- if (rc == 0) {
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-
- list_add_tail(&ras->ras_list,
- &kranal_data.kra_connd_acceptq);
-
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
- wake_up(&kranal_data.kra_connd_waitq);
-
- ras = NULL;
- continue;
- }
+ spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
+ wake_up(&kranal_data.kra_connd_waitq);
- if (rc != -EAGAIN) {
- CERROR("Accept failed: %d, pausing...\n", rc);
- kranal_pause(HZ);
- }
+ ras = NULL;
}
- if (ras != NULL) {
- if (ras->ras_sock != NULL)
- sock_release(ras->ras_sock);
+ if (ras != NULL)
PORTAL_FREE(ras, sizeof(*ras));
- }
rc = 0;
- remove_wait_queue(sock->sk->sk_sleep, &wait);
- out_1:
- sock_release(sock);
+ libcfs_sock_release(sock);
kranal_data.kra_listener_sock = NULL;
- out_0:
+ out:
/* set completion status and unblock thread waiting for me
* (parent on startup failure, executioner on normal shutdown) */
kranal_data.kra_listener_shutdown = rc;
CDEBUG(D_NET, "Starting listener\n");
- /* Called holding kra_nid_mutex: listener stopped */
LASSERT (kranal_data.kra_listener_sock == NULL);
kranal_data.kra_listener_shutdown = 0;
}
void
-kranal_stop_listener(int clear_acceptq)
+kranal_stop_listener(void)
{
- struct list_head zombie_accepts;
- unsigned long flags;
- kra_acceptsock_t *ras;
-
CDEBUG(D_NET, "Stopping listener\n");
- /* Called holding kra_nid_mutex: listener running */
LASSERT (kranal_data.kra_listener_sock != NULL);
kranal_data.kra_listener_shutdown = 1;
- wake_up_all(kranal_data.kra_listener_sock->sk->sk_sleep);
+ libcfs_sock_abort_accept(kranal_data.kra_listener_sock);
/* Block until listener has torn down. */
down(&kranal_data.kra_listener_signal);
LASSERT (kranal_data.kra_listener_sock == NULL);
CDEBUG(D_NET, "Listener stopped\n");
-
- if (!clear_acceptq)
- return;
-
- /* Close any unhandled accepts */
- spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-
- list_add(&zombie_accepts, &kranal_data.kra_connd_acceptq);
- list_del_init(&kranal_data.kra_connd_acceptq);
-
- spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
- while (!list_empty(&zombie_accepts)) {
- ras = list_entry(zombie_accepts.next,
- kra_acceptsock_t, ras_list);
- list_del(&ras->ras_list);
- kranal_free_acceptsock(ras);
- }
}
int
-kranal_listener_procint(ctl_table *table, int write, struct file *filp,
- void *buffer, size_t *lenp)
+kranal_create_peer (kra_peer_t **peerp, ptl_nid_t nid)
{
- int *tunable = (int *)table->data;
- int old_val;
- int rc;
-
- /* No race with nal initialisation since the nal is setup all the time
- * it's loaded. When that changes, change this! */
- LASSERT (kranal_data.kra_init == RANAL_INIT_ALL);
-
- down(&kranal_data.kra_nid_mutex);
-
- LASSERT (tunable == &kranal_tunables.kra_port ||
- tunable == &kranal_tunables.kra_backlog);
- old_val = *tunable;
-
- rc = proc_dointvec(table, write, filp, buffer, lenp);
-
- if (write &&
- (*tunable != old_val ||
- kranal_data.kra_listener_sock == NULL)) {
-
- if (kranal_data.kra_listener_sock != NULL)
- kranal_stop_listener(0);
-
- rc = kranal_start_listener();
-
- if (rc != 0) {
- CWARN("Unable to start listener with new tunable:"
- " reverting to old value\n");
- *tunable = old_val;
- kranal_start_listener();
- }
- }
-
- up(&kranal_data.kra_nid_mutex);
-
- LASSERT (kranal_data.kra_init == RANAL_INIT_ALL);
- return rc;
-}
-
-int
-kranal_set_mynid(ptl_nid_t nid)
-{
- unsigned long flags;
- ptl_ni_t *ni = kranal_data.kra_ni;
- int rc = 0;
-
- CDEBUG(D_NET, "setting mynid to "LPX64" (old nid="LPX64")\n",
- nid, ni->ni_nid);
-
- down(&kranal_data.kra_nid_mutex);
-
- if (nid == ni->ni_nid) {
- /* no change of NID */
- up(&kranal_data.kra_nid_mutex);
- return 0;
- }
-
- if (kranal_data.kra_listener_sock != NULL)
- kranal_stop_listener(1);
-
- write_lock_irqsave(&kranal_data.kra_global_lock, flags);
- kranal_data.kra_peerstamp++;
- ni->ni_nid = nid;
- write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
- /* Delete all existing peers and their connections after new
- * NID/connstamp set to ensure no old connections in our brave
- * new world. */
- kranal_del_peer(PTL_NID_ANY);
-
- if (nid != PTL_NID_ANY)
- rc = kranal_start_listener();
-
- up(&kranal_data.kra_nid_mutex);
- return rc;
-}
-
-kra_peer_t *
-kranal_create_peer (ptl_nid_t nid)
-{
- kra_peer_t *peer;
+ kra_peer_t *peer;
+ unsigned long flags;
LASSERT (nid != PTL_NID_ANY);
PORTAL_ALLOC(peer, sizeof(*peer));
if (peer == NULL)
- return NULL;
+ return -ENOMEM;
memset(peer, 0, sizeof(*peer)); /* zero flags etc */
INIT_LIST_HEAD(&peer->rap_tx_queue);
peer->rap_reconnect_time = CURRENT_SECONDS;
- peer->rap_reconnect_interval = RANAL_MIN_RECONNECT_INTERVAL;
+ peer->rap_reconnect_interval =
+ *kranal_tunables.kra_min_reconnect_interval;
- atomic_inc(&kranal_data.kra_npeers);
- return peer;
+ write_lock_irqsave(&kranal_data.kra_global_lock, flags);
+
+ if (kranal_data.kra_listener_shutdown) {
+ /* shutdown has started already */
+ write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
+
+ PORTAL_FREE(peer, sizeof(*peer));
+ CERROR("Can't create peer: network shutdown\n");
+ return -ESHUTDOWN;
+ }
+
+ kranal_data.kra_npeers++;
+
+ write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
+
+ *peerp = peer;
+ return 0;
}
void
kranal_destroy_peer (kra_peer_t *peer)
{
+ unsigned long flags;
+
CDEBUG(D_NET, "peer "LPX64" %p deleted\n", peer->rap_nid, peer);
LASSERT (atomic_read(&peer->rap_refcount) == 0);
* they are destroyed, so we can be assured that _all_ state to do
* with this peer has been cleaned up when its refcount drops to
* zero. */
- atomic_dec(&kranal_data.kra_npeers);
+ write_lock_irqsave(&kranal_data.kra_global_lock, flags);
+ kranal_data.kra_npeers--;
+ write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
}
kra_peer_t *
unsigned long flags;
kra_peer_t *peer;
kra_peer_t *peer2;
+ int rc;
if (nid == PTL_NID_ANY)
return -EINVAL;
- peer = kranal_create_peer(nid);
- if (peer == NULL)
- return -ENOMEM;
+ rc = kranal_create_peer(&peer, nid);
+ if (rc != 0)
+ return rc;
write_lock_irqsave(&kranal_data.kra_global_lock, flags);
break;
}
case IOC_PORTAL_REGISTER_MYNID: {
- if (data->ioc_nid == PTL_NID_ANY)
+ /* Ignore if this is a noop */
+ if (data->ioc_nid == ni->ni_nid) {
+ rc = 0;
+ } else {
+ CERROR("obsolete IOC_PORTAL_REGISTER_MYNID: %s(%s)\n",
+ libcfs_nid2str(data->ioc_nid),
+ libcfs_nid2str(ni->ni_nid));
rc = -EINVAL;
- else
- rc = kranal_set_mynid(data->ioc_nid);
+ }
break;
}
}
int
kranal_device_init(int id, kra_device_t *dev)
{
- const int total_ntx = RANAL_NTX + RANAL_NTX_NBLK;
+ int total_ntx = *kranal_tunables.kra_ntx +
+ *kranal_tunables.kra_ntx_nblk;
RAP_RETURN rrc;
dev->rad_id = id;
rrc = RapkCreateCQ(dev->rad_handle, total_ntx, RAP_CQTYPE_SEND,
&dev->rad_rdma_cqh);
if (rrc != RAP_SUCCESS) {
- CERROR("Can't create rdma cq size %d"
- " for device %d: %d\n", total_ntx, id, rrc);
+ CERROR("Can't create rdma cq size %d for device %d: %d\n",
+ total_ntx, id, rrc);
goto failed_1;
}
- rrc = RapkCreateCQ(dev->rad_handle, RANAL_FMA_CQ_SIZE, RAP_CQTYPE_RECV,
- &dev->rad_fma_cqh);
+ rrc = RapkCreateCQ(dev->rad_handle,
+ *kranal_tunables.kra_fma_cq_size,
+ RAP_CQTYPE_RECV, &dev->rad_fma_cqh);
if (rrc != RAP_SUCCESS) {
- CERROR("Can't create fma cq size %d"
- " for device %d: %d\n", RANAL_FMA_CQ_SIZE, id, rrc);
+ CERROR("Can't create fma cq size %d for device %d: %d\n",
+ *kranal_tunables.kra_fma_cq_size, id, rrc);
goto failed_2;
}
void
kranal_device_fini(kra_device_t *dev)
{
+ LASSERT (list_empty(&dev->rad_ready_conns));
+ LASSERT (list_empty(&dev->rad_new_conns));
+ LASSERT (dev->rad_nphysmap == 0);
+ LASSERT (dev->rad_nppphysmap == 0);
+ LASSERT (dev->rad_nvirtmap == 0);
+ LASSERT (dev->rad_nobvirtmap == 0);
+
LASSERT(dev->rad_scheduler == NULL);
RapkDestroyCQ(dev->rad_handle, dev->rad_fma_cqh);
RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh);
LBUG();
case RANAL_INIT_ALL:
- /* resetting my NID to unadvertises me, removes my
- * listener and nukes all current peers */
- kranal_set_mynid(PTL_NID_ANY);
- /* no new peers or conns */
+ /* Stop listener and prevent new peers from being created */
+ kranal_stop_listener();
- /* Wait for all peer/conn state to clean up */
+ /* Remove all existing peers from the peer table */
+ kranal_del_peer(PTL_NID_ANY);
+
+ /* Wait for pending conn reqs to be handled */
i = 2;
- while (atomic_read(&kranal_data.kra_nconns) != 0 ||
- atomic_read(&kranal_data.kra_npeers) != 0) {
+ spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
+ while (!list_empty(&kranal_data.kra_connd_acceptq)) {
+ spin_unlock_irqrestore(&kranal_data.kra_connd_lock,
+ flags);
i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers and %d conns to close down\n",
- atomic_read(&kranal_data.kra_npeers),
- atomic_read(&kranal_data.kra_nconns));
- kranal_pause(HZ);
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
+ "waiting for conn reqs to clean up\n");
+ libcfs_pause(cfs_time_seconds(1));
+
+ spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
}
+ spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
+
+ /* Wait for all peers to be freed */
+ i = 2;
+ write_lock_irqsave(&kranal_data.kra_global_lock, flags);
+ while (kranal_data.kra_npeers != 0) {
+ write_unlock_irqrestore(&kranal_data.kra_global_lock,
+ flags);
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
+ "waiting for %d peers to close down\n",
+ kranal_data.kra_npeers);
+ libcfs_pause(cfs_time_seconds(1));
+
+ write_lock_irqsave(&kranal_data.kra_global_lock,
+ flags);
+ }
+ write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
/* fall through */
case RANAL_INIT_DATA:
break;
}
- /* Conn/Peer state all cleaned up BEFORE setting shutdown, so threads
- * don't have to worry about shutdown races */
- LASSERT (atomic_read(&kranal_data.kra_nconns) == 0);
- LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
+ /* Peer state all cleaned up BEFORE setting shutdown, so threads don't
+ * have to worry about shutdown races. NB connections may be created
+ * while there are still active connds, but these will be temporary
+ * since peer creation always fails after the listener has started to
+ * shut down. */
+ LASSERT (kranal_data.kra_npeers == 0);
- /* flag threads to terminate; wake and wait for them to die */
+ /* Flag threads to terminate */
kranal_data.kra_shutdown = 1;
for (i = 0; i < kranal_data.kra_ndevs; i++) {
kra_device_t *dev = &kranal_data.kra_devices[i];
- LASSERT (list_empty(&dev->rad_ready_conns));
- LASSERT (list_empty(&dev->rad_new_conns));
-
spin_lock_irqsave(&dev->rad_lock, flags);
wake_up(&dev->rad_waitq);
spin_unlock_irqrestore(&dev->rad_lock, flags);
wake_up_all(&kranal_data.kra_connd_waitq);
spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
+ /* Wait for threads to exit */
i = 2;
while (atomic_read(&kranal_data.kra_nthreads) != 0) {
i++;
CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
"Waiting for %d threads to terminate\n",
atomic_read(&kranal_data.kra_nthreads));
- kranal_pause(HZ);
+ libcfs_pause(cfs_time_seconds(1));
}
- LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
+ LASSERT (kranal_data.kra_npeers == 0);
if (kranal_data.kra_peers != NULL) {
for (i = 0; i < kranal_data.kra_peer_hash_size; i++)
LASSERT (list_empty(&kranal_data.kra_peers[i]));
CERROR ("Only 1 instance supported\n");
return PTL_FAIL;
}
+
+ if (ptl_set_ip_niaddr(ni) != PTL_OK) {
+ CERROR ("Can't determine my NID\n");
+ return PTL_FAIL;
+ }
memset(&kranal_data, 0, sizeof(kranal_data)); /* zero pointers, flags etc */
kranal_data.kra_connstamp =
kranal_data.kra_peerstamp = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
- init_MUTEX(&kranal_data.kra_nid_mutex);
init_MUTEX_LOCKED(&kranal_data.kra_listener_signal);
rwlock_init(&kranal_data.kra_global_lock);
for (i = 0; i < kranal_data.kra_conn_hash_size; i++)
INIT_LIST_HEAD(&kranal_data.kra_conns[i]);
- rc = kranal_alloc_txdescs(&kranal_data.kra_idle_txs, RANAL_NTX);
+ rc = kranal_alloc_txdescs(&kranal_data.kra_idle_txs,
+ *kranal_tunables.kra_ntx);
if (rc != 0)
goto failed;
- rc = kranal_alloc_txdescs(&kranal_data.kra_idle_nblk_txs,RANAL_NTX_NBLK);
+ rc = kranal_alloc_txdescs(&kranal_data.kra_idle_nblk_txs,
+ *kranal_tunables.kra_ntx_nblk);
if (rc != 0)
goto failed;
goto failed;
}
- for (i = 0; i < RANAL_N_CONND; i++) {
+ for (i = 0; i < *kranal_tunables.kra_n_connd; i++) {
rc = kranal_thread_start(kranal_connd, (void *)(unsigned long)i);
if (rc != 0) {
CERROR("Can't spawn ranal connd[%d]: %d\n",
LASSERT (kranal_data.kra_ndevs == 0);
- if (ni->ni_interfaces[0] == NULL) {
- /* Use all available RapidArray devices */
- for (i = 0; i < sizeof(kranal_devids)/sizeof(kranal_devids[0]); i++) {
- LASSERT (i < RANAL_MAXDEVS);
-
- dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
+ /* Use all available RapidArray devices */
+ for (i = 0; i < RANAL_MAXDEVS; i++) {
+ dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
- rc = kranal_device_init(kranal_devids[i], dev);
- if (rc == 0)
- kranal_data.kra_ndevs++;
- }
-
- if (kranal_data.kra_ndevs == 0) {
- CERROR("Can't initialise any RapidArray devices\n");
- goto failed;
- }
- } else {
- /* Use specified RapidArray devices */
- for (i = 0; i < PTL_MAX_INTERFACES; i++) {
- int devid;
- int len;
-
- if (kranal_data.kra_ndevs == RANAL_MAXDEVS) {
- CERROR("Too many interfaces\n");
- goto failed;
- }
-
- dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
-
- if (sscanf(ni->ni_interfaces[i], "%d%n", &devid, &len) < 1 ||
- len != strlen(ni->ni_interfaces[i])) {
- CERROR("Can't parse interface '%s'\n",
- ni->ni_interfaces[i]);
- goto failed;
- }
-
- rc = kranal_device_init(devid, dev);
- if (rc != 0) {
- CERROR("Can't open interface '%s': %d\n",
- ni->ni_interfaces[i], rc);
- goto failed;
- }
-
+ rc = kranal_device_init(kranal_devids[i], dev);
+ if (rc == 0)
kranal_data.kra_ndevs++;
- }
+ }
+
+ if (kranal_data.kra_ndevs == 0) {
+ CERROR("Can't initialise any RapidArray devices\n");
+ goto failed;
}
for (i = 0; i < kranal_data.kra_ndevs; i++) {
}
}
+ rc = kranal_start_listener();
+ if (rc != 0)
+ goto failed;
+
/* flag everything initialised */
kranal_data.kra_init = RANAL_INIT_ALL;
/*****************************************************/
-
- CDEBUG(D_MALLOC, "initial kmem %d\n", atomic_read(&portal_kmemory));
+
+ CDEBUG(D_MALLOC, "initial kmem %d\n", pkmem);
printk(KERN_INFO "Lustre: RapidArray NAL loaded "
"(initial mem %d)\n", pkmem);
void __exit
kranal_module_fini (void)
{
- if (kranal_tunables.kra_sysctl != NULL)
- unregister_sysctl_table(kranal_tunables.kra_sysctl);
-
ptl_unregister_nal(&kranal_nal);
+ kranal_tunables_fini();
}
int __init
{
int rc;
- /* the following must be sizeof(int) for
- * proc_dointvec/kranal_listener_procint() */
- CLASSERT (sizeof(kranal_tunables.kra_timeout) == sizeof(int));
- CLASSERT (sizeof(kranal_tunables.kra_listener_timeout) == sizeof(int));
- CLASSERT (sizeof(kranal_tunables.kra_backlog) == sizeof(int));
- CLASSERT (sizeof(kranal_tunables.kra_port) == sizeof(int));
- CLASSERT (sizeof(kranal_tunables.kra_max_immediate) == sizeof(int));
-
- /* Initialise dynamic tunables to defaults once only */
- kranal_tunables.kra_timeout = RANAL_TIMEOUT;
- kranal_tunables.kra_listener_timeout = RANAL_LISTENER_TIMEOUT;
- kranal_tunables.kra_backlog = RANAL_BACKLOG;
- kranal_tunables.kra_port = RANAL_PORT;
- kranal_tunables.kra_max_immediate = RANAL_MAX_IMMEDIATE;
+ rc = kranal_tunables_init();
+ if (rc != 0)
+ return rc;
ptl_register_nal(&kranal_nal);
- kranal_tunables.kra_sysctl =
- register_sysctl_table(kranal_top_ctl_table, 0);
- if (kranal_tunables.kra_sysctl == NULL) {
- CERROR("Can't register sysctl table\n");
- ptl_unregister_nal(&kranal_nal);
- return -ENOMEM;
- }
-
return 0;
}
#include <rapl.h>
-#define RANAL_MAXDEVS 2 /* max # devices RapidArray supports */
+/* default vals for modparams/tunables */
+#define RANAL_N_CONND 4 /* # connection daemons */
-#define RANAL_N_CONND 4 /* # connection daemons */
-
-#define RANAL_MIN_RECONNECT_INTERVAL 1 /* first failed connection retry (seconds)... */
+#define RANAL_MIN_RECONNECT_INTERVAL 1 /* first failed connection retry... */
#define RANAL_MAX_RECONNECT_INTERVAL 60 /* ...exponentially increasing to this */
-#define RANAL_FMA_MAX_PREFIX 232 /* max size of FMA "Prefix" */
-#define RANAL_FMA_MAX_DATA ((7<<10)-256) /* Max FMA MSG is 7K including prefix */
-
-#define RANAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define RANAL_CONN_HASH_SIZE 101 /* # conn lists */
+#define RANAL_NTX 64 /* # tx descs */
+#define RANAL_NTX_NBLK 256 /* # reserved tx descs */
-#define RANAL_NTX 64 /* # tx descs */
-#define RANAL_NTX_NBLK 256 /* # reserved tx descs */
-
-#define RANAL_FMA_CQ_SIZE 8192 /* # entries in receive CQ
+#define RANAL_FMA_CQ_SIZE 8192 /* # entries in receive CQ
* (overflow is a performance hit) */
+#define RANAL_TIMEOUT 30 /* comms timeout (seconds) */
+#define RANAL_LISTENER_TIMEOUT 5 /* listener timeout (seconds) */
+#define RANAL_BACKLOG 127 /* listener's backlog */
+#define RANAL_PORT 987 /* listener's port */
+#define RANAL_MAX_IMMEDIATE (2<<10) /* immediate payload breakpoint */
+
+/* tunables determined at compile time */
+#define RANAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define RANAL_RESCHED 100 /* # scheduler loops before reschedule */
+#define RANAL_PEER_HASH_SIZE 101 /* # peer lists */
+#define RANAL_CONN_HASH_SIZE 101 /* # conn lists */
-#define RANAL_MIN_TIMEOUT 5 /* minimum timeout interval (seconds) */
+#define RANAL_MIN_TIMEOUT 5 /* minimum timeout interval (seconds) */
#define RANAL_TIMEOUT2KEEPALIVE(t) (((t)+1)/2) /* timeout -> keepalive interval */
-/* default vals for runtime tunables */
-#define RANAL_TIMEOUT 30 /* comms timeout (seconds) */
-#define RANAL_LISTENER_TIMEOUT 5 /* listener timeout (seconds) */
-#define RANAL_BACKLOG 127 /* listener's backlog */
-#define RANAL_PORT 988 /* listener's port */
-#define RANAL_MAX_IMMEDIATE (2<<10) /* immediate payload breakpoint */
+/* fixed constants */
+#define RANAL_MAXDEVS 2 /* max # devices RapidArray supports */
+#define RANAL_FMA_MAX_PREFIX 232 /* max bytes in FMA "Prefix" we can use */
+#define RANAL_FMA_MAX_DATA ((7<<10)-256) /* Max FMA MSG is 7K including prefix */
+
typedef struct
{
- int kra_timeout; /* comms timeout (seconds) */
- int kra_listener_timeout; /* max time the listener can block */
- int kra_backlog; /* listener's backlog */
- int kra_port; /* listener's TCP/IP port */
- int kra_max_immediate; /* immediate payload breakpoint */
-
+ int *kra_n_connd; /* # connection daemons */
+ int *kra_min_reconnect_interval; /* first failed connection retry... */
+ int *kra_max_reconnect_interval; /* ...exponentially increasing to this */
+ int *kra_ntx; /* # tx descs */
+ int *kra_ntx_nblk; /* # reserved tx descs */
+ int *kra_fma_cq_size; /* # entries in receive CQ */
+ int *kra_timeout; /* comms timeout (seconds) */
+ int *kra_listener_timeout; /* max time the listener can block */
+ int *kra_backlog; /* listener's backlog */
+ int *kra_port; /* listener's TCP/IP port */
+ int *kra_max_immediate; /* immediate payload breakpoint */
+
+#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
struct ctl_table_header *kra_sysctl; /* sysctl interface */
+#endif
} kra_tunables_t;
typedef struct
wait_queue_head_t rad_waitq; /* scheduler waits here */
spinlock_t rad_lock; /* serialise */
void *rad_scheduler; /* scheduling thread */
+ unsigned int rad_nphysmap; /* # phys mappings */
+ unsigned int rad_nppphysmap; /* # phys pages mapped */
+ unsigned int rad_nvirtmap; /* # virt mappings */
+ unsigned long rad_nobvirtmap; /* # virt bytes mapped */
} kra_device_t;
typedef struct
atomic_t kra_nthreads; /* # live threads */
ptl_ni_t *kra_ni; /* _the_ nal instance */
- struct semaphore kra_nid_mutex; /* serialise NID/listener ops */
struct semaphore kra_listener_signal; /* block for listener startup/shutdown */
struct socket *kra_listener_sock; /* listener's socket */
int kra_listener_shutdown; /* ask listener to close */
struct list_head *kra_peers; /* hash table of all my known peers */
int kra_peer_hash_size; /* size of kra_peers */
- atomic_t kra_npeers; /* # peers extant */
+ int kra_npeers; /* # peers extant */
struct list_head *kra_conns; /* conns hashed by cqid */
int kra_conn_hash_size; /* size of kra_conns */
extern void kranal_tx_done (kra_tx_t *tx, int completion);
extern void kranal_unlink_peer_locked (kra_peer_t *peer);
extern void kranal_schedule_conn (kra_conn_t *conn);
-extern kra_peer_t *kranal_create_peer (ptl_nid_t nid);
+extern int kranal_create_peer (kra_peer_t **peerp, ptl_nid_t nid);
+extern int kranal_add_persistent_peer (ptl_nid_t nid, __u32 ip, int port);
extern kra_peer_t *kranal_find_peer_locked (ptl_nid_t nid);
extern void kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx);
extern int kranal_del_peer (ptl_nid_t nid);
extern void kranal_terminate_conn_locked (kra_conn_t *conn);
extern void kranal_connect (kra_peer_t *peer);
extern int kranal_conn_handshake (struct socket *sock, kra_peer_t *peer);
-extern void kranal_pause(int ticks);
+extern int kranal_tunables_init(void);
+extern void kranal_tunables_fini(void);
return kranal_setup_virt_buffer(tx, niov, iov, offset, nob);
}
-void
+int
kranal_map_buffer (kra_tx_t *tx)
{
kra_conn_t *conn = tx->tx_conn;
case RANAL_BUF_IMMEDIATE:
case RANAL_BUF_PHYS_MAPPED:
case RANAL_BUF_VIRT_MAPPED:
- break;
+ return 0;
case RANAL_BUF_PHYS_UNMAPPED:
rrc = RapkRegisterPhys(dev->rad_handle,
tx->tx_phys, tx->tx_phys_npages,
&tx->tx_map_key);
- LASSERT (rrc == RAP_SUCCESS);
+ if (rrc != RAP_SUCCESS) {
+ CERROR ("Can't map %d pages: dev %d "
+ "phys %u pp %u, virt %u nob %lu\n",
+ tx->tx_phys_npages, dev->rad_id,
+ dev->rad_nphysmap, dev->rad_nppphysmap,
+ dev->rad_nvirtmap, dev->rad_nobvirtmap);
+ return -ENOMEM; /* assume insufficient resources */
+ }
+
+ dev->rad_nphysmap++;
+ dev->rad_nppphysmap += tx->tx_phys_npages;
+
tx->tx_buftype = RANAL_BUF_PHYS_MAPPED;
- break;
+ return 0;
case RANAL_BUF_VIRT_UNMAPPED:
rrc = RapkRegisterMemory(dev->rad_handle,
tx->tx_buffer, tx->tx_nob,
&tx->tx_map_key);
- LASSERT (rrc == RAP_SUCCESS);
+ if (rrc != RAP_SUCCESS) {
+ CERROR ("Can't map %d bytes: dev %d "
+ "phys %u pp %u, virt %u nob %lu\n",
+ tx->tx_nob, dev->rad_id,
+ dev->rad_nphysmap, dev->rad_nppphysmap,
+ dev->rad_nvirtmap, dev->rad_nobvirtmap);
+ return -ENOMEM; /* assume insufficient resources */
+ }
+
+ dev->rad_nvirtmap++;
+ dev->rad_nobvirtmap += tx->tx_nob;
+
tx->tx_buftype = RANAL_BUF_VIRT_MAPPED;
- break;
+ return 0;
}
}
rrc = RapkDeregisterMemory(dev->rad_handle, NULL,
&tx->tx_map_key);
LASSERT (rrc == RAP_SUCCESS);
+
+ dev->rad_nphysmap--;
+ dev->rad_nppphysmap -= tx->tx_phys_npages;
+
tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED;
break;
rrc = RapkDeregisterMemory(dev->rad_handle, tx->tx_buffer,
&tx->tx_map_key);
LASSERT (rrc == RAP_SUCCESS);
+
+ dev->rad_nvirtmap--;
+ dev->rad_nobvirtmap -= tx->tx_nob;
+
tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED;
break;
}
kra_peer_t *peer;
kra_conn_t *conn;
unsigned long now;
+ int rc;
+ int retry;
rwlock_t *g_lock = &kranal_data.kra_global_lock;
/* If I get here, I've committed to send, so I complete the tx with
LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */
- read_lock(g_lock);
+ for (retry = 0; ; retry = 1) {
- peer = kranal_find_peer_locked(nid);
- if (peer == NULL) {
- read_unlock(g_lock);
- kranal_tx_done(tx, -EHOSTUNREACH);
- return;
- }
+ read_lock(g_lock);
- conn = kranal_find_conn_locked(peer);
- if (conn != NULL) {
- kranal_post_fma(conn, tx);
+ peer = kranal_find_peer_locked(nid);
+ if (peer != NULL) {
+ conn = kranal_find_conn_locked(peer);
+ if (conn != NULL) {
+ kranal_post_fma(conn, tx);
+ read_unlock(g_lock);
+ return;
+ }
+ }
+
+ /* Making connections; I'll need a write lock... */
read_unlock(g_lock);
- return;
- }
+ write_lock_irqsave(g_lock, flags);
- /* Making one or more connections; I'll need a write lock... */
- read_unlock(g_lock);
- write_lock_irqsave(g_lock, flags);
-
- peer = kranal_find_peer_locked(nid);
- if (peer == NULL) {
+ peer = kranal_find_peer_locked(nid);
+ if (peer != NULL)
+ break;
+
write_unlock_irqrestore(g_lock, flags);
- kranal_tx_done(tx, -EHOSTUNREACH);
- return;
- }
+
+ if (retry) {
+ CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
+ kranal_tx_done(tx, -EHOSTUNREACH);
+ return;
+ }
+ rc = kranal_add_persistent_peer(nid, PTL_NIDADDR(nid),
+ *kranal_tunables.kra_port);
+ if (rc != 0) {
+ CERROR("Can't add peer %s: %d\n",
+ libcfs_nid2str(nid), rc);
+ kranal_tx_done(tx, rc);
+ return;
+ }
+ }
+
conn = kranal_find_conn_locked(peer);
if (conn != NULL) {
/* Connection exists; queue message on it */
write_unlock_irqrestore(g_lock, flags);
return;
}
-
+
LASSERT (peer->rap_persistence > 0);
if (!peer->rap_connecting) {
tx->tx_conn = conn;
tx->tx_ptlmsg[0] = ptlmsg;
- kranal_map_buffer(tx);
+ rc = kranal_map_buffer(tx);
+ if (rc != 0) {
+ kranal_tx_done(tx, rc);
+ return PTL_FAIL;
+ }
+
kranal_rdma(tx, RANAL_MSG_GET_DONE,
&conn->rac_rxmsg->ram_u.get.ragm_desc, nob,
conn->rac_rxmsg->ram_u.get.ragm_cookie);
if ((ptlmsg->msg_md->md_options & PTL_MD_KIOV) == 0 &&
ptlmsg->msg_md->md_length <= RANAL_FMA_MAX_DATA &&
- ptlmsg->msg_md->md_length <= kranal_tunables.kra_max_immediate)
+ ptlmsg->msg_md->md_length <= *kranal_tunables.kra_max_immediate)
break;
tx = kranal_new_tx_msg(!in_interrupt(), RANAL_MSG_GET_REQ);
case PTL_MSG_PUT:
if (kiov == NULL && /* not paged */
nob <= RANAL_FMA_MAX_DATA && /* small enough */
- nob <= kranal_tunables.kra_max_immediate)
+ nob <= *kranal_tunables.kra_max_immediate)
break; /* send IMMEDIATE */
tx = kranal_new_tx_msg(!in_interrupt(), RANAL_MSG_PUT_REQ);
}
tx->tx_conn = conn;
- kranal_map_buffer(tx);
+ rc = kranal_map_buffer(tx);
+ if (rc != 0) {
+ kranal_tx_done(tx, rc);
+ return PTL_FAIL;
+ }
tx->tx_msg.ram_u.putack.rapam_src_cookie =
conn->rac_rxmsg->ram_u.putreq.raprm_cookie;
int expect_reply;
/* NB 1. kranal_sendmsg() may fail if I'm out of credits right now.
- * However I will be rescheduled some by an FMA completion event
+ * However I will be rescheduled by an FMA completion event
* when I eventually get some.
* NB 2. Sampling rac_state here races with setting it elsewhere.
* But it doesn't matter if I try to send a "real" message just
case RANAL_MSG_IMMEDIATE:
rc = kranal_sendmsg(conn, &tx->tx_msg,
tx->tx_buffer, tx->tx_nob);
- expect_reply = 0;
break;
case RANAL_MSG_PUT_NAK:
case RANAL_MSG_GET_NAK:
case RANAL_MSG_GET_DONE:
rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
- expect_reply = 0;
break;
case RANAL_MSG_PUT_REQ:
+ rc = kranal_map_buffer(tx);
+ LASSERT (rc != -EAGAIN);
+ if (rc != 0)
+ break;
+
tx->tx_msg.ram_u.putreq.raprm_cookie = tx->tx_cookie;
rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
- kranal_map_buffer(tx);
expect_reply = 1;
break;
break;
case RANAL_MSG_GET_REQ:
- kranal_map_buffer(tx);
+ rc = kranal_map_buffer(tx);
+ LASSERT (rc != -EAGAIN);
+ if (rc != 0)
+ break;
+
tx->tx_msg.ram_u.get.ragm_cookie = tx->tx_cookie;
tx->tx_msg.ram_u.get.ragm_desc.rard_key = tx->tx_map_key;
tx->tx_msg.ram_u.get.ragm_desc.rard_addr.AddressBits =
return;
}
- LASSERT (rc == 0);
-
- if (!expect_reply) {
- kranal_tx_done(tx, 0);
+ if (!expect_reply || rc != 0) {
+ kranal_tx_done(tx, rc);
} else {
/* LASSERT(current) above ensures this doesn't race with reply
* processing */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "ranal.h"
+
+static int n_connd = RANAL_N_CONND;
+CFS_MODULE_PARM(n_connd, "i", int, 0444,
+ "# of connection daemons");
+
+static int min_reconnect_interval = RANAL_MIN_RECONNECT_INTERVAL;
+CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
+ "minimum connection retry interval (seconds)");
+
+static int max_reconnect_interval = RANAL_MAX_RECONNECT_INTERVAL;
+CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
+ "maximum connection retry interval (seconds)");
+
+static int ntx = RANAL_NTX;
+CFS_MODULE_PARM(ntx, "i", int, 0444,
+ "# of 'normal' transmit descriptors");
+
+static int ntx_nblk = RANAL_NTX_NBLK;
+CFS_MODULE_PARM(ntx_nblk, "i", int, 0444,
+ "# of 'reserved' transmit descriptors");
+
+static int fma_cq_size = RANAL_FMA_CQ_SIZE;
+CFS_MODULE_PARM(fma_cq_size, "i", int, 0444,
+ "size of the completion queue");
+
+static int timeout = RANAL_TIMEOUT;
+CFS_MODULE_PARM(timeout, "i", int, 0644,
+ "communications timeout (seconds)");
+
+static int listener_timeout = RANAL_LISTENER_TIMEOUT;
+CFS_MODULE_PARM(listener_timeout, "i", int, 0644,
+ "passive connection timeout");
+
+static int backlog = RANAL_BACKLOG;
+CFS_MODULE_PARM(backlog, "i", int, 0444,
+ "passive connection (listen) backlog");
+
+static int port = RANAL_PORT;
+CFS_MODULE_PARM(port, "i", int, 0444,
+ "connection request TCP/IP port");
+
+static int max_immediate = RANAL_MAX_IMMEDIATE;
+CFS_MODULE_PARM(max_immediate, "i", int, 0644,
+ "immediate/RDMA breakpoint");
+
+kra_tunables_t kranal_tunables = {
+ .kra_n_connd = &n_connd,
+ .kra_min_reconnect_interval = &min_reconnect_interval,
+ .kra_max_reconnect_interval = &max_reconnect_interval,
+ .kra_ntx = &ntx,
+ .kra_ntx_nblk = &ntx_nblk,
+ .kra_fma_cq_size = &fma_cq_size,
+ .kra_timeout = &timeout,
+ .kra_listener_timeout = &listener_timeout,
+ .kra_backlog = &backlog,
+ .kra_port = &port,
+ .kra_max_immediate = &max_immediate,
+};
+
+#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
+static ctl_table kranal_ctl_table[] = {
+ {1, "n_connd", &n_connd,
+ sizeof(int), 0444, NULL, &proc_dointvec},
+ {2, "min_reconnect_interval", &min_reconnect_interval,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {3, "max_reconnect_interval", &max_reconnect_interval,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {4, "ntx", &ntx,
+ sizeof(int), 0444, NULL, &proc_dointvec},
+ {5, "ntx_nblk", &ntx_nblk,
+ sizeof(int), 0444, NULL, &proc_dointvec},
+ {6, "fma_cq_size", &fma_cq_size,
+ sizeof(int), 0444, NULL, &proc_dointvec},
+ {7, "timeout", &timeout,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {8, "listener_timeout", &listener_timeout,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {9, "backlog", &backlog,
+ sizeof(int), 0444, NULL, &proc_dointvec},
+ {10, "port", &port,
+ sizeof(int), 0444, NULL, &proc_dointvec},
+ {11, "max_immediate", &max_immediate,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {0}
+};
+
+static ctl_table kranal_top_ctl_table[] = {
+ {202, "ranal", NULL, 0, 0555, kranal_ctl_table},
+ {0}
+};
+
+int
+kranal_tunables_init ()
+{
+ kranal_tunables.kra_sysctl =
+ register_sysctl_table(kranal_top_ctl_table, 0);
+
+ if (kranal_tunables.kra_sysctl == NULL)
+ CWARN("Can't setup /proc tunables\n");
+
+ return 0;
+}
+
+void
+kranal_tunables_fini ()
+{
+ if (kranal_tunables.kra_sysctl != NULL)
+ unregister_sysctl_table(kranal_tunables.kra_sysctl);
+}
+
+#else
+
+int
+kranal_tunables_init ()
+{
+ return 0;
+}
+
+void
+kranal_tunables_fini ()
+{
+}
+
+#endif
+
PORTAL_FREE (route, sizeof (*route));
}
-ksock_peer_t *
-ksocknal_create_peer (ptl_ni_t *ni, ptl_nid_t nid)
+int
+ksocknal_create_peer (ksock_peer_t **peerp, ptl_ni_t *ni, ptl_nid_t nid)
{
- ksock_net_t *net = ni->ni_data;
- ksock_peer_t *peer;
+ ksock_net_t *net = ni->ni_data;
+ ksock_peer_t *peer;
+ unsigned long flags;
LASSERT (nid != PTL_NID_ANY);
+ LASSERT (!in_interrupt());
PORTAL_ALLOC (peer, sizeof (*peer));
if (peer == NULL)
- return (NULL);
+ return -ENOMEM;
memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */
CFS_INIT_LIST_HEAD (&peer->ksnp_routes);
CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue);
- atomic_inc (&net->ksnn_npeers);
- return (peer);
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+
+ if (net->ksnn_shutdown) {
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
+ PORTAL_FREE(peer, sizeof(*peer));
+ CERROR("Can't create peer: network shutdown\n");
+ return -ESHUTDOWN;
+ }
+
+ net->ksnn_npeers++;
+
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
+ *peerp = peer;
+ return 0;
}
void
ksocknal_destroy_peer (ksock_peer_t *peer)
{
- ksock_net_t *net = peer->ksnp_ni->ni_data;
+ ksock_net_t *net = peer->ksnp_ni->ni_data;
+ unsigned long flags;
CDEBUG (D_NET, "peer %s %p deleted\n",
libcfs_nid2str(peer->ksnp_nid), peer);
* until they are destroyed, so we can be assured that _all_ state to
* do with this peer has been cleaned up when its refcount drops to
* zero. */
- atomic_dec (&net->ksnn_npeers);
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+ net->ksnn_npeers--;
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
}
ksock_peer_t *
}
int
-ksocknal_get_peer_info (int index, ptl_nid_t *nid,
- __u32 *myip, __u32 *peer_ip, int *port,
+ksocknal_get_peer_info (ptl_ni_t *ni, int index,
+ ptl_nid_t *nid, __u32 *myip, __u32 *peer_ip, int *port,
int *conn_count, int *share_count)
{
ksock_peer_t *peer;
list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+ if (peer->ksnp_ni != ni)
+ continue;
+
if (peer->ksnp_n_passive_ips == 0 &&
list_empty(&peer->ksnp_routes)) {
if (index-- > 0)
ksock_peer_t *peer2;
ksock_route_t *route;
ksock_route_t *route2;
+ int rc;
if (nid == PTL_NID_ANY)
return (-EINVAL);
/* Have a brand new peer ready... */
- peer = ksocknal_create_peer(ni, nid);
- if (peer == NULL)
- return (-ENOMEM);
+ rc = ksocknal_create_peer(&peer, ni, nid);
+ if (rc != 0)
+ return rc;
route = ksocknal_create_route (ipaddr, port);
if (route == NULL) {
}
ksock_conn_t *
-ksocknal_get_conn_by_idx (int index)
+ksocknal_get_conn_by_idx (ptl_ni_t *ni, int index)
{
ksock_peer_t *peer;
struct list_head *ptmp;
LASSERT (!peer->ksnp_closing);
+ if (peer->ksnp_ni != ni)
+ continue;
+
list_for_each (ctmp, &peer->ksnp_conns) {
if (index-- > 0)
continue;
ksocknal_route_decref(newroute);
}
-void
-ksocknal_pause(int ticks)
-{
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ticks);
-}
-
int
ksocknal_listener (void *arg)
{
char name[16];
- ksock_connreq_t *cr;
+ ksock_connreq_t *cr = NULL;
int rc;
unsigned long flags;
kportal_daemonize(name);
kportal_blockallsigs();
- rc = ksocknal_lib_listen(&ksocknal_data.ksnd_listener_sock,
- *ksocknal_tunables.ksnd_port,
- *ksocknal_tunables.ksnd_backlog);
+ rc = libcfs_sock_listen(&ksocknal_data.ksnd_listener_sock,
+ 0, *ksocknal_tunables.ksnd_port,
+ *ksocknal_tunables.ksnd_backlog);
/* set init status and unblock parent */
ksocknal_data.ksnd_listener_shutdown = rc;
return rc;
while (ksocknal_data.ksnd_listener_shutdown == 0) {
+
+ if (cr == NULL) {
+ PORTAL_ALLOC(cr, sizeof(*cr));
+ if (cr == NULL) {
+ CWARN("ENOMEM: listener pausing\n");
+ libcfs_pause(cfs_time_seconds(1));
+ continue;
+ }
+ }
- rc = ksocknal_lib_accept(ksocknal_data.ksnd_listener_sock, &cr);
+ rc = libcfs_sock_accept(&cr->ksncr_sock,
+ ksocknal_data.ksnd_listener_sock,
+ 0);
if (rc != 0) {
if (rc != -EAGAIN) {
- CWARN("Accept error: %d\n", rc);
- ksocknal_pause(cfs_time_seconds(1));
+ CWARN("Accept error %d: listener pausing\n",
+ rc);
+ libcfs_pause(cfs_time_seconds(1));
}
continue;
}
-
+
spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags);
list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
wake_up(&ksocknal_data.ksnd_connd_waitq);
spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock, flags);
+ cr = NULL;
}
- ksocknal_lib_release_sock(ksocknal_data.ksnd_listener_sock);
+ libcfs_sock_release(ksocknal_data.ksnd_listener_sock);
+ if (cr != NULL)
+ PORTAL_FREE(cr, sizeof(*cr));
/* unblock executioner */
mutex_up(&ksocknal_data.ksnd_listener_signal);
/* make the listener exit its loop... */
ksocknal_data.ksnd_listener_shutdown = 1;
- ksocknal_lib_abort_accept(ksocknal_data.ksnd_listener_sock);
+ libcfs_sock_abort_accept(ksocknal_data.ksnd_listener_sock);
/* block until listener exits */
mutex_down(&ksocknal_data.ksnd_listener_signal);
rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
__u32 ipaddrs[PTL_MAX_INTERFACES];
int nipaddrs;
+ ptl_ni_t *ni;
+ ksock_net_t *net;
ptl_nid_t nid;
struct list_head *tmp;
__u64 incarnation;
goto failed_1;
if (route != NULL) {
- ptl_ni_t *ni = route->ksnr_peer->ksnp_ni;
- ksock_net_t *net = ni->ni_data;
+ ni = route->ksnr_peer->ksnp_ni;
+ net = (ksock_net_t *)ni->ni_data;
/* Active connection sends HELLO eagerly */
nipaddrs = ksocknal_local_ipvec(ni, ipaddrs);
}
/* Find out/confirm peer's NID and connection type and get the
- * vector of interfaces she's willing to let me connect to */
+ * vector of interfaces she's willing to let me connect to.
+ * Passive connections use the listener timeout since the peer sends
+ * eagerly */
nid = (route == NULL) ? PTL_NID_ANY : route->ksnr_peer->ksnp_nid;
- rc = ksocknal_recv_hello (conn, &nid, &incarnation, ipaddrs);
+ rc = ksocknal_recv_hello (conn, &nid, &incarnation, ipaddrs,
+ (route == NULL) ?
+ *ksocknal_tunables.ksnd_listen_timeout :
+ *ksocknal_tunables.ksnd_timeout);
if (rc < 0)
goto failed_1;
nipaddrs = rc;
peer = route->ksnr_peer;
ksocknal_peer_addref(peer);
} else {
- ptl_ni_t *ni = ptl_net2ni(PTL_NIDNET(nid));
+ ni = ptl_net2ni(PTL_NIDNET(nid));
if (ni == NULL) {
CERROR("Refusing connection attempt "
rc = -ECONNREFUSED;
goto failed_1;
}
-
- peer = ksocknal_create_peer(ni, nid);
+
+ net = (ksock_net_t *)ni->ni_data;
+ rc = ksocknal_create_peer(&peer, ni, nid);
/* lose extra ref from ptl_net2ni NB we wait for all the peers
* to be deleted before ni teardown can complete; i.e. ni can't
* there's no to account the peer's refs on ni. */
ptl_ni_decref(ni);
- if (peer == NULL) {
- rc = -ENOMEM;
+ if (rc != 0)
goto failed_1;
- }
write_lock_irqsave(global_lock, flags);
PORTAL_FREE (conn, sizeof(*conn));
failed_0:
- ksocknal_lib_release_sock(sock);
+ libcfs_sock_release(sock);
LASSERT (rc != 0);
return (rc);
int conn_count = 0;
int share_count = 0;
- rc = ksocknal_get_peer_info(data->ioc_count, &nid,
- &myip, &ip, &port,
+ rc = ksocknal_get_peer_info(ni, data->ioc_count,
+ &nid, &myip, &ip, &port,
&conn_count, &share_count);
+ if (rc != 0)
+ return rc;
+
data->ioc_nid = nid;
data->ioc_count = share_count;
data->ioc_u32[0] = ip;
int txmem;
int rxmem;
int nagle;
- ksock_conn_t *conn = ksocknal_get_conn_by_idx (data->ioc_count);
+ ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count);
if (conn == NULL)
return -ENOENT;
data->ioc_u32[0]);
case IOC_PORTAL_REGISTER_MYNID:
+ /* Ignore if this is a noop */
if (data->ioc_nid == ni->ni_nid)
return 0;
- LASSERT (PTL_NIDNET(data->ioc_nid) == PTL_NIDNET(ni->ni_nid));
-
- CERROR("obsolete IOC_PORTAL_REGISTER_MYNID for %s(%s)\n",
+ CERROR("obsolete IOC_PORTAL_REGISTER_MYNID: %s(%s)\n",
libcfs_nid2str(data->ioc_nid),
libcfs_nid2str(ni->ni_nid));
- return 0;
+ return -EINVAL;
case IOC_PORTAL_PUSH_CONNECTION:
return ksocknal_push (ni, data->ioc_nid);
{
ksock_sched_t *sched;
int i;
+ unsigned long flags;
CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
atomic_read (&portal_kmemory));
ksocknal_stop_listener();
/* Wait for queued connreqs to clean up */
i = 2;
+ spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags);
while (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock,
+ flags);
i++;
CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
"waiting for connreqs to clean up\n");
- ksocknal_pause(cfs_time_seconds(1));
+ libcfs_pause(cfs_time_seconds(1));
+
+ spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags);
}
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock,
+ flags);
/* fall through */
"waiting for %d threads to terminate\n",
ksocknal_data.ksnd_nthreads);
read_unlock(&ksocknal_data.ksnd_global_lock);
- ksocknal_pause(cfs_time_seconds(1));
+ libcfs_pause(cfs_time_seconds(1));
read_lock(&ksocknal_data.ksnd_global_lock);
}
read_unlock(&ksocknal_data.ksnd_global_lock);
{
ksock_net_t *net = ni->ni_data;
int i;
+ unsigned long flags;
LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
LASSERT(ksocknal_data.ksnd_nnets > 0);
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+ net->ksnn_shutdown = 1; /* prevent new peers */
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
/* Delete all peers */
ksocknal_del_peer(ni, PTL_NID_ANY, 0);
/* Wait for all peer state to clean up */
i = 2;
- while (atomic_read (&net->ksnn_npeers) != 0) {
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+ while (net->ksnn_npeers != 0) {
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
i++;
CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
"waiting for %d peers to disconnect\n",
- atomic_read (&net->ksnn_npeers));
- ksocknal_pause(cfs_time_seconds(1));
+ net->ksnn_npeers);
+ libcfs_pause(cfs_time_seconds(1));
+
+ spin_lock_irqsave(&net->ksnn_lock, flags);
}
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
for (i = 0; i < net->ksnn_ninterfaces; i++) {
LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
ksocknal_base_shutdown();
}
+int
+ksocknal_enumerate_interfaces(ksock_net_t *net)
+{
+ char **names;
+ int i;
+ int j;
+ int rc;
+ int n;
+
+ n = libcfs_ipif_enumerate(&names);
+ if (n <= 0) {
+ CERROR("Can't enumerate interfaces: %d\n", n);
+ return n;
+ }
+
+ for (i = j = 0; i < n; i++) {
+ int up;
+ __u32 ip;
+ __u32 mask;
+
+ if (!strcmp(names[i], "lo")) /* skip the loopback IF */
+ continue;
+
+ rc = libcfs_ipif_query(names[i], &up, &ip, &mask);
+ if (rc != 0) {
+ CWARN("Can't get interface %s info: %d\n",
+ names[i], rc);
+ continue;
+ }
+
+ if (!up) {
+ CWARN("Ignoring interface %s (down)\n",
+ names[i]);
+ continue;
+ }
+
+ if (j == PTL_MAX_INTERFACES) {
+ CWARN("Ignoring interface %s (too many interfaces)\n",
+ names[i]);
+ continue;
+ }
+
+ net->ksnn_interfaces[j].ksni_ipaddr = ip;
+ net->ksnn_interfaces[j].ksni_netmask = mask;
+ j++;
+ }
+
+ libcfs_ipif_free_enumeration(names, n);
+
+ if (j == 0)
+ CERROR("Can't find any usable interfaces\n");
+
+ return j;
+}
+
ptl_err_t
ksocknal_startup (ptl_ni_t *ni)
{
goto fail_0;
memset(net, 0, sizeof(*net));
-
+ spin_lock_init(&net->ksnn_lock);
net->ksnn_incarnation = ksocknal_new_incarnation();
ni->ni_data = net;
if (ni->ni_interfaces[0] == NULL) {
- rc = ksocknal_lib_enumerate_ifs(net->ksnn_interfaces,
- PTL_MAX_INTERFACES);
- if (rc < 0)
+ rc = ksocknal_enumerate_interfaces(net);
+ if (rc <= 0)
goto fail_1;
- if (rc == 0) {
- CERROR("Can't find any interfaces\n");
- goto fail_1;
- }
-
net->ksnn_ninterfaces = rc;
} else {
for (i = 0; i < PTL_MAX_INTERFACES; i++) {
+ int up;
+
if (ni->ni_interfaces[i] == NULL)
break;
- rc = ksocknal_lib_init_if(&net->ksnn_interfaces[i],
- ni->ni_interfaces[i]);
- if (rc != 0)
+ rc = libcfs_ipif_query(
+ ni->ni_interfaces[i], &up,
+ &net->ksnn_interfaces[i].ksni_ipaddr,
+ &net->ksnn_interfaces[i].ksni_netmask);
+
+ if (rc != 0) {
+ CERROR("Can't get interface %s info: %d\n",
+ ni->ni_interfaces[i], rc);
goto fail_1;
+ }
+
+ if (!up) {
+ CERROR("Interface %s is down\n",
+ ni->ni_interfaces[i]);
+ goto fail_1;
+ }
}
net->ksnn_ninterfaces = i;
}
# define SOCKNAL_RISK_KMAP_DEADLOCK 1
#endif
+/* minimum socket buffer required for connection handshake */
+#define SOCKNAL_MIN_BUFFER (2*(sizeof(ptl_hdr_t) + \
+ PTL_MAX_INTERFACES * sizeof(__u32)))
+
typedef struct /* pool of forwarding buffers */
{
spinlock_t fmp_lock; /* serialise */
typedef struct
{
__u64 ksnn_incarnation; /* my epoch */
- atomic_t ksnn_npeers; /* # peers */
+ spinlock_t ksnn_lock; /* serialise */
+ int ksnn_npeers; /* # peers */
+ int ksnn_shutdown; /* shutting down? */
int ksnn_ninterfaces; /* IP interfaces */
ksock_interface_t ksnn_interfaces[PTL_MAX_INTERFACES];
} ksock_net_t;
return (rc);
}
-extern void ksocknal_lib_release_sock(struct socket *sock);
-
static inline void
ksocknal_connsock_decref (ksock_conn_t *conn)
{
LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0);
if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
LASSERT (conn->ksnc_closing);
- ksocknal_lib_release_sock(conn->ksnc_sock);
+ libcfs_sock_release(conn->ksnc_sock);
conn->ksnc_sock = NULL;
}
}
extern int ksocknal_send_hello (ksock_conn_t *conn, ptl_nid_t nid,
__u64 incarnation, __u32 *ipaddrs, int nipaddrs);
extern int ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid,
- __u64 *incarnation, __u32 *ipaddrs);
+ __u64 *incarnation, __u32 *ipaddrs,
+ int timeout);
extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn);
extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn);
extern void ksocknal_lib_eager_ack (ksock_conn_t *conn);
extern int ksocknal_lib_recv_iov (ksock_conn_t *conn);
extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn);
-extern int ksocknal_lib_sock_write (struct socket *sock,
- void *buffer, int nob);
-extern int ksocknal_lib_sock_read (struct socket *sock,
- void *buffer, int nob);
extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem,
int *rxmem, int *nagle);
-extern int ksocknal_lib_connect_sock(struct socket **sockp, int *fatal,
- ksock_route_t *route, int local_port);
-extern int ksocknal_lib_listen(struct socket **sockp, int port, int backlog);
-extern int ksocknal_lib_accept(struct socket *sock, ksock_connreq_t **crp);
-extern void ksocknal_lib_abort_accept(struct socket *sock);
extern int ksocknal_lib_tunables_init(void);
extern void ksocknal_lib_tunables_fini(void);
-
-extern int ksocknal_lib_init_if (ksock_interface_t *iface, char *name);
-extern int ksocknal_lib_enumerate_ifs (ksock_interface_t *ifs, int nifs);
-
ksock_conn_t *conn;
ksock_route_t *route;
rwlock_t *g_lock;
- int create_peer = 0;
+ int retry;
int rc;
/* Ensure the frags we've been given EXACTLY match the number of
g_lock = &ksocknal_data.ksnd_global_lock;
- again:
- if (create_peer) {
- rc = ksocknal_add_peer(ni, nid, PTL_NIDADDR(nid),
- *ksocknal_tunables.ksnd_port);
- if (rc != 0) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_nid2str(nid), rc);
- return rc;
- }
- }
-
+ for (retry = 0;; retry) {
#if !SOCKNAL_ROUND_ROBIN
- read_lock (g_lock);
- peer = ksocknal_find_peer_locked(ni, nid);
- if (peer != NULL) {
- if (ksocknal_find_connectable_route_locked(peer) == NULL) {
- conn = ksocknal_find_conn_locked (tx, peer);
- if (conn != NULL) {
- /* I've got no routes that need to be
- * connecting and I do have an actual
- * connection... */
- ksocknal_queue_tx_locked (tx, conn);
- read_unlock (g_lock);
- return (0);
+ read_lock (g_lock);
+ peer = ksocknal_find_peer_locked(ni, nid);
+ if (peer != NULL) {
+ if (ksocknal_find_connectable_route_locked(peer) == NULL) {
+ conn = ksocknal_find_conn_locked (tx, peer);
+ if (conn != NULL) {
+ /* I've got no routes that need to be
+ * connecting and I do have an actual
+ * connection... */
+ ksocknal_queue_tx_locked (tx, conn);
+ read_unlock (g_lock);
+ return (0);
+ }
}
}
- }
- /* I'll need a write lock... */
- read_unlock (g_lock);
+ /* I'll need a write lock... */
+ read_unlock (g_lock);
#endif
- write_lock_irqsave(g_lock, flags);
+ write_lock_irqsave(g_lock, flags);
- peer = ksocknal_find_peer_locked(ni, nid);
- if (peer == NULL) {
+ peer = ksocknal_find_peer_locked(ni, nid);
+ if (peer != NULL)
+ break;
+
write_unlock_irqrestore(g_lock, flags);
- if (create_peer) {
+ if (retry) {
CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
- return -ENOENT;
+ return -EHOSTUNREACH;
}
- create_peer = 1;
- goto again;
+ rc = ksocknal_add_peer(ni, nid, PTL_NIDADDR(nid),
+ *ksocknal_tunables.ksnd_port);
+ if (rc != 0) {
+ CERROR("Can't add peer %s: %d\n",
+ libcfs_nid2str(nid), rc);
+ return rc;
+ }
}
for (;;) {
ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]);
}
- /* Receiver should be eager */
- rc = ksocknal_lib_sock_write (sock, &hdr, sizeof(hdr));
+ /* socket buffer should have been set large enough not to block
+ * (timeout == 0) */
+ rc = libcfs_sock_write(sock, &hdr, sizeof(hdr), 0);
if (rc != 0) {
CERROR ("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
if (nipaddrs == 0)
return (0);
- rc = ksocknal_lib_sock_write (sock, ipaddrs, nipaddrs * sizeof(*ipaddrs));
+ rc = libcfs_sock_write(sock, ipaddrs, nipaddrs * sizeof(*ipaddrs), 0);
if (rc != 0)
CERROR ("Error %d sending HELLO payload (%d)"
" to %u.%u.%u.%u/%d\n", rc, nipaddrs,
int
ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid,
- __u64 *incarnation, __u32 *ipaddrs)
+ __u64 *incarnation, __u32 *ipaddrs,
+ int timeout)
{
struct socket *sock = conn->ksnc_sock;
int rc;
hmv = (ptl_magicversion_t *)&hdr.dest_nid;
LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
- rc = ksocknal_lib_sock_read (sock, hmv, sizeof (*hmv));
+ rc = libcfs_sock_read(sock, hmv, sizeof (*hmv), timeout);
if (rc != 0) {
CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
* header, followed by payload full of interface IP addresses.
* Read the rest of it in now... */
- rc = ksocknal_lib_sock_read (sock, hmv + 1, sizeof (hdr) - sizeof (*hmv));
+ rc = libcfs_sock_read(sock, hmv + 1, sizeof (hdr) - sizeof (*hmv),
+ timeout);
if (rc != 0) {
CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
rc, HIPQUAD(conn->ksnc_ipaddr));
if (nips == 0)
return (0);
- rc = ksocknal_lib_sock_read (sock, ipaddrs, nips * sizeof(*ipaddrs));
+ rc = libcfs_sock_read(sock, ipaddrs, nips * sizeof(*ipaddrs), timeout);
if (rc != 0) {
CERROR ("Error %d reading IPs from %s ip %u.%u.%u.%u\n",
rc, libcfs_nid2str(*nid), HIPQUAD(conn->ksnc_ipaddr));
int port;
int fatal;
- /* Iterate through reserved ports. When typed connections are
- * used, we will need to bind to multiple ports, but we only know
- * this at connect time. But, by that time we've already called
- * bind() so we need a new socket. */
-
for (port = 1023; port > 512; --port) {
+ /* Iterate through reserved ports. */
- rc = ksocknal_lib_connect_sock(&sock, &fatal, route, port);
-
+ rc = libcfs_sock_connect(&sock, &fatal, 0,
+ route->ksnr_myipaddr, port,
+ route->ksnr_ipaddr, route->ksnr_port);
if (rc == 0) {
rc = ksocknal_create_conn(route, sock, type);
return rc;
}
int
-ksocknal_lib_sock_write (struct socket *sock, void *buffer, int nob)
-{
- int rc;
- mm_segment_t oldmm = get_fs();
-
- while (nob > 0) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
-
- set_fs (KERNEL_DS);
- rc = sock_sendmsg (sock, &msg, iov.iov_len);
- set_fs (oldmm);
-
- if (rc < 0)
- return (rc);
-
- if (rc == 0) {
- CERROR ("Unexpected zero rc\n");
- return (-ECONNABORTED);
- }
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
- }
-
- return (0);
-}
-
-int
-ksocknal_lib_sock_read (struct socket *sock, void *buffer, int nob)
-{
- int rc;
- mm_segment_t oldmm = get_fs();
-
- while (nob > 0) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct msghdr msg = {
- .msg_name = NULL,
- .msg_namelen = 0,
- .msg_iov = &iov,
- .msg_iovlen = 1,
- .msg_control = NULL,
- .msg_controllen = 0,
- .msg_flags = 0
- };
-
- set_fs (KERNEL_DS);
- rc = sock_recvmsg (sock, &msg, iov.iov_len, 0);
- set_fs (oldmm);
-
- if (rc < 0)
- return (rc);
-
- if (rc == 0)
- return (-ECONNABORTED);
-
- buffer = ((char *)buffer) + rc;
- nob -= rc;
- }
-
- return (0);
-}
-
-int
ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
{
mm_segment_t oldmm = get_fs ();
return (-ESHUTDOWN);
}
- set_fs (KERNEL_DS);
-
- len = sizeof(*txmem);
- rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF,
- (char *)txmem, &len);
- if (rc == 0) {
- len = sizeof(*rxmem);
- rc = sock_getsockopt(sock, SOL_SOCKET, SO_RCVBUF,
- (char *)rxmem, &len);
- }
+ rc = libcfs_sock_getbuf(sock, txmem, rxmem);
if (rc == 0) {
len = sizeof(*nagle);
+ set_fs(KERNEL_DS);
rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
(char *)nagle, &len);
+ set_fs(oldmm);
}
- set_fs (oldmm);
ksocknal_connsock_decref(conn);
if (rc == 0)
}
int
+ksocknal_lib_buffersize (int current_sz, int tunable_sz)
+{
+ /* ensure >= SOCKNAL_MIN_BUFFER */
+ if (current_sz < SOCKNAL_MIN_BUFFER)
+ return MAX(SOCKNAL_MIN_BUFFER, tunable_sz);
+
+ if (tunable_sz > SOCKNAL_MIN_BUFFER)
+ return tunable_sz;
+
+ /* leave alone */
+ return 0;
+}
+
+int
ksocknal_lib_setup_sock (struct socket *sock)
{
mm_segment_t oldmm = get_fs ();
int keep_intvl;
int keep_count;
int do_keepalive;
+ int sndbuf;
+ int rcvbuf;
struct linger linger;
sock->sk->sk_allocation = GFP_NOFS;
}
}
- if (ksocknal_tunables.ksnd_buffer_size > 0) {
- option = *ksocknal_tunables.ksnd_buffer_size;
+ rc = libcfs_sock_getbuf(sock, &sndbuf, &rcvbuf);
+ if (rc != 0) {
+ CERROR("Can't get buffer sizes: %d\n", rc);
+ return rc;
+ }
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
- option, rc);
- return (rc);
- }
+ sndbuf = ksocknal_lib_buffersize(sndbuf,
+ *ksocknal_tunables.ksnd_buffer_size);
+ rcvbuf = ksocknal_lib_buffersize(rcvbuf,
+ *ksocknal_tunables.ksnd_buffer_size);
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
- option, rc);
- return (rc);
- }
- }
+ rc = libcfs_sock_setbuf(sock, sndbuf, rcvbuf);
+ if (rc != 0) {
+ CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
+ sndbuf, rcvbuf, rc);
+ return (rc);
+ }
/* snapshot tunables */
keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
return (0);
}
-int
-ksocknal_lib_set_sock_timeout (struct socket *sock, int timeout)
-{
- struct timeval tv;
- int rc;
- mm_segment_t oldmm = get_fs();
-
- /* Set the socket timeouts, so our connection handshake completes in
- * finite time */
- tv.tv_sec = timeout;
- tv.tv_usec = 0;
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDTIMEO,
- (char *)&tv, sizeof (tv));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set send timeout %d: %d\n", timeout, rc);
- return rc;
- }
-
- set_fs (KERNEL_DS);
- rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVTIMEO,
- (char *)&tv, sizeof (tv));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR ("Can't set receive timeout %d: %d\n", timeout, rc);
- return rc;
- }
-
- return 0;
-}
-
-void
-ksocknal_lib_release_sock(struct socket *sock)
-{
- sock_release(sock);
-}
-
-int
-ksocknal_lib_create_sock(struct socket **sockp, int *fatal,
- int timeout, __u32 local_ip, int local_port)
-{
- struct sockaddr_in locaddr;
- struct socket *sock;
- int rc;
- int option;
- mm_segment_t oldmm = get_fs();
-
- *fatal = 1; /* assume errors are fatal */
-
- memset(&locaddr, 0, sizeof(locaddr));
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons(local_port);
- locaddr.sin_addr.s_addr = (local_ip == 0) ?
- INADDR_ANY : htonl(local_ip);
-
- rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
- *sockp = sock;
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return (rc);
- }
-
- rc = ksocknal_lib_set_sock_timeout(sock, timeout);
- if (rc != 0)
- goto failed;
-
- set_fs (KERNEL_DS);
- option = 1;
- rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
- (char *)&option, sizeof (option));
- set_fs (oldmm);
- if (rc != 0) {
- CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
- goto failed;
- }
-
- rc = sock->ops->bind(sock,
- (struct sockaddr *)&locaddr, sizeof(locaddr));
- if (rc == -EADDRINUSE) {
- CDEBUG(D_WARNING, "Port %d already in use\n", local_port);
- *fatal = 0;
- goto failed;
- }
- if (rc != 0) {
- CERROR("Error trying to bind to reserved port %d: %d\n",
- local_port, rc);
- goto failed;
- }
-
- return 0;
-
- failed:
- sock_release(sock);
- return rc;
-}
-
-int
-ksocknal_lib_listen(struct socket **sockp, int port, int backlog)
-{
- int fatal;
- int rc;
-
- rc = ksocknal_lib_create_sock(sockp, &fatal, 1, 0, port);
- if (rc != 0)
- return rc;
-
- rc = (*sockp)->ops->listen(*sockp, backlog);
- if (rc == 0)
- return 0;
-
- CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
- sock_release(*sockp);
- return rc;
-}
-
-int
-ksocknal_lib_accept(struct socket *sock, ksock_connreq_t **crp)
-{
- wait_queue_t wait;
- struct socket *newsock;
- int rc;
-
- init_waitqueue_entry(&wait, current);
-
- newsock = sock_alloc();
- if (newsock == NULL) {
- CERROR("Can't allocate socket\n");
- return -ENOMEM;
- }
-
- /* XXX this should add a ref to sock->ops->owner, if
- * TCP could be a module */
- newsock->type = sock->type;
- newsock->ops = sock->ops;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(sock->sk->sk_sleep, &wait);
-
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
- if (rc == -EAGAIN) {
- /* Nothing ready, so wait for activity */
- schedule();
- rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
- }
-
- remove_wait_queue(sock->sk->sk_sleep, &wait);
- set_current_state(TASK_RUNNING);
-
- if (rc != 0)
- goto failed;
-
- rc = ksocknal_lib_set_sock_timeout(newsock,
- *ksocknal_tunables.ksnd_listen_timeout);
- if (rc != 0)
- goto failed;
-
- rc = -ENOMEM;
- PORTAL_ALLOC(*crp, sizeof(**crp));
- if (*crp == NULL)
- goto failed;
-
- (*crp)->ksncr_sock = newsock;
- return 0;
-
- failed:
- sock_release(newsock);
- return rc;
-}
-
-void
-ksocknal_lib_abort_accept(struct socket *sock)
-{
- wake_up_all(sock->sk->sk_sleep);
-}
-
-int
-ksocknal_lib_connect_sock(struct socket **sockp, int *fatal,
- ksock_route_t *route, int local_port)
-{
- struct sockaddr_in srvaddr;
- int rc;
-
- memset (&srvaddr, 0, sizeof (srvaddr));
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons (route->ksnr_port);
- srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr);
-
- rc = ksocknal_lib_create_sock(sockp, fatal,
- *ksocknal_tunables.ksnd_timeout,
- route->ksnr_myipaddr, local_port);
- if (rc != 0)
- return rc;
-
- rc = (*sockp)->ops->connect(*sockp,
- (struct sockaddr *)&srvaddr, sizeof(srvaddr),
- 0);
- if (rc == 0)
- return 0;
-
- /* EADDRNOTAVAIL probably means we're already connected to the same
- * peer/port on the same local port on a differently typed
- * connection. Let our caller retry with a different local
- * port... */
- *fatal = !(rc == -EADDRNOTAVAIL);
-
- CDEBUG(*fatal ? D_ERROR : D_NET,
- "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
- HIPQUAD(route->ksnr_myipaddr), local_port,
- HIPQUAD(route->ksnr_ipaddr), route->ksnr_port);
-
- sock_release(*sockp);
- return rc;
-}
-
-int
-ksocknal_lib_getifaddr(struct socket *sock, char *name, __u32 *ip, __u32 *mask)
-{
- mm_segment_t oldmm = get_fs();
- struct ifreq ifr;
- int rc;
- __u32 val;
-
- LASSERT (strnlen(name, IFNAMSIZ) < IFNAMSIZ);
-
- strcpy(ifr.ifr_name, name);
- ifr.ifr_addr.sa_family = AF_INET;
- set_fs(KERNEL_DS);
- rc = sock->ops->ioctl(sock, SIOCGIFADDR, (unsigned long)&ifr);
- set_fs(oldmm);
-
- if (rc != 0)
- return rc;
-
- val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
- *ip = ntohl(val);
-
- strcpy(ifr.ifr_name, name);
- ifr.ifr_addr.sa_family = AF_INET;
- set_fs(KERNEL_DS);
- rc = sock->ops->ioctl(sock, SIOCGIFNETMASK, (unsigned long)&ifr);
- set_fs(oldmm);
-
- if (rc != 0)
- return rc;
-
- val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
- *mask = ntohl(val);
- return 0;
-}
-
-int
-ksocknal_lib_init_if (ksock_interface_t *iface, char *name)
-{
- struct socket *sock;
- int rc;
- int nob;
-
- rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return rc;
- }
-
- nob = strnlen(name, IFNAMSIZ);
- if (nob == IFNAMSIZ) {
- CERROR("Interface name %s too long\n", name);
- rc -EINVAL;
- } else {
- CLASSERT (sizeof(iface->ksni_name) >= IFNAMSIZ);
- strcpy(iface->ksni_name, name);
-
- rc = ksocknal_lib_getifaddr(sock, name,
- &iface->ksni_ipaddr,
- &iface->ksni_netmask);
- if (rc != 0)
- CERROR("Can't get IP address for interface %s\n", name);
- }
-
- sock_release(sock);
- return rc;
-}
-
-int
-ksocknal_lib_enumerate_ifs (ksock_interface_t *ifs, int nifs)
-{
- int nalloc = PTL_MAX_INTERFACES;
- char name[IFNAMSIZ];
- int nfound;
- int nused;
- struct socket *sock;
- struct ifconf ifc;
- struct ifreq *ifr;
- mm_segment_t oldmm = get_fs();
- __u32 ipaddr;
- __u32 netmask;
- int rc;
- int i;
-
- rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
- if (rc != 0) {
- CERROR ("Can't create socket: %d\n", rc);
- return rc;
- }
-
- for (;;) {
- PORTAL_ALLOC(ifr, nalloc * sizeof(*ifr));
- if (ifr == NULL) {
- CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
- rc = -ENOMEM;
- goto out0;
- }
-
- ifc.ifc_buf = (char *)ifr;
- ifc.ifc_len = nalloc * sizeof(*ifr);
-
- set_fs(KERNEL_DS);
- rc = sock->ops->ioctl(sock, SIOCGIFCONF, (unsigned long)&ifc);
- set_fs(oldmm);
-
- if (rc < 0) {
- CERROR ("Error %d enumerating interfaces\n", rc);
- goto out1;
- }
-
- LASSERT (rc == 0);
-
- nfound = ifc.ifc_len/sizeof(*ifr);
- LASSERT (nfound <= nalloc);
-
- if (nfound <= nalloc)
- break;
-
- /* Assume there are more interfaces */
- if (nalloc >= 16 * PTL_MAX_INTERFACES) {
- CWARN("Too many interfaces: "
- "only trying the first %d\n", nfound);
- break;
- }
-
- nalloc *= 2;
- }
-
- for (i = nused = 0; i < nfound; i++) {
- strncpy(name, ifr[i].ifr_name, IFNAMSIZ); /* ensure terminated name */
- name[IFNAMSIZ-1] = 0;
-
- if (!strncmp(name, "lo", 2)) {
- CDEBUG(D_WARNING, "ignoring %s\n", name);
- continue;
- }
-
- strcpy(ifr[i].ifr_name, name);
- set_fs(KERNEL_DS);
- rc = sock->ops->ioctl(sock, SIOCGIFFLAGS,
- (unsigned long)&ifr[i]);
- set_fs(oldmm);
-
- if (rc != 0) {
- CDEBUG(D_WARNING, "Can't get flags for %s\n", name);
- continue;
- }
-
- if ((ifr[i].ifr_flags & IFF_UP) == 0) {
- CDEBUG(D_WARNING, "Interface %s down\n", name);
- continue;
- }
-
- rc = ksocknal_lib_getifaddr(sock, name, &ipaddr, &netmask);
- if (rc != 0) {
- CDEBUG(D_WARNING,
- "Can't get IP address or netmask for %s\n",
- name);
- continue;
- }
-
- if (nused >= nifs) {
- CWARN("Too many available interfaces: "
- "only using the first %d\n", nused);
- break;
- }
-
- memset(&ifs[nused], 0, sizeof(ifs[nused]));
-
- CLASSERT(sizeof(ifs[nused].ksni_name) >= IFNAMSIZ);
- strcpy(ifs[nused].ksni_name, name);
- ifs[nused].ksni_ipaddr = ipaddr;
- ifs[nused].ksni_netmask = netmask;
- nused++;
-
- CDEBUG(D_WARNING, "Added interface %s: %u.%u.%u.%u\n",
- name, HIPQUAD(ipaddr));
- }
-
- rc = nused;
- out1:
- PORTAL_FREE(ifr, nalloc * sizeof(*ifr));
- out0:
- sock_release(sock);
- return rc;
-}
-
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
struct tcp_opt *sock2tcp_opt(struct sock *sk)
{
#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10)
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72))
-# define sk_allocation allocation
-# define sk_data_ready data_ready
-# define sk_write_space write_space
-# define sk_user_data user_data
-# define sk_prot prot
-# define sk_sndbuf sndbuf
-# define sk_socket socket
-# define sk_sleep sleep
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
-# define sk_wmem_queued wmem_queued
-# define sk_err err
-#endif
-
#define SOCKNAL_ARCH_EAGER_ACK 0
-#define SOCK_WMEM_QUEUED(so) ((so)->sk->sk_wmem_queued)
-#define SOCK_ERROR(so) ((so)->sk->sk_err)
-#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags)
#ifndef CONFIG_SMP
static inline
libcfs-linux-objs := linux-tracefile.o linux-debug.o
libcfs-linux-objs += linux-prim.o linux-mem.o
-libcfs-linux-objs += linux-fs.o linux-sync.o
+libcfs-linux-objs += linux-fs.o linux-sync.o linux-tcpip.o
libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o
libcfs-linux-objs += linux-utils.o linux-module.o
EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \
linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \
- linux-module.c linux-sync.c linux-curproc.c
+ linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+
+#include <linux/if.h>
+#include <linux/in.h>
+
+int
+libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
+{
+ mm_segment_t oldmm = get_fs();
+ struct ifreq ifr;
+ struct socket *sock;
+ int nob;
+ int rc;
+ __u32 val;
+
+ rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return rc;
+ }
+
+ nob = strnlen(name, IFNAMSIZ);
+ if (nob == IFNAMSIZ) {
+ CERROR("Interface name %s too long\n", name);
+ rc -EINVAL;
+ goto out;
+ }
+
+ CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
+
+ strcpy(ifr.ifr_name, name);
+ set_fs(KERNEL_DS);
+ rc = sock->ops->ioctl(sock, SIOCGIFFLAGS, (unsigned long)&ifr);
+ set_fs(oldmm);
+
+ if (rc != 0) {
+ CERROR("Can't get flags for interface %s\n", name);
+ goto out;
+ }
+
+ if ((ifr.ifr_flags & IFF_UP) == 0) {
+ CDEBUG(D_NET, "Interface %s down\n", name);
+ *up = 0;
+ *ip = *mask = 0;
+ goto out;
+ }
+
+ strcpy(ifr.ifr_name, name);
+ ifr.ifr_addr.sa_family = AF_INET;
+ set_fs(KERNEL_DS);
+ rc = sock->ops->ioctl(sock, SIOCGIFADDR, (unsigned long)&ifr);
+ set_fs(oldmm);
+
+ if (rc != 0) {
+ CERROR("Can't get IP address for interface %s\n", name);
+ goto out;
+ }
+
+ val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+ *ip = ntohl(val);
+
+ strcpy(ifr.ifr_name, name);
+ ifr.ifr_addr.sa_family = AF_INET;
+ set_fs(KERNEL_DS);
+ rc = sock->ops->ioctl(sock, SIOCGIFNETMASK, (unsigned long)&ifr);
+ set_fs(oldmm);
+
+ if (rc != 0) {
+ CERROR("Can't get netmask for interface %s\n", name);
+ goto out;
+ }
+
+ val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
+ *mask = ntohl(val);
+
+ out:
+ sock_release(sock);
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_ipif_query);
+
+int
+libcfs_ipif_enumerate (char ***namesp)
+{
+ /* Allocate and fill in 'names', returning # interfaces/error */
+ char **names;
+ int toobig;
+ int nalloc;
+ int nfound;
+ struct socket *sock;
+ struct ifreq *ifr;
+ struct ifconf ifc;
+ mm_segment_t oldmm = get_fs();
+ int rc;
+ int nob;
+ int i;
+
+ rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return rc;
+ }
+
+ nalloc = 16; /* first guess at max interfaces */
+ toobig = 0;
+ for (;;) {
+ if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
+ toobig = 1;
+ nalloc = PAGE_SIZE/sizeof(*ifr);
+ CWARN("Too many interfaces: only enumerating first %d\n",
+ nalloc);
+ }
+
+ PORTAL_ALLOC(ifr, nalloc * sizeof(*ifr));
+ if (ifr == NULL) {
+ CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
+ rc = -ENOMEM;
+ goto out0;
+ }
+
+ ifc.ifc_buf = (char *)ifr;
+ ifc.ifc_len = nalloc * sizeof(*ifr);
+
+ set_fs(KERNEL_DS);
+ rc = sock->ops->ioctl(sock, SIOCGIFCONF, (unsigned long)&ifc);
+ set_fs(oldmm);
+
+ if (rc < 0) {
+ CERROR ("Error %d enumerating interfaces\n", rc);
+ goto out1;
+ }
+
+ LASSERT (rc == 0);
+
+ nfound = ifc.ifc_len/sizeof(*ifr);
+ LASSERT (nfound <= nalloc);
+
+ if (nfound < nalloc || toobig)
+ break;
+
+ PORTAL_FREE(ifr, nalloc * sizeof(*ifr));
+ nalloc *= 2;
+ }
+
+ if (nfound == 0)
+ goto out1;
+
+ PORTAL_ALLOC(names, nfound * sizeof(*names));
+ if (names == NULL) {
+ rc = -ENOMEM;
+ goto out1;
+ }
+ /* NULL out all names[i] */
+ memset (names, 0, nfound * sizeof(*names));
+
+ for (i = 0; i < nfound; i++) {
+
+ nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
+ if (nob == IFNAMSIZ) {
+ /* no space for terminating NULL */
+ CERROR("interface name %.*s too long (%d max)\n",
+ nob, ifr[i].ifr_name, IFNAMSIZ);
+ rc = -ENAMETOOLONG;
+ goto out2;
+ }
+
+ PORTAL_ALLOC(names[i], IFNAMSIZ);
+ if (names[i] == NULL) {
+ rc = -ENOMEM;
+ goto out2;
+ }
+
+ memcpy(names[i], ifr[i].ifr_name, nob);
+ names[i][nob] = 0;
+ }
+
+ *namesp = names;
+ rc = nfound;
+
+ out2:
+ if (rc < 0)
+ libcfs_ipif_free_enumeration(names, nfound);
+ out1:
+ PORTAL_FREE(ifr, nalloc * sizeof(*ifr));
+ out0:
+ sock_release(sock);
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_ipif_enumerate);
+
+void
+libcfs_ipif_free_enumeration (char **names, int n)
+{
+ int i;
+
+ LASSERT (n > 0);
+
+ for (i = 0; i < n && names[i] != NULL; i++)
+ PORTAL_FREE(names[i], IFNAMSIZ);
+
+ PORTAL_FREE(names, n * sizeof(*names));
+}
+
+EXPORT_SYMBOL(libcfs_ipif_free_enumeration);
+
+int
+libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
+{
+ int rc;
+ mm_segment_t oldmm = get_fs();
+ long ticks = timeout * HZ;
+ unsigned long then;
+ struct timeval tv;
+
+ LASSERT (nob > 0);
+ /* Caller may pass a zero timeout if she thinks the socket buffer is
+ * empty enough to take the whole message immediately */
+
+ for (;;) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0
+ };
+
+ if (timeout != 0) {
+ /* Set send timeout to remaining time */
+ tv = (struct timeval) {
+ .tv_sec = ticks / HZ,
+ .tv_usec = ((ticks % HZ) * 1000000) / HZ
+ };
+ set_fs(KERNEL_DS);
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
+ (char *)&tv, sizeof(tv));
+ set_fs(oldmm);
+ if (rc != 0) {
+ CERROR("Can't set socket send timeout "
+ "%ld.%06d: %d\n",
+ (long)tv.tv_sec, (int)tv.tv_usec, rc);
+ return rc;
+ }
+ }
+
+ set_fs (KERNEL_DS);
+ rc = sock_sendmsg (sock, &msg, iov.iov_len);
+ set_fs (oldmm);
+
+ if (rc == nob)
+ return 0;
+
+ if (rc < 0)
+ return rc;
+
+ if (rc == 0) {
+ CERROR ("Unexpected zero rc\n");
+ return (-ECONNABORTED);
+ }
+
+ if (timeout == 0)
+ return -EAGAIN;
+
+ buffer = ((char *)buffer) + rc;
+ nob -= rc;
+ }
+
+ return (0);
+}
+
+EXPORT_SYMBOL(libcfs_sock_write);
+
+int
+libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
+{
+ int rc;
+ mm_segment_t oldmm = get_fs();
+ long ticks = timeout * HZ;
+ unsigned long then;
+ struct timeval tv;
+
+ LASSERT (nob > 0);
+ LASSERT (ticks > 0);
+
+ for (;;) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+
+ /* Set receive timeout to remaining time */
+ tv = (struct timeval) {
+ .tv_sec = ticks / HZ,
+ .tv_usec = ((ticks % HZ) * 1000000) / HZ
+ };
+ set_fs(KERNEL_DS);
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
+ (char *)&tv, sizeof(tv));
+ set_fs(oldmm);
+ if (rc != 0) {
+ CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
+ (long)tv.tv_sec, (int)tv.tv_usec, rc);
+ return rc;
+ }
+
+ set_fs(KERNEL_DS);
+ then = jiffies;
+ rc = sock_recvmsg(sock, &msg, iov.iov_len, 0);
+ ticks -= jiffies - then;
+ set_fs(oldmm);
+
+ if (rc < 0)
+ return rc;
+
+ if (rc == 0)
+ return -ECONNABORTED;
+
+ buffer = ((char *)buffer) + rc;
+ nob -= rc;
+
+ if (nob == 0)
+ return 0;
+
+ if (ticks <= 0)
+ return -ETIMEDOUT;
+ }
+}
+
+EXPORT_SYMBOL(libcfs_sock_read);
+
+static int
+libcfs_sock_create (struct socket **sockp, int *fatal,
+ __u32 local_ip, int local_port)
+{
+ struct sockaddr_in locaddr;
+ struct socket *sock;
+ int rc;
+ int option;
+ mm_segment_t oldmm = get_fs();
+
+ /* All errors are fatal except bind failure if the port is in use */
+ *fatal = 1;
+
+ rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
+ *sockp = sock;
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return (rc);
+ }
+
+ set_fs (KERNEL_DS);
+ option = 1;
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
+ goto failed;
+ }
+
+ /* can't specify a local port without a local IP */
+ LASSERT (local_ip == 0 || local_port != 0);
+
+ if (local_ip != 0 || local_port != 0) {
+ memset(&locaddr, 0, sizeof(locaddr));
+ locaddr.sin_family = AF_INET;
+ locaddr.sin_port = htons(local_port);
+ locaddr.sin_addr.s_addr = (local_ip == 0) ?
+ INADDR_ANY : htonl(local_ip);
+
+ rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr,
+ sizeof(locaddr));
+ if (rc == -EADDRINUSE) {
+ CDEBUG(D_NET, "Port %d already in use\n", local_port);
+ *fatal = 0;
+ goto failed;
+ }
+ if (rc != 0) {
+ CERROR("Error trying to bind to port %d: %d\n",
+ local_port, rc);
+ goto failed;
+ }
+ }
+
+ return 0;
+
+ failed:
+ sock_release(sock);
+ return rc;
+}
+
+int
+libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize)
+{
+ mm_segment_t oldmm = get_fs();
+ int option;
+ int rc;
+
+ if (txbufsize != 0) {
+ option = txbufsize;
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set send buffer %d: %d\n",
+ option, rc);
+ return (rc);
+ }
+ }
+
+ if (rxbufsize != 0) {
+ option = rxbufsize;
+ set_fs (KERNEL_DS);
+ rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
+ (char *)&option, sizeof (option));
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't set receive buffer %d: %d\n",
+ option, rc);
+ return (rc);
+ }
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL(libcfs_sock_setbuf);
+
+int
+libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize)
+{
+ mm_segment_t oldmm = get_fs();
+ int option;
+ int optlen;
+ int rc;
+
+ if (txbufsize != NULL) {
+ optlen = sizeof(option);
+ set_fs (KERNEL_DS);
+ rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF,
+ (char *)&option, &optlen);
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't get send buffer size: %d\n", rc);
+ return (rc);
+ }
+ *txbufsize = option;
+ }
+
+ if (rxbufsize != NULL) {
+ optlen = sizeof(option);
+ set_fs (KERNEL_DS);
+ rc = sock_getsockopt (sock, SOL_SOCKET, SO_RCVBUF,
+ (char *)&option, &optlen);
+ set_fs (oldmm);
+ if (rc != 0) {
+ CERROR ("Can't get receive buffer size: %d\n", rc);
+ return (rc);
+ }
+ *rxbufsize = option;
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL(libcfs_sock_getbuf);
+
+int
+libcfs_sock_listen (struct socket **sockp,
+ __u32 local_ip, int local_port, int backlog)
+{
+ int fatal;
+ int rc;
+
+ rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port);
+ if (rc != 0) {
+ if (!fatal)
+ CERROR("Can't create socket: port %d already in use\n",
+ local_port);
+ return rc;
+ }
+
+ rc = (*sockp)->ops->listen(*sockp, backlog);
+ if (rc == 0)
+ return 0;
+
+ CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
+ sock_release(*sockp);
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_listen);
+
+int
+libcfs_sock_accept (struct socket **newsockp, struct socket *sock, int bufsize)
+{
+ wait_queue_t wait;
+ struct socket *newsock;
+ int rc;
+
+ init_waitqueue_entry(&wait, current);
+
+ newsock = sock_alloc();
+ if (newsock == NULL) {
+ CERROR("Can't allocate socket\n");
+ return -ENOMEM;
+ }
+
+ /* XXX this should add a ref to sock->ops->owner, if
+ * TCP could be a module */
+ newsock->type = sock->type;
+ newsock->ops = sock->ops;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(sock->sk->sk_sleep, &wait);
+
+ rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+ if (rc == -EAGAIN) {
+ /* Nothing ready, so wait for activity */
+ schedule();
+ rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+ }
+
+ remove_wait_queue(sock->sk->sk_sleep, &wait);
+ set_current_state(TASK_RUNNING);
+
+ if (rc != 0)
+ goto failed;
+
+ if (bufsize != 0) {
+ rc = libcfs_sock_setbuf(newsock, bufsize, bufsize);
+ if (rc != 0)
+ goto failed;
+ }
+
+ *newsockp = newsock;
+ return 0;
+
+ failed:
+ sock_release(newsock);
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_accept);
+
+void
+libcfs_sock_abort_accept (struct socket *sock)
+{
+ wake_up_all(sock->sk->sk_sleep);
+}
+
+EXPORT_SYMBOL(libcfs_sock_abort_accept);
+
+int
+libcfs_sock_connect (struct socket **sockp, int *fatal, int bufsize,
+ __u32 local_ip, int local_port,
+ __u32 peer_ip, int peer_port)
+{
+ struct sockaddr_in srvaddr;
+ int rc;
+
+ rc = libcfs_sock_create(sockp, fatal, local_ip, local_port);
+ if (rc != 0)
+ return rc;
+
+ if (bufsize != 0) {
+ rc = libcfs_sock_setbuf(*sockp, bufsize, bufsize);
+ if (rc != 0)
+ goto failed;
+ }
+
+ memset (&srvaddr, 0, sizeof (srvaddr));
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons(peer_port);
+ srvaddr.sin_addr.s_addr = htonl(peer_ip);
+
+ rc = (*sockp)->ops->connect(*sockp,
+ (struct sockaddr *)&srvaddr, sizeof(srvaddr),
+ 0);
+ if (rc == 0)
+ return 0;
+
+ /* EADDRNOTAVAIL probably means we're already connected to the same
+ * peer/port on the same local port on a differently typed
+ * connection. Let our caller retry with a different local
+ * port... */
+ *fatal = !(rc == -EADDRNOTAVAIL);
+
+ CDEBUG(*fatal ? D_ERROR : D_NET,
+ "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+ HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+
+ failed:
+ sock_release(*sockp);
+ return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_connect);
+
+void
+libcfs_sock_release (struct socket *sock)
+{
+ sock_release(sock);
+}
+
+EXPORT_SYMBOL(libcfs_sock_release);
+
+void
+libcfs_pause (cfs_duration_t ticks)
+{
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(ticks);
+}
+
+EXPORT_SYMBOL(libcfs_pause);
char *str = libcfs_next_nidstring();
if (nf == NULL)
- snprintf(str, PTL_NALFMT_SIZE, "t<%u>%u", nal, num);
+ snprintf(str, PTL_NALFMT_SIZE, "<%u:%u>", nal, num);
else if (num == 0)
snprintf(str, PTL_NALFMT_SIZE, "%s", nf->nf_name);
else
str = libcfs_next_nidstring();
if (nf == NULL)
- snprintf(str, PTL_NALFMT_SIZE, "%x@t<%u>%u", addr, nal, nnum);
+ snprintf(str, PTL_NALFMT_SIZE, "%x@<%u:%u>", addr, nal, nnum);
else {
nf->nf_addr2str(addr, str);
nob = strlen(str);
if (!strncmp(str, nf->nf_name, strlen(nf->nf_name)))
break;
}
- if (i == libcfs_nnalstrfns) {
- CWARN("No base: %s\n", str);
+
+ if (i == libcfs_nnalstrfns)
return NULL;
- }
nob = strlen(nf->nf_name);
LASSERT (ptl_tbnob == 0);
return rc;
}
+
+#ifdef __KERNEL__
+ptl_err_t
+ptl_set_ip_niaddr (ptl_ni_t *ni)
+{
+ __u32 net = PTL_NIDNET(ni->ni_nid);
+ char **names;
+ int n;
+ __u32 ip;
+ __u32 netmask;
+ int up;
+ int i;
+ int rc;
+
+ /* Convenience for NALs that use the IP address of a local interface as
+ * the local address part of their NID */
+
+ if (ni->ni_interfaces[0] != NULL) {
+
+ CLASSERT (PTL_MAX_INTERFACES > 1);
+
+ if (ni->ni_interfaces[1] != NULL) {
+ CERROR("Net %s doesn't support multiple interfaces\n",
+ libcfs_net2str(net));
+ return PTL_FAIL;
+ }
+
+ rc = libcfs_ipif_query(ni->ni_interfaces[0],
+ &up, &ip, &netmask);
+ if (rc != 0) {
+ CERROR("Net %s can't qeury interface %s: %d\n",
+ libcfs_net2str(net), ni->ni_interfaces[0], rc);
+ return PTL_FAIL;
+ }
+
+ ni->ni_nid = PTL_MKNID(net, ip);
+ return PTL_OK;
+ }
+
+ n = libcfs_ipif_enumerate(&names);
+ if (n <= 0) {
+ CERROR("Net %s can't enumerate interfaces: %d\n",
+ libcfs_net2str(net), n);
+ return 0;
+ }
+
+ for (i = 0; i < n; i++) {
+ rc = libcfs_ipif_query(names[i], &up, &ip, &netmask);
+
+ if (rc != 0) {
+ CWARN("Net %s can't query interface %s: %d\n",
+ libcfs_net2str(net), names[i], rc);
+ continue;
+ }
+
+ if (!up) {
+ CWARN("Net %s ignoring interface %s (down)\n",
+ libcfs_net2str(net), names[i]);
+ continue;
+ }
+
+ libcfs_ipif_free_enumeration(names, n);
+ ni->ni_nid = PTL_MKNID(net, ip);
+ return PTL_OK;
+ }
+
+ CERROR("Net %s can't find any interfaces\n", libcfs_net2str(net));
+ libcfs_ipif_free_enumeration(names, n);
+ return PTL_FAIL;
+}
+
+EXPORT_SYMBOL(ptl_set_ip_niaddr);
+
+#endif
if (net == PTL_NIDNET(data.ioc_nid)) {
g_net_set = 1;
+ g_net = net;
return 0;
}
continue;
return -1;
}
- fprintf(stderr,"%s not a local network (%s on its own to list)\n",
- argv[1]);
+ fprintf(stderr,"%s not a local network (%s on its own to list them all)\n",
+ argv[1], argv[0]);
return -1;
#endif
}
break;
if (g_net_is_compatible(NULL, SOCKNAL, 0))
- printf (LPX64"[%d]%s@%s:%d #%d\n",
- data.ioc_nid,
+ printf ("%-20s [%d]%s->%s:%d #%d\n",
+ libcfs_nid2str(data.ioc_nid),
data.ioc_count, /* persistence */
ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* my ip */
ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */
data.ioc_u32[1], /* peer port */
data.ioc_u32[3]); /* conn_count */
else if (g_net_is_compatible(NULL, RANAL, OPENIBNAL, VIBNAL, 0))
- printf (LPX64"[%d]@%s:%d\n",
- data.ioc_nid, data.ioc_count,
+ printf ("%-20s [%d]@%s:%d\n",
+ libcfs_nid2str(data.ioc_nid),
+ data.ioc_count,
ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */
data.ioc_u32[1]); /* peer port */
else
- printf (LPX64"[%d]\n",
- data.ioc_nid, data.ioc_count);
+ printf ("%-20s [%d]\n",
+ libcfs_nid2str(data.ioc_nid), data.ioc_count);
}
if (index == 0) {
PORTAL_IOC_INIT(data);
data.ioc_net = g_net;
data.ioc_count = index;
-
+
rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_CONN, &data);
if (rc != 0)
break;
if (g_net_is_compatible (NULL, SOCKNAL, 0))
- printf ("[%d]%s:"LPX64"@%s:%d:%s %d/%d %s\n",
- data.ioc_u32[4], /* scheduler */
- ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* local IP addr */
- data.ioc_nid,
- ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* remote IP addr */
- data.ioc_u32[1], /* remote port */
+ printf ("%-20s %s[%d]%s->%s:%d %d/%d %s\n",
+ libcfs_nid2str(data.ioc_nid),
(data.ioc_u32[3] == SOCKNAL_CONN_ANY) ? "A" :
(data.ioc_u32[3] == SOCKNAL_CONN_CONTROL) ? "C" :
(data.ioc_u32[3] == SOCKNAL_CONN_BULK_IN) ? "I" :
(data.ioc_u32[3] == SOCKNAL_CONN_BULK_OUT) ? "O" : "?",
+ data.ioc_u32[4], /* scheduler */
+ ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* local IP addr */
+ ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* remote IP addr */
+ data.ioc_u32[1], /* remote port */
data.ioc_count, /* tx buffer size */
data.ioc_u32[5], /* rx buffer size */
data.ioc_flags ? "nagle" : "nonagle");
else if (g_net_is_compatible (NULL, RANAL, 0))
- printf ("[%d]"LPX64"@%s:%d\n",
- data.ioc_u32[0], /* device id */
- data.ioc_nid);
+ printf ("%-20s [%d]\n",
+ libcfs_nid2str(data.ioc_nid),
+ data.ioc_u32[0] /* device id */);
else
- printf (LPX64"\n",
- data.ioc_nid);
+ printf ("%s\n", libcfs_nid2str(data.ioc_nid));
}
if (index == 0) {