if (ctx->mxc_page != NULL) {
__free_page(ctx->mxc_page);
- spin_lock(&kmxlnd_data.kmx_global_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
kmxlnd_data.kmx_mem_used -= MXLND_EAGER_SIZE;
- spin_unlock(&kmxlnd_data.kmx_global_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
}
if (ctx->mxc_seg_list != NULL) {
ret = -ENOMEM;
goto failed;
}
- spin_lock(&kmxlnd_data.kmx_global_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
kmxlnd_data.kmx_mem_used += MXLND_EAGER_SIZE;
- spin_unlock(&kmxlnd_data.kmx_global_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
ctx->mxc_msg = (struct kmx_msg *)((char *)page_address(ctx->mxc_page));
ctx->mxc_seg.segment_ptr = MX_PA_TO_U64(lnet_page2phys(ctx->mxc_page));
ctx->mxc_state = MXLND_CTX_IDLE;
}
}
-int
-mxlnd_host_alloc(struct kmx_host **hostp)
-{
- struct kmx_host *host = NULL;
-
- MXLND_ALLOC(host, sizeof (*host));
- if (host == NULL) {
- CDEBUG(D_NETERROR, "Cannot allocate host\n");
- return -1;
- }
- memset(host, 0, sizeof(*host));
- spin_lock_init(&host->mxh_lock);
-
- *hostp = host;
-
- return 0;
-}
-
-void
-mxlnd_host_free(struct kmx_host *host)
-{
- if (host == NULL) return;
-
- if (host->mxh_hostname != NULL)
- MXLND_FREE(host->mxh_hostname, strlen(host->mxh_hostname) + 1);
-
- MXLND_FREE(host, sizeof(*host));
- return;
-}
-
/**
- * mxlnd_free_hosts - free kmx_hosts
- *
- * Called from mxlnd_shutdown()
- */
-void
-mxlnd_free_hosts(void)
-{
- struct kmx_host *host = NULL;
- struct kmx_host *next = NULL;
-
- list_for_each_entry_safe(host, next, &kmxlnd_data.kmx_hosts, mxh_list) {
- list_del_init(&host->mxh_list);
- mxlnd_host_free(host);
- }
- return;
-}
-
-#define xstr(s) #s
-#define str(s) xstr(s)
-#define MXLND_MAX_BOARD 4 /* we expect hosts to have fewer NICs than this */
-#define MXLND_MAX_EP_ID 16 /* we expect hosts to have less than this endpoints */
-
-/* this parses a line that consists of:
- *
- * IP HOSTNAME BOARD ENDPOINT ID
- * 169.192.0.113 mds01 0 3
- *
- * By default MX uses the alias (short hostname). If you override
- * it using mx_hostname to use the FQDN or some other name, the hostname
- * here must match exactly.
- */
-
-/* MX_MAX_HOSTNAME_LEN = 80. See myriexpress.h */
-int
-mxlnd_parse_line(char *line)
-{
- int i = 0;
- int ret = 0;
- int len = 0;
- u32 ip[4] = { 0, 0, 0, 0 };
- char hostname[MX_MAX_HOSTNAME_LEN];
- u32 board = -1;
- u32 ep_id = -1;
- struct kmx_host *host = NULL;
-
- if (line == NULL) return -1;
-
- len = strlen(line);
-
- if (len == 0) return -1;
-
- /* convert tabs to spaces */
- for (i = 0; i < len; i++) {
- if (line[i] == '\t') line[i] = ' ';
- }
-
- memset(&hostname, 0 , sizeof(hostname));
- ret = sscanf(line, "%d.%d.%d.%d %" str(MX_MAX_HOSTNAME_LEN) "s %d %d",
- &ip[0], &ip[1], &ip[2], &ip[3], hostname, &board, &ep_id);
-
- if (ret != 7) {
- return -1;
- }
-
- /* check for valid values */
- /* we assume a valid IP address (all <= 255), number of NICs,
- * and number of endpoint IDs */
- if (ip[0] > 255 || ip [1] > 255 || ip[2] > 255 || ip[3] > 255 ||
- board > MXLND_MAX_BOARD || ep_id > MXLND_MAX_EP_ID) {
- CDEBUG(D_NETERROR, "Illegal value in \"%s\". Ignoring "
- "this host.\n", line);
- return -1;
- }
-
- ret = mxlnd_host_alloc(&host);
- if (ret != 0) return -1;
-
- host->mxh_addr = ((ip[0]<<24)|(ip[1]<<16)|(ip[2]<<8)|ip[3]);
- len = strlen(hostname);
- MXLND_ALLOC(host->mxh_hostname, len + 1);
- if (host->mxh_hostname == NULL) {
- mxlnd_host_free(host);
- return -ENOMEM;
- }
- memset(host->mxh_hostname, 0, len + 1);
- strncpy(host->mxh_hostname, hostname, len);
- host->mxh_board = board;
- host->mxh_ep_id = ep_id;
-
- spin_lock(&kmxlnd_data.kmx_hosts_lock);
- list_add_tail(&host->mxh_list, &kmxlnd_data.kmx_hosts);
- spin_unlock(&kmxlnd_data.kmx_hosts_lock);
-
- return 0;
-}
-
-void
-mxlnd_print_hosts(void)
-{
-#if MXLND_DEBUG
- struct kmx_host *host = NULL;
-
- list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) {
- int ip[4];
- u32 addr = host->mxh_addr;
-
- ip[0] = (addr >> 24) & 0xff;
- ip[1] = (addr >> 16) & 0xff;
- ip[2] = (addr >> 8) & 0xff;
- ip[3] = addr & 0xff;
- CDEBUG(D_NET, "\tip= %d.%d.%d.%d\n\thost= %s\n\tboard= %d\n\tep_id= %d\n\n",
- ip[0], ip[1], ip[2], ip[3],
- host->mxh_hostname, host->mxh_board, host->mxh_ep_id);
- }
-#endif
- return;
-}
-
-#define MXLND_BUFSIZE (PAGE_SIZE - 1)
-
-int
-mxlnd_parse_hosts(char *filename)
-{
- int ret = 0;
- s32 size = 0;
- s32 bufsize = MXLND_BUFSIZE;
- s32 allocd = 0;
- loff_t offset = 0;
- struct file *filp = NULL;
- struct inode *inode = NULL;
- char *buf = NULL;
- s32 buf_off = 0;
- char *sep = NULL;
- char *line = NULL;
-
- if (filename == NULL) return -1;
-
- filp = filp_open(filename, O_RDONLY, 0);
- if (IS_ERR(filp)) {
- CERROR("filp_open() failed for %s\n", filename);
- return -1;
- }
-
- inode = filp->f_dentry->d_inode;
- if (!S_ISREG(inode->i_mode)) {
- CERROR("%s is not a regular file\n", filename);
- return -1;
- }
-
- size = (s32) inode->i_size;
- if (size < MXLND_BUFSIZE) bufsize = size;
- allocd = bufsize;
- MXLND_ALLOC(buf, allocd + 1);
- if (buf == NULL) {
- CERROR("Cannot allocate buf\n");
- filp_close(filp, current->files);
- return -1;
- }
-
- while (offset < size) {
- memset(buf, 0, bufsize + 1);
- ret = kernel_read(filp, (unsigned long) offset, buf, (unsigned long) bufsize);
- if (ret < 0) {
- CDEBUG(D_NETERROR, "kernel_read() returned %d - closing %s\n", ret, filename);
- filp_close(filp, current->files);
- MXLND_FREE(buf, allocd + 1);
- return -1;
- }
-
- if (ret < bufsize) bufsize = ret;
- buf_off = 0;
- while (buf_off < bufsize) {
- sep = strchr(buf + buf_off, '\n');
- if (sep != NULL) {
- /* we have a line */
- line = buf + buf_off;
- *sep = '\0';
- ret = mxlnd_parse_line(line);
- if (ret != 0 && strlen(line) != 0) {
- CDEBUG(D_NETERROR, "Failed to parse \"%s\". Ignoring this host.\n", line);
- }
- buf_off += strlen(line) + 1;
- } else {
- /* last line or we need to read more */
- line = buf + buf_off;
- ret = mxlnd_parse_line(line);
- if (ret != 0) {
- bufsize -= strlen(line) + 1;
- }
- buf_off += strlen(line) + 1;
- }
- }
- offset += bufsize;
- bufsize = MXLND_BUFSIZE;
- }
-
- MXLND_FREE(buf, allocd + 1);
- filp_close(filp, current->files);
- mxlnd_print_hosts();
-
- return 0;
-}
-
-/**
- * mxlnd_init_mx - open the endpoint, set out ID, register the EAGER callback
+ * mxlnd_init_mx - open the endpoint, set our ID, register the EAGER callback
* @ni - the network interface
*
* Returns 0 on success, -1 on failure
mxlnd_init_mx(lnet_ni_t *ni)
{
int ret = 0;
- int found = 0;
+ int hash = 0;
mx_return_t mxret;
- mx_endpoint_addr_t addr;
+ mx_endpoint_addr_t epa;
u32 board = *kmxlnd_tunables.kmx_board;
u32 ep_id = *kmxlnd_tunables.kmx_ep_id;
u64 nic_id = 0LL;
- struct kmx_host *host = NULL;
+ char *ifname = NULL;
+ __u32 ip;
+ __u32 netmask;
+ int up = 0;
+ struct kmx_peer *peer = NULL;
mxret = mx_init();
if (mxret != MX_SUCCESS) {
return -1;
}
- ret = mxlnd_parse_hosts(*kmxlnd_tunables.kmx_hosts);
- if (ret != 0) {
- if (*kmxlnd_tunables.kmx_hosts != NULL) {
- CERROR("mxlnd_parse_hosts(%s) failed\n", *kmxlnd_tunables.kmx_hosts);
+ if (ni->ni_interfaces[0] != NULL) {
+ /* Use the IPoMX interface specified in 'networks=' */
+
+ CLASSERT (LNET_MAX_INTERFACES > 1);
+ if (ni->ni_interfaces[1] != NULL) {
+ CERROR("Multiple interfaces not supported\n");
+ goto failed_with_init;
}
- mx_finalize();
- return -1;
+
+ ifname = ni->ni_interfaces[0];
+ } else {
+ ifname = *kmxlnd_tunables.kmx_default_ipif;
}
- list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) {
- if (strcmp(host->mxh_hostname, system_utsname.nodename) == 0) {
- /* override the defaults and module parameters with
- * the info from the hosts file */
- board = host->mxh_board;
- ep_id = host->mxh_ep_id;
- kmxlnd_data.kmx_localhost = host;
- CDEBUG(D_NET, "my hostname is %s board %d ep_id %d\n", kmxlnd_data.kmx_localhost->mxh_hostname, kmxlnd_data.kmx_localhost->mxh_board, kmxlnd_data.kmx_localhost->mxh_ep_id);
- found = 1;
- break;
- }
+ ret = libcfs_ipif_query(ifname, &up, &ip, &netmask);
+ if (ret != 0) {
+ CERROR("Can't query IPoMX interface %s: %d\n",
+ ifname, ret);
+ goto failed_with_init;
}
- if (found == 0) {
- CERROR("no host entry found for localhost\n");
- mx_finalize();
- return -1;
+ if (!up) {
+ CERROR("Can't query IPoMX interface %s: it's down\n",
+ ifname);
+ goto failed_with_init;
}
- mxret = mx_open_endpoint(board, ep_id, MXLND_MSG_MAGIC,
+ mxret = mx_open_endpoint(board, ep_id, MXLND_MSG_MAGIC,
NULL, 0, &kmxlnd_data.kmx_endpt);
if (mxret != MX_SUCCESS) {
CERROR("mx_open_endpoint() failed with %d\n", mxret);
- mx_finalize();
- return -1;
+ goto failed_with_init;
}
- mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &addr);
- mx_decompose_endpoint_addr(addr, &nic_id, &ep_id);
-
- LASSERT(host != NULL);
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), host->mxh_addr);
+ mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &epa);
+ mx_decompose_endpoint_addr(epa, &nic_id, &ep_id);
+ ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
CDEBUG(D_NET, "My NID is 0x%llx\n", ni->ni_nid);
+ ret = mxlnd_peer_alloc(&peer, ni->ni_nid, board, ep_id, nic_id);
+ if (ret != 0) {
+ goto failed_with_endpoint;
+ }
+ peer->mxp_conn->mxk_epa = epa;
+
+ peer->mxp_incarnation = kmxlnd_data.kmx_incarnation;
+ peer->mxp_incompatible = 0;
+ spin_lock(&peer->mxp_conn->mxk_lock);
+ peer->mxp_conn->mxk_credits = *kmxlnd_tunables.kmx_credits;
+ peer->mxp_conn->mxk_outstanding = 0;
+ peer->mxp_conn->mxk_incarnation = kmxlnd_data.kmx_incarnation;
+ peer->mxp_conn->mxk_timeout = 0;
+ peer->mxp_conn->mxk_status = MXLND_CONN_READY;
+ spin_unlock(&peer->mxp_conn->mxk_lock);
+ mx_set_endpoint_addr_context(peer->mxp_conn->mxk_epa, (void *) peer);
+
+ hash = mxlnd_nid_to_hash(ni->ni_nid);
+ list_add_tail(&peer->mxp_peers, &kmxlnd_data.kmx_peers[hash]);
+ atomic_inc(&kmxlnd_data.kmx_npeers);
+
+ mxlnd_conn_decref(peer->mxp_conn); /* drop 2nd ref taken in peer_alloc */
+
+ kmxlnd_data.kmx_localhost = peer;
+
/* this will catch all unexpected receives. */
mxret = mx_register_unexp_handler(kmxlnd_data.kmx_endpt,
(mx_unexp_handler_t) mxlnd_unexpected_recv,
if (mxret != MX_SUCCESS) {
CERROR("mx_register_unexp_callback() failed with %s\n",
mx_strerror(mxret));
- mx_close_endpoint(kmxlnd_data.kmx_endpt);
- mx_finalize();
- return -1;
+ goto failed_with_peer;
}
mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL, MXLND_COMM_TIMEOUT/HZ*1000);
if (mxret != MX_SUCCESS) {
CERROR("mx_set_request_timeout() failed with %s\n",
mx_strerror(mxret));
- mx_close_endpoint(kmxlnd_data.kmx_endpt);
- mx_finalize();
- return -1;
+ goto failed_with_peer;
}
return 0;
+
+failed_with_peer:
+ mxlnd_conn_decref(peer->mxp_conn);
+ mxlnd_conn_decref(peer->mxp_conn);
+ mxlnd_peer_decref(peer);
+failed_with_endpoint:
+ mx_close_endpoint(kmxlnd_data.kmx_endpt);
+failed_with_init:
+ mx_finalize();
+ return -1;
}
up(&kmxlnd_data.kmx_tx_queue_sem);
mxlnd_sleep(2 * HZ);
+ read_lock(&kmxlnd_data.kmx_global_lock);
+ mxlnd_close_matching_conns(LNET_NID_ANY);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
+
/* fall through */
case MXLND_INIT_THREADS:
mx_close_endpoint(kmxlnd_data.kmx_endpt);
mx_finalize();
- CDEBUG(D_NET, "mxlnd_free_hosts();\n");
- mxlnd_free_hosts();
-
/* fall through */
case MXLND_INIT_RXS:
kmxlnd_data.kmx_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
CDEBUG(D_NET, "my incarnation is %lld\n", kmxlnd_data.kmx_incarnation);
- spin_lock_init (&kmxlnd_data.kmx_global_lock);
+ rwlock_init (&kmxlnd_data.kmx_global_lock);
+ spin_lock_init (&kmxlnd_data.kmx_mem_lock);
INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_req);
spin_lock_init (&kmxlnd_data.kmx_conn_lock);
sema_init(&kmxlnd_data.kmx_conn_sem, 0);
- INIT_LIST_HEAD (&kmxlnd_data.kmx_hosts);
- spin_lock_init (&kmxlnd_data.kmx_hosts_lock);
-
for (i = 0; i < MXLND_HASH_SIZE; i++) {
INIT_LIST_HEAD (&kmxlnd_data.kmx_peers[i]);
}
- rwlock_init (&kmxlnd_data.kmx_peers_lock);
+ //rwlock_init (&kmxlnd_data.kmx_peers_lock);
INIT_LIST_HEAD (&kmxlnd_data.kmx_txs);
INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_idle);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Myricom, Inc. - help@myri.com");
MODULE_DESCRIPTION("Kernel MyrinetExpress LND");
-MODULE_VERSION("0.5.0");
+MODULE_VERSION("0.6.0");
#include <linux/sysctl.h>
#include <linux/random.h>
#include <linux/utsname.h>
+#include <linux/jiffies.h> /* msecs_to_jiffies */
#include <net/sock.h>
#include <linux/in.h>
-#include <linux/netdevice.h> /* these are needed for ARP */
-#include <linux/if_arp.h>
-#include <net/arp.h>
-#include <linux/inetdevice.h>
+#include <asm/byteorder.h> /* __LITTLE_ENDIAN */
+#include <net/arp.h> /* arp table */
+#include <linux/netdevice.h> /* get_device_by_name */
+#include <linux/inetdevice.h> /* neigh_lookup, etc. */
+#include <linux/net.h> /* sock_create_kern, kernel_connect, sock_release */
#define DEBUG_SUBSYSTEM S_LND
#error LNET_MAX_IOV is greater then MX_MAX_SEGMENTS
#endif
+#define MXLND_MSG_MAGIC 0x4d583130 /* unique magic 'MX10' */
+#define MXLND_MSG_VERSION 0x02
+
/* Using MX's 64 match bits
* We are using the match bits to specify message type and the cookie. The
* highest four bits (60-63) are reserved for message type. Below we specify
* should allow unique cookies for 4 KB messages at 10 Gbps line rate without
* rollover for about 8 years. That should be enough. */
-/* constants */
-#define MXLND_MASK_ICON_REQ (0xBLL << 60) /* it is a mx_iconnect() completion */
-#define MXLND_MASK_CONN_REQ (0xCLL << 60) /* CONN_REQ msg */
-#define MXLND_MASK_ICON_ACK (0x9LL << 60) /* it is a mx_iconnect() completion */
-#define MXLND_MASK_CONN_ACK (0xALL << 60) /* CONN_ACK msg*/
-#define MXLND_MASK_EAGER (0xELL << 60) /* EAGER msg */
-#define MXLND_MASK_NOOP (0x1LL << 60) /* NOOP msg */
-#define MXLND_MASK_PUT_REQ (0x2LL << 60) /* PUT_REQ msg */
-#define MXLND_MASK_PUT_ACK (0x3LL << 60) /* PUT_ACK msg */
-#define MXLND_MASK_PUT_DATA (0x4LL << 60) /* PUT_DATA msg */
-#define MXLND_MASK_GET_REQ (0x5LL << 60) /* GET_REQ msg */
-#define MXLND_MASK_GET_DATA (0x6LL << 60) /* GET_DATA msg */
-//#define MXLND_MASK_NAK (0x7LL << 60) /* NAK msg */
-
-#define MXLND_MAX_COOKIE ((1LL << 52) - 1) /* when to roll-over the cookie value */
+#define MXLND_MSG_OFFSET 60 /* msg type offset */
+#define MXLND_MSG_BITS (64 - MXLND_MSG_OFFSET)
+#define MXLND_MSG_MASK (((1ULL<<MXLND_MSG_BITS) - 1) << MXLND_MSG_OFFSET)
+#define MXLND_MSG_TYPE(x) (((x) & MXLND_MSG_MASK) >> MXLND_MSG_OFFSET)
+
+#define MXLND_ERROR_OFFSET 52 /* error value offset */
+#define MXLND_ERROR_BITS (MXLND_MSG_OFFSET - MXLND_ERROR_OFFSET)
+#define MXLND_ERROR_MASK (((1ULL<<MXLND_ERROR_BITS) - 1) << MXLND_ERROR_OFFSET)
+#define MXLND_ERROR_VAL(x) (((x) & MXLND_ERROR_MASK) >> MXLND_ERROR_OFFSET)
+
+/* message types */
+#define MXLND_MSG_ICON_REQ 0xb /* mx_iconnect() before CONN_REQ */
+#define MXLND_MSG_CONN_REQ 0xc /* connection request */
+#define MXLND_MSG_ICON_ACK 0x9 /* mx_iconnect() before CONN_ACK */
+#define MXLND_MSG_CONN_ACK 0xa /* connection request response */
+#define MXLND_MSG_BYE 0xd /* disconnect msg */
+#define MXLND_MSG_EAGER 0xe /* eager message */
+#define MXLND_MSG_NOOP 0x1 /* no msg, return credits */
+#define MXLND_MSG_PUT_REQ 0x2 /* put request src->sink */
+#define MXLND_MSG_PUT_ACK 0x3 /* put ack src<-sink */
+#define MXLND_MSG_PUT_DATA 0x4 /* put payload src->sink */
+#define MXLND_MSG_GET_REQ 0x5 /* get request sink->src */
+#define MXLND_MSG_GET_DATA 0x6 /* get payload sink<-src */
+
+/* when to roll-over the cookie value */
+#define MXLND_MAX_COOKIE ((1ULL << MXLND_ERROR_OFFSET) - 1)
+
#define MXLND_NCOMPLETIONS (MXLND_N_SCHED + 2) /* max threads for completion array */
/* defaults for configurable parameters */
/* debugging features */
#define MXLND_CKSUM 0 /* checksum kmx_msg_t */
-#define MXLND_DEBUG 0 /* turn on printk()s */
-
-extern inline void mxlnd_noop(char *s, ...);
-#if MXLND_DEBUG
- #define MXLND_PRINT printk
-#else
- #define MXLND_PRINT mxlnd_noop
-#endif
+#define MXLND_DEBUG 0 /* additional CDEBUG messages */
/* provide wrappers around LIBCFS_ALLOC/FREE to keep MXLND specific
* memory usage stats that include pages */
#define MXLND_ALLOC(x, size) \
do { \
- spin_lock(&kmxlnd_data.kmx_global_lock); \
+ spin_lock(&kmxlnd_data.kmx_mem_lock); \
kmxlnd_data.kmx_mem_used += size; \
- spin_unlock(&kmxlnd_data.kmx_global_lock); \
+ spin_unlock(&kmxlnd_data.kmx_mem_lock); \
LIBCFS_ALLOC(x, size); \
- if (x == NULL) { \
- spin_lock(&kmxlnd_data.kmx_global_lock); \
+ if (unlikely(x == NULL)) { \
+ spin_lock(&kmxlnd_data.kmx_mem_lock); \
kmxlnd_data.kmx_mem_used -= size; \
- spin_unlock(&kmxlnd_data.kmx_global_lock); \
+ spin_unlock(&kmxlnd_data.kmx_mem_lock); \
} \
} while (0)
#define MXLND_FREE(x, size) \
do { \
- spin_lock(&kmxlnd_data.kmx_global_lock); \
+ spin_lock(&kmxlnd_data.kmx_mem_lock); \
kmxlnd_data.kmx_mem_used -= size; \
- spin_unlock(&kmxlnd_data.kmx_global_lock); \
+ spin_unlock(&kmxlnd_data.kmx_mem_lock); \
LIBCFS_FREE(x, size); \
} while (0)
int *kmx_credits; /* concurrent sends to 1 peer */
int *kmx_board; /* MX board (NIC) number */
int *kmx_ep_id; /* MX endpoint number */
+ char **kmx_default_ipif; /* IPoMX interface name */
int *kmx_polling; /* if 0, block. if > 0, poll this many
iterations before blocking */
- char **kmx_hosts; /* Location of hosts file, if used */
} kmx_tunables_t;
-/* structure to hold IP-to-hostname resolution data */
-struct kmx_host {
- struct kmx_peer *mxh_peer; /* pointer to matching peer */
- u32 mxh_addr; /* IP address as int */
- char *mxh_hostname; /* peer's hostname */
- u32 mxh_board; /* peer's board rank */
- u32 mxh_ep_id; /* peer's MX endpoint ID */
- struct list_head mxh_list; /* position on kmx_hosts */
- spinlock_t mxh_lock; /* lock */
-};
-
/* global interface state */
typedef struct kmx_data
{
lnet_ni_t *kmx_ni; /* the LND instance */
u64 kmx_incarnation; /* my incarnation value - unused */
long kmx_mem_used; /* memory used */
- struct kmx_host *kmx_localhost; /* pointer to my kmx_host info */
+ struct kmx_peer *kmx_localhost; /* pointer to my kmx_peer info */
mx_endpoint_t kmx_endpt; /* the MX endpoint */
- spinlock_t kmx_global_lock; /* global lock */
+ rwlock_t kmx_global_lock; /* global lock */
+ spinlock_t kmx_mem_lock; /* memory accounting lock */
struct list_head kmx_conn_req; /* list of connection requests */
spinlock_t kmx_conn_lock; /* connection list lock */
struct semaphore kmx_conn_sem; /* semaphore for connection request list */
- struct list_head kmx_hosts; /* host lookup info */
- spinlock_t kmx_hosts_lock; /* hosts list lock */
-
struct list_head kmx_peers[MXLND_HASH_SIZE];
/* list of all known peers */
- rwlock_t kmx_peers_lock; /* peer list rw lock */
+ //rwlock_t kmx_peers_lock; /* peer list rw lock */
atomic_t kmx_npeers; /* number of peers */
struct list_head kmx_txs; /* all tx descriptors */
#define MXLND_INIT_THREADS 5 /* waitd, timeoutd, tx_queued threads */
#define MXLND_INIT_ALL 6 /* startup completed */
-#include "mxlnd_wire.h"
+/************************************************************************
+ * MXLND Wire message format.
+ * These are sent in sender's byte order (i.e. receiver flips).
+ */
+
+typedef struct kmx_connreq_msg
+{
+ u32 mxcrm_queue_depth; /* per peer max messages in flight */
+ u32 mxcrm_eager_size; /* size of preposted eager messages */
+} WIRE_ATTR kmx_connreq_msg_t;
+
+typedef struct kmx_eager_msg
+{
+ lnet_hdr_t mxem_hdr; /* lnet header */
+ char mxem_payload[0]; /* piggy-backed payload */
+} WIRE_ATTR kmx_eager_msg_t;
+
+typedef struct kmx_putreq_msg
+{
+ lnet_hdr_t mxprm_hdr; /* lnet header */
+ u64 mxprm_cookie; /* opaque completion cookie */
+} WIRE_ATTR kmx_putreq_msg_t;
+
+typedef struct kmx_putack_msg
+{
+ u64 mxpam_src_cookie; /* reflected completion cookie */
+ u64 mxpam_dst_cookie; /* opaque completion cookie */
+} WIRE_ATTR kmx_putack_msg_t;
+
+typedef struct kmx_getreq_msg
+{
+ lnet_hdr_t mxgrm_hdr; /* lnet header */
+ u64 mxgrm_cookie; /* opaque completion cookie */
+} WIRE_ATTR kmx_getreq_msg_t;
+
+typedef struct kmx_msg
+{
+ /* First two fields fixed for all time */
+ u32 mxm_magic; /* MXLND message */
+ u16 mxm_version; /* version number */
+
+ u8 mxm_type; /* message type */
+ u8 mxm_credits; /* returned credits */
+ u32 mxm_nob; /* # of bytes in whole message */
+ u32 mxm_cksum; /* checksum (0 == no checksum) */
+ u64 mxm_srcnid; /* sender's NID */
+ u64 mxm_srcstamp; /* sender's incarnation */
+ u64 mxm_dstnid; /* destination's NID */
+ u64 mxm_dststamp; /* destination's incarnation */
+ u64 mxm_seq; /* sequence number */
+
+ union {
+ kmx_connreq_msg_t conn_req;
+ kmx_eager_msg_t eager;
+ kmx_putreq_msg_t put_req;
+ kmx_putack_msg_t put_ack;
+ kmx_getreq_msg_t get_req;
+ } WIRE_ATTR mxm_u;
+} WIRE_ATTR kmx_msg_t;
+
+/***********************************************************************/
enum kmx_req_type {
MXLND_REQ_TX = 0,
{
lnet_nid_t mxp_nid; /* peer's LNET NID */
u64 mxp_incarnation; /* peer's incarnation value */
+ u32 mxp_sid; /* MX session ID */
atomic_t mxp_refcount; /* reference counts */
- struct kmx_host *mxp_host; /* peer lookup info */
+ u32 mxp_ip; /* IP address as int */
+ u32 mxp_board; /* peer's board rank */
+ u32 mxp_ep_id; /* peer's MX endpoint ID */
u64 mxp_nic_id; /* remote's MX nic_id for mx_connect() */
struct list_head mxp_peers; /* for placing on kmx_peers */
- spinlock_t mxp_lock; /* lock */
struct list_head mxp_conns; /* list of connections */
struct kmx_conn *mxp_conn; /* current connection */
extern int mxlnd_ctx_alloc(struct kmx_ctx **ctxp, enum kmx_req_type type);
extern void mxlnd_ctx_free(struct kmx_ctx *ctx);
extern void mxlnd_ctx_init(struct kmx_ctx *ctx);
-extern lnet_nid_t mxlnd_nic_id2nid(lnet_ni_t *ni, u64 nic_id);
-extern u64 mxlnd_nid2nic_id(lnet_nid_t nid);
+extern int mxlnd_peer_alloc(struct kmx_peer **peerp, lnet_nid_t nid,
+ u32 board, u32 ep_id, u64 nic_id);
/* in mxlnd_cb.c */
void mxlnd_eager_recv(void *context, uint64_t match_value, uint32_t length);
void *data_if_available);
extern void mxlnd_peer_free(struct kmx_peer *peer);
extern void mxlnd_conn_free(struct kmx_conn *conn);
+extern void mxlnd_conn_disconnect(struct kmx_conn *conn, int mx_dis, int send_bye);
+extern int mxlnd_close_matching_conns(lnet_nid_t nid);
extern void mxlnd_sleep(unsigned long timeout);
extern int mxlnd_tx_queued(void *arg);
extern void mxlnd_handle_rx_completion(struct kmx_ctx *rx);
extern int mxlnd_timeoutd(void *arg);
extern int mxlnd_connd(void *arg);
+/**
+ * mxlnd_nid_to_hash - hash the nid
+ * @nid - LNET ID
+ *
+ * Takes the u64 nid and XORs the lowest N bits by the next lowest N bits.
+ */
+static inline int
+mxlnd_nid_to_hash(lnet_nid_t nid)
+{
+ return (nid & MXLND_HASH_MASK) ^
+ ((nid & (MXLND_HASH_MASK << MXLND_HASH_BITS)) >> MXLND_HASH_BITS);
+}
+
+
#define mxlnd_peer_addref(peer) \
do { \
LASSERT(peer != NULL); \
#include "mxlnd.h"
+mx_endpoint_addr_t MX_EPA_NULL; /* use to determine if an endpoint is NULL */
+
+inline int
+mxlnd_endpoint_addr_null(mx_endpoint_addr_t epa)
+{
+ /* if memcmp() == 0, it is NULL */
+ return !(memcmp(&epa, &MX_EPA_NULL, sizeof(epa)));
+}
+
inline void mxlnd_noop(char *s, ...)
{
return;
return "MXLND_MSG_CONN_REQ";
case MXLND_MSG_CONN_ACK:
return "MXLND_MSG_CONN_ACK";
+ case MXLND_MSG_BYE:
+ return "MXLND_MSG_BYE";
case MXLND_MSG_NOOP:
return "MXLND_MSG_NOOP";
case MXLND_MSG_PUT_REQ:
{
u64 type = (u64) ctx->mxc_msg_type;
u64 err = (u64) error;
- u64 match = 0LL;
+ u64 match = 0ULL;
LASSERT(ctx->mxc_msg_type != 0);
- LASSERT(ctx->mxc_cookie >> 52 == 0);
- match = (type << 60) | (err << 52) | ctx->mxc_cookie;
+ LASSERT(ctx->mxc_cookie >> MXLND_ERROR_OFFSET == 0);
+ match = (type << MXLND_MSG_OFFSET) | (err << MXLND_ERROR_OFFSET) | ctx->mxc_cookie;
return match;
}
static inline void
mxlnd_parse_match(u64 match, u8 *msg_type, u8 *error, u64 *cookie)
{
- *msg_type = (u8) (match >> 60);
- *error = (u8) ((match >> 52) & 0xFF);
- *cookie = match & 0xFFFFFFFFFFFFFLL;
- LASSERT(match == (MXLND_MASK_ICON_REQ & 0xF000000000000000LL) ||
- match == (MXLND_MASK_ICON_ACK & 0xF000000000000000LL) ||
- *msg_type == MXLND_MSG_EAGER ||
+ *msg_type = (u8) MXLND_MSG_TYPE(match);
+ *error = (u8) MXLND_ERROR_VAL(match);
+ *cookie = match & MXLND_MAX_COOKIE;
+ LASSERT(*msg_type == MXLND_MSG_EAGER ||
+ *msg_type == MXLND_MSG_ICON_REQ ||
*msg_type == MXLND_MSG_CONN_REQ ||
+ *msg_type == MXLND_MSG_ICON_ACK ||
*msg_type == MXLND_MSG_CONN_ACK ||
+ *msg_type == MXLND_MSG_BYE ||
*msg_type == MXLND_MSG_NOOP ||
*msg_type == MXLND_MSG_PUT_REQ ||
*msg_type == MXLND_MSG_PUT_ACK ||
return tx;
}
+void
+mxlnd_conn_disconnect(struct kmx_conn *conn, int mx_dis, int send_bye);
+
int
mxlnd_put_idle_tx(struct kmx_ctx *tx)
{
return -EINVAL;
}
if (!(tx->mxc_status.code == MX_STATUS_SUCCESS ||
- tx->mxc_status.code == MX_STATUS_TRUNCATED))
+ tx->mxc_status.code == MX_STATUS_TRUNCATED)) {
+ struct kmx_conn *conn = tx->mxc_conn;
+
result = -EIO;
+ mxlnd_conn_disconnect(conn, 0, 1);
+ }
lntmsg[0] = tx->mxc_lntmsg[0];
lntmsg[1] = tx->mxc_lntmsg[1];
list_empty (&conn->mxk_tx_free_queue) &&
list_empty (&conn->mxk_pending));
if (!list_empty(&conn->mxk_list)) {
- spin_lock(&peer->mxp_lock);
list_del_init(&conn->mxk_list);
if (peer->mxp_conn == conn) {
peer->mxp_conn = NULL;
- if (!(conn->mxk_epa.stuff[0] == 0 && conn->mxk_epa.stuff[1] == 0)) {
+ if (!mxlnd_endpoint_addr_null(conn->mxk_epa))
mx_set_endpoint_addr_context(conn->mxk_epa,
(void *) NULL);
- }
}
- spin_unlock(&peer->mxp_lock);
}
mxlnd_peer_decref(conn->mxk_peer); /* drop conn's ref to peer */
MXLND_FREE (conn, sizeof (*conn));
/**
* mxlnd_conn_disconnect - shutdown a connection
* @conn - a kmx_conn pointer
+ * @mx_dis - call mx_disconnect()
+ * @send_bye - send peer a BYE msg
*
* This function sets the status to DISCONNECT, completes queued
* txs with failure, calls mx_disconnect, which will complete
* pending txs and matched rxs with failure.
*/
void
-mxlnd_conn_disconnect(struct kmx_conn *conn, int mx_dis, int notify)
+mxlnd_conn_disconnect(struct kmx_conn *conn, int mx_dis, int send_bye)
{
+ mx_endpoint_addr_t epa = conn->mxk_epa;
struct list_head *tmp = NULL;
+ int valid = !mxlnd_endpoint_addr_null(epa);
spin_lock(&conn->mxk_lock);
if (conn->mxk_status == MXLND_CONN_DISCONNECT) {
/* cancel pending rxs */
mxlnd_conn_cancel_pending_rxs(conn);
+ if (send_bye && valid) {
+ u64 match = ((u64) MXLND_MSG_BYE) << MXLND_MSG_OFFSET;
+ /* send a BYE to the peer */
+ CDEBUG(D_NET, "%s: sending a BYE msg to %s\n", __func__,
+ libcfs_nid2str(conn->mxk_peer->mxp_nid));
+ mx_kisend(kmxlnd_data.kmx_endpt, NULL, 0, MX_PIN_PHYSICAL,
+ epa, match, NULL, NULL);
+ /* wait to allow the peer to ack our message */
+ mxlnd_sleep(msecs_to_jiffies(20));
+ }
+
if (kmxlnd_data.kmx_shutdown != 1) {
+ time_t last_alive = 0;
+ unsigned long last_msg = 0;
- if (mx_dis) mx_disconnect(kmxlnd_data.kmx_endpt, conn->mxk_epa);
+ /* notify LNET that we are giving up on this peer */
+ if (time_after(conn->mxk_last_rx, conn->mxk_last_tx))
+ last_msg = conn->mxk_last_rx;
+ else
+ last_msg = conn->mxk_last_tx;
- if (notify) {
- time_t last_alive = 0;
- unsigned long last_msg = 0;
+ last_alive = cfs_time_current_sec() -
+ cfs_duration_sec(cfs_time_current() - last_msg);
+ lnet_notify(kmxlnd_data.kmx_ni, conn->mxk_peer->mxp_nid, 0, last_alive);
- /* notify LNET that we are giving up on this peer */
- if (time_after(conn->mxk_last_rx, conn->mxk_last_tx)) {
- last_msg = conn->mxk_last_rx;
- } else {
- last_msg = conn->mxk_last_tx;
- }
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() - last_msg);
- lnet_notify(kmxlnd_data.kmx_ni, conn->mxk_peer->mxp_nid, 0, last_alive);
- }
+ if (mx_dis && valid)
+ mx_disconnect(kmxlnd_data.kmx_endpt, epa);
}
mxlnd_conn_decref(conn); /* drop the owning peer's reference */
mxlnd_conn_alloc(struct kmx_conn **connp, struct kmx_peer *peer)
{
int ret = 0;
- spin_lock(&peer->mxp_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
ret = mxlnd_conn_alloc_locked(connp, peer);
- spin_unlock(&peer->mxp_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
return ret;
}
void
mxlnd_peer_free(struct kmx_peer *peer)
{
- CDEBUG(D_NET, "freeing peer 0x%p\n", peer);
+ CDEBUG(D_NET, "freeing peer 0x%p %s\n", peer,
+ peer == kmxlnd_data.kmx_localhost ? "(*** localhost ***)" : "");
LASSERT (atomic_read(&peer->mxp_refcount) == 0);
- if (peer->mxp_host != NULL) {
- spin_lock(&peer->mxp_host->mxh_lock);
- peer->mxp_host->mxh_peer = NULL;
- spin_unlock(&peer->mxp_host->mxh_lock);
- }
+ if (peer == kmxlnd_data.kmx_localhost)
+ LASSERT(kmxlnd_data.kmx_shutdown);
+
if (!list_empty(&peer->mxp_peers)) {
/* assume we are locked */
list_del_init(&peer->mxp_peers);
return;
}
-void
-mxlnd_peer_hostname_to_nic_id(struct kmx_peer *peer)
+#define MXLND_LOOKUP_COUNT 10
+
+/* We only want the MAC address of the peer's Myricom NIC. We
+ * require that each node has the IPoMX interface (myriN) up.
+ * We will not pass any traffic over IPoMX, but it allows us
+ * to get the MAC address. */
+static int
+mxlnd_ip2nic_id(u32 ip, u64 *nic_id)
{
- u64 nic_id = 0LL;
- char name[MX_MAX_HOSTNAME_LEN + 1];
- mx_return_t mxret = MX_SUCCESS;
+ int ret = 0;
+ int try = 1;
+ int fatal = 0;
+ u64 tmp_id = 0ULL;
+ unsigned char *haddr = NULL;
+ struct net_device *dev = NULL;
+ struct neighbour *n = NULL;
+ cfs_socket_t *sock = NULL;
+ __be32 dst_ip = htonl(ip);
- memset(name, 0, sizeof(name));
- snprintf(name, sizeof(name), "%s:%d", peer->mxp_host->mxh_hostname, peer->mxp_host->mxh_board);
- mxret = mx_hostname_to_nic_id(name, &nic_id);
- if (mxret == MX_SUCCESS) {
- peer->mxp_nic_id = nic_id;
- } else {
- CDEBUG(D_NETERROR, "mx_hostname_to_nic_id() failed for %s "
- "with %s\n", name, mx_strerror(mxret));
- mxret = mx_hostname_to_nic_id(peer->mxp_host->mxh_hostname, &nic_id);
- if (mxret == MX_SUCCESS) {
- peer->mxp_nic_id = nic_id;
- } else {
- CDEBUG(D_NETERROR, "mx_hostname_to_nic_id() failed for %s "
- "with %s\n", peer->mxp_host->mxh_hostname,
- mx_strerror(mxret));
- }
+ dev = dev_get_by_name(*kmxlnd_tunables.kmx_default_ipif);
+ if (dev == NULL) {
+ return -ENODEV;
}
- return;
+
+ haddr = (unsigned char *) &tmp_id + 2; /* MAC is only 6 bytes */
+
+ do {
+ n = neigh_lookup(&arp_tbl, &dst_ip, dev);
+ if (n) {
+ n->used = jiffies;
+ if (n->nud_state & NUD_VALID) {
+ memcpy(haddr, n->ha, dev->addr_len);
+ neigh_release(n);
+ ret = 0;
+ break;
+ }
+ }
+ /* not found, try to connect (force an arp) */
+ libcfs_sock_connect(&sock, &fatal, 0, 0, ip, 987);
+ if (!fatal)
+ libcfs_sock_release(sock);
+ schedule_timeout_interruptible(HZ/10 * try); /* add a little backoff */
+ } while (try++ < MXLND_LOOKUP_COUNT);
+
+ dev_put(dev);
+
+ if (tmp_id == 0ULL)
+ ret = -EHOSTUNREACH;
+#ifdef __LITTLE_ENDIAN
+ *nic_id = ___arch__swab64(tmp_id);
+#else
+ *nic_id = tmp_id;
+#endif
+ return ret;
}
/**
* Returns 0 on success and -ENOMEM on failure
*/
int
-mxlnd_peer_alloc(struct kmx_peer **peerp, lnet_nid_t nid)
+mxlnd_peer_alloc(struct kmx_peer **peerp, lnet_nid_t nid, u32 board, u32 ep_id, u64 nic_id)
{
int i = 0;
int ret = 0;
- u32 addr = LNET_NIDADDR(nid);
+ u32 ip = LNET_NIDADDR(nid);
struct kmx_peer *peer = NULL;
- struct kmx_host *host = NULL;
LASSERT (nid != LNET_NID_ANY && nid != 0LL);
memset(peer, 0, sizeof(*peer));
- list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) {
- if (addr == host->mxh_addr) {
- peer->mxp_host = host;
- spin_lock(&host->mxh_lock);
- host->mxh_peer = peer;
- spin_unlock(&host->mxh_lock);
- break;
- }
- }
- if (peer->mxp_host == NULL) {
- CDEBUG(D_NETERROR, "unknown host for NID 0x%llx\n", nid);
- MXLND_FREE(peer, sizeof(*peer));
- return -ENXIO;
- }
-
peer->mxp_nid = nid;
/* peer->mxp_incarnation */
atomic_set(&peer->mxp_refcount, 1); /* ref for kmx_peers list */
- mxlnd_peer_hostname_to_nic_id(peer);
+
+ peer->mxp_ip = ip;
+ peer->mxp_ep_id = *kmxlnd_tunables.kmx_ep_id;
+ peer->mxp_board = board;
+ peer->mxp_nic_id = nic_id;
+
+ if (nic_id == 0ULL) {
+ ret = mxlnd_ip2nic_id(ip, &nic_id);
+ if (ret != 0)
+ CERROR("%s: mxlnd_ip2nic_id() returned %d\n", __func__, ret);
+ mx_nic_id_to_board_number(nic_id, &peer->mxp_board);
+ }
+
+ peer->mxp_nic_id = nic_id; /* may be 0ULL if ip2nic_id() failed */
INIT_LIST_HEAD(&peer->mxp_peers);
- spin_lock_init(&peer->mxp_lock);
INIT_LIST_HEAD(&peer->mxp_conns);
ret = mxlnd_conn_alloc(&peer->mxp_conn, peer); /* adds 2nd conn ref here... */
if (ret != 0) {
return 0;
}
-/**
- * mxlnd_nid_to_hash - hash the nid
- * @nid - msg pointer
- *
- * Takes the u64 nid and XORs the lowest N bits by the next lowest N bits.
- */
-static inline int
-mxlnd_nid_to_hash(lnet_nid_t nid)
-{
- return (nid & MXLND_HASH_MASK) ^
- ((nid & (MXLND_HASH_MASK << MXLND_HASH_BITS)) >> MXLND_HASH_BITS);
-}
-
static inline struct kmx_peer *
mxlnd_find_peer_by_nid_locked(lnet_nid_t nid)
{
{
struct kmx_peer *peer = NULL;
- read_lock(&kmxlnd_data.kmx_peers_lock);
+ read_lock(&kmxlnd_data.kmx_global_lock);
peer = mxlnd_find_peer_by_nid_locked(nid);
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
return peer;
}
{
int ret = 0;
mx_return_t mxret = MX_SUCCESS;
- uint64_t mask = 0xF00FFFFFFFFFFFFFLL;
+ uint64_t mask = ~(MXLND_ERROR_MASK);
rx->mxc_msg_type = msg_type;
rx->mxc_lntmsg[0] = lntmsg; /* may be NULL if EAGER */
cookie, mask, (void *) rx, &rx->mxc_mxreq);
if (mxret != MX_SUCCESS) {
mxlnd_deq_pending_ctx(rx);
- CDEBUG(D_NETERROR, "mx_kirecv() failed with %s (%d)\n",
+ CDEBUG(D_NETERROR, "mx_kirecv() failed with %s (%d)\n",
mx_strerror(mxret), (int) mxret);
return -1;
}
* unexpected receives
* @context - NULL, ignore
* @source - the peer's mx_endpoint_addr_t
- * @match_value - the msg's bit, should be MXLND_MASK_EAGER
+ * @match_value - the msg's bit, should be MXLND_MSG_EAGER
* @length - length of incoming message
* @data_if_available - ignore
*
mx_ksegment_t seg;
u8 msg_type = 0;
u8 error = 0;
- u64 cookie = 0LL;
+ u64 cookie = 0ULL;
if (context != NULL) {
CDEBUG(D_NETERROR, "unexpected receive with non-NULL context\n");
CDEBUG(D_NET, "unexpected_recv() bits=0x%llx length=%d\n", match_value, length);
#endif
+ mxlnd_parse_match(match_value, &msg_type, &error, &cookie);
+ if (msg_type == MXLND_MSG_BYE) {
+ struct kmx_peer *peer = NULL;
+
+ mx_get_endpoint_addr_context(source, (void **) &peer);
+ if (peer && peer->mxp_conn) {
+ CDEBUG(D_NET, "peer %s sent BYE msg\n",
+ libcfs_nid2str(peer->mxp_nid));
+ mxlnd_conn_disconnect(peer->mxp_conn, 1, 0);
+ }
+
+ return MX_RECV_FINISHED;
+ }
+
rx = mxlnd_get_idle_rx();
if (rx != NULL) {
- mxlnd_parse_match(match_value, &msg_type, &error, &cookie);
if (length <= MXLND_EAGER_SIZE) {
ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, length);
} else {
/* NOTE to avoid a peer disappearing out from under us,
* read lock the peers lock first */
- read_lock(&kmxlnd_data.kmx_peers_lock);
+ read_lock(&kmxlnd_data.kmx_global_lock);
mx_get_endpoint_addr_context(source, (void **) &peer);
if (peer != NULL) {
mxlnd_peer_addref(peer); /* add a ref... */
- spin_lock(&peer->mxp_lock);
conn = peer->mxp_conn;
if (conn) {
mxlnd_conn_addref(conn); /* add ref until rx completed */
mxlnd_peer_decref(peer); /* and drop peer ref */
rx->mxc_conn = conn;
}
- spin_unlock(&peer->mxp_lock);
rx->mxc_peer = peer;
rx->mxc_nid = peer->mxp_nid;
}
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
} else {
CDEBUG(D_NETERROR, "could not post receive\n");
mxlnd_put_idle_rx(rx);
/* ret != 0 */
CDEBUG(D_NETERROR, "disconnected peer - dropping rx\n");
}
- seg.segment_ptr = 0LL;
+ seg.segment_ptr = 0ULL;
seg.segment_length = 0;
mx_kirecv(kmxlnd_data.kmx_endpt, &seg, 1, MX_PIN_PHYSICAL,
- match_value, 0xFFFFFFFFFFFFFFFFLL, NULL, NULL);
+ match_value, ~0ULL, NULL, NULL);
}
return MX_RECV_CONTINUE;
int ret = -ENOENT;
struct kmx_peer *peer = NULL;
- read_lock(&kmxlnd_data.kmx_peers_lock);
+ read_lock(&kmxlnd_data.kmx_global_lock);
for (i = 0; i < MXLND_HASH_SIZE; i++) {
list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) {
- if (index-- > 0)
- continue;
-
- *nidp = peer->mxp_nid;
- *count = atomic_read(&peer->mxp_refcount);
- ret = 0;
- break;
+ if (index-- == 0) {
+ *nidp = peer->mxp_nid;
+ *count = atomic_read(&peer->mxp_refcount);
+ ret = 0;
+ break;
+ }
}
}
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
return ret;
}
mxlnd_del_peer_locked(struct kmx_peer *peer)
{
list_del_init(&peer->mxp_peers); /* remove from the global list */
- if (peer->mxp_conn) mxlnd_conn_disconnect(peer->mxp_conn, 1, 0);
+ if (peer->mxp_conn) mxlnd_conn_disconnect(peer->mxp_conn, 1, 1);
mxlnd_peer_decref(peer); /* drop global list ref */
return;
}
if (nid != LNET_NID_ANY) {
peer = mxlnd_find_peer_by_nid(nid); /* adds peer ref */
}
- write_lock(&kmxlnd_data.kmx_peers_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
if (nid != LNET_NID_ANY) {
if (peer == NULL) {
ret = -ENOENT;
+ } if (peer == kmxlnd_data.kmx_localhost) {
+ mxlnd_peer_decref(peer); /* and drops it */
+ CERROR("cannot free this host's NID 0x%llx\n", nid);
} else {
mxlnd_peer_decref(peer); /* and drops it */
mxlnd_del_peer_locked(peer);
for (i = 0; i < MXLND_HASH_SIZE; i++) {
list_for_each_entry_safe(peer, next,
&kmxlnd_data.kmx_peers[i], mxp_peers) {
- mxlnd_del_peer_locked(peer);
+ if (peer != kmxlnd_data.kmx_localhost)
+ mxlnd_del_peer_locked(peer);
}
}
}
- write_unlock(&kmxlnd_data.kmx_peers_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
return ret;
}
struct kmx_peer *peer = NULL;
struct kmx_conn *conn = NULL;
- read_lock(&kmxlnd_data.kmx_peers_lock);
+ read_lock(&kmxlnd_data.kmx_global_lock);
for (i = 0; i < MXLND_HASH_SIZE; i++) {
list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) {
- spin_lock(&peer->mxp_lock);
list_for_each_entry(conn, &peer->mxp_conns, mxk_list) {
if (index-- > 0) {
continue;
}
mxlnd_conn_addref(conn); /* add ref here, dec in ctl() */
- spin_unlock(&peer->mxp_lock);
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
return conn;
}
- spin_unlock(&peer->mxp_lock);
}
}
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
return NULL;
}
struct kmx_conn *conn = NULL;
struct kmx_conn *next = NULL;
- spin_lock(&peer->mxp_lock);
- list_for_each_entry_safe(conn, next, &peer->mxp_conns, mxk_list) {
- mxlnd_conn_disconnect(conn, 0 , 0);
- }
- spin_unlock(&peer->mxp_lock);
+ if (peer == kmxlnd_data.kmx_localhost) return;
+
+ list_for_each_entry_safe(conn, next, &peer->mxp_conns, mxk_list)
+ mxlnd_conn_disconnect(conn, 0, 1);
+
return;
}
int ret = 0;
struct kmx_peer *peer = NULL;
- read_lock(&kmxlnd_data.kmx_peers_lock);
+ read_lock(&kmxlnd_data.kmx_global_lock);
if (nid != LNET_NID_ANY) {
peer = mxlnd_find_peer_by_nid(nid); /* adds peer ref */
if (peer == NULL) {
mxlnd_close_matching_conns_locked(peer);
}
}
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
return ret;
}
mxlnd_init_tx_msg(tx, type, sizeof(kmx_putack_msg_t), tx->mxc_nid);
tx->mxc_cookie = cookie;
tx->mxc_msg->mxm_u.put_ack.mxpam_src_cookie = cookie;
- tx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie = ((u64) status << 52); /* error code */
+ tx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie = ((u64) status << MXLND_ERROR_OFFSET); /* error code */
tx->mxc_match = mxlnd_create_match(tx, status);
mxlnd_queue_tx(tx);
LASSERT(lntmsg != NULL);
LASSERT(peer != NULL);
LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA);
- LASSERT((cookie>>52) == 0);
+ LASSERT((cookie>>MXLND_ERROR_OFFSET) == 0);
tx = mxlnd_get_idle_tx();
if (tx == NULL) {
int
mxlnd_recv_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, struct kmx_ctx *rx, u8 msg_type, u64 cookie)
{
- int ret = 0;
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- mx_return_t mxret = MX_SUCCESS;
+ int ret = 0;
+ lnet_process_id_t target = lntmsg->msg_target;
+ unsigned int niov = lntmsg->msg_niov;
+ struct iovec *iov = lntmsg->msg_iov;
+ lnet_kiov_t *kiov = lntmsg->msg_kiov;
+ unsigned int offset = lntmsg->msg_offset;
+ unsigned int nob = lntmsg->msg_len;
+ mx_return_t mxret = MX_SUCCESS;
+ u64 mask = ~(MXLND_ERROR_MASK);
/* above assumes MXLND_MSG_PUT_DATA */
if (msg_type == MXLND_MSG_GET_DATA) {
LASSERT(lntmsg != NULL);
LASSERT(rx != NULL);
LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA);
- LASSERT((cookie>>52) == 0); /* ensure top 12 bits are 0 */
+ LASSERT((cookie>>MXLND_ERROR_OFFSET) == 0); /* ensure top 12 bits are 0 */
rx->mxc_msg_type = msg_type;
rx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT;
mxret = mx_kirecv(kmxlnd_data.kmx_endpt,
rx->mxc_seg_list, rx->mxc_nseg,
rx->mxc_pin_type, rx->mxc_match,
- 0xF00FFFFFFFFFFFFFLL, (void *) rx,
+ mask, (void *) rx,
&rx->mxc_mxreq);
if (mxret != MX_SUCCESS) {
if (rx->mxc_conn != NULL) {
if (unlikely(peer->mxp_incompatible)) {
mxlnd_peer_decref(peer); /* drop ref taken above */
} else {
- spin_lock(&peer->mxp_lock);
+ read_lock(&kmxlnd_data.kmx_global_lock);
conn = peer->mxp_conn;
if (conn) {
mxlnd_conn_addref(conn);
mxlnd_peer_decref(peer); /* drop peer ref taken above */
}
- spin_unlock(&peer->mxp_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
}
}
+ CDEBUG(D_NET, "%s: peer 0x%llx is 0x%p\n", __func__, nid, peer);
if (conn == NULL && peer != NULL) {
- CDEBUG(D_NETERROR, "conn==NULL peer=0x%p nid=0x%llx payload_nob=%d type=%s\n",
+ CDEBUG(D_NET, "conn==NULL peer=0x%p nid=0x%llx payload_nob=%d type=%s\n",
peer, nid, payload_nob, mxlnd_lnetmsg_to_str(type));
}
struct kmx_msg *txmsg = NULL;
struct kmx_peer *peer = rx->mxc_peer;
struct kmx_conn *conn = peer->mxp_conn;
- u64 cookie = 0LL;
+ u64 cookie = 0ULL;
int msg_type = rxmsg->mxm_type;
int repost = 1;
int credit = 0;
peer = mxlnd_find_peer_by_nid(tx->mxc_nid); /* adds peer ref */
if (peer != NULL) {
tx->mxc_peer = peer;
- spin_lock(&peer->mxp_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
if (peer->mxp_conn == NULL) {
ret = mxlnd_conn_alloc_locked(&peer->mxp_conn, peer);
if (ret != 0) {
/* out of memory, give up and fail tx */
tx->mxc_status.code = -ENOMEM;
- spin_unlock(&peer->mxp_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
mxlnd_peer_decref(peer);
mxlnd_put_idle_tx(tx);
continue;
}
tx->mxc_conn = peer->mxp_conn;
mxlnd_conn_addref(tx->mxc_conn); /* for this tx */
- spin_unlock(&peer->mxp_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
mxlnd_peer_decref(peer); /* drop peer ref taken above */
mxlnd_queue_tx(tx);
found = 1;
tx->mxc_msg_type != MXLND_MSG_GET_DATA);
/* create peer */
/* adds conn ref for this function */
- ret = mxlnd_peer_alloc(&peer, tx->mxc_nid);
+ ret = mxlnd_peer_alloc(&peer, tx->mxc_nid,
+ *kmxlnd_tunables.kmx_board,
+ *kmxlnd_tunables.kmx_ep_id, 0ULL);
if (ret != 0) {
/* finalize message */
tx->mxc_status.code = ret;
/* add peer to global peer list, but look to see
* if someone already created it after we released
* the read lock */
- write_lock(&kmxlnd_data.kmx_peers_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
list_for_each_entry(old, &kmxlnd_data.kmx_peers[hash], mxp_peers) {
if (old->mxp_nid == peer->mxp_nid) {
/* somebody beat us here, we created a duplicate */
atomic_inc(&kmxlnd_data.kmx_npeers);
} else {
tx->mxc_peer = old;
- spin_lock(&old->mxp_lock);
tx->mxc_conn = old->mxp_conn;
/* FIXME can conn be NULL? */
LASSERT(old->mxp_conn != NULL);
mxlnd_conn_addref(old->mxp_conn);
- spin_unlock(&old->mxp_lock);
mxlnd_reduce_idle_rxs(*kmxlnd_tunables.kmx_credits - 1);
mxlnd_conn_decref(peer->mxp_conn); /* drop ref taken above.. */
mxlnd_conn_decref(peer->mxp_conn); /* drop peer's ref */
mxlnd_peer_decref(peer);
}
- write_unlock(&kmxlnd_data.kmx_peers_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
mxlnd_queue_tx(tx);
}
void
mxlnd_iconnect(struct kmx_peer *peer, u64 mask)
{
- mx_return_t mxret = MX_SUCCESS;
- mx_request_t request;
- struct kmx_conn *conn = peer->mxp_conn;
+ mx_return_t mxret = MX_SUCCESS;
+ mx_request_t request;
+ struct kmx_conn *conn = peer->mxp_conn;
+ u8 msg_type = (u8) MXLND_MSG_TYPE(mask);
/* NOTE we are holding a conn ref every time we call this function,
* we do not need to lock the peer before taking another ref */
mxlnd_conn_addref(conn); /* hold until CONN_REQ or CONN_ACK completes */
- LASSERT(mask == MXLND_MASK_ICON_REQ ||
- mask == MXLND_MASK_ICON_ACK);
+ LASSERT(msg_type == MXLND_MSG_ICON_REQ || msg_type == MXLND_MSG_ICON_ACK);
if (peer->mxp_reconnect_time == 0) {
peer->mxp_reconnect_time = jiffies;
}
- if (peer->mxp_nic_id == 0LL) {
- mxlnd_peer_hostname_to_nic_id(peer);
- if (peer->mxp_nic_id == 0LL) {
+ if (peer->mxp_nic_id == 0ULL) {
+ int ret = 0;
+
+ ret = mxlnd_ip2nic_id(peer->mxp_ip, &peer->mxp_nic_id);
+ if (ret == 0) {
+ mx_nic_id_to_board_number(peer->mxp_nic_id, &peer->mxp_board);
+ }
+ if (peer->mxp_nic_id == 0ULL) {
/* not mapped yet, return */
spin_lock(&conn->mxk_lock);
conn->mxk_status = MXLND_CONN_INIT;
spin_unlock(&conn->mxk_lock);
if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) {
/* give up and notify LNET */
- mxlnd_conn_disconnect(conn, 0, 1);
+ mxlnd_conn_disconnect(conn, 0, 0);
mxlnd_conn_alloc(&peer->mxp_conn, peer); /* adds ref for this
function... */
- mxlnd_conn_decref(peer->mxp_conn); /* which we no
+ mxlnd_conn_decref(peer->mxp_conn); /* which we no
longer need */
}
mxlnd_conn_decref(conn);
}
mxret = mx_iconnect(kmxlnd_data.kmx_endpt, peer->mxp_nic_id,
- peer->mxp_host->mxh_ep_id, MXLND_MSG_MAGIC, mask,
+ peer->mxp_ep_id, MXLND_MSG_MAGIC, mask,
(void *) peer, &request);
if (unlikely(mxret != MX_SUCCESS)) {
spin_lock(&conn->mxk_lock);
LASSERT(peer != NULL);
return -1;
}
- spin_lock(&peer->mxp_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
conn = peer->mxp_conn;
/* NOTE take a ref for the duration of this function since it is called
* when there might not be any queued txs for this peer */
if (conn) mxlnd_conn_addref(conn); /* for duration of this function */
- spin_unlock(&peer->mxp_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
/* do not add another ref for this tx */
if (unlikely(conn->mxk_status == MXLND_CONN_INIT ||
conn->mxk_status == MXLND_CONN_FAIL ||
conn->mxk_status == MXLND_CONN_REQ)) {
+ u64 match = (u64) MXLND_MSG_ICON_REQ << MXLND_MSG_OFFSET;
CDEBUG(D_NET, "status=%s\n", mxlnd_connstatus_to_str(conn->mxk_status));
conn->mxk_status = MXLND_CONN_WAIT;
spin_unlock(&conn->mxk_lock);
- mxlnd_iconnect(peer, MXLND_MASK_ICON_REQ);
+ mxlnd_iconnect(peer, match);
goto done;
}
spin_unlock(&conn->mxk_lock);
struct kmx_peer *peer = rx->mxc_peer;
struct kmx_conn *conn = rx->mxc_conn;
u8 type = rx->mxc_msg_type;
- u64 seq = 0LL;
+ u64 seq = 0ULL;
lnet_msg_t *lntmsg[2];
int result = 0;
- u64 nic_id = 0LL;
+ u64 nic_id = 0ULL;
u32 ep_id = 0;
+ u32 sid = 0;
int peer_ref = 0;
int conn_ref = 0;
int incompatible = 0;
+ u64 match = 0ULL;
/* NOTE We may only know the peer's nid if it is a PUT_REQ, GET_REQ,
* failed GET reply, CONN_REQ, or a CONN_ACK */
#endif
if (conn == NULL && peer != NULL) {
- spin_lock(&peer->mxp_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
conn = peer->mxp_conn;
if (conn) {
mxlnd_conn_addref(conn); /* conn takes ref... */
conn_ref = 1;
peer_ref = 0;
}
- spin_unlock(&peer->mxp_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
rx->mxc_conn = conn;
}
if (nob == 0) {
/* this may be a failed GET reply */
if (type == MXLND_MSG_GET_DATA) {
- bits = rx->mxc_status.match_info & 0x0FF0000000000000LL;
- ret = (u32) (bits>>52);
+ /* get the error (52-59) bits from the match bits */
+ ret = (u32) MXLND_ERROR_VAL(rx->mxc_status.match_info);
lntmsg[0] = rx->mxc_lntmsg[0];
result = -ret;
goto cleanup;
if (cookie > MXLND_MAX_COOKIE) {
CDEBUG(D_NETERROR, "NAK for msg_type %d from %s\n", rx->mxc_msg_type,
libcfs_nid2str(rx->mxc_nid));
- result = -((cookie >> 52) & 0xff);
+ result = -((u32) MXLND_ERROR_VAL(cookie));
lntmsg[0] = rx->mxc_lntmsg[0];
} else {
mxlnd_send_data(kmxlnd_data.kmx_ni, rx->mxc_lntmsg[0],
(int) MXLND_EAGER_SIZE);
incompatible = 1;
}
+ mx_decompose_endpoint_addr2(rx->mxc_status.source, &nic_id, &ep_id, &sid);
if (peer == NULL) {
peer = mxlnd_find_peer_by_nid(msg->mxm_srcnid); /* adds peer ref */
if (peer == NULL) {
struct kmx_peer *existing_peer = NULL;
hash = mxlnd_nid_to_hash(msg->mxm_srcnid);
- mx_decompose_endpoint_addr(rx->mxc_status.source,
- &nic_id, &ep_id);
rx->mxc_nid = msg->mxm_srcnid;
/* adds conn ref for peer and one for this function */
- ret = mxlnd_peer_alloc(&peer, msg->mxm_srcnid);
+ ret = mxlnd_peer_alloc(&peer, msg->mxm_srcnid,
+ *kmxlnd_tunables.kmx_board,
+ *kmxlnd_tunables.kmx_ep_id, 0ULL);
if (ret != 0) {
goto cleanup;
}
- LASSERT(peer->mxp_host->mxh_ep_id == ep_id);
- write_lock(&kmxlnd_data.kmx_peers_lock);
+ peer->mxp_sid = sid;
+ LASSERT(peer->mxp_ep_id == ep_id);
+ write_lock(&kmxlnd_data.kmx_global_lock);
existing_peer = mxlnd_find_peer_by_nid_locked(msg->mxm_srcnid);
if (existing_peer) {
mxlnd_conn_decref(peer->mxp_conn);
} else {
list_add_tail(&peer->mxp_peers,
&kmxlnd_data.kmx_peers[hash]);
- write_unlock(&kmxlnd_data.kmx_peers_lock);
atomic_inc(&kmxlnd_data.kmx_npeers);
}
+ write_unlock(&kmxlnd_data.kmx_global_lock);
} else {
+ /* FIXME should write lock here */
ret = mxlnd_conn_alloc(&conn, peer); /* adds 2nd ref */
mxlnd_peer_decref(peer); /* drop ref taken above */
if (ret != 0) {
}
conn_ref = 1; /* peer/conn_alloc() added ref for this function */
conn = peer->mxp_conn;
- } else {
+ } else { /* found peer */
struct kmx_conn *old_conn = conn;
- /* do not call mx_disconnect() */
- mxlnd_conn_disconnect(old_conn, 0, 0);
+ if (sid != peer->mxp_sid) {
+ /* do not call mx_disconnect() or send a BYE */
+ mxlnd_conn_disconnect(old_conn, 0, 0);
- /* the ref for this rx was taken on the old_conn */
- mxlnd_conn_decref(old_conn);
+ /* the ref for this rx was taken on the old_conn */
+ mxlnd_conn_decref(old_conn);
- /* This allocs a conn, points peer->mxp_conn to this one.
- * The old conn is still on the peer->mxp_conns list.
- * As the pending requests complete, they will call
- * conn_decref() which will eventually free it. */
- ret = mxlnd_conn_alloc(&conn, peer);
- if (ret != 0) {
- CDEBUG(D_NETERROR, "Cannot allocate peer->mxp_conn\n");
- goto cleanup;
+ /* This allocs a conn, points peer->mxp_conn to this one.
+ * The old conn is still on the peer->mxp_conns list.
+ * As the pending requests complete, they will call
+ * conn_decref() which will eventually free it. */
+ ret = mxlnd_conn_alloc(&conn, peer);
+ if (ret != 0) {
+ CDEBUG(D_NETERROR, "Cannot allocate peer->mxp_conn\n");
+ goto cleanup;
+ }
+ /* conn_alloc() adds one ref for the peer and one
+ * for this function */
+ conn_ref = 1;
+
+ peer->mxp_sid = sid;
}
- /* conn_alloc() adds one ref for the peer and one for this function */
- conn_ref = 1;
}
- spin_lock(&peer->mxp_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
peer->mxp_incarnation = msg->mxm_srcstamp;
peer->mxp_incompatible = incompatible;
- spin_unlock(&peer->mxp_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
spin_lock(&conn->mxk_lock);
conn->mxk_incarnation = msg->mxm_srcstamp;
conn->mxk_status = MXLND_CONN_WAIT;
spin_unlock(&conn->mxk_lock);
/* handle_conn_ack() will create the CONN_ACK msg */
- mxlnd_iconnect(peer, MXLND_MASK_ICON_ACK);
+ match = (u64) MXLND_MSG_ICON_ACK << MXLND_MSG_OFFSET;
+ mxlnd_iconnect(peer, match);
break;
incompatible = 1;
ret = -1;
}
- spin_lock(&peer->mxp_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
peer->mxp_incarnation = msg->mxm_srcstamp;
peer->mxp_incompatible = incompatible;
- spin_unlock(&peer->mxp_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
spin_lock(&conn->mxk_lock);
conn->mxk_credits = *kmxlnd_tunables.kmx_credits;
conn->mxk_outstanding = 0;
failed:
if (ret < 0) {
- MXLND_PRINT("setting PEER_CONN_FAILED\n");
+ CDEBUG(D_NET, "setting PEER_CONN_FAILED\n");
spin_lock(&conn->mxk_lock);
conn->mxk_status = MXLND_CONN_FAIL;
spin_unlock(&conn->mxk_lock);
if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) {
struct kmx_conn *new_conn = NULL;
CDEBUG(D_NETERROR, "timeout, calling conn_disconnect()\n");
- mxlnd_conn_disconnect(conn, 0, 1);
+ /* FIXME write lock here ? */
+ mxlnd_conn_disconnect(conn, 0, 0);
mxlnd_conn_alloc(&new_conn, peer); /* adds a ref for this function */
mxlnd_conn_decref(new_conn); /* which we no longer need */
- spin_lock(&peer->mxp_lock);
peer->mxp_reconnect_time = 0;
- spin_unlock(&peer->mxp_lock);
}
mxlnd_conn_decref(conn);
mx_set_endpoint_addr_context(conn->mxk_epa, (void *) peer);
/* mx_iconnect() succeeded, reset delay to 0 */
- spin_lock(&peer->mxp_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
peer->mxp_reconnect_time = 0;
- spin_unlock(&peer->mxp_lock);
+ write_unlock(&kmxlnd_data.kmx_global_lock);
/* marshal CONN_REQ msg */
/* we are still using the conn ref from iconnect() - do not take another */
struct kmx_ctx *tx = NULL;
struct kmx_msg *txmsg = NULL;
struct kmx_conn *conn = peer->mxp_conn;
+ u64 nic_id = 0ULL;
+ u32 ep_id = 0;
+ u32 sid = 0;
/* a conn ref was taken when calling mx_iconnect(),
* hold it until CONN_REQ or CONN_ACK completes */
CDEBUG(D_NET, "entering\n");
if (status.code != MX_STATUS_SUCCESS) {
CDEBUG(D_NETERROR, "mx_iconnect() failed for CONN_ACK with %s (%d) "
- "to %s mxp_nid = 0x%llx mxp_nic_id = 0x%0llx mxh_ep_id = %d\n",
+ "to %s mxp_nid = 0x%llx mxp_nic_id = 0x%0llx mxp_ep_id = %d\n",
mx_strstatus(status.code), status.code,
libcfs_nid2str(peer->mxp_nid),
peer->mxp_nid,
peer->mxp_nic_id,
- peer->mxp_host->mxh_ep_id);
+ peer->mxp_ep_id);
spin_lock(&conn->mxk_lock);
conn->mxk_status = MXLND_CONN_FAIL;
spin_unlock(&conn->mxk_lock);
if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) {
struct kmx_conn *new_conn = NULL;
CDEBUG(D_NETERROR, "timeout, calling conn_disconnect()\n");
+ /* FIXME write lock here? */
mxlnd_conn_disconnect(conn, 0, 1);
mxlnd_conn_alloc(&new_conn, peer); /* adds ref for
this function... */
mxlnd_conn_decref(new_conn); /* which we no longer need */
- spin_lock(&peer->mxp_lock);
peer->mxp_reconnect_time = 0;
- spin_unlock(&peer->mxp_lock);
}
mxlnd_conn_decref(conn);
return;
}
+ mx_decompose_endpoint_addr2(status.source, &nic_id, &ep_id, &sid);
spin_lock(&conn->mxk_lock);
conn->mxk_epa = status.source;
if (likely(!peer->mxp_incompatible)) {
mx_set_endpoint_addr_context(conn->mxk_epa, (void *) peer);
/* mx_iconnect() succeeded, reset delay to 0 */
- spin_lock(&peer->mxp_lock);
+ write_lock(&kmxlnd_data.kmx_global_lock);
peer->mxp_reconnect_time = 0;
- spin_unlock(&peer->mxp_lock);
+ peer->mxp_sid = sid;
+ write_unlock(&kmxlnd_data.kmx_global_lock);
/* marshal CONN_ACK msg */
tx = mxlnd_get_idle_tx();
CDEBUG(D_NET, "%s starting\n", name);
while (!kmxlnd_data.kmx_shutdown) {
+ u8 msg_type = 0;
+
mxret = MX_SUCCESS;
result = 0;
#if MXLND_POLLING
if (id == 0 && count++ < *kmxlnd_tunables.kmx_polling) {
- mxret = mx_test_any(kmxlnd_data.kmx_endpt, 0LL, 0LL,
+ mxret = mx_test_any(kmxlnd_data.kmx_endpt, 0ULL, 0ULL,
&status, &result);
} else {
count = 0;
mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT,
- 0LL, 0LL, &status, &result);
+ 0ULL, 0ULL, &status, &result);
}
#else
mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT,
- 0LL, 0LL, &status, &result);
+ 0ULL, 0ULL, &status, &result);
#endif
if (unlikely(kmxlnd_data.kmx_shutdown))
break;
(u64) status.match_info, status.msg_length);
}
+ msg_type = MXLND_MSG_TYPE(status.match_info);
+
/* This may be a mx_iconnect() request completing,
* check the bit mask for CONN_REQ and CONN_ACK */
- if (status.match_info == MXLND_MASK_ICON_REQ ||
- status.match_info == MXLND_MASK_ICON_ACK) {
+ if (msg_type == MXLND_MSG_ICON_REQ ||
+ msg_type == MXLND_MSG_ICON_ACK) {
peer = (struct kmx_peer*) status.context;
- if (status.match_info == MXLND_MASK_ICON_REQ) {
+ if (msg_type == MXLND_MSG_ICON_REQ) {
mxlnd_handle_conn_req(peer, status);
} else {
mxlnd_handle_conn_ack(peer, status);
struct kmx_peer *peer = NULL;
struct kmx_conn *conn = NULL;
- read_lock(&kmxlnd_data.kmx_peers_lock);
+ read_lock(&kmxlnd_data.kmx_global_lock);
for (i = 0; i < MXLND_HASH_SIZE; i++) {
list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) {
if (unlikely(kmxlnd_data.kmx_shutdown)) {
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
return next;
}
- spin_lock(&peer->mxp_lock);
conn = peer->mxp_conn;
if (conn) {
mxlnd_conn_addref(conn);
- spin_unlock(&peer->mxp_lock);
} else {
- spin_unlock(&peer->mxp_lock);
continue;
}
+ /* FIXMEis this needed? */
spin_lock(&conn->mxk_lock);
/* if nothing pending (timeout == 0) or
* skip this conn */
if (conn->mxk_timeout == 0 ||
conn->mxk_status == MXLND_CONN_DISCONNECT) {
+ /* FIXME is this needed? */
spin_unlock(&conn->mxk_lock);
mxlnd_conn_decref(conn);
continue;
mxlnd_conn_decref(conn);
}
}
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
if (next == 0) next = now + MXLND_COMM_TIMEOUT;
return next;
next = mxlnd_check_timeouts(now);
}
- read_lock(&kmxlnd_data.kmx_peers_lock);
+ read_lock(&kmxlnd_data.kmx_global_lock);
for (i = 0; i < MXLND_HASH_SIZE; i++) {
list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) {
- spin_lock(&peer->mxp_lock);
+ /* FIXME upgrade to write lock?
+ * is any lock needed? */
conn = peer->mxp_conn;
if (conn) mxlnd_conn_addref(conn); /* take ref... */
- spin_unlock(&peer->mxp_lock);
if (conn == NULL)
continue;
if (conn->mxk_status != MXLND_CONN_DISCONNECT &&
time_after(now, conn->mxk_last_tx + HZ)) {
+ /* FIXME drop lock or call check_sends_locked */
+ read_unlock(&kmxlnd_data.kmx_global_lock);
mxlnd_check_sends(peer);
+ read_lock(&kmxlnd_data.kmx_global_lock);
}
mxlnd_conn_decref(conn); /* until here */
}
}
- read_unlock(&kmxlnd_data.kmx_peers_lock);
+ read_unlock(&kmxlnd_data.kmx_global_lock);
mxlnd_sleep(delay);
}