Description:
Details :
+Severity : major
+Bugzilla : 15093
+Description: Support Zerocopy receive of Chelsio device
+Details : Chelsio driver can support zerocopy for iov[1] if it's
+ contiguous and large enough.
+
Severity : normal
Bugzilla : 13490
Description: fix credit flow deadlock in uptllnd
}
memset (conn, 0, sizeof (*conn));
+
conn->ksnc_peer = NULL;
conn->ksnc_route = NULL;
conn->ksnc_sock = sock;
struct list_head kss_zombie_noop_txs; /* zombie noop tx list */
cfs_waitq_t kss_waitq; /* where scheduler sleeps */
int kss_nconns; /* # connections assigned to this scheduler */
+#if !SOCKNAL_SINGLE_FRAG_RX
+ struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
+#endif
+#if !SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_SINGLE_FRAG_RX
+ struct iovec kss_scratch_iov[LNET_MAX_IOV];
+#endif
+
} ksock_sched_t;
typedef struct
int *ksnd_enable_csum; /* enable check sum */
int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
unsigned int *ksnd_zc_min_frag; /* minimum zero copy frag size */
+ int *ksnd_zc_recv; /* enable ZC receive (for Chelsio TOE) */
+ int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to enable ZC receive */
#ifdef CPU_AFFINITY
int *ksnd_irq_affinity; /* enable IRQ affinity? */
#endif
atomic_t ksnc_tx_nob; /* # bytes queued */
int ksnc_tx_ready; /* write space */
int ksnc_tx_scheduled; /* being progressed */
-
-#if !SOCKNAL_SINGLE_FRAG_RX
- struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV];
-#endif
-#if !SOCKNAL_SINGLE_FRAG_TX
- struct iovec ksnc_tx_scratch_iov[LNET_MAX_IOV];
-#endif
} ksock_conn_t;
typedef struct ksock_route
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_niov;
#endif
struct msghdr msg = {
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_nkiov;
#endif
struct msghdr msg = {
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
#endif
struct iovec *iov = conn->ksnc_rx_iov;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_nkiov;
#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_niov;
#endif
struct socket *sock = conn->ksnc_sock;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_nkiov;
#endif
struct socket *sock = conn->ksnc_sock;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
#endif
struct iovec *iov = conn->ksnc_rx_iov;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_nkiov;
#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
#include "socklnd.h"
# if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-static cfs_sysctl_table_t ksocknal_ctl_table[21];
+static cfs_sysctl_table_t ksocknal_ctl_table[23];
cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
{
int i = 0;
int j = 1;
+ if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
+ *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
+
+ if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
+ *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
+
ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
.ctl_name = j++,
.procname = "timeout",
};
ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
.ctl_name = j++,
+ .procname = "zero_copy_recv",
+ .data = ksocknal_tunables.ksnd_zc_recv,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
+ .procname = "zero_copy_recv_min_nfrags",
+ .data = ksocknal_tunables.ksnd_zc_recv_min_nfrags,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ };
+ ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+ .ctl_name = j++,
.procname = "typed",
.data = ksocknal_tunables.ksnd_typed_conns,
.maxlen = sizeof (int),
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_niov;
#endif
struct msghdr msg = {
#ifdef CONFIG_HIGHMEM
#warning "XXX risk of kmap deadlock on multiple frags..."
#endif
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_nkiov;
#endif
struct msghdr msg = {
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
#endif
struct iovec *iov = conn->ksnc_rx_iov;
return rc;
}
+static void
+ksocknal_lib_kiov_vunmap(void *addr)
+{
+ if (addr == NULL)
+ return;
+
+ vunmap(addr);
+}
+
+static void *
+ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
+ struct iovec *iov, struct page **pages)
+{
+ void *addr;
+ int nob;
+ int i;
+
+ if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL)
+ return NULL;
+
+ LASSERT (niov <= LNET_MAX_IOV);
+
+ if (niov < 2 ||
+ niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
+ return NULL;
+
+ for (nob = i = 0; i < niov; i++) {
+ if ((kiov[i].kiov_offset != 0 && i > 0) ||
+ (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1))
+ return NULL;
+
+ pages[i] = kiov[i].kiov_page;
+ nob += kiov[i].kiov_len;
+ }
+
+ addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
+ if (addr == NULL)
+ return NULL;
+
+ iov->iov_base = addr + kiov[0].kiov_offset;
+ iov->iov_len = nob;
+
+ return addr;
+}
+
int
ksocknal_lib_recv_kiov (ksock_conn_t *conn)
{
#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ struct page **pages = NULL;
+ unsigned int niov = 1;
#else
#ifdef CONFIG_HIGHMEM
#warning "XXX risk of kmap deadlock on multiple frags..."
#endif
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- unsigned int niov = conn->ksnc_rx_nkiov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+ struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs;
+ unsigned int niov = conn->ksnc_rx_nkiov;
#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
struct msghdr msg = {
.msg_name = NULL,
.msg_namelen = 0,
.msg_iov = scratchiov,
- .msg_iovlen = niov,
.msg_control = NULL,
.msg_controllen = 0,
.msg_flags = 0
int i;
int rc;
void *base;
+ void *addr;
int sum;
int fragnob;
/* NB we can't trust socket ops to either consume our iovs
* or leave them alone. */
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
+ nob = scratchiov[0].iov_len;
+ msg.msg_iovlen = 1;
+
+ } else {
+ for (nob = i = 0; i < niov; i++) {
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+ kiov[i].kiov_offset;
+ }
+ msg.msg_iovlen = niov;
}
+
+
LASSERT (nob <= conn->ksnc_rx_nob_wanted);
set_fs (KERNEL_DS);
kunmap(kiov[i].kiov_page);
}
}
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
+
+ if (addr != NULL) {
+ ksocknal_lib_kiov_vunmap(addr);
+ } else {
+ for (i = 0; i < niov; i++)
+ kunmap(kiov[i].kiov_page);
+ }
return (rc);
}
CFS_MODULE_PARM(zc_min_frag, "i", int, 0644,
"minimum fragment to zero copy");
+static unsigned int zc_recv = 0;
+CFS_MODULE_PARM(zc_recv, "i", int, 0444,
+ "enable ZC recv for Chelsio driver");
+
+static unsigned int zc_recv_min_nfrags = 16;
+CFS_MODULE_PARM(zc_recv_min_nfrags, "i", int, 0444,
+ "minimum # of fragments to enable ZC recv");
+
#ifdef SOCKNAL_BACKOFF
static int backoff_init = 3;
CFS_MODULE_PARM(backoff_init, "i", int, 0644,
{
/* initialize ksocknal_tunables structure */
- ksocknal_tunables.ksnd_timeout = &sock_timeout;
- ksocknal_tunables.ksnd_nconnds = &nconnds;
- ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms;
- ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms;
- ksocknal_tunables.ksnd_eager_ack = &eager_ack;
- ksocknal_tunables.ksnd_typed_conns = &typed_conns;
- ksocknal_tunables.ksnd_min_bulk = &min_bulk;
- ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size;
- ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size;
- ksocknal_tunables.ksnd_nagle = &nagle;
- ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle;
- ksocknal_tunables.ksnd_keepalive_count = &keepalive_count;
- ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl;
- ksocknal_tunables.ksnd_credits = &credits;
- ksocknal_tunables.ksnd_peercredits = &peer_credits;
- ksocknal_tunables.ksnd_enable_csum = &enable_csum;
- ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error;
- ksocknal_tunables.ksnd_zc_min_frag = &zc_min_frag;
+ ksocknal_tunables.ksnd_timeout = &sock_timeout;
+ ksocknal_tunables.ksnd_nconnds = &nconnds;
+ ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms;
+ ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms;
+ ksocknal_tunables.ksnd_eager_ack = &eager_ack;
+ ksocknal_tunables.ksnd_typed_conns = &typed_conns;
+ ksocknal_tunables.ksnd_min_bulk = &min_bulk;
+ ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size;
+ ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size;
+ ksocknal_tunables.ksnd_nagle = &nagle;
+ ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle;
+ ksocknal_tunables.ksnd_keepalive_count = &keepalive_count;
+ ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl;
+ ksocknal_tunables.ksnd_credits = &credits;
+ ksocknal_tunables.ksnd_peercredits = &peer_credits;
+ ksocknal_tunables.ksnd_enable_csum = &enable_csum;
+ ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error;
+ ksocknal_tunables.ksnd_zc_min_frag = &zc_min_frag;
+ ksocknal_tunables.ksnd_zc_recv = &zc_recv;
+ ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
+
#ifdef CPU_AFFINITY
- ksocknal_tunables.ksnd_irq_affinity = &enable_irq_affinity;
+ ksocknal_tunables.ksnd_irq_affinity = &enable_irq_affinity;
#endif
#ifdef SOCKNAL_BACKOFF
- ksocknal_tunables.ksnd_backoff_init = &backoff_init;
- ksocknal_tunables.ksnd_backoff_max = &backoff_max;
+ ksocknal_tunables.ksnd_backoff_init = &backoff_init;
+ ksocknal_tunables.ksnd_backoff_max = &backoff_max;
#endif
#if SOCKNAL_VERSION_DEBUG
- ksocknal_tunables.ksnd_protocol = &protocol;
+ ksocknal_tunables.ksnd_protocol = &protocol;
#endif
#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- ksocknal_tunables.ksnd_sysctl = NULL;
+ ksocknal_tunables.ksnd_sysctl = NULL;
#endif
/* initialize platform-sepcific tunables */