Whamcloud - gitweb
b=17087
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_lib-linux.c
index 5b851c4..5b97271 100644 (file)
 #include "socklnd.h"
 
 # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-static cfs_sysctl_table_t ksocknal_ctl_table[21];
 
-cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
-        {
-                .ctl_name = 200,
-                .procname = "socknal",
-                .data     = NULL,
-                .maxlen   = 0,
-                .mode     = 0555,
-                .child    = ksocknal_ctl_table
-        },
-        { 0 }
+#ifndef HAVE_SYSCTL_UNNUMBERED
+
+enum {
+        SOCKLND_TIMEOUT = 1,
+        SOCKLND_CREDITS,
+        SOCKLND_PEER_CREDITS,
+        SOCKLND_NCONNDS,
+        SOCKLND_RECONNECTS_MIN,
+        SOCKLND_RECONNECTS_MAX,
+        SOCKLND_EAGER_ACK,
+        SOCKLND_ZERO_COPY,
+        SOCKLND_TYPED,
+        SOCKLND_BULK_MIN,
+        SOCKLND_RX_BUFFER_SIZE,
+        SOCKLND_TX_BUFFER_SIZE,
+        SOCKLND_NAGLE,
+        SOCKLND_IRQ_AFFINITY,
+        SOCKLND_ROUND_ROBIN,
+        SOCKLND_KEEPALIVE,
+        SOCKLND_KEEPALIVE_IDLE,
+        SOCKLND_KEEPALIVE_COUNT,
+        SOCKLND_KEEPALIVE_INTVL,
+        SOCKLND_BACKOFF_INIT,
+        SOCKLND_BACKOFF_MAX,
+        SOCKLND_PROTOCOL,
+        SOCKLND_ZERO_COPY_RECV,
+        SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS
 };
+#else
 
-int
-ksocknal_lib_tunables_init ()
-{
-        int    i = 0;
-        int    j = 1;
+#define SOCKLND_TIMEOUT         CTL_UNNUMBERED
+#define SOCKLND_CREDITS         CTL_UNNUMBERED
+#define SOCKLND_PEER_CREDITS    CTL_UNNUMBERED
+#define SOCKLND_NCONNDS         CTL_UNNUMBERED
+#define SOCKLND_RECONNECTS_MIN  CTL_UNNUMBERED
+#define SOCKLND_RECONNECTS_MAX  CTL_UNNUMBERED
+#define SOCKLND_EAGER_ACK       CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY       CTL_UNNUMBERED
+#define SOCKLND_TYPED           CTL_UNNUMBERED
+#define SOCKLND_BULK_MIN        CTL_UNNUMBERED
+#define SOCKLND_RX_BUFFER_SIZE  CTL_UNNUMBERED
+#define SOCKLND_TX_BUFFER_SIZE  CTL_UNNUMBERED
+#define SOCKLND_NAGLE           CTL_UNNUMBERED
+#define SOCKLND_IRQ_AFFINITY    CTL_UNNUMBERED
+#define SOCKLND_ROUND_ROBIN     CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE       CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE_IDLE  CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE_COUNT CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE_INTVL CTL_UNNUMBERED
+#define SOCKLND_BACKOFF_INIT    CTL_UNNUMBERED
+#define SOCKLND_BACKOFF_MAX     CTL_UNNUMBERED
+#define SOCKLND_PROTOCOL        CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY_RECV  CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS CTL_UNNUMBERED
+#endif
 
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+static cfs_sysctl_table_t ksocknal_ctl_table[] = {
+        {
+                .ctl_name = SOCKLND_TIMEOUT,
                 .procname = "timeout",
-                .data     = ksocknal_tunables.ksnd_timeout,
+                .data     = &ksocknal_tunables.ksnd_timeout,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_CREDITS,
                 .procname = "credits",
-                .data     = ksocknal_tunables.ksnd_credits,
+                .data     = &ksocknal_tunables.ksnd_credits,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+         {
+                .ctl_name = SOCKLND_PEER_CREDITS,
                 .procname = "peer_credits",
-                .data     = ksocknal_tunables.ksnd_peercredits,
+                .data     = &ksocknal_tunables.ksnd_peercredits,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_NCONNDS,
                 .procname = "nconnds",
-                .data     = ksocknal_tunables.ksnd_nconnds,
+                .data     = &ksocknal_tunables.ksnd_nconnds,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_RECONNECTS_MIN,
                 .procname = "min_reconnectms",
-                .data     = ksocknal_tunables.ksnd_min_reconnectms,
+                .data     = &ksocknal_tunables.ksnd_min_reconnectms,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_RECONNECTS_MAX,
                 .procname = "max_reconnectms",
-                .data     = ksocknal_tunables.ksnd_max_reconnectms,
+                .data     = &ksocknal_tunables.ksnd_max_reconnectms,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_EAGER_ACK,
                 .procname = "eager_ack",
-                .data     = ksocknal_tunables.ksnd_eager_ack,
+                .data     = &ksocknal_tunables.ksnd_eager_ack,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_ZERO_COPY,
                 .procname = "zero_copy",
-                .data     = ksocknal_tunables.ksnd_zc_min_frag,
+                .data     = &ksocknal_tunables.ksnd_zc_min_payload,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_ZERO_COPY_RECV,
+                .procname = "zero_copy_recv",
+                .data     = &ksocknal_tunables.ksnd_zc_recv,
+                .maxlen   = sizeof (int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+
+        {
+                .ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS,
+                .procname = "zero_copy_recv",
+                .data     = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
+                .maxlen   = sizeof (int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_TYPED,
                 .procname = "typed",
-                .data     = ksocknal_tunables.ksnd_typed_conns,
+                .data     = &ksocknal_tunables.ksnd_typed_conns,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_BULK_MIN,
                 .procname = "min_bulk",
-                .data     = ksocknal_tunables.ksnd_min_bulk,
+                .data     = &ksocknal_tunables.ksnd_min_bulk,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_RX_BUFFER_SIZE,
                 .procname = "rx_buffer_size",
-                .data     = ksocknal_tunables.ksnd_rx_buffer_size,
+                .data     = &ksocknal_tunables.ksnd_rx_buffer_size,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_TX_BUFFER_SIZE,
                 .procname = "tx_buffer_size",
-                .data     = ksocknal_tunables.ksnd_tx_buffer_size,
+                .data     = &ksocknal_tunables.ksnd_tx_buffer_size,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_NAGLE,
                 .procname = "nagle",
-                .data     = ksocknal_tunables.ksnd_nagle,
+                .data     = &ksocknal_tunables.ksnd_nagle,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #ifdef CPU_AFFINITY
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+        {
+                .ctl_name = SOCKLND_IRQ_AFFINITY,
                 .procname = "irq_affinity",
-                .data     = ksocknal_tunables.ksnd_irq_affinity,
+                .data     = &ksocknal_tunables.ksnd_irq_affinity,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #endif
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+        {
+                .ctl_name = SOCKLND_ROUND_ROBIN,
+                .procname = "round_robin",
+                .data     = &ksocknal_tunables.ksnd_round_robin,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE,
+                .procname = "keepalive",
+                .data     = &ksocknal_tunables.ksnd_keepalive,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE_IDLE,
                 .procname = "keepalive_idle",
-                .data     = ksocknal_tunables.ksnd_keepalive_idle,
+                .data     = &ksocknal_tunables.ksnd_keepalive_idle,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE_COUNT,
                 .procname = "keepalive_count",
-                .data     = ksocknal_tunables.ksnd_keepalive_count,
+                .data     = &ksocknal_tunables.ksnd_keepalive_count,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE_INTVL,
                 .procname = "keepalive_intvl",
-                .data     = ksocknal_tunables.ksnd_keepalive_intvl,
+                .data     = &ksocknal_tunables.ksnd_keepalive_intvl,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #ifdef SOCKNAL_BACKOFF
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+        {
+                .ctl_name = SOCKLND_BACKOFF_INIT,
                 .procname = "backoff_init",
-                .data     = ksocknal_tunables.ksnd_backoff_init,
+                .data     = &ksocknal_tunables.ksnd_backoff_init,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_BACKOFF_MAX,
                 .procname = "backoff_max",
-                .data     = ksocknal_tunables.ksnd_backoff_max,
+                .data     = &ksocknal_tunables.ksnd_backoff_max,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #endif
 #if SOCKNAL_VERSION_DEBUG
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+        {
+                .ctl_name = SOCKLND_PROTOCOL,
                 .procname = "protocol",
-                .data     = ksocknal_tunables.ksnd_protocol,
+                .data     = &ksocknal_tunables.ksnd_protocol,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #endif
-        ksocknal_ctl_table[i++] =  (cfs_sysctl_table_t) { 0 };
+        {0}
+};
+
 
-        LASSERT (j == i);
-        LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
+cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
+        {
+                .ctl_name = CTL_SOCKLND,
+                .procname = "socknal",
+                .data     = NULL,
+                .maxlen   = 0,
+                .mode     = 0555,
+                .child    = ksocknal_ctl_table
+        },
+        { 0 }
+};
+
+int
+ksocknal_lib_tunables_init ()
+{
+        if (!*ksocknal_tunables.ksnd_typed_conns) {
+                int rc = -EINVAL;
+#if SOCKNAL_VERSION_DEBUG
+                if (*ksocknal_tunables.ksnd_protocol < 3)
+                        rc = 0;
+#endif
+                if (rc != 0) {
+                        CERROR("Protocol V3.x MUST have typed connections\n");
+                        return rc;
+                }
+        }
+
+        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
+                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
+        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
+                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
 
         ksocknal_tunables.ksnd_sysctl =
                 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
@@ -278,13 +404,13 @@ ksocknal_lib_bind_irq (unsigned int irq)
 
         info = &ksocknal_data.ksnd_irqinfo[irq];
 
-        write_lock_bh (&ksocknal_data.ksnd_global_lock);
+        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
 
         LASSERT (info->ksni_valid);
         bind = !info->ksni_bound;
         info->ksni_bound = 1;
 
-        write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
 
         if (!bind)                              /* bound already */
                 return;
@@ -355,9 +481,12 @@ ksocknal_lib_sock_irq (struct socket *sock)
 }
 
 int
-ksocknal_lib_zc_capable(struct socket *sock)
+ksocknal_lib_zc_capable(ksock_conn_t *conn)
 {
-        int  caps = sock->sk->sk_route_caps;
+        int  caps = conn->ksnc_sock->sk->sk_route_caps;
+
+        if (conn->ksnc_proto == &ksocknal_protocol_v1x)
+                return 0;
 
         /* ZC if the socket supports scatter/gather and doesn't need software
          * checksums */
@@ -387,7 +516,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
                 struct iovec   *scratchiov = &scratch;
                 unsigned int    niov = 1;
 #else
-                struct iovec   *scratchiov = conn->ksnc_tx_scratch_iov;
+                struct iovec   *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
                 unsigned int    niov = tx->tx_niov;
 #endif
                 struct msghdr msg = {
@@ -422,15 +551,16 @@ int
 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         struct socket *sock = conn->ksnc_sock;
-        lnet_kiov_t    *kiov = tx->tx_kiov;
+        lnet_kiov_t   *kiov = tx->tx_kiov;
         int            rc;
         int            nob;
 
+        /* Not NOOP message */
+        LASSERT (tx->tx_lnetmsg != NULL);
+
         /* NB we can't trust socket ops to either consume our iovs
          * or leave them alone. */
-
-        if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag &&
-            tx->tx_msg.ksm_zc_req_cookie != 0) {
+        if (tx->tx_msg.ksm_zc_cookies[0] != 0) {
                 /* Zero copy is enabled */
                 struct sock   *sk = sock->sk;
                 struct page   *page = kiov->kiov_page;
@@ -460,7 +590,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 #ifdef CONFIG_HIGHMEM
 #warning "XXX risk of kmap deadlock on multiple frags..."
 #endif
-                struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+                struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
                 unsigned int  niov = tx->tx_nkiov;
 #endif
                 struct msghdr msg = {
@@ -521,7 +651,7 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
         struct iovec *scratchiov = &scratch;
         unsigned int  niov = 1;
 #else
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
         unsigned int  niov = conn->ksnc_rx_niov;
 #endif
         struct iovec *iov = conn->ksnc_rx_iov;
@@ -581,26 +711,72 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
         return rc;
 }
 
+static void
+ksocknal_lib_kiov_vunmap(void *addr)
+{
+        if (addr == NULL)
+                return;
+
+        vunmap(addr);
+}
+
+static void *
+ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
+                       struct iovec *iov, struct page **pages)
+{
+        void             *addr;
+        int               nob;
+        int               i;
+
+        if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL)
+                return NULL;
+
+        LASSERT (niov <= LNET_MAX_IOV);
+
+        if (niov < 2 ||
+            niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
+                return NULL;
+
+        for (nob = i = 0; i < niov; i++) {
+                if ((kiov[i].kiov_offset != 0 && i > 0) ||
+                    (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1))
+                        return NULL;
+
+                pages[i] = kiov[i].kiov_page;
+                nob += kiov[i].kiov_len;
+        }
+
+        addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
+        if (addr == NULL)
+                return NULL;
+
+        iov->iov_base = addr + kiov[0].kiov_offset;
+        iov->iov_len = nob;
+
+        return addr;
+}
+
 int
 ksocknal_lib_recv_kiov (ksock_conn_t *conn)
 {
 #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
-        struct iovec  scratch;
-        struct iovec *scratchiov = &scratch;
-        unsigned int  niov = 1;
+        struct iovec   scratch;
+        struct iovec  *scratchiov = &scratch;
+        struct page  **pages      = NULL;
+        unsigned int   niov       = 1;
 #else
 #ifdef CONFIG_HIGHMEM
 #warning "XXX risk of kmap deadlock on multiple frags..."
 #endif
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
-        unsigned int  niov = conn->ksnc_rx_nkiov;
+        struct iovec  *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+        struct page  **pages      = conn->ksnc_scheduler->kss_rx_scratch_pgs;
+        unsigned int   niov       = conn->ksnc_rx_nkiov;
 #endif
         lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
         struct msghdr msg = {
                 .msg_name       = NULL,
                 .msg_namelen    = 0,
                 .msg_iov        = scratchiov,
-                .msg_iovlen     = niov,
                 .msg_control    = NULL,
                 .msg_controllen = 0,
                 .msg_flags      = 0
@@ -610,15 +786,25 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
         int          i;
         int          rc;
         void        *base;
+        void        *addr;
         int          sum;
         int          fragnob;
 
         /* NB we can't trust socket ops to either consume our iovs
          * or leave them alone. */
-        for (nob = i = 0; i < niov; i++) {
-                scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
-                nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+        if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
+                nob = scratchiov[0].iov_len;
+                msg.msg_iovlen = 1;
+
+        } else {
+                for (nob = i = 0; i < niov; i++) {
+                        nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+                        scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+                                                 kiov[i].kiov_offset;
+                }
+                msg.msg_iovlen = niov;
         }
+
         LASSERT (nob <= conn->ksnc_rx_nob_wanted);
 
         set_fs (KERNEL_DS);
@@ -645,8 +831,13 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
                         kunmap(kiov[i].kiov_page);
                 }
         }
-        for (i = 0; i < niov; i++)
-                kunmap(kiov[i].kiov_page);
+
+        if (addr != NULL) {
+                ksocknal_lib_kiov_vunmap(addr);
+        } else {
+                for (i = 0; i < niov; i++)
+                        kunmap(kiov[i].kiov_page);
+        }
 
         return (rc);
 }
@@ -943,7 +1134,7 @@ ksocknal_data_ready (struct sock *sk, int n)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        read_lock (&ksocknal_data.ksnd_global_lock);
+        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
@@ -952,7 +1143,7 @@ ksocknal_data_ready (struct sock *sk, int n)
         } else
                 ksocknal_read_callback(conn);
 
-        read_unlock (&ksocknal_data.ksnd_global_lock);
+        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
 
         EXIT;
 }
@@ -966,7 +1157,7 @@ ksocknal_write_space (struct sock *sk)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        read_lock (&ksocknal_data.ksnd_global_lock);
+        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         wspace = SOCKNAL_WSPACE(sk);
@@ -985,7 +1176,7 @@ ksocknal_write_space (struct sock *sk)
                 LASSERT (sk->sk_write_space != &ksocknal_write_space);
                 sk->sk_write_space (sk);
 
-                read_unlock (&ksocknal_data.ksnd_global_lock);
+                cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
                 return;
         }
 
@@ -999,7 +1190,7 @@ ksocknal_write_space (struct sock *sk)
                 clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
         }
 
-        read_unlock (&ksocknal_data.ksnd_global_lock);
+        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
 }
 
 void
@@ -1034,3 +1225,49 @@ ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
 
         return ;
 }
+
+int
+ksocknal_lib_memory_pressure(ksock_conn_t *conn)
+{
+        int            rc = 0;
+        ksock_sched_t *sched;
+        
+        sched = conn->ksnc_scheduler;
+        cfs_spin_lock_bh (&sched->kss_lock);
+        
+        if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
+            !conn->ksnc_tx_ready) {
+                /* SOCK_NOSPACE is set when the socket fills
+                 * and cleared in the write_space callback
+                 * (which also sets ksnc_tx_ready).  If
+                 * SOCK_NOSPACE and ksnc_tx_ready are BOTH
+                 * zero, I didn't fill the socket and
+                 * write_space won't reschedule me, so I
+                 * return -ENOMEM to get my caller to retry
+                 * after a timeout */
+                rc = -ENOMEM;
+        }
+        
+        cfs_spin_unlock_bh (&sched->kss_lock);
+
+        return rc;
+}
+
+int
+ksocknal_lib_bind_thread_to_cpu(int id)
+{
+#if defined(CONFIG_SMP) && defined(CPU_AFFINITY)
+        id = ksocknal_sched2cpu(id);
+        if (cpu_online(id)) {
+                cpumask_t m = CPU_MASK_NONE;
+                cpu_set(id, m);
+                set_cpus_allowed(current, m);
+                return 0;
+        }
+
+        return -1;
+
+#else
+        return 0;
+#endif
+}