Whamcloud - gitweb
LU-1346 libcfs: cleanup macros in kp30.h
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_lib-linux.c
index c4dc1e1..1dff915 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #include "socklnd.h"
 
 # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-static cfs_sysctl_table_t ksocknal_ctl_table[23];
 
-cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
-        {
-                .ctl_name = 200,
-                .procname = "socknal",
-                .data     = NULL,
-                .maxlen   = 0,
-                .mode     = 0555,
-                .child    = ksocknal_ctl_table
-        },
-        { 0 }
+#ifndef HAVE_SYSCTL_UNNUMBERED
+
+enum {
+        SOCKLND_TIMEOUT = 1,
+        SOCKLND_CREDITS,
+        SOCKLND_PEER_TXCREDITS,
+        SOCKLND_PEER_RTRCREDITS,
+        SOCKLND_PEER_TIMEOUT,
+        SOCKLND_NCONNDS,
+        SOCKLND_RECONNECTS_MIN,
+        SOCKLND_RECONNECTS_MAX,
+        SOCKLND_EAGER_ACK,
+        SOCKLND_ZERO_COPY,
+        SOCKLND_TYPED,
+        SOCKLND_BULK_MIN,
+        SOCKLND_RX_BUFFER_SIZE,
+        SOCKLND_TX_BUFFER_SIZE,
+        SOCKLND_NAGLE,
+        SOCKLND_IRQ_AFFINITY,
+        SOCKLND_ROUND_ROBIN,
+        SOCKLND_KEEPALIVE,
+        SOCKLND_KEEPALIVE_IDLE,
+        SOCKLND_KEEPALIVE_COUNT,
+        SOCKLND_KEEPALIVE_INTVL,
+        SOCKLND_BACKOFF_INIT,
+        SOCKLND_BACKOFF_MAX,
+        SOCKLND_PROTOCOL,
+        SOCKLND_ZERO_COPY_RECV,
+        SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS
 };
+#else
 
-int
-ksocknal_lib_tunables_init ()
-{
-        int    i = 0;
-        int    j = 1;
-
-        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
-                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
-
-        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
-                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
+#define SOCKLND_TIMEOUT         CTL_UNNUMBERED
+#define SOCKLND_CREDITS         CTL_UNNUMBERED
+#define SOCKLND_PEER_TXCREDITS  CTL_UNNUMBERED
+#define SOCKLND_PEER_RTRCREDITS  CTL_UNNUMBERED
+#define SOCKLND_PEER_TIMEOUT    CTL_UNNUMBERED
+#define SOCKLND_NCONNDS         CTL_UNNUMBERED
+#define SOCKLND_RECONNECTS_MIN  CTL_UNNUMBERED
+#define SOCKLND_RECONNECTS_MAX  CTL_UNNUMBERED
+#define SOCKLND_EAGER_ACK       CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY       CTL_UNNUMBERED
+#define SOCKLND_TYPED           CTL_UNNUMBERED
+#define SOCKLND_BULK_MIN        CTL_UNNUMBERED
+#define SOCKLND_RX_BUFFER_SIZE  CTL_UNNUMBERED
+#define SOCKLND_TX_BUFFER_SIZE  CTL_UNNUMBERED
+#define SOCKLND_NAGLE           CTL_UNNUMBERED
+#define SOCKLND_IRQ_AFFINITY    CTL_UNNUMBERED
+#define SOCKLND_ROUND_ROBIN     CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE       CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE_IDLE  CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE_COUNT CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE_INTVL CTL_UNNUMBERED
+#define SOCKLND_BACKOFF_INIT    CTL_UNNUMBERED
+#define SOCKLND_BACKOFF_MAX     CTL_UNNUMBERED
+#define SOCKLND_PROTOCOL        CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY_RECV  CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS CTL_UNNUMBERED
+#endif
 
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+static cfs_sysctl_table_t ksocknal_ctl_table[] = {
+        {
+                .ctl_name = SOCKLND_TIMEOUT,
                 .procname = "timeout",
-                .data     = ksocknal_tunables.ksnd_timeout,
+                .data     = &ksocknal_tunables.ksnd_timeout,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_CREDITS,
                 .procname = "credits",
-                .data     = ksocknal_tunables.ksnd_credits,
+                .data     = &ksocknal_tunables.ksnd_credits,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+         {
+                .ctl_name = SOCKLND_PEER_TXCREDITS,
                 .procname = "peer_credits",
-                .data     = ksocknal_tunables.ksnd_peercredits,
+                .data     = &ksocknal_tunables.ksnd_peertxcredits,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+         {
+                .ctl_name = SOCKLND_PEER_RTRCREDITS,
+                .procname = "peer_buffer_credits",
+                .data     = &ksocknal_tunables.ksnd_peerrtrcredits,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_PEER_TIMEOUT,
+                .procname = "peer_timeout",
+                .data     = &ksocknal_tunables.ksnd_peertimeout,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
                 .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_NCONNDS,
                 .procname = "nconnds",
-                .data     = ksocknal_tunables.ksnd_nconnds,
+                .data     = &ksocknal_tunables.ksnd_nconnds,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_RECONNECTS_MIN,
                 .procname = "min_reconnectms",
-                .data     = ksocknal_tunables.ksnd_min_reconnectms,
+                .data     = &ksocknal_tunables.ksnd_min_reconnectms,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_RECONNECTS_MAX,
                 .procname = "max_reconnectms",
-                .data     = ksocknal_tunables.ksnd_max_reconnectms,
+                .data     = &ksocknal_tunables.ksnd_max_reconnectms,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_EAGER_ACK,
                 .procname = "eager_ack",
-                .data     = ksocknal_tunables.ksnd_eager_ack,
+                .data     = &ksocknal_tunables.ksnd_eager_ack,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_ZERO_COPY,
                 .procname = "zero_copy",
-                .data     = ksocknal_tunables.ksnd_zc_min_frag,
+                .data     = &ksocknal_tunables.ksnd_zc_min_payload,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_ZERO_COPY_RECV,
                 .procname = "zero_copy_recv",
-                .data     = ksocknal_tunables.ksnd_zc_recv,
+                .data     = &ksocknal_tunables.ksnd_zc_recv,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
-                .procname = "zero_copy_recv_min_nfrags",
-                .data     = ksocknal_tunables.ksnd_zc_recv_min_nfrags,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+
+        {
+                .ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS,
+                .procname = "zero_copy_recv",
+                .data     = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_TYPED,
                 .procname = "typed",
-                .data     = ksocknal_tunables.ksnd_typed_conns,
+                .data     = &ksocknal_tunables.ksnd_typed_conns,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_BULK_MIN,
                 .procname = "min_bulk",
-                .data     = ksocknal_tunables.ksnd_min_bulk,
+                .data     = &ksocknal_tunables.ksnd_min_bulk,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_RX_BUFFER_SIZE,
                 .procname = "rx_buffer_size",
-                .data     = ksocknal_tunables.ksnd_rx_buffer_size,
+                .data     = &ksocknal_tunables.ksnd_rx_buffer_size,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_TX_BUFFER_SIZE,
                 .procname = "tx_buffer_size",
-                .data     = ksocknal_tunables.ksnd_tx_buffer_size,
+                .data     = &ksocknal_tunables.ksnd_tx_buffer_size,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_NAGLE,
                 .procname = "nagle",
-                .data     = ksocknal_tunables.ksnd_nagle,
+                .data     = &ksocknal_tunables.ksnd_nagle,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #ifdef CPU_AFFINITY
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+        {
+                .ctl_name = SOCKLND_IRQ_AFFINITY,
                 .procname = "irq_affinity",
-                .data     = ksocknal_tunables.ksnd_irq_affinity,
+                .data     = &ksocknal_tunables.ksnd_irq_affinity,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #endif
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+        {
+                .ctl_name = SOCKLND_ROUND_ROBIN,
+                .procname = "round_robin",
+                .data     = &ksocknal_tunables.ksnd_round_robin,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE,
+                .procname = "keepalive",
+                .data     = &ksocknal_tunables.ksnd_keepalive,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE_IDLE,
                 .procname = "keepalive_idle",
-                .data     = ksocknal_tunables.ksnd_keepalive_idle,
+                .data     = &ksocknal_tunables.ksnd_keepalive_idle,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE_COUNT,
                 .procname = "keepalive_count",
-                .data     = ksocknal_tunables.ksnd_keepalive_count,
+                .data     = &ksocknal_tunables.ksnd_keepalive_count,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE_INTVL,
                 .procname = "keepalive_intvl",
-                .data     = ksocknal_tunables.ksnd_keepalive_intvl,
+                .data     = &ksocknal_tunables.ksnd_keepalive_intvl,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #ifdef SOCKNAL_BACKOFF
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+        {
+                .ctl_name = SOCKLND_BACKOFF_INIT,
                 .procname = "backoff_init",
-                .data     = ksocknal_tunables.ksnd_backoff_init,
+                .data     = &ksocknal_tunables.ksnd_backoff_init,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_BACKOFF_MAX,
                 .procname = "backoff_max",
-                .data     = ksocknal_tunables.ksnd_backoff_max,
+                .data     = &ksocknal_tunables.ksnd_backoff_max,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 #endif
 #if SOCKNAL_VERSION_DEBUG
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
-                .ctl_name = j++,
+        {
+                .ctl_name = SOCKLND_PROTOCOL,
                 .procname = "protocol",
-                .data     = ksocknal_tunables.ksnd_protocol,
+                .data     = &ksocknal_tunables.ksnd_protocol,
                 .maxlen   = sizeof(int),
                 .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        };
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+#endif
+        {0}
+};
+
+
+cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
+        {
+                .ctl_name = CTL_SOCKLND,
+                .procname = "socknal",
+                .data     = NULL,
+                .maxlen   = 0,
+                .mode     = 0555,
+                .child    = ksocknal_ctl_table
+        },
+        { 0 }
+};
+
+int
+ksocknal_lib_tunables_init ()
+{
+        if (!*ksocknal_tunables.ksnd_typed_conns) {
+                int rc = -EINVAL;
+#if SOCKNAL_VERSION_DEBUG
+                if (*ksocknal_tunables.ksnd_protocol < 3)
+                        rc = 0;
 #endif
-        ksocknal_ctl_table[i++] =  (cfs_sysctl_table_t) { 0 };
+                if (rc != 0) {
+                        CERROR("Protocol V3.x MUST have typed connections\n");
+                        return rc;
+                }
+        }
 
-        LASSERT (j == i);
-        LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
+        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
+                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
+        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
+                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
 
         ksocknal_tunables.ksnd_sysctl =
                 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
@@ -278,53 +404,6 @@ ksocknal_lib_tunables_fini ()
 }
 #endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
 
-void
-ksocknal_lib_bind_irq (unsigned int irq)
-{
-#if (defined(CONFIG_SMP) && defined(CPU_AFFINITY))
-        int              bind;
-        int              cpu;
-        char             cmdline[64];
-        ksock_irqinfo_t *info;
-        char            *argv[] = {"/bin/sh",
-                                   "-c",
-                                   cmdline,
-                                   NULL};
-        char            *envp[] = {"HOME=/",
-                                   "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-                                   NULL};
-
-        LASSERT (irq < NR_IRQS);
-        if (irq == 0)              /* software NIC or affinity disabled */
-                return;
-
-        info = &ksocknal_data.ksnd_irqinfo[irq];
-
-        write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
-        LASSERT (info->ksni_valid);
-        bind = !info->ksni_bound;
-        info->ksni_bound = 1;
-
-        write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
-        if (!bind)                              /* bound already */
-                return;
-
-        cpu = ksocknal_irqsched2cpu(info->ksni_sched);
-        snprintf (cmdline, sizeof (cmdline),
-                  "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
-
-        LCONSOLE_INFO("Binding irq %u to CPU %d with cmd: %s\n",
-                      irq, cpu, cmdline);
-
-        /* FIXME: Find a better method of setting IRQ affinity...
-         */
-
-        USERMODEHELPER(argv[0], argv, envp);
-#endif
-}
-
 int
 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
 {
@@ -350,41 +429,17 @@ ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
         return 0;
 }
 
-unsigned int
-ksocknal_lib_sock_irq (struct socket *sock)
-{
-        int                irq = 0;
-#ifdef CPU_AFFINITY
-        struct dst_entry  *dst;
-
-        if (!*ksocknal_tunables.ksnd_irq_affinity)
-                return 0;
-
-        dst = sk_dst_get (sock->sk);
-        if (dst != NULL) {
-                if (dst->dev != NULL) {
-                        irq = dst->dev->irq;
-                        if (irq >= NR_IRQS) {
-                                CERROR ("Unexpected IRQ %x\n", irq);
-                                irq = 0;
-                        }
-                }
-                dst_release (dst);
-        }
-
-#endif
-        return irq;
-}
-
 int
-ksocknal_lib_zc_capable(struct socket *sock)
+ksocknal_lib_zc_capable(ksock_conn_t *conn)
 {
-        int  caps = sock->sk->sk_route_caps;
+       int  caps = conn->ksnc_sock->sk->sk_route_caps;
+
+       if (conn->ksnc_proto == &ksocknal_protocol_v1x)
+               return 0;
 
-        /* ZC if the socket supports scatter/gather and doesn't need software
-         * checksums */
-        return ((caps & NETIF_F_SG) != 0 &&
-                (caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) != 0);
+       /* ZC if the socket supports scatter/gather and doesn't need software
+        * checksums */
+       return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0);
 }
 
 int
@@ -429,7 +484,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
                         nob += scratchiov[i].iov_len;
                 }
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
+                if (!cfs_list_empty(&conn->ksnc_tx_queue) ||
                     nob < tx->tx_resid)
                         msg.msg_flags |= MSG_MORE;
 
@@ -444,15 +499,16 @@ int
 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         struct socket *sock = conn->ksnc_sock;
-        lnet_kiov_t    *kiov = tx->tx_kiov;
+        lnet_kiov_t   *kiov = tx->tx_kiov;
         int            rc;
         int            nob;
 
+        /* Not NOOP message */
+        LASSERT (tx->tx_lnetmsg != NULL);
+
         /* NB we can't trust socket ops to either consume our iovs
          * or leave them alone. */
-
-        if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag &&
-            tx->tx_msg.ksm_zc_req_cookie != 0) {
+        if (tx->tx_msg.ksm_zc_cookies[0] != 0) {
                 /* Zero copy is enabled */
                 struct sock   *sk = sock->sk;
                 struct page   *page = kiov->kiov_page;
@@ -463,7 +519,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                 CDEBUG(D_NET, "page %p + offset %x for %d\n",
                                page, offset, kiov->kiov_len);
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
+                if (!cfs_list_empty(&conn->ksnc_tx_queue) ||
                     fragsize < tx->tx_resid)
                         msgflg |= MSG_MORE;
 
@@ -471,7 +527,8 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                         rc = sk->sk_prot->sendpage(sk, page,
                                                    offset, fragsize, msgflg);
                 } else {
-                        rc = tcp_sendpage(sock, page, offset, fragsize, msgflg);
+                        rc = cfs_tcp_sendpage(sk, page, offset, fragsize,
+                                              msgflg);
                 }
         } else {
 #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
@@ -503,7 +560,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                         nob += scratchiov[i].iov_len = kiov[i].kiov_len;
                 }
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
+                if (!cfs_list_empty(&conn->ksnc_tx_queue) ||
                     nob < tx->tx_resid)
                         msg.msg_flags |= MSG_MORE;
 
@@ -631,7 +688,8 @@ ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
 
         for (nob = i = 0; i < niov; i++) {
                 if ((kiov[i].kiov_offset != 0 && i > 0) ||
-                    (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1))
+                   (kiov[i].kiov_offset + kiov[i].kiov_len !=
+                    PAGE_CACHE_SIZE && i < niov - 1))
                         return NULL;
 
                 pages[i] = kiov[i].kiov_page;
@@ -697,7 +755,6 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
                 msg.msg_iovlen = niov;
         }
 
-
         LASSERT (nob <= conn->ksnc_rx_nob_wanted);
 
         set_fs (KERNEL_DS);
@@ -957,30 +1014,11 @@ ksocknal_lib_setup_sock (struct socket *sock)
         return (0);
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
-        return &(sk->tp_pinfo.af_tcp);
-}
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-#define sock2tcp_opt(sk) tcp_sk(sk)
-#else
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
-        struct tcp_sock *s = (struct tcp_sock *)sk;
-        return &s->tcp;
-}
-#endif
-
 void
 ksocknal_lib_push_conn (ksock_conn_t *conn)
 {
         struct sock    *sk;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
-        struct tcp_opt *tp;
-#else
         struct tcp_sock *tp;
-#endif
         int             nonagle;
         int             val = 1;
         int             rc;
@@ -990,8 +1028,8 @@ ksocknal_lib_push_conn (ksock_conn_t *conn)
         if (rc != 0)                            /* being shut down */
                 return;
 
-        sk = conn->ksnc_sock->sk;
-        tp = sock2tcp_opt(sk);
+       sk = conn->ksnc_sock->sk;
+       tp = tcp_sk(sk);
 
         lock_sock (sk);
         nonagle = tp->nonagle;
@@ -1027,7 +1065,7 @@ ksocknal_data_ready (struct sock *sk, int n)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
@@ -1036,7 +1074,7 @@ ksocknal_data_ready (struct sock *sk, int n)
         } else
                 ksocknal_read_callback(conn);
 
-        read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
 
         EXIT;
 }
@@ -1050,7 +1088,7 @@ ksocknal_write_space (struct sock *sk)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         wspace = SOCKNAL_WSPACE(sk);
@@ -1062,14 +1100,14 @@ ksocknal_write_space (struct sock *sk)
                                       " ready" : " blocked"),
                (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
                                       " scheduled" : " idle"),
-               (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
+               (conn == NULL) ? "" : (cfs_list_empty (&conn->ksnc_tx_queue) ?
                                       " empty" : " queued"));
 
         if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
                 LASSERT (sk->sk_write_space != &ksocknal_write_space);
                 sk->sk_write_space (sk);
 
-                read_unlock (&ksocknal_data.ksnd_global_lock);
+               read_unlock(&ksocknal_data.ksnd_global_lock);
                 return;
         }
 
@@ -1083,7 +1121,7 @@ ksocknal_write_space (struct sock *sk)
                 clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
         }
 
-        read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
 }
 
 void
@@ -1118,3 +1156,30 @@ ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
 
         return ;
 }
+
+int
+ksocknal_lib_memory_pressure(ksock_conn_t *conn)
+{
+       int            rc = 0;
+       ksock_sched_t *sched;
+
+       sched = conn->ksnc_scheduler;
+       spin_lock_bh(&sched->kss_lock);
+
+        if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
+            !conn->ksnc_tx_ready) {
+                /* SOCK_NOSPACE is set when the socket fills
+                 * and cleared in the write_space callback
+                 * (which also sets ksnc_tx_ready).  If
+                 * SOCK_NOSPACE and ksnc_tx_ready are BOTH
+                 * zero, I didn't fill the socket and
+                 * write_space won't reschedule me, so I
+                 * return -ENOMEM to get my caller to retry
+                 * after a timeout */
+                rc = -ENOMEM;
+        }
+
+       spin_unlock_bh(&sched->kss_lock);
+
+       return rc;
+}