Whamcloud - gitweb
LU-5322 socklnd: handle sk_ready_data change in 3.15+ kernels
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_lib-linux.c
index 6ce7d03..2815a36 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 
 # if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
 
-#ifndef HAVE_SYSCTL_UNNUMBERED
-
-enum {
-        SOCKLND_TIMEOUT = 1,
-        SOCKLND_CREDITS,
-        SOCKLND_PEER_CREDITS,
-        SOCKLND_NCONNDS,
-        SOCKLND_RECONNECTS_MIN,
-        SOCKLND_RECONNECTS_MAX,
-        SOCKLND_EAGER_ACK,
-        SOCKLND_ZERO_COPY,
-        SOCKLND_TYPED,
-        SOCKLND_BULK_MIN,
-        SOCKLND_RX_BUFFER_SIZE,
-        SOCKLND_TX_BUFFER_SIZE,
-        SOCKLND_NAGLE,
-        SOCKLND_IRQ_AFFINITY,
-        SOCKLND_ROUND_ROBIN,
-        SOCKLND_KEEPALIVE,
-        SOCKLND_KEEPALIVE_IDLE,
-        SOCKLND_KEEPALIVE_COUNT,
-        SOCKLND_KEEPALIVE_INTVL,
-        SOCKLND_BACKOFF_INIT,
-        SOCKLND_BACKOFF_MAX,
-        SOCKLND_PROTOCOL,
-        SOCKLND_ZERO_COPY_RECV,
-        SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS
-};
-#else
-
-#define SOCKLND_TIMEOUT         CTL_UNNUMBERED
-#define SOCKLND_CREDITS         CTL_UNNUMBERED
-#define SOCKLND_PEER_CREDITS    CTL_UNNUMBERED
-#define SOCKLND_NCONNDS         CTL_UNNUMBERED
-#define SOCKLND_RECONNECTS_MIN  CTL_UNNUMBERED
-#define SOCKLND_RECONNECTS_MAX  CTL_UNNUMBERED
-#define SOCKLND_EAGER_ACK       CTL_UNNUMBERED
-#define SOCKLND_ZERO_COPY       CTL_UNNUMBERED
-#define SOCKLND_TYPED           CTL_UNNUMBERED
-#define SOCKLND_BULK_MIN        CTL_UNNUMBERED
-#define SOCKLND_RX_BUFFER_SIZE  CTL_UNNUMBERED
-#define SOCKLND_TX_BUFFER_SIZE  CTL_UNNUMBERED
-#define SOCKLND_NAGLE           CTL_UNNUMBERED
-#define SOCKLND_IRQ_AFFINITY    CTL_UNNUMBERED
-#define SOCKLND_ROUND_ROBIN     CTL_UNNUMBERED
-#define SOCKLND_KEEPALIVE       CTL_UNNUMBERED
-#define SOCKLND_KEEPALIVE_IDLE  CTL_UNNUMBERED
-#define SOCKLND_KEEPALIVE_COUNT CTL_UNNUMBERED
-#define SOCKLND_KEEPALIVE_INTVL CTL_UNNUMBERED
-#define SOCKLND_BACKOFF_INIT    CTL_UNNUMBERED
-#define SOCKLND_BACKOFF_MAX     CTL_UNNUMBERED
-#define SOCKLND_PROTOCOL        CTL_UNNUMBERED
-#define SOCKLND_ZERO_COPY_RECV  CTL_UNNUMBERED
-#define SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS CTL_UNNUMBERED
-#endif
-
-static cfs_sysctl_table_t ksocknal_ctl_table[] = {
-        {
-                .ctl_name = SOCKLND_TIMEOUT,
-                .procname = "timeout",
-                .data     = &ksocknal_tunables.ksnd_timeout,
-                .maxlen   = sizeof (int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_CREDITS,
-                .procname = "credits",
-                .data     = &ksocknal_tunables.ksnd_credits,
-                .maxlen   = sizeof (int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-         {
-                .ctl_name = SOCKLND_PEER_CREDITS,
-                .procname = "peer_credits",
-                .data     = &ksocknal_tunables.ksnd_peercredits,
-                .maxlen   = sizeof (int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_NCONNDS,
-                .procname = "nconnds",
-                .data     = &ksocknal_tunables.ksnd_nconnds,
-                .maxlen   = sizeof (int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_RECONNECTS_MIN,
-                .procname = "min_reconnectms",
-                .data     = &ksocknal_tunables.ksnd_min_reconnectms,
-                .maxlen   = sizeof (int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_RECONNECTS_MAX,
-                .procname = "max_reconnectms",
-                .data     = &ksocknal_tunables.ksnd_max_reconnectms,
-                .maxlen   = sizeof (int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_EAGER_ACK,
-                .procname = "eager_ack",
-                .data     = &ksocknal_tunables.ksnd_eager_ack,
-                .maxlen   = sizeof (int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_ZERO_COPY,
-                .procname = "zero_copy",
-                .data     = &ksocknal_tunables.ksnd_zc_min_payload,
-                .maxlen   = sizeof (int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_ZERO_COPY_RECV,
-                .procname = "zero_copy_recv",
-                .data     = &ksocknal_tunables.ksnd_zc_recv,
-                .maxlen   = sizeof (int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-
-        {
-                .ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS,
-                .procname = "zero_copy_recv",
-                .data     = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
-                .maxlen   = sizeof (int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_TYPED,
-                .procname = "typed",
-                .data     = &ksocknal_tunables.ksnd_typed_conns,
-                .maxlen   = sizeof (int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_BULK_MIN,
-                .procname = "min_bulk",
-                .data     = &ksocknal_tunables.ksnd_min_bulk,
-                .maxlen   = sizeof (int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_RX_BUFFER_SIZE,
-                .procname = "rx_buffer_size",
-                .data     = &ksocknal_tunables.ksnd_rx_buffer_size,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_TX_BUFFER_SIZE,
-                .procname = "tx_buffer_size",
-                .data     = &ksocknal_tunables.ksnd_tx_buffer_size,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_NAGLE,
-                .procname = "nagle",
-                .data     = &ksocknal_tunables.ksnd_nagle,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
+static struct ctl_table ksocknal_ctl_table[] = {
+       {
+               INIT_CTL_NAME
+               .procname       = "timeout",
+               .data           = &ksocknal_tunables.ksnd_timeout,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "credits",
+               .data           = &ksocknal_tunables.ksnd_credits,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "peer_credits",
+               .data           = &ksocknal_tunables.ksnd_peertxcredits,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "peer_buffer_credits",
+               .data           = &ksocknal_tunables.ksnd_peerrtrcredits,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "peer_timeout",
+               .data           = &ksocknal_tunables.ksnd_peertimeout,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "nconnds",
+               .data           = &ksocknal_tunables.ksnd_nconnds,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "min_reconnectms",
+               .data           = &ksocknal_tunables.ksnd_min_reconnectms,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "max_reconnectms",
+               .data           = &ksocknal_tunables.ksnd_max_reconnectms,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "eager_ack",
+               .data           = &ksocknal_tunables.ksnd_eager_ack,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "zero_copy",
+               .data           = &ksocknal_tunables.ksnd_zc_min_payload,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "zero_copy_recv",
+               .data           = &ksocknal_tunables.ksnd_zc_recv,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "zero_copy_recv",
+               .data           = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "typed",
+               .data           = &ksocknal_tunables.ksnd_typed_conns,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "min_bulk",
+               .data           = &ksocknal_tunables.ksnd_min_bulk,
+               .maxlen         = sizeof (int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "rx_buffer_size",
+               .data           = &ksocknal_tunables.ksnd_rx_buffer_size,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "tx_buffer_size",
+               .data           = &ksocknal_tunables.ksnd_tx_buffer_size,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "nagle",
+               .data           = &ksocknal_tunables.ksnd_nagle,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
 #ifdef CPU_AFFINITY
-        {
-                .ctl_name = SOCKLND_IRQ_AFFINITY,
-                .procname = "irq_affinity",
-                .data     = &ksocknal_tunables.ksnd_irq_affinity,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
+       {
+               INIT_CTL_NAME
+               .procname       = "irq_affinity",
+               .data           = &ksocknal_tunables.ksnd_irq_affinity,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
 #endif
-        {
-                .ctl_name = SOCKLND_ROUND_ROBIN,
-                .procname = "round_robin",
-                .data     = &ksocknal_tunables.ksnd_round_robin,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_KEEPALIVE,
-                .procname = "keepalive",
-                .data     = &ksocknal_tunables.ksnd_keepalive,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_KEEPALIVE_IDLE,
-                .procname = "keepalive_idle",
-                .data     = &ksocknal_tunables.ksnd_keepalive_idle,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_KEEPALIVE_COUNT,
-                .procname = "keepalive_count",
-                .data     = &ksocknal_tunables.ksnd_keepalive_count,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_KEEPALIVE_INTVL,
-                .procname = "keepalive_intvl",
-                .data     = &ksocknal_tunables.ksnd_keepalive_intvl,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
+       {
+               INIT_CTL_NAME
+               .procname       = "round_robin",
+               .data           = &ksocknal_tunables.ksnd_round_robin,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "keepalive",
+               .data           = &ksocknal_tunables.ksnd_keepalive,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "keepalive_idle",
+               .data           = &ksocknal_tunables.ksnd_keepalive_idle,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "keepalive_count",
+               .data           = &ksocknal_tunables.ksnd_keepalive_count,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "keepalive_intvl",
+               .data           = &ksocknal_tunables.ksnd_keepalive_intvl,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
 #ifdef SOCKNAL_BACKOFF
-        {
-                .ctl_name = SOCKLND_BACKOFF_INIT,
-                .procname = "backoff_init",
-                .data     = &ksocknal_tunables.ksnd_backoff_init,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
-        {
-                .ctl_name = SOCKLND_BACKOFF_MAX,
-                .procname = "backoff_max",
-                .data     = &ksocknal_tunables.ksnd_backoff_max,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
+       {
+               INIT_CTL_NAME
+               .procname       = "backoff_init",
+               .data           = &ksocknal_tunables.ksnd_backoff_init,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
+       {
+               INIT_CTL_NAME
+               .procname       = "backoff_max",
+               .data           = &ksocknal_tunables.ksnd_backoff_max,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
 #endif
 #if SOCKNAL_VERSION_DEBUG
-        {
-                .ctl_name = SOCKLND_PROTOCOL,
-                .procname = "protocol",
-                .data     = &ksocknal_tunables.ksnd_protocol,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec,
-                .strategy = &sysctl_intvec,
-        },
+       {
+               INIT_CTL_NAME
+               .procname       = "protocol",
+               .data           = &ksocknal_tunables.ksnd_protocol,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+               INIT_STRATEGY
+       },
 #endif
-        {0}
+       { 0 }
 };
 
-
-cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
-        {
-                .ctl_name = CTL_SOCKLND,
-                .procname = "socknal",
-                .data     = NULL,
-                .maxlen   = 0,
-                .mode     = 0555,
-                .child    = ksocknal_ctl_table
-        },
-        { 0 }
+struct ctl_table ksocknal_top_ctl_table[] = {
+       {
+               INIT_CTL_NAME
+               .procname       = "socknal",
+               .data           = NULL,
+               .maxlen         = 0,
+               .mode           = 0555,
+               .child          = ksocknal_ctl_table
+       },
+       { 0 }
 };
 
 int
 ksocknal_lib_tunables_init ()
 {
-        if (!*ksocknal_tunables.ksnd_typed_conns) {
-                int rc = -EINVAL;
+       if (!*ksocknal_tunables.ksnd_typed_conns) {
+               int rc = -EINVAL;
 #if SOCKNAL_VERSION_DEBUG
-                if (*ksocknal_tunables.ksnd_protocol < 3)
-                        rc = 0;
+               if (*ksocknal_tunables.ksnd_protocol < 3)
+                       rc = 0;
 #endif
-                if (rc != 0) {
-                        CERROR("Protocol V3.x MUST have typed connections\n");
-                        return rc;
-                }
-        }
+               if (rc != 0) {
+                       CERROR("Protocol V3.x MUST have typed connections\n");
+                       return rc;
+               }
+       }
 
-        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
-                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
-        if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
-                *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
+       if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
+               *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
+       if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
+               *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
 
-        ksocknal_tunables.ksnd_sysctl =
-                cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
+       ksocknal_tunables.ksnd_sysctl =
+               register_sysctl_table(ksocknal_top_ctl_table);
 
-        if (ksocknal_tunables.ksnd_sysctl == NULL)
-                CWARN("Can't setup /proc tunables\n");
+       if (ksocknal_tunables.ksnd_sysctl == NULL)
+               CWARN("Can't setup /proc tunables\n");
 
-        return 0;
+       return 0;
 }
 
 void
 ksocknal_lib_tunables_fini ()
 {
-        if (ksocknal_tunables.ksnd_sysctl != NULL)
-                cfs_unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
+       if (ksocknal_tunables.ksnd_sysctl != NULL)
+               unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
 }
 #else
 int
@@ -382,53 +342,6 @@ ksocknal_lib_tunables_fini ()
 }
 #endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
 
-void
-ksocknal_lib_bind_irq (unsigned int irq)
-{
-#if (defined(CONFIG_SMP) && defined(CPU_AFFINITY))
-        int              bind;
-        int              cpu;
-        char             cmdline[64];
-        ksock_irqinfo_t *info;
-        char            *argv[] = {"/bin/sh",
-                                   "-c",
-                                   cmdline,
-                                   NULL};
-        char            *envp[] = {"HOME=/",
-                                   "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-                                   NULL};
-
-        LASSERT (irq < NR_IRQS);
-        if (irq == 0)              /* software NIC or affinity disabled */
-                return;
-
-        info = &ksocknal_data.ksnd_irqinfo[irq];
-
-        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
-        LASSERT (info->ksni_valid);
-        bind = !info->ksni_bound;
-        info->ksni_bound = 1;
-
-        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
-        if (!bind)                              /* bound already */
-                return;
-
-        cpu = ksocknal_irqsched2cpu(info->ksni_sched);
-        snprintf (cmdline, sizeof (cmdline),
-                  "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
-
-        LCONSOLE_INFO("Binding irq %u to CPU %d with cmd: %s\n",
-                      irq, cpu, cmdline);
-
-        /* FIXME: Find a better method of setting IRQ affinity...
-         */
-
-        USERMODEHELPER(argv[0], argv, envp);
-#endif
-}
-
 int
 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
 {
@@ -454,101 +367,64 @@ ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
         return 0;
 }
 
-unsigned int
-ksocknal_lib_sock_irq (struct socket *sock)
-{
-        int                irq = 0;
-#ifdef CPU_AFFINITY
-        struct dst_entry  *dst;
-
-        if (!*ksocknal_tunables.ksnd_irq_affinity)
-                return 0;
-
-        dst = sk_dst_get (sock->sk);
-        if (dst != NULL) {
-                if (dst->dev != NULL) {
-                        irq = dst->dev->irq;
-                        if (irq >= NR_IRQS) {
-                                CERROR ("Unexpected IRQ %x\n", irq);
-                                irq = 0;
-                        }
-                }
-                dst_release (dst);
-        }
-
-#endif
-        return irq;
-}
-
 int
 ksocknal_lib_zc_capable(ksock_conn_t *conn)
 {
-        int  caps = conn->ksnc_sock->sk->sk_route_caps;
+       int  caps = conn->ksnc_sock->sk->sk_route_caps;
 
-        if (conn->ksnc_proto == &ksocknal_protocol_v1x)
-                return 0;
+       if (conn->ksnc_proto == &ksocknal_protocol_v1x)
+               return 0;
 
-        /* ZC if the socket supports scatter/gather and doesn't need software
-         * checksums */
-        return ((caps & NETIF_F_SG) != 0 &&
-                (caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) != 0);
+       /* ZC if the socket supports scatter/gather and doesn't need software
+        * checksums */
+       return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0);
 }
 
 int
-ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_lib_send_iov(ksock_conn_t *conn, ksock_tx_t *tx)
 {
-        struct socket *sock = conn->ksnc_sock;
-        int            nob;
-        int            rc;
+       struct socket  *sock = conn->ksnc_sock;
+       int             nob;
+       int             rc;
 
-        if (*ksocknal_tunables.ksnd_enable_csum        && /* checksum enabled */
-            conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection  */
-            tx->tx_nob == tx->tx_resid                 && /* frist sending    */
-            tx->tx_msg.ksm_csum == 0)                     /* not checksummed  */
-                ksocknal_lib_csum_tx(tx);
+       if (*ksocknal_tunables.ksnd_enable_csum        && /* checksum enabled */
+           conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection  */
+           tx->tx_nob == tx->tx_resid                 && /* frist sending    */
+           tx->tx_msg.ksm_csum == 0)                     /* not checksummed  */
+               ksocknal_lib_csum_tx(tx);
 
-        /* NB we can't trust socket ops to either consume our iovs
-         * or leave them alone. */
+       /* NB we can't trust socket ops to either consume our iovs
+        * or leave them alone. */
 
-        {
+       {
 #if SOCKNAL_SINGLE_FRAG_TX
-                struct iovec    scratch;
-                struct iovec   *scratchiov = &scratch;
-                unsigned int    niov = 1;
+               struct iovec    scratch;
+               struct iovec   *scratchiov = &scratch;
+               unsigned int    niov = 1;
 #else
                 struct iovec   *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
-                unsigned int    niov = tx->tx_niov;
+               unsigned int    niov = tx->tx_niov;
 #endif
-                struct msghdr msg = {
-                        .msg_name       = NULL,
-                        .msg_namelen    = 0,
-                        .msg_iov        = scratchiov,
-                        .msg_iovlen     = niov,
-                        .msg_control    = NULL,
-                        .msg_controllen = 0,
-                        .msg_flags      = MSG_DONTWAIT
-                };
-                mm_segment_t oldmm = get_fs();
+               struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
                 int  i;
 
-                for (nob = i = 0; i < niov; i++) {
-                        scratchiov[i] = tx->tx_iov[i];
-                        nob += scratchiov[i].iov_len;
-                }
+               for (nob = i = 0; i < niov; i++) {
+                       scratchiov[i] = tx->tx_iov[i];
+                       nob += scratchiov[i].iov_len;
+               }
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
-                    nob < tx->tx_resid)
-                        msg.msg_flags |= MSG_MORE;
+               if (!list_empty(&conn->ksnc_tx_queue) ||
+                   nob < tx->tx_resid)
+                       msg.msg_flags |= MSG_MORE;
 
-                set_fs (KERNEL_DS);
-                rc = sock_sendmsg(sock, &msg, nob);
-                set_fs (oldmm);
-        }
-        return rc;
+               rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov,
+                                   niov, nob);
+       }
+       return rc;
 }
 
 int
-ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
+ksocknal_lib_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx)
 {
         struct socket *sock = conn->ksnc_sock;
         lnet_kiov_t   *kiov = tx->tx_kiov;
@@ -571,7 +447,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                 CDEBUG(D_NET, "page %p + offset %x for %d\n",
                                page, offset, kiov->kiov_len);
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
+               if (!list_empty(&conn->ksnc_tx_queue) ||
                     fragsize < tx->tx_resid)
                         msgflg |= MSG_MORE;
 
@@ -579,50 +455,40 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                         rc = sk->sk_prot->sendpage(sk, page,
                                                    offset, fragsize, msgflg);
                 } else {
-                        rc = tcp_sendpage(sock, page, offset, fragsize, msgflg);
+                        rc = cfs_tcp_sendpage(sk, page, offset, fragsize,
+                                              msgflg);
                 }
         } else {
 #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
-                struct iovec  scratch;
-                struct iovec *scratchiov = &scratch;
-                unsigned int  niov = 1;
+               struct iovec    scratch;
+               struct iovec   *scratchiov = &scratch;
+               unsigned int    niov = 1;
 #else
 #ifdef CONFIG_HIGHMEM
 #warning "XXX risk of kmap deadlock on multiple frags..."
 #endif
-                struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
-                unsigned int  niov = tx->tx_nkiov;
+               struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+               unsigned int  niov = tx->tx_nkiov;
 #endif
-                struct msghdr msg = {
-                        .msg_name       = NULL,
-                        .msg_namelen    = 0,
-                        .msg_iov        = scratchiov,
-                        .msg_iovlen     = niov,
-                        .msg_control    = NULL,
-                        .msg_controllen = 0,
-                        .msg_flags      = MSG_DONTWAIT
-                };
-                mm_segment_t  oldmm = get_fs();
-                int           i;
-
-                for (nob = i = 0; i < niov; i++) {
-                        scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
-                                                 kiov[i].kiov_offset;
-                        nob += scratchiov[i].iov_len = kiov[i].kiov_len;
-                }
+               struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
+               int           i;
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
-                    nob < tx->tx_resid)
-                        msg.msg_flags |= MSG_MORE;
+               for (nob = i = 0; i < niov; i++) {
+                       scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+                                                kiov[i].kiov_offset;
+                       nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+               }
 
-                set_fs (KERNEL_DS);
-                rc = sock_sendmsg(sock, &msg, nob);
-                set_fs (oldmm);
+               if (!list_empty(&conn->ksnc_tx_queue) ||
+                   nob < tx->tx_resid)
+                       msg.msg_flags |= MSG_MORE;
 
-                for (i = 0; i < niov; i++)
-                        kunmap(kiov[i].kiov_page);
-        }
-        return rc;
+               rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
+
+               for (i = 0; i < niov; i++)
+                       kunmap(kiov[i].kiov_page);
+       }
+       return rc;
 }
 
 void
@@ -647,24 +513,17 @@ int
 ksocknal_lib_recv_iov (ksock_conn_t *conn)
 {
 #if SOCKNAL_SINGLE_FRAG_RX
-        struct iovec  scratch;
-        struct iovec *scratchiov = &scratch;
-        unsigned int  niov = 1;
+       struct iovec  scratch;
+       struct iovec *scratchiov = &scratch;
+       unsigned int  niov = 1;
 #else
-        struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
-        unsigned int  niov = conn->ksnc_rx_niov;
+       struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+       unsigned int  niov = conn->ksnc_rx_niov;
 #endif
-        struct iovec *iov = conn->ksnc_rx_iov;
-        struct msghdr msg = {
-                .msg_name       = NULL,
-                .msg_namelen    = 0,
-                .msg_iov        = scratchiov,
-                .msg_iovlen     = niov,
-                .msg_control    = NULL,
-                .msg_controllen = 0,
-                .msg_flags      = 0
-        };
-        mm_segment_t oldmm = get_fs();
+       struct iovec *iov = conn->ksnc_rx_iov;
+       struct msghdr msg = {
+               .msg_flags      = 0
+       };
         int          nob;
         int          i;
         int          rc;
@@ -682,10 +541,8 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
         }
         LASSERT (nob <= conn->ksnc_rx_nob_wanted);
 
-        set_fs (KERNEL_DS);
-        rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
-        /* NB this is just a boolean..........................^ */
-        set_fs (oldmm);
+       rc = kernel_recvmsg(conn->ksnc_sock, &msg,
+               (struct kvec *)scratchiov, niov, nob, MSG_DONTWAIT);
 
         saved_csum = 0;
         if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
@@ -739,7 +596,8 @@ ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
 
         for (nob = i = 0; i < niov; i++) {
                 if ((kiov[i].kiov_offset != 0 && i > 0) ||
-                    (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1))
+                   (kiov[i].kiov_offset + kiov[i].kiov_len !=
+                    PAGE_CACHE_SIZE && i < niov - 1))
                         return NULL;
 
                 pages[i] = kiov[i].kiov_page;
@@ -768,20 +626,14 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
 #ifdef CONFIG_HIGHMEM
 #warning "XXX risk of kmap deadlock on multiple frags..."
 #endif
-        struct iovec  *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
-        struct page  **pages      = conn->ksnc_scheduler->kss_rx_scratch_pgs;
-        unsigned int   niov       = conn->ksnc_rx_nkiov;
+       struct iovec  *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+       struct page  **pages      = conn->ksnc_scheduler->kss_rx_scratch_pgs;
+       unsigned int   niov       = conn->ksnc_rx_nkiov;
 #endif
-        lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
-        struct msghdr msg = {
-                .msg_name       = NULL,
-                .msg_namelen    = 0,
-                .msg_iov        = scratchiov,
-                .msg_control    = NULL,
-                .msg_controllen = 0,
-                .msg_flags      = 0
-        };
-        mm_segment_t oldmm = get_fs();
+       lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
+       struct msghdr msg = {
+               .msg_flags      = 0
+       };
         int          nob;
         int          i;
         int          rc;
@@ -789,28 +641,27 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
         void        *addr;
         int          sum;
         int          fragnob;
+       int n;
 
         /* NB we can't trust socket ops to either consume our iovs
          * or leave them alone. */
-        if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
-                nob = scratchiov[0].iov_len;
-                msg.msg_iovlen = 1;
+       if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
+               nob = scratchiov[0].iov_len;
+               n = 1;
 
-        } else {
-                for (nob = i = 0; i < niov; i++) {
-                        nob += scratchiov[i].iov_len = kiov[i].kiov_len;
-                        scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
-                                                 kiov[i].kiov_offset;
-                }
-                msg.msg_iovlen = niov;
-        }
+       } else {
+               for (nob = i = 0; i < niov; i++) {
+                       nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+                       scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+                                                kiov[i].kiov_offset;
+               }
+               n = niov;
+       }
 
-        LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+       LASSERT (nob <= conn->ksnc_rx_nob_wanted);
 
-        set_fs (KERNEL_DS);
-        rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
-        /* NB this is just a boolean.......................^ */
-        set_fs (oldmm);
+       rc = kernel_recvmsg(conn->ksnc_sock, &msg,
+                       (struct kvec *)scratchiov, n, nob, MSG_DONTWAIT);
 
         if (conn->ksnc_msg.ksm_csum != 0) {
                 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
@@ -1064,30 +915,11 @@ ksocknal_lib_setup_sock (struct socket *sock)
         return (0);
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
-        return &(sk->tp_pinfo.af_tcp);
-}
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-#define sock2tcp_opt(sk) tcp_sk(sk)
-#else
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
-        struct tcp_sock *s = (struct tcp_sock *)sk;
-        return &s->tcp;
-}
-#endif
-
 void
 ksocknal_lib_push_conn (ksock_conn_t *conn)
 {
         struct sock    *sk;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
-        struct tcp_opt *tp;
-#else
         struct tcp_sock *tp;
-#endif
         int             nonagle;
         int             val = 1;
         int             rc;
@@ -1097,8 +929,8 @@ ksocknal_lib_push_conn (ksock_conn_t *conn)
         if (rc != 0)                            /* being shut down */
                 return;
 
-        sk = conn->ksnc_sock->sk;
-        tp = sock2tcp_opt(sk);
+       sk = conn->ksnc_sock->sk;
+       tp = tcp_sk(sk);
 
         lock_sock (sk);
         nonagle = tp->nonagle;
@@ -1127,25 +959,33 @@ extern void ksocknal_write_callback (ksock_conn_t *conn);
  * socket call back in Linux
  */
 static void
-ksocknal_data_ready (struct sock *sk, int n)
+#ifdef HAVE_SK_DATA_READY_ONE_ARG
+ksocknal_data_ready(struct sock *sk)
+#else
+ksocknal_data_ready(struct sock *sk, int n)
+#endif
 {
-        ksock_conn_t  *conn;
-        ENTRY;
+       ksock_conn_t  *conn;
+       ENTRY;
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
-        conn = sk->sk_user_data;
-        if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
-                LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
-                sk->sk_data_ready (sk, n);
-        } else
-                ksocknal_read_callback(conn);
+       conn = sk->sk_user_data;
+       if (conn == NULL) {     /* raced with ksocknal_terminate_conn */
+               LASSERT(sk->sk_data_ready != &ksocknal_data_ready);
+#ifdef HAVE_SK_DATA_READY_ONE_ARG
+               sk->sk_data_ready(sk);
+#else
+               sk->sk_data_ready(sk, n);
+#endif
+       } else
+               ksocknal_read_callback(conn);
 
-        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
 
-        EXIT;
+       EXIT;
 }
 
 static void
@@ -1157,7 +997,7 @@ ksocknal_write_space (struct sock *sk)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         wspace = SOCKNAL_WSPACE(sk);
@@ -1169,14 +1009,14 @@ ksocknal_write_space (struct sock *sk)
                                       " ready" : " blocked"),
                (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
                                       " scheduled" : " idle"),
-               (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
+              (conn == NULL) ? "" : (list_empty(&conn->ksnc_tx_queue) ?
                                       " empty" : " queued"));
 
         if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
                 LASSERT (sk->sk_write_space != &ksocknal_write_space);
                 sk->sk_write_space (sk);
 
-                cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+               read_unlock(&ksocknal_data.ksnd_global_lock);
                 return;
         }
 
@@ -1190,7 +1030,7 @@ ksocknal_write_space (struct sock *sk)
                 clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
         }
 
-        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
 }
 
 void
@@ -1229,12 +1069,12 @@ ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
 int
 ksocknal_lib_memory_pressure(ksock_conn_t *conn)
 {
-        int            rc = 0;
-        ksock_sched_t *sched;
-        
-        sched = conn->ksnc_scheduler;
-        cfs_spin_lock_bh (&sched->kss_lock);
-        
+       int            rc = 0;
+       ksock_sched_t *sched;
+
+       sched = conn->ksnc_scheduler;
+       spin_lock_bh(&sched->kss_lock);
+
         if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
             !conn->ksnc_tx_ready) {
                 /* SOCK_NOSPACE is set when the socket fills
@@ -1247,42 +1087,8 @@ ksocknal_lib_memory_pressure(ksock_conn_t *conn)
                  * after a timeout */
                 rc = -ENOMEM;
         }
-        
-        cfs_spin_unlock_bh (&sched->kss_lock);
-
-        return rc;
-}
-
-__u64
-ksocknal_lib_new_incarnation(void)
-{
-        struct timeval tv;
-
-        /* The incarnation number is the time this module loaded and it
-         * identifies this particular instance of the socknal.  Hopefully
-         * we won't be able to reboot more frequently than 1MHz for the
-         * forseeable future :) */
-
-        do_gettimeofday(&tv);
-
-        return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-}
-
-int
-ksocknal_lib_bind_thread_to_cpu(int id)
-{
-#if defined(CONFIG_SMP) && defined(CPU_AFFINITY)
-        id = ksocknal_sched2cpu(id);
-        if (cpu_online(id)) {
-                cpumask_t m = CPU_MASK_NONE;
-                cpu_set(id, m);
-                set_cpus_allowed(current, m);
-                return 0;
-        }
 
-        return -1;
+       spin_unlock_bh(&sched->kss_lock);
 
-#else
-        return 0;
-#endif
+       return rc;
 }