Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_lib-linux.c
index d4f0502..0eb0997 100644 (file)
@@ -1,10 +1,42 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see [sun.com URL with a
+ * copy of GPLv2].
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
 #include "socklnd.h"
 
-# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
+# if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
 static cfs_sysctl_table_t ksocknal_ctl_table[21];
 
 cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
@@ -25,184 +57,176 @@ ksocknal_lib_tunables_init ()
         int    i = 0;
         int    j = 1;
 
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "timeout",
-                        .data     = ksocknal_tunables.ksnd_timeout,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "credits",
-                        .data     = ksocknal_tunables.ksnd_credits,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0444,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "peer_credits",
-                        .data     = ksocknal_tunables.ksnd_peercredits,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0444,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "nconnds",
-                        .data     = ksocknal_tunables.ksnd_nconnds,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0444,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "min_reconnectms",
-                        .data     = ksocknal_tunables.ksnd_min_reconnectms,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0444,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "max_reconnectms",
-                        .data     = ksocknal_tunables.ksnd_max_reconnectms,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0444,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "eager_ack",
-                        .data     = ksocknal_tunables.ksnd_eager_ack,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "zero_copy",
-                        .data     = ksocknal_tunables.ksnd_zc_min_frag,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "typed",
-                        .data     = ksocknal_tunables.ksnd_typed_conns,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0444,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "min_bulk",
-                        .data     = ksocknal_tunables.ksnd_min_bulk,
-                        .maxlen   = sizeof (int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "rx_buffer_size",
-                        .data     = ksocknal_tunables.ksnd_rx_buffer_size,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "tx_buffer_size",
-                        .data     = ksocknal_tunables.ksnd_tx_buffer_size,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "nagle",
-                        .data     = ksocknal_tunables.ksnd_nagle,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-#if CPU_AFFINITY
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "irq_affinity",
-                        .data     = ksocknal_tunables.ksnd_irq_affinity,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "timeout",
+                .data     = ksocknal_tunables.ksnd_timeout,
+                .maxlen   = sizeof (int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "credits",
+                .data     = ksocknal_tunables.ksnd_credits,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "peer_credits",
+                .data     = ksocknal_tunables.ksnd_peercredits,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "nconnds",
+                .data     = ksocknal_tunables.ksnd_nconnds,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "min_reconnectms",
+                .data     = ksocknal_tunables.ksnd_min_reconnectms,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "max_reconnectms",
+                .data     = ksocknal_tunables.ksnd_max_reconnectms,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "eager_ack",
+                .data     = ksocknal_tunables.ksnd_eager_ack,
+                .maxlen   = sizeof (int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "zero_copy",
+                .data     = ksocknal_tunables.ksnd_zc_min_frag,
+                .maxlen   = sizeof (int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "typed",
+                .data     = ksocknal_tunables.ksnd_typed_conns,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "min_bulk",
+                .data     = ksocknal_tunables.ksnd_min_bulk,
+                .maxlen   = sizeof (int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "rx_buffer_size",
+                .data     = ksocknal_tunables.ksnd_rx_buffer_size,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "tx_buffer_size",
+                .data     = ksocknal_tunables.ksnd_tx_buffer_size,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "nagle",
+                .data     = ksocknal_tunables.ksnd_nagle,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+#ifdef CPU_AFFINITY
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "irq_affinity",
+                .data     = ksocknal_tunables.ksnd_irq_affinity,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
 #endif
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "keepalive_idle",
-                        .data     = ksocknal_tunables.ksnd_keepalive_idle,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "keepalive_count",
-                        .data     = ksocknal_tunables.ksnd_keepalive_count,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "keepalive_intvl",
-                        .data     = ksocknal_tunables.ksnd_keepalive_intvl,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "keepalive_idle",
+                .data     = ksocknal_tunables.ksnd_keepalive_idle,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "keepalive_count",
+                .data     = ksocknal_tunables.ksnd_keepalive_count,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "keepalive_intvl",
+                .data     = ksocknal_tunables.ksnd_keepalive_intvl,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
 #ifdef SOCKNAL_BACKOFF
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "backoff_init",
-                        .data     = ksocknal_tunables.ksnd_backoff_init,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
-        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t)
-                {
-                        .ctl_name = j++,
-                        .procname = "backoff_max",
-                        .data     = ksocknal_tunables.ksnd_backoff_max,
-                        .maxlen   = sizeof(int),
-                        .mode     = 0644,
-                        .proc_handler = &proc_dointvec
-                };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "backoff_init",
+                .data     = ksocknal_tunables.ksnd_backoff_init,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "backoff_max",
+                .data     = ksocknal_tunables.ksnd_backoff_max,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
 #endif
+#if SOCKNAL_VERSION_DEBUG
+        ksocknal_ctl_table[i++] = (cfs_sysctl_table_t) {
+                .ctl_name = j++,
+                .procname = "protocol",
+                .data     = ksocknal_tunables.ksnd_protocol,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        };
+#endif
+        ksocknal_ctl_table[i++] =  (cfs_sysctl_table_t) { 0 };
 
-        LASSERT (j == i+1);
-        LASSERT (i < sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
+        LASSERT (j == i);
+        LASSERT (i <= sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
 
         ksocknal_tunables.ksnd_sysctl =
                 cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
@@ -235,7 +259,7 @@ ksocknal_lib_tunables_fini ()
 void
 ksocknal_lib_bind_irq (unsigned int irq)
 {
-#if (defined(CONFIG_SMP) && CPU_AFFINITY)
+#if (defined(CONFIG_SMP) && defined(CPU_AFFINITY))
         int              bind;
         int              cpu;
         char             cmdline[64];
@@ -308,7 +332,7 @@ unsigned int
 ksocknal_lib_sock_irq (struct socket *sock)
 {
         int                irq = 0;
-#if CPU_AFFINITY
+#ifdef CPU_AFFINITY
         struct dst_entry  *dst;
 
         if (!*ksocknal_tunables.ksnd_irq_affinity)
@@ -408,6 +432,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
         if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag &&
             tx->tx_msg.ksm_zc_req_cookie != 0) {
                 /* Zero copy is enabled */
+                struct sock   *sk = sock->sk;
                 struct page   *page = kiov->kiov_page;
                 int            offset = kiov->kiov_offset;
                 int            fragsize = kiov->kiov_len;
@@ -420,7 +445,12 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                     fragsize < tx->tx_resid)
                         msgflg |= MSG_MORE;
 
-                rc = tcp_sendpage(sock, page, offset, fragsize, msgflg);
+                if (sk->sk_prot->sendpage != NULL) {
+                        rc = sk->sk_prot->sendpage(sk, page,
+                                                   offset, fragsize, msgflg);
+                } else {
+                        rc = tcp_sendpage(sock, page, offset, fragsize, msgflg);
+                }
         } else {
 #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
                 struct iovec  scratch;
@@ -621,7 +651,8 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
         return (rc);
 }
 
-void ksocknal_lib_csum_tx(ksock_tx_t *tx)
+void
+ksocknal_lib_csum_tx(ksock_tx_t *tx)
 {
         int          i;
         __u32        csum;
@@ -759,6 +790,9 @@ ksocknal_lib_setup_sock (struct socket *sock)
 #ifdef SOCKNAL_BACKOFF
         if (*ksocknal_tunables.ksnd_backoff_init > 0) {
                 option = *ksocknal_tunables.ksnd_backoff_init;
+#ifdef SOCKNAL_BACKOFF_MS
+                option *= 1000;
+#endif
 
                 set_fs (KERNEL_DS);
                 rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_BACKOFF_INIT,
@@ -773,6 +807,9 @@ ksocknal_lib_setup_sock (struct socket *sock)
 
         if (*ksocknal_tunables.ksnd_backoff_max > 0) {
                 option = *ksocknal_tunables.ksnd_backoff_max;
+#ifdef SOCKNAL_BACKOFF_MS
+                option *= 1000;
+#endif
 
                 set_fs (KERNEL_DS);
                 rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_BACKOFF_MAX,