Whamcloud - gitweb
LU-1346 libcfs: cleanup macros in kp30.h
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_lib-linux.c
index 70c9b39..1dff915 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -43,7 +43,9 @@
 enum {
         SOCKLND_TIMEOUT = 1,
         SOCKLND_CREDITS,
-        SOCKLND_PEER_CREDITS,
+        SOCKLND_PEER_TXCREDITS,
+        SOCKLND_PEER_RTRCREDITS,
+        SOCKLND_PEER_TIMEOUT,
         SOCKLND_NCONNDS,
         SOCKLND_RECONNECTS_MIN,
         SOCKLND_RECONNECTS_MAX,
@@ -55,6 +57,8 @@ enum {
         SOCKLND_TX_BUFFER_SIZE,
         SOCKLND_NAGLE,
         SOCKLND_IRQ_AFFINITY,
+        SOCKLND_ROUND_ROBIN,
+        SOCKLND_KEEPALIVE,
         SOCKLND_KEEPALIVE_IDLE,
         SOCKLND_KEEPALIVE_COUNT,
         SOCKLND_KEEPALIVE_INTVL,
@@ -68,7 +72,9 @@ enum {
 
 #define SOCKLND_TIMEOUT         CTL_UNNUMBERED
 #define SOCKLND_CREDITS         CTL_UNNUMBERED
-#define SOCKLND_PEER_CREDITS    CTL_UNNUMBERED
+#define SOCKLND_PEER_TXCREDITS  CTL_UNNUMBERED
+#define SOCKLND_PEER_RTRCREDITS  CTL_UNNUMBERED
+#define SOCKLND_PEER_TIMEOUT    CTL_UNNUMBERED
 #define SOCKLND_NCONNDS         CTL_UNNUMBERED
 #define SOCKLND_RECONNECTS_MIN  CTL_UNNUMBERED
 #define SOCKLND_RECONNECTS_MAX  CTL_UNNUMBERED
@@ -80,6 +86,8 @@ enum {
 #define SOCKLND_TX_BUFFER_SIZE  CTL_UNNUMBERED
 #define SOCKLND_NAGLE           CTL_UNNUMBERED
 #define SOCKLND_IRQ_AFFINITY    CTL_UNNUMBERED
+#define SOCKLND_ROUND_ROBIN     CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE       CTL_UNNUMBERED
 #define SOCKLND_KEEPALIVE_IDLE  CTL_UNNUMBERED
 #define SOCKLND_KEEPALIVE_COUNT CTL_UNNUMBERED
 #define SOCKLND_KEEPALIVE_INTVL CTL_UNNUMBERED
@@ -110,15 +118,33 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = {
                 .strategy = &sysctl_intvec,
         },
          {
-                .ctl_name = SOCKLND_PEER_CREDITS,
+                .ctl_name = SOCKLND_PEER_TXCREDITS,
                 .procname = "peer_credits",
-                .data     = &ksocknal_tunables.ksnd_peercredits,
+                .data     = &ksocknal_tunables.ksnd_peertxcredits,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+         {
+                .ctl_name = SOCKLND_PEER_RTRCREDITS,
+                .procname = "peer_buffer_credits",
+                .data     = &ksocknal_tunables.ksnd_peerrtrcredits,
                 .maxlen   = sizeof (int),
                 .mode     = 0444,
                 .proc_handler = &proc_dointvec,
                 .strategy = &sysctl_intvec,
         },
         {
+                .ctl_name = SOCKLND_PEER_TIMEOUT,
+                .procname = "peer_timeout",
+                .data     = &ksocknal_tunables.ksnd_peertimeout,
+                .maxlen   = sizeof (int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+                .strategy = &sysctl_intvec,
+        },
+        {
                 .ctl_name = SOCKLND_NCONNDS,
                 .procname = "nconnds",
                 .data     = &ksocknal_tunables.ksnd_nconnds,
@@ -157,7 +183,7 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = {
         {
                 .ctl_name = SOCKLND_ZERO_COPY,
                 .procname = "zero_copy",
-                .data     = &ksocknal_tunables.ksnd_zc_min_frag,
+                .data     = &ksocknal_tunables.ksnd_zc_min_payload,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
                 .proc_handler = &proc_dointvec,
@@ -239,6 +265,24 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = {
         },
 #endif
         {
+                .ctl_name = SOCKLND_ROUND_ROBIN,
+                .procname = "round_robin",
+                .data     = &ksocknal_tunables.ksnd_round_robin,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE,
+                .procname = "keepalive",
+                .data     = &ksocknal_tunables.ksnd_keepalive,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
                 .ctl_name = SOCKLND_KEEPALIVE_IDLE,
                 .procname = "keepalive_idle",
                 .data     = &ksocknal_tunables.ksnd_keepalive_idle,
@@ -315,6 +359,18 @@ cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
 int
 ksocknal_lib_tunables_init ()
 {
+        if (!*ksocknal_tunables.ksnd_typed_conns) {
+                int rc = -EINVAL;
+#if SOCKNAL_VERSION_DEBUG
+                if (*ksocknal_tunables.ksnd_protocol < 3)
+                        rc = 0;
+#endif
+                if (rc != 0) {
+                        CERROR("Protocol V3.x MUST have typed connections\n");
+                        return rc;
+                }
+        }
+
         if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
                 *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
         if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
@@ -348,53 +404,6 @@ ksocknal_lib_tunables_fini ()
 }
 #endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */
 
-void
-ksocknal_lib_bind_irq (unsigned int irq)
-{
-#if (defined(CONFIG_SMP) && defined(CPU_AFFINITY))
-        int              bind;
-        int              cpu;
-        char             cmdline[64];
-        ksock_irqinfo_t *info;
-        char            *argv[] = {"/bin/sh",
-                                   "-c",
-                                   cmdline,
-                                   NULL};
-        char            *envp[] = {"HOME=/",
-                                   "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-                                   NULL};
-
-        LASSERT (irq < NR_IRQS);
-        if (irq == 0)              /* software NIC or affinity disabled */
-                return;
-
-        info = &ksocknal_data.ksnd_irqinfo[irq];
-
-        write_lock_bh (&ksocknal_data.ksnd_global_lock);
-
-        LASSERT (info->ksni_valid);
-        bind = !info->ksni_bound;
-        info->ksni_bound = 1;
-
-        write_unlock_bh (&ksocknal_data.ksnd_global_lock);
-
-        if (!bind)                              /* bound already */
-                return;
-
-        cpu = ksocknal_irqsched2cpu(info->ksni_sched);
-        snprintf (cmdline, sizeof (cmdline),
-                  "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
-
-        LCONSOLE_INFO("Binding irq %u to CPU %d with cmd: %s\n",
-                      irq, cpu, cmdline);
-
-        /* FIXME: Find a better method of setting IRQ affinity...
-         */
-
-        USERMODEHELPER(argv[0], argv, envp);
-#endif
-}
-
 int
 ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
 {
@@ -420,41 +429,17 @@ ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
         return 0;
 }
 
-unsigned int
-ksocknal_lib_sock_irq (struct socket *sock)
-{
-        int                irq = 0;
-#ifdef CPU_AFFINITY
-        struct dst_entry  *dst;
-
-        if (!*ksocknal_tunables.ksnd_irq_affinity)
-                return 0;
-
-        dst = sk_dst_get (sock->sk);
-        if (dst != NULL) {
-                if (dst->dev != NULL) {
-                        irq = dst->dev->irq;
-                        if (irq >= NR_IRQS) {
-                                CERROR ("Unexpected IRQ %x\n", irq);
-                                irq = 0;
-                        }
-                }
-                dst_release (dst);
-        }
-
-#endif
-        return irq;
-}
-
 int
-ksocknal_lib_zc_capable(struct socket *sock)
+ksocknal_lib_zc_capable(ksock_conn_t *conn)
 {
-        int  caps = sock->sk->sk_route_caps;
+       int  caps = conn->ksnc_sock->sk->sk_route_caps;
+
+       if (conn->ksnc_proto == &ksocknal_protocol_v1x)
+               return 0;
 
-        /* ZC if the socket supports scatter/gather and doesn't need software
-         * checksums */
-        return ((caps & NETIF_F_SG) != 0 &&
-                (caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) != 0);
+       /* ZC if the socket supports scatter/gather and doesn't need software
+        * checksums */
+       return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0);
 }
 
 int
@@ -499,7 +484,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
                         nob += scratchiov[i].iov_len;
                 }
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
+                if (!cfs_list_empty(&conn->ksnc_tx_queue) ||
                     nob < tx->tx_resid)
                         msg.msg_flags |= MSG_MORE;
 
@@ -514,15 +499,16 @@ int
 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         struct socket *sock = conn->ksnc_sock;
-        lnet_kiov_t    *kiov = tx->tx_kiov;
+        lnet_kiov_t   *kiov = tx->tx_kiov;
         int            rc;
         int            nob;
 
+        /* Not NOOP message */
+        LASSERT (tx->tx_lnetmsg != NULL);
+
         /* NB we can't trust socket ops to either consume our iovs
          * or leave them alone. */
-
-        if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag &&
-            tx->tx_msg.ksm_zc_req_cookie != 0) {
+        if (tx->tx_msg.ksm_zc_cookies[0] != 0) {
                 /* Zero copy is enabled */
                 struct sock   *sk = sock->sk;
                 struct page   *page = kiov->kiov_page;
@@ -533,7 +519,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                 CDEBUG(D_NET, "page %p + offset %x for %d\n",
                                page, offset, kiov->kiov_len);
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
+                if (!cfs_list_empty(&conn->ksnc_tx_queue) ||
                     fragsize < tx->tx_resid)
                         msgflg |= MSG_MORE;
 
@@ -541,7 +527,8 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                         rc = sk->sk_prot->sendpage(sk, page,
                                                    offset, fragsize, msgflg);
                 } else {
-                        rc = tcp_sendpage(sock, page, offset, fragsize, msgflg);
+                        rc = cfs_tcp_sendpage(sk, page, offset, fragsize,
+                                              msgflg);
                 }
         } else {
 #if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
@@ -573,7 +560,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                         nob += scratchiov[i].iov_len = kiov[i].kiov_len;
                 }
 
-                if (!list_empty(&conn->ksnc_tx_queue) ||
+                if (!cfs_list_empty(&conn->ksnc_tx_queue) ||
                     nob < tx->tx_resid)
                         msg.msg_flags |= MSG_MORE;
 
@@ -701,7 +688,8 @@ ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
 
         for (nob = i = 0; i < niov; i++) {
                 if ((kiov[i].kiov_offset != 0 && i > 0) ||
-                    (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1))
+                   (kiov[i].kiov_offset + kiov[i].kiov_len !=
+                    PAGE_CACHE_SIZE && i < niov - 1))
                         return NULL;
 
                 pages[i] = kiov[i].kiov_page;
@@ -1026,30 +1014,11 @@ ksocknal_lib_setup_sock (struct socket *sock)
         return (0);
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
-        return &(sk->tp_pinfo.af_tcp);
-}
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10))
-#define sock2tcp_opt(sk) tcp_sk(sk)
-#else
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
-        struct tcp_sock *s = (struct tcp_sock *)sk;
-        return &s->tcp;
-}
-#endif
-
 void
 ksocknal_lib_push_conn (ksock_conn_t *conn)
 {
         struct sock    *sk;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11))
-        struct tcp_opt *tp;
-#else
         struct tcp_sock *tp;
-#endif
         int             nonagle;
         int             val = 1;
         int             rc;
@@ -1059,8 +1028,8 @@ ksocknal_lib_push_conn (ksock_conn_t *conn)
         if (rc != 0)                            /* being shut down */
                 return;
 
-        sk = conn->ksnc_sock->sk;
-        tp = sock2tcp_opt(sk);
+       sk = conn->ksnc_sock->sk;
+       tp = tcp_sk(sk);
 
         lock_sock (sk);
         nonagle = tp->nonagle;
@@ -1096,7 +1065,7 @@ ksocknal_data_ready (struct sock *sk, int n)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
@@ -1105,7 +1074,7 @@ ksocknal_data_ready (struct sock *sk, int n)
         } else
                 ksocknal_read_callback(conn);
 
-        read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
 
         EXIT;
 }
@@ -1119,7 +1088,7 @@ ksocknal_write_space (struct sock *sk)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        read_lock (&ksocknal_data.ksnd_global_lock);
+       read_lock(&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         wspace = SOCKNAL_WSPACE(sk);
@@ -1131,14 +1100,14 @@ ksocknal_write_space (struct sock *sk)
                                       " ready" : " blocked"),
                (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
                                       " scheduled" : " idle"),
-               (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
+               (conn == NULL) ? "" : (cfs_list_empty (&conn->ksnc_tx_queue) ?
                                       " empty" : " queued"));
 
         if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
                 LASSERT (sk->sk_write_space != &ksocknal_write_space);
                 sk->sk_write_space (sk);
 
-                read_unlock (&ksocknal_data.ksnd_global_lock);
+               read_unlock(&ksocknal_data.ksnd_global_lock);
                 return;
         }
 
@@ -1152,7 +1121,7 @@ ksocknal_write_space (struct sock *sk)
                 clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
         }
 
-        read_unlock (&ksocknal_data.ksnd_global_lock);
+       read_unlock(&ksocknal_data.ksnd_global_lock);
 }
 
 void
@@ -1187,3 +1156,30 @@ ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
 
         return ;
 }
+
+int
+ksocknal_lib_memory_pressure(ksock_conn_t *conn)
+{
+       int            rc = 0;
+       ksock_sched_t *sched;
+
+       sched = conn->ksnc_scheduler;
+       spin_lock_bh(&sched->kss_lock);
+
+        if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
+            !conn->ksnc_tx_ready) {
+                /* SOCK_NOSPACE is set when the socket fills
+                 * and cleared in the write_space callback
+                 * (which also sets ksnc_tx_ready).  If
+                 * SOCK_NOSPACE and ksnc_tx_ready are BOTH
+                 * zero, I didn't fill the socket and
+                 * write_space won't reschedule me, so I
+                 * return -ENOMEM to get my caller to retry
+                 * after a timeout */
+                rc = -ENOMEM;
+        }
+
+       spin_unlock_bh(&sched->kss_lock);
+
+       return rc;
+}