Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_lib-linux.c
index 70c9b39..8595e4c 100644 (file)
@@ -55,6 +55,8 @@ enum {
         SOCKLND_TX_BUFFER_SIZE,
         SOCKLND_NAGLE,
         SOCKLND_IRQ_AFFINITY,
+        SOCKLND_ROUND_ROBIN,
+        SOCKLND_KEEPALIVE,
         SOCKLND_KEEPALIVE_IDLE,
         SOCKLND_KEEPALIVE_COUNT,
         SOCKLND_KEEPALIVE_INTVL,
@@ -80,6 +82,8 @@ enum {
 #define SOCKLND_TX_BUFFER_SIZE  CTL_UNNUMBERED
 #define SOCKLND_NAGLE           CTL_UNNUMBERED
 #define SOCKLND_IRQ_AFFINITY    CTL_UNNUMBERED
+#define SOCKLND_ROUND_ROBIN     CTL_UNNUMBERED
+#define SOCKLND_KEEPALIVE       CTL_UNNUMBERED
 #define SOCKLND_KEEPALIVE_IDLE  CTL_UNNUMBERED
 #define SOCKLND_KEEPALIVE_COUNT CTL_UNNUMBERED
 #define SOCKLND_KEEPALIVE_INTVL CTL_UNNUMBERED
@@ -157,7 +161,7 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = {
         {
                 .ctl_name = SOCKLND_ZERO_COPY,
                 .procname = "zero_copy",
-                .data     = &ksocknal_tunables.ksnd_zc_min_frag,
+                .data     = &ksocknal_tunables.ksnd_zc_min_payload,
                 .maxlen   = sizeof (int),
                 .mode     = 0644,
                 .proc_handler = &proc_dointvec,
@@ -239,6 +243,24 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = {
         },
 #endif
         {
+                .ctl_name = SOCKLND_ROUND_ROBIN,
+                .procname = "round_robin",
+                .data     = &ksocknal_tunables.ksnd_round_robin,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = SOCKLND_KEEPALIVE,
+                .procname = "keepalive",
+                .data     = &ksocknal_tunables.ksnd_keepalive,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+                .strategy = &sysctl_intvec,
+        },
+        {
                 .ctl_name = SOCKLND_KEEPALIVE_IDLE,
                 .procname = "keepalive_idle",
                 .data     = &ksocknal_tunables.ksnd_keepalive_idle,
@@ -315,6 +337,18 @@ cfs_sysctl_table_t ksocknal_top_ctl_table[] = {
 int
 ksocknal_lib_tunables_init ()
 {
+        if (!*ksocknal_tunables.ksnd_typed_conns) {
+                int rc = -EINVAL;
+#if SOCKNAL_VERSION_DEBUG
+                if (*ksocknal_tunables.ksnd_protocol < 3)
+                        rc = 0;
+#endif
+                if (rc != 0) {
+                        CERROR("Protocol V3.x MUST have typed connections\n");
+                        return rc;
+                }
+        }
+
         if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
                 *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
         if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
@@ -370,13 +404,13 @@ ksocknal_lib_bind_irq (unsigned int irq)
 
         info = &ksocknal_data.ksnd_irqinfo[irq];
 
-        write_lock_bh (&ksocknal_data.ksnd_global_lock);
+        cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
 
         LASSERT (info->ksni_valid);
         bind = !info->ksni_bound;
         info->ksni_bound = 1;
 
-        write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+        cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
 
         if (!bind)                              /* bound already */
                 return;
@@ -447,9 +481,12 @@ ksocknal_lib_sock_irq (struct socket *sock)
 }
 
 int
-ksocknal_lib_zc_capable(struct socket *sock)
+ksocknal_lib_zc_capable(ksock_conn_t *conn)
 {
-        int  caps = sock->sk->sk_route_caps;
+        int  caps = conn->ksnc_sock->sk->sk_route_caps;
+
+        if (conn->ksnc_proto == &ksocknal_protocol_v1x)
+                return 0;
 
         /* ZC if the socket supports scatter/gather and doesn't need software
          * checksums */
@@ -514,15 +551,16 @@ int
 ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
 {
         struct socket *sock = conn->ksnc_sock;
-        lnet_kiov_t    *kiov = tx->tx_kiov;
+        lnet_kiov_t   *kiov = tx->tx_kiov;
         int            rc;
         int            nob;
 
+        /* Not NOOP message */
+        LASSERT (tx->tx_lnetmsg != NULL);
+
         /* NB we can't trust socket ops to either consume our iovs
          * or leave them alone. */
-
-        if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag &&
-            tx->tx_msg.ksm_zc_req_cookie != 0) {
+        if (tx->tx_msg.ksm_zc_cookies[0] != 0) {
                 /* Zero copy is enabled */
                 struct sock   *sk = sock->sk;
                 struct page   *page = kiov->kiov_page;
@@ -1096,7 +1134,7 @@ ksocknal_data_ready (struct sock *sk, int n)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        read_lock (&ksocknal_data.ksnd_global_lock);
+        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
@@ -1105,7 +1143,7 @@ ksocknal_data_ready (struct sock *sk, int n)
         } else
                 ksocknal_read_callback(conn);
 
-        read_unlock (&ksocknal_data.ksnd_global_lock);
+        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
 
         EXIT;
 }
@@ -1119,7 +1157,7 @@ ksocknal_write_space (struct sock *sk)
 
         /* interleave correctly with closing sockets... */
         LASSERT(!in_irq());
-        read_lock (&ksocknal_data.ksnd_global_lock);
+        cfs_read_lock (&ksocknal_data.ksnd_global_lock);
 
         conn = sk->sk_user_data;
         wspace = SOCKNAL_WSPACE(sk);
@@ -1138,7 +1176,7 @@ ksocknal_write_space (struct sock *sk)
                 LASSERT (sk->sk_write_space != &ksocknal_write_space);
                 sk->sk_write_space (sk);
 
-                read_unlock (&ksocknal_data.ksnd_global_lock);
+                cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
                 return;
         }
 
@@ -1152,7 +1190,7 @@ ksocknal_write_space (struct sock *sk)
                 clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
         }
 
-        read_unlock (&ksocknal_data.ksnd_global_lock);
+        cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
 }
 
 void
@@ -1187,3 +1225,64 @@ ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
 
         return ;
 }
+
+int
+ksocknal_lib_memory_pressure(ksock_conn_t *conn)
+{
+        int            rc = 0;
+        ksock_sched_t *sched;
+        
+        sched = conn->ksnc_scheduler;
+        cfs_spin_lock_bh (&sched->kss_lock);
+        
+        if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) &&
+            !conn->ksnc_tx_ready) {
+                /* SOCK_NOSPACE is set when the socket fills
+                 * and cleared in the write_space callback
+                 * (which also sets ksnc_tx_ready).  If
+                 * SOCK_NOSPACE and ksnc_tx_ready are BOTH
+                 * zero, I didn't fill the socket and
+                 * write_space won't reschedule me, so I
+                 * return -ENOMEM to get my caller to retry
+                 * after a timeout */
+                rc = -ENOMEM;
+        }
+        
+        cfs_spin_unlock_bh (&sched->kss_lock);
+
+        return rc;
+}
+
+__u64
+ksocknal_lib_new_incarnation(void)
+{
+        struct timeval tv;
+
+        /* The incarnation number is the time this module loaded and it
+         * identifies this particular instance of the socknal.  Hopefully
+         * we won't be able to reboot more frequently than 1MHz for the
+         * forseeable future :) */
+
+        do_gettimeofday(&tv);
+
+        return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+}
+
+int
+ksocknal_lib_bind_thread_to_cpu(int id)
+{
+#if defined(CONFIG_SMP) && defined(CPU_AFFINITY)
+        id = ksocknal_sched2cpu(id);
+        if (cpu_online(id)) {
+                cpumask_t m = CPU_MASK_NONE;
+                cpu_set(id, m);
+                set_cpus_allowed(current, m);
+                return 0;
+        }
+
+        return -1;
+
+#else
+        return 0;
+#endif
+}