Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_cb.c
index 85e687a..8c2c956 100644 (file)
@@ -107,8 +107,8 @@ ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
         do {
                 LASSERT (tx->tx_niov > 0);
 
-                if (nob < iov->iov_len) {
-                        iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
+                if (nob < (int) iov->iov_len) {
+                        iov->iov_base = (void *)((char *)iov->iov_base + nob);
                         iov->iov_len -= nob;
                         return (rc);
                 }
@@ -145,13 +145,13 @@ ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
         do {
                 LASSERT(tx->tx_nkiov > 0);
 
-                if (nob < kiov->kiov_len) {
+                if (nob < (int)kiov->kiov_len) {
                         kiov->kiov_offset += nob;
                         kiov->kiov_len -= nob;
                         return rc;
                 }
 
-                nob -= kiov->kiov_len;
+                nob -= (int)kiov->kiov_len;
                 tx->tx_kiov = ++kiov;
                 tx->tx_nkiov--;
         } while (nob != 0);
@@ -274,9 +274,9 @@ ksocknal_recv_iov (ksock_conn_t *conn)
         do {
                 LASSERT (conn->ksnc_rx_niov > 0);
 
-                if (nob < iov->iov_len) {
+                if (nob < (int)iov->iov_len) {
                         iov->iov_len -= nob;
-                        iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
+                        iov->iov_base = (void *)((char *)iov->iov_base + nob);
                         return (-EAGAIN);
                 }
 
@@ -318,7 +318,7 @@ ksocknal_recv_kiov (ksock_conn_t *conn)
         do {
                 LASSERT (conn->ksnc_rx_nkiov > 0);
 
-                if (nob < kiov->kiov_len) {
+                if (nob < (int) kiov->kiov_len) {
                         kiov->kiov_offset += nob;
                         kiov->kiov_len -= nob;
                         return -EAGAIN;
@@ -463,6 +463,10 @@ ksocknal_check_zc_req(ksock_tx_t *tx)
 
         spin_lock(&peer->ksnp_lock);
 
+        /* ZC_REQ is going to be pinned to the peer */
+        tx->tx_deadline =
+                cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+
         LASSERT (tx->tx_msg.ksm_zc_req_cookie == 0);
         tx->tx_msg.ksm_zc_req_cookie = peer->ksnp_zc_next_cookie++;
         list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
@@ -630,8 +634,7 @@ ksocknal_find_conn_locked (int payload_nob, ksock_peer_t *peer)
                 } else {
                         /* lnet packet */
                         hdr_nob = (c->ksnc_proto == &ksocknal_protocol_v2x)?
-                                  offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload):
-                                  sizeof(lnet_hdr_t);
+                                  sizeof(ksock_msg_t) : sizeof(lnet_hdr_t);
                 }
 
                 switch (c->ksnc_type) {
@@ -744,7 +747,8 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
          * We always expect at least 1 mapped fragment containing the
          * complete ksocknal message header. */
         LASSERT (lnet_iov_nob (tx->tx_niov, tx->tx_iov) +
-                 lnet_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob);
+                 lnet_kiov_nob(tx->tx_nkiov, tx->tx_kiov) ==
+                 (unsigned int)tx->tx_nob);
         LASSERT (tx->tx_niov >= 1);
         LASSERT (tx->tx_resid == tx->tx_nob);
 
@@ -757,10 +761,9 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
         tx->tx_conn = conn;
         ksocknal_conn_addref(conn); /* +1 ref for tx */
 
-        /* 
-         * NB Darwin: SOCK_WMEM_QUEUED()->sock_getsockopt() will take
-         * a blockable lock(socket lock), so SOCK_WMEM_QUEUED can't be
-         * put in spinlock. 
+        /*
+         * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
+         * but they're used inside spinlocks a lot.
          */
         bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock);
         spin_lock_bh (&sched->kss_lock);
@@ -980,6 +983,10 @@ ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
 
         if (peer->ksnp_accepting > 0 ||
             ksocknal_find_connecting_route_locked (peer) != NULL) {
+                /* the message is going to be pinned to the peer */
+                tx->tx_deadline =
+                        cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+                
                 /* Queue the message until a connection is established */
                 list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
                 write_unlock_bh (g_lock);
@@ -1310,6 +1317,16 @@ ksocknal_process_receive (ksock_conn_t *conn)
                         __swab64s(&conn->ksnc_msg.ksm_zc_ack_cookie);
                 }
 
+                if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
+                    conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
+                        CERROR("%s: Unknown message type: %x\n",
+                               libcfs_id2str(conn->ksnc_peer->ksnp_id),
+                               conn->ksnc_msg.ksm_type);
+                        ksocknal_new_packet(conn, 0);
+                        ksocknal_close_conn_and_siblings(conn, -EPROTO);
+                        return (-EPROTO);
+                }
+
                 if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
                     conn->ksnc_msg.ksm_csum != 0 &&     /* has checksum */
                     conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
@@ -1341,7 +1358,6 @@ ksocknal_process_receive (ksock_conn_t *conn)
                         ksocknal_new_packet (conn, 0);
                         return 0;       /* NOOP is done and just return */
                 }
-                LASSERT (conn->ksnc_msg.ksm_type == KSOCK_MSG_LNET);
 
                 conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
                 conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t);
@@ -1518,7 +1534,7 @@ int ksocknal_scheduler (void *arg)
         ksock_tx_t        *tx;
         int                rc;
         int                nloops = 0;
-        int                id = sched - ksocknal_data.ksnd_schedulers;
+        int                id = (int)(sched - ksocknal_data.ksnd_schedulers);
         char               name[16];
 
         snprintf (name, sizeof (name),"socknal_sd%02d", id);
@@ -1663,9 +1679,9 @@ int ksocknal_scheduler (void *arg)
                         nloops = 0;
 
                         if (!did_something) {   /* wait for something to do */
-                                rc = wait_event_interruptible_exclusive(
+                                cfs_wait_event_interruptible_exclusive(
                                         sched->kss_waitq,
-                                        !ksocknal_sched_cansleep(sched));
+                                        !ksocknal_sched_cansleep(sched), rc);
                                 LASSERT (rc == 0);
                         } else {
                                 our_cond_resched();
@@ -1825,7 +1841,7 @@ ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
         if (hello->kshm_nips == 0)
                 goto out;
 
-        for (i = 0; i < hello->kshm_nips; i++) {
+        for (i = 0; i < (int) hello->kshm_nips; i++) {
                 hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]);
         }
 
@@ -1946,7 +1962,7 @@ ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout)
                 goto out;
         }
 
-        for (i = 0; i < hello->kshm_nips; i++) {
+        for (i = 0; i < (int) hello->kshm_nips; i++) {
                 hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
 
                 if (hello->kshm_ips[i] == 0) {
@@ -2014,7 +2030,7 @@ ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeou
                 return rc;
         }
 
-        for (i = 0; i < hello->kshm_nips; i++) {
+        for (i = 0; i < (int) hello->kshm_nips; i++) {
                 if (conn->ksnc_flip)
                         __swab32s(&hello->kshm_ips[i]);
 
@@ -2050,9 +2066,8 @@ ksocknal_pack_msg_v2(ksock_tx_t *tx)
                 LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
 
                 tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
-                tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload);
-                tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t,  ksm_u.lnetmsg.ksnm_payload) +
-                                            tx->tx_lnetmsg->msg_len;
+                tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
+                tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
         } else {
                 LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
 
@@ -2101,16 +2116,13 @@ ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
 {
         /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
         ksock_net_t         *net = (ksock_net_t *)ni->ni_data;
-        lnet_nid_t           srcnid;
 
-        LASSERT (0 <= hello->kshm_nips && hello->kshm_nips <= LNET_MAX_INTERFACES);
+        LASSERT (hello->kshm_nips <= LNET_MAX_INTERFACES);
 
         /* rely on caller to hold a ref on socket so it wouldn't disappear */
         LASSERT (conn->ksnc_proto != NULL);
 
-        srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, peer_nid);
-
-        hello->kshm_src_nid         = srcnid;
+        hello->kshm_src_nid         = ni->ni_nid;
         hello->kshm_dst_nid         = peer_nid;
         hello->kshm_src_pid         = the_lnet.ln_pid;
 
@@ -2173,42 +2185,11 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
             hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) &&
             hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) {
                 /* Unexpected magic! */
-                if (active ||
-                    the_lnet.ln_ptlcompat == 0) {
-                        CERROR ("Bad magic(1) %#08x (%#08x expected) from "
-                                "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic),
-                                LNET_PROTO_TCP_MAGIC,
-                                HIPQUAD(conn->ksnc_ipaddr));
-                        return -EPROTO;
-                }
-
-                /* When portals compatibility is set, I may be passed a new
-                 * connection "blindly" by the acceptor, and I have to
-                 * determine if my peer has sent an acceptor connection request
-                 * or not.  This isn't a 'hello', so I'll get the acceptor to
-                 * look at it... */
-                rc = lnet_accept(ni, sock, hello->kshm_magic);
-                if (rc != 0)
-                        return -EPROTO;
-
-                /* ...and if it's OK I'm back to looking for a 'hello'... */
-                rc = libcfs_sock_read(sock, &hello->kshm_magic, 
-                                      sizeof (hello->kshm_magic), timeout);
-                if (rc != 0) {
-                        CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
-                                rc, HIPQUAD(conn->ksnc_ipaddr));
-                        LASSERT (rc < 0);
-                        return rc;
-                }
-
-                /* Only need to check V1.x magic */
-                if (hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) {
-                        CERROR ("Bad magic(2) %#08x (%#08x expected) from "
-                                "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic),
-                                LNET_PROTO_TCP_MAGIC,
-                                HIPQUAD(conn->ksnc_ipaddr));
-                        return -EPROTO;
-                }
+                CERROR ("Bad magic(1) %#08x (%#08x expected) from "
+                        "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic),
+                        LNET_PROTO_TCP_MAGIC,
+                        HIPQUAD(conn->ksnc_ipaddr));
+                return -EPROTO;
         }
 
         rc = libcfs_sock_read(sock, &hello->kshm_version,
@@ -2268,13 +2249,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
                 recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
         } else {
                 recv_id.nid = hello->kshm_src_nid;
-
-                if (the_lnet.ln_ptlcompat > 1 && /* portals peers may exist */
-                    LNET_NIDNET(recv_id.nid) == 0) /* this is one */
-                        recv_id.pid = the_lnet.ln_pid; /* give it a sensible pid */
-                else
-                        recv_id.pid = hello->kshm_src_pid;
-
+                recv_id.pid = hello->kshm_src_pid;
         }
 
         if (!active) {
@@ -2293,7 +2268,7 @@ ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
         }
 
         if (peerid->pid != recv_id.pid ||
-            !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) {
+            peerid->nid != recv_id.nid) {
                 LCONSOLE_ERROR_MSG(0x130, "Connected successfully to %s on host"
                                    " %u.%u.%u.%u, but they claimed they were "
                                    "%s; please check your Lustre "
@@ -2495,10 +2470,11 @@ ksocknal_connd_ready(void)
 int
 ksocknal_connd (void *arg)
 {
-        long               id = (long)arg;
+        long               id = (long)(long_ptr_t)arg;
         char               name[16];
         ksock_connreq_t   *cr;
         ksock_route_t     *route;
+        int                rc = 0;
 
         snprintf (name, sizeof (name), "socknal_cd%02ld", id);
         cfs_daemonize (name);
@@ -2542,9 +2518,9 @@ ksocknal_connd (void *arg)
 
                 spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
 
-                wait_event_interruptible_exclusive(
+                cfs_wait_event_interruptible_exclusive(
                         ksocknal_data.ksnd_connd_waitq,
-                        ksocknal_connd_ready());
+                        ksocknal_connd_ready(), rc);
 
                 spin_lock_bh (&ksocknal_data.ksnd_connd_lock);
         }
@@ -2641,6 +2617,31 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer)
         return (NULL);
 }
 
+static inline void
+ksocknal_flush_stale_txs(ksock_peer_t *peer)
+{
+        ksock_tx_t        *tx;
+        CFS_LIST_HEAD      (stale_txs);
+        
+        write_lock_bh (&ksocknal_data.ksnd_global_lock);
+
+        while (!list_empty (&peer->ksnp_tx_queue)) {
+                tx = list_entry (peer->ksnp_tx_queue.next,
+                                 ksock_tx_t, tx_list);
+
+                if (!cfs_time_aftereq(cfs_time_current(),
+                                      tx->tx_deadline))
+                        break;
+                
+                list_del (&tx->tx_list);
+                list_add_tail (&tx->tx_list, &stale_txs);
+        }
+
+        write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+
+        ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
+}
+
 void
 ksocknal_check_peer_timeouts (int idx)
 {
@@ -2670,8 +2671,51 @@ ksocknal_check_peer_timeouts (int idx)
                         ksocknal_conn_decref(conn);
                         goto again;
                 }
+
+                /* we can't process stale txs right here because we're
+                 * holding only shared lock */
+                if (!list_empty (&peer->ksnp_tx_queue)) {
+                        ksock_tx_t *tx = list_entry (peer->ksnp_tx_queue.next,
+                                                     ksock_tx_t, tx_list);
+
+                        if (cfs_time_aftereq(cfs_time_current(),
+                                             tx->tx_deadline)) {
+
+                                ksocknal_peer_addref(peer);
+                                read_unlock (&ksocknal_data.ksnd_global_lock);
+                                
+                                ksocknal_flush_stale_txs(peer);
+
+                                ksocknal_peer_decref(peer);
+                                goto again;
+                        }
+                }
         }
 
+        /* print out warnings about stale ZC_REQs */
+        cfs_list_for_each_entry_typed(peer, peers, ksock_peer_t, ksnp_list) {
+                ksock_tx_t *tx;
+                int         n = 0;
+                
+                cfs_list_for_each_entry_typed(tx, &peer->ksnp_zc_req_list,
+                                              ksock_tx_t, tx_zc_list) {
+                        if (!cfs_time_aftereq(cfs_time_current(),
+                                              tx->tx_deadline))
+                                break;
+                        n++;
+                }
+
+                if (n != 0) {
+                        tx = list_entry (peer->ksnp_zc_req_list.next,
+                                         ksock_tx_t, tx_zc_list);
+                        CWARN("Stale ZC_REQs for peer %s detected: %d; the "
+                              "oldest (%p) timed out %ld secs ago\n",
+                              libcfs_nid2str(peer->ksnp_id.nid), n, tx,
+                              cfs_duration_sec(cfs_time_current() -
+                                               tx->tx_deadline));
+                }
+        }
+        
         read_unlock (&ksocknal_data.ksnd_global_lock);
 }