Whamcloud - gitweb
LU-10391 socklnd: don't deref lnet_hdr in LNDs
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_cb.c
index c0d626a..3ad0eee 100644 (file)
@@ -425,12 +425,12 @@ ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist, int error)
 
        while ((tx = list_first_entry_or_null(txlist, struct ksock_tx,
                                              tx_list)) != NULL) {
-               if (error && tx->tx_lnetmsg != NULL) {
+               if (error && tx->tx_lnetmsg) {
                        CNETERR("Deleting packet type %d len %d %s->%s\n",
-                               le32_to_cpu(tx->tx_lnetmsg->msg_hdr.type),
-                               le32_to_cpu(tx->tx_lnetmsg->msg_hdr.payload_length),
-                               libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
-                               libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.dest_nid)));
+                               tx->tx_lnetmsg->msg_type,
+                               tx->tx_lnetmsg->msg_len,
+                               libcfs_nidstr(&tx->tx_lnetmsg->msg_initiator),
+                               libcfs_nidstr(&tx->tx_lnetmsg->msg_target.nid));
                } else if (error) {
                        CNETERR("Deleting noop packet\n");
                }
@@ -617,7 +617,7 @@ simulate_error:
                        break;
                }
                CDEBUG(D_NET, "[%p] Error %d on write to %s ip %pISp\n",
-                      conn, rc, libcfs_id2str(conn->ksnc_peer->ksnp_id),
+                      conn, rc, libcfs_idstr(&conn->ksnc_peer->ksnp_id),
                       &conn->ksnc_peeraddr);
        }
 
@@ -674,16 +674,14 @@ ksocknal_launch_all_connections_locked(struct ksock_peer_ni *peer_ni)
 struct ksock_conn *
 ksocknal_find_conn_locked(struct ksock_peer_ni *peer_ni, struct ksock_tx *tx, int nonblk)
 {
-       struct list_head *tmp;
+       struct ksock_conn *c;
        struct ksock_conn *conn;
        struct ksock_conn *typed = NULL;
        struct ksock_conn *fallback = NULL;
        int tnob = 0;
        int fnob = 0;
 
-       list_for_each(tmp, &peer_ni->ksnp_conns) {
-               struct ksock_conn *c = list_entry(tmp, struct ksock_conn,
-                                                 ksnc_list);
+       list_for_each_entry(c, &peer_ni->ksnp_conns, ksnc_list) {
                int nob = atomic_read(&c->ksnc_tx_nob) +
                          c->ksnc_sock->sk->sk_wmem_queued;
                int rc;
@@ -754,7 +752,7 @@ ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn)
         LASSERT(!conn->ksnc_closing);
 
        CDEBUG(D_NET, "Sending to %s ip %pISp\n",
-              libcfs_id2str(conn->ksnc_peer->ksnp_id),
+              libcfs_idstr(&conn->ksnc_peer->ksnp_id),
               &conn->ksnc_peeraddr);
 
         ksocknal_tx_prep(conn, tx);
@@ -772,10 +770,9 @@ ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn)
        LASSERT(tx->tx_niov >= 1);
        LASSERT(tx->tx_resid == tx->tx_nob);
 
-        CDEBUG (D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
-                tx, (tx->tx_lnetmsg != NULL) ? tx->tx_lnetmsg->msg_hdr.type:
-                                               KSOCK_MSG_NOOP,
-                tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
+       CDEBUG(D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n",
+              tx, tx->tx_lnetmsg ? tx->tx_lnetmsg->msg_type : KSOCK_MSG_NOOP,
+              tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
 
        bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
        spin_lock_bh(&sched->kss_lock);
@@ -877,7 +874,7 @@ ksocknal_find_connecting_conn_cb_locked(struct ksock_peer_ni *peer_ni)
 
 int
 ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
-                      struct lnet_process_id id)
+                      struct lnet_processid *id)
 {
        struct ksock_peer_ni *peer_ni;
        struct ksock_conn *conn;
@@ -901,46 +898,52 @@ ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
                                         * connecting and I do have an actual
                                         * connection...
                                         */
-                                        ksocknal_queue_tx_locked (tx, conn);
+                                       ksocknal_queue_tx_locked(tx, conn);
                                        read_unlock(g_lock);
-                                        return (0);
+                                       return 0;
                                }
                        }
                }
 
-                /* I'll need a write lock... */
+               /* I'll need a write lock... */
                read_unlock(g_lock);
 
                write_lock_bh(g_lock);
 
-                peer_ni = ksocknal_find_peer_locked(ni, id);
-                if (peer_ni != NULL)
-                        break;
+               peer_ni = ksocknal_find_peer_locked(ni, id);
+               if (peer_ni != NULL)
+                       break;
 
                write_unlock_bh(g_lock);
 
-                if ((id.pid & LNET_PID_USERFLAG) != 0) {
-                        CERROR("Refusing to create a connection to "
-                               "userspace process %s\n", libcfs_id2str(id));
-                        return -EHOSTUNREACH;
-                }
+               if ((id->pid & LNET_PID_USERFLAG) != 0) {
+                       CERROR("Refusing to create a connection to userspace process %s\n",
+                              libcfs_idstr(id));
+                       return -EHOSTUNREACH;
+               }
 
-                if (retry) {
-                        CERROR("Can't find peer_ni %s\n", libcfs_id2str(id));
-                        return -EHOSTUNREACH;
-                }
+               if (retry) {
+                       CERROR("Can't find peer_ni %s\n", libcfs_idstr(id));
+                       return -EHOSTUNREACH;
+               }
 
                memset(&sa, 0, sizeof(sa));
                sa.sin_family = AF_INET;
-               sa.sin_addr.s_addr = htonl(LNET_NIDADDR(id.nid));
+               sa.sin_addr.s_addr = id->nid.nid_addr[0];
                sa.sin_port = htons(lnet_acceptor_port());
-               rc = ksocknal_add_peer(ni, id, (struct sockaddr *)&sa);
-                if (rc != 0) {
-                        CERROR("Can't add peer_ni %s: %d\n",
-                               libcfs_id2str(id), rc);
-                        return rc;
-                }
-        }
+               {
+                       struct lnet_process_id id4 = {
+                               .pid = id->pid,
+                               .nid = lnet_nid_to_nid4(&id->nid),
+                       };
+                       rc = ksocknal_add_peer(ni, id4, (struct sockaddr *)&sa);
+               }
+               if (rc != 0) {
+                       CERROR("Can't add peer_ni %s: %d\n",
+                              libcfs_idstr(id), rc);
+                       return rc;
+               }
+       }
 
         ksocknal_launch_all_connections_locked(peer_ni);
 
@@ -967,7 +970,7 @@ ksocknal_launch_packet(struct lnet_ni *ni, struct ksock_tx *tx,
        write_unlock_bh(g_lock);
 
         /* NB Routes may be ignored if connections to them failed recently */
-        CNETERR("No usable routes to %s\n", libcfs_id2str(id));
+       CNETERR("No usable routes to %s\n", libcfs_idstr(id));
        tx->tx_hstatus = LNET_MSG_STATUS_REMOTE_ERROR;
         return (-EHOSTUNREACH);
 }
@@ -978,7 +981,7 @@ ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
        /* '1' for consistency with code that checks !mpflag to restore */
        unsigned int mpflag = 1;
        int type = lntmsg->msg_type;
-       struct lnet_process_id target = lntmsg->msg_target;
+       struct lnet_processid *target = &lntmsg->msg_target;
        unsigned int payload_niov = lntmsg->msg_niov;
        struct bio_vec *payload_kiov = lntmsg->msg_kiov;
        unsigned int payload_offset = lntmsg->msg_offset;
@@ -987,11 +990,12 @@ ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
        int desc_size;
        int rc;
 
-        /* NB 'private' is different depending on what we're sending.
-         * Just ignore it... */
+       /* NB 'private' is different depending on what we're sending.
+        * Just ignore it...
+        */
 
-        CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
-               payload_nob, payload_niov, libcfs_id2str(target));
+       CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
+              payload_nob, payload_niov, libcfs_idstr(target));
 
        LASSERT (payload_nob == 0 || payload_niov > 0);
        LASSERT (payload_niov <= LNET_MAX_IOV);
@@ -1038,24 +1042,12 @@ ksocknal_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
        if (!mpflag)
                memalloc_noreclaim_restore(mpflag);
 
-        if (rc == 0)
-                return (0);
+       if (rc == 0)
+               return (0);
 
        lntmsg->msg_health_status = tx->tx_hstatus;
-        ksocknal_free_tx(tx);
-        return (-EIO);
-}
-
-int
-ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
-       struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
-       if (IS_ERR(task))
-               return PTR_ERR(task);
-
-       atomic_inc(&ksocknal_data.ksnd_nthreads);
-       return 0;
+       ksocknal_free_tx(tx);
+       return -EIO;
 }
 
 void
@@ -1087,14 +1079,15 @@ ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip)
                switch (conn->ksnc_proto->pro_version) {
                case  KSOCK_PROTO_V2:
                case  KSOCK_PROTO_V3:
-                        conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
+                       conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER;
                        conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
-                        conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg;
+                       conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg;
 
-                       conn->ksnc_rx_nob_wanted = offsetof(struct ksock_msg, ksm_u);
-                       conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u);
-                       conn->ksnc_rx_iov[0].iov_len  = offsetof(struct ksock_msg, ksm_u);
-                        break;
+                       conn->ksnc_rx_nob_wanted = sizeof(struct ksock_msg_hdr);
+                       conn->ksnc_rx_nob_left = sizeof(struct ksock_msg_hdr);
+                       conn->ksnc_rx_iov[0].iov_len =
+                               sizeof(struct ksock_msg_hdr);
+                       break;
 
                case KSOCK_PROTO_V1:
                        /* Receiving bare struct lnet_hdr */
@@ -1152,7 +1145,7 @@ ksocknal_process_receive(struct ksock_conn *conn,
                         struct kvec *scratch_iov)
 {
        struct lnet_hdr *lhdr;
-       struct lnet_process_id *id;
+       struct lnet_processid *id;
        int rc;
 
        LASSERT(refcount_read(&conn->ksnc_conn_refcount) > 0);
@@ -1169,18 +1162,18 @@ ksocknal_process_receive(struct ksock_conn *conn,
                                      scratch_iov);
 
                if (rc <= 0) {
-                       struct lnet_process_id ksnp_id;
+                       struct lnet_processid *ksnp_id;
 
-                       ksnp_id = conn->ksnc_peer->ksnp_id;
+                       ksnp_id = &conn->ksnc_peer->ksnp_id;
 
                        LASSERT(rc != -EAGAIN);
                        if (rc == 0)
                                CDEBUG(D_NET, "[%p] EOF from %s ip %pISp\n",
-                                      conn, libcfs_id2str(ksnp_id),
+                                      conn, libcfs_idstr(ksnp_id),
                                       &conn->ksnc_peeraddr);
                        else if (!conn->ksnc_closing)
                                CERROR("[%p] Error %d on read from %s ip %pISp\n",
-                                      conn, rc, libcfs_id2str(ksnp_id),
+                                      conn, rc, libcfs_idstr(ksnp_id),
                                       &conn->ksnc_peeraddr);
 
                         /* it's not an error if conn is being closed */
@@ -1194,161 +1187,167 @@ ksocknal_process_receive(struct ksock_conn *conn,
                         return (-EAGAIN);
                 }
         }
-        switch (conn->ksnc_rx_state) {
-        case SOCKNAL_RX_KSM_HEADER:
-                if (conn->ksnc_flip) {
-                        __swab32s(&conn->ksnc_msg.ksm_type);
-                        __swab32s(&conn->ksnc_msg.ksm_csum);
-                        __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]);
-                        __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]);
-                }
-
-                if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
-                    conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
-                        CERROR("%s: Unknown message type: %x\n",
-                               libcfs_id2str(conn->ksnc_peer->ksnp_id),
-                               conn->ksnc_msg.ksm_type);
-                        ksocknal_new_packet(conn, 0);
-                        ksocknal_close_conn_and_siblings(conn, -EPROTO);
-                        return (-EPROTO);
-                }
+       switch (conn->ksnc_rx_state) {
+       case SOCKNAL_RX_KSM_HEADER:
+               if (conn->ksnc_flip) {
+                       __swab32s(&conn->ksnc_msg.ksm_type);
+                       __swab32s(&conn->ksnc_msg.ksm_csum);
+                       __swab64s(&conn->ksnc_msg.ksm_zc_cookies[0]);
+                       __swab64s(&conn->ksnc_msg.ksm_zc_cookies[1]);
+               }
 
-                if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
-                    conn->ksnc_msg.ksm_csum != 0 &&     /* has checksum */
-                    conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
-                        /* NOOP Checksum error */
-                        CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
-                               libcfs_id2str(conn->ksnc_peer->ksnp_id),
-                               conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
-                        ksocknal_new_packet(conn, 0);
-                        ksocknal_close_conn_and_siblings(conn, -EPROTO);
-                        return (-EIO);
-                }
+               if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
+                   conn->ksnc_msg.ksm_csum != 0 &&     /* has checksum */
+                   conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
+                       /* NOOP Checksum error */
+                       CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
+                              libcfs_idstr(&conn->ksnc_peer->ksnp_id),
+                              conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
+                       ksocknal_new_packet(conn, 0);
+                       ksocknal_close_conn_and_siblings(conn, -EPROTO);
+                       return (-EIO);
+               }
 
-                if (conn->ksnc_msg.ksm_zc_cookies[1] != 0) {
-                        __u64 cookie = 0;
+               if (conn->ksnc_msg.ksm_zc_cookies[1] != 0) {
+                       __u64 cookie = 0;
 
-                        LASSERT (conn->ksnc_proto != &ksocknal_protocol_v1x);
+                       LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
 
-                        if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP)
-                                cookie = conn->ksnc_msg.ksm_zc_cookies[0];
+                       if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP)
+                               cookie = conn->ksnc_msg.ksm_zc_cookies[0];
 
-                        rc = conn->ksnc_proto->pro_handle_zcack(conn, cookie,
-                                               conn->ksnc_msg.ksm_zc_cookies[1]);
+                       rc = conn->ksnc_proto->pro_handle_zcack(
+                               conn, cookie, conn->ksnc_msg.ksm_zc_cookies[1]);
 
-                        if (rc != 0) {
+                       if (rc != 0) {
                                CERROR("%s: Unknown ZC-ACK cookie: %llu, %llu\n",
-                                       libcfs_id2str(conn->ksnc_peer->ksnp_id),
-                                       cookie, conn->ksnc_msg.ksm_zc_cookies[1]);
-                                ksocknal_new_packet(conn, 0);
-                                ksocknal_close_conn_and_siblings(conn, -EPROTO);
-                                return (rc);
-                        }
-                }
+                                      libcfs_idstr(&conn->ksnc_peer->ksnp_id),
+                                      cookie,
+                                      conn->ksnc_msg.ksm_zc_cookies[1]);
+                               ksocknal_new_packet(conn, 0);
+                               ksocknal_close_conn_and_siblings(conn, -EPROTO);
+                               return rc;
+                       }
+               }
 
-                if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) {
-                        ksocknal_new_packet (conn, 0);
-                        return 0;       /* NOOP is done and just return */
-                }
+               switch (conn->ksnc_msg.ksm_type) {
+               case KSOCK_MSG_NOOP:
+                       ksocknal_new_packet(conn, 0);
+                       return 0;       /* NOOP is done and just return */
+
+               case KSOCK_MSG_LNET:
+
+                       conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
+                       conn->ksnc_rx_nob_wanted = sizeof(struct lnet_hdr);
+                       conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr);
 
-                conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
-               conn->ksnc_rx_nob_wanted = sizeof(struct ksock_lnet_msg);
-               conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg);
+                       conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
+                       conn->ksnc_rx_iov[0].iov_base =
+                               (void *)&conn->ksnc_msg.ksm_u.lnetmsg;
+                       conn->ksnc_rx_iov[0].iov_len = sizeof(struct lnet_hdr);
 
-               conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space;
-                conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg;
-               conn->ksnc_rx_iov[0].iov_len  = sizeof(struct ksock_lnet_msg);
+                       conn->ksnc_rx_niov = 1;
+                       conn->ksnc_rx_kiov = NULL;
+                       conn->ksnc_rx_nkiov = 0;
 
-                conn->ksnc_rx_niov = 1;
-                conn->ksnc_rx_kiov = NULL;
-                conn->ksnc_rx_nkiov = 0;
+                       goto again;     /* read lnet header now */
 
-                goto again;     /* read lnet header now */
+               default:
+                       CERROR("%s: Unknown message type: %x\n",
+                              libcfs_idstr(&conn->ksnc_peer->ksnp_id),
+                              conn->ksnc_msg.ksm_type);
+                       ksocknal_new_packet(conn, 0);
+                       ksocknal_close_conn_and_siblings(conn, -EPROTO);
+                       return -EPROTO;
+               }
 
-        case SOCKNAL_RX_LNET_HEADER:
-                /* unpack message header */
-                conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
+       case SOCKNAL_RX_LNET_HEADER:
+               /* unpack message header */
+               conn->ksnc_proto->pro_unpack(&conn->ksnc_msg);
 
-                if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) {
-                        /* Userspace peer_ni */
-                        lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
-                        id   = &conn->ksnc_peer->ksnp_id;
+               if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) {
+                       /* Userspace peer_ni */
+                       lhdr = &conn->ksnc_msg.ksm_u.lnetmsg;
+                       id = &conn->ksnc_peer->ksnp_id;
 
-                        /* Substitute process ID assigned at connection time */
-                        lhdr->src_pid = cpu_to_le32(id->pid);
-                        lhdr->src_nid = cpu_to_le64(id->nid);
-                }
+                       /* Substitute process ID assigned at connection time */
+                       lhdr->src_pid = cpu_to_le32(id->pid);
+                       lhdr->src_nid = cpu_to_le64(lnet_nid_to_nid4(&id->nid));
+               }
 
-                conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
-                ksocknal_conn_addref(conn);     /* ++ref while parsing */
-
-                rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
-                                &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr,
-                                conn->ksnc_peer->ksnp_id.nid, conn, 0);
-                if (rc < 0) {
-                        /* I just received garbage: give up on this conn */
-                        ksocknal_new_packet(conn, 0);
-                        ksocknal_close_conn_and_siblings (conn, rc);
-                        ksocknal_conn_decref(conn);
-                        return (-EPROTO);
-                }
+               conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
+               ksocknal_conn_addref(conn);     /* ++ref while parsing */
 
-                /* I'm racing with ksocknal_recv() */
-                LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
-                         conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
+               rc = lnet_parse(conn->ksnc_peer->ksnp_ni,
+                               &conn->ksnc_msg.ksm_u.lnetmsg,
+                               lnet_nid_to_nid4(&conn->ksnc_peer->ksnp_id.nid),
+                               conn, 0);
+               if (rc < 0) {
+                       /* I just received garbage: give up on this conn */
+                       ksocknal_new_packet(conn, 0);
+                       ksocknal_close_conn_and_siblings(conn, rc);
+                       ksocknal_conn_decref(conn);
+                       return (-EPROTO);
+               }
 
-                if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
-                        return 0;
+               /* I'm racing with ksocknal_recv() */
+               LASSERT(conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
+                       conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD);
 
-                /* ksocknal_recv() got called */
-                goto again;
+               if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD)
+                       return 0;
 
-        case SOCKNAL_RX_LNET_PAYLOAD:
-                /* payload all received */
-                rc = 0;
-
-                if (conn->ksnc_rx_nob_left == 0 &&   /* not truncating */
-                    conn->ksnc_msg.ksm_csum != 0 &&  /* has checksum */
-                    conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
-                        CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
-                               libcfs_id2str(conn->ksnc_peer->ksnp_id),
-                               conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
-                        rc = -EIO;
-                }
+               /* ksocknal_recv() got called */
+               goto again;
 
-                if (rc == 0 && conn->ksnc_msg.ksm_zc_cookies[0] != 0) {
-                        LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
+       case SOCKNAL_RX_LNET_PAYLOAD:
+               /* payload all received */
+               rc = 0;
 
-                        lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr;
-                        id   = &conn->ksnc_peer->ksnp_id;
+               if (conn->ksnc_rx_nob_left == 0 &&   /* not truncating */
+                   conn->ksnc_msg.ksm_csum != 0 &&  /* has checksum */
+                   conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
+                       CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n",
+                              libcfs_idstr(&conn->ksnc_peer->ksnp_id),
+                              conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum);
+                       rc = -EIO;
+               }
 
-                        rc = conn->ksnc_proto->pro_handle_zcreq(conn,
-                                        conn->ksnc_msg.ksm_zc_cookies[0],
-                                        *ksocknal_tunables.ksnd_nonblk_zcack ||
-                                        le64_to_cpu(lhdr->src_nid) != id->nid);
-                }
+               if (rc == 0 && conn->ksnc_msg.ksm_zc_cookies[0] != 0) {
+                       LASSERT(conn->ksnc_proto != &ksocknal_protocol_v1x);
+
+                       lhdr = &conn->ksnc_msg.ksm_u.lnetmsg;
+                       id = &conn->ksnc_peer->ksnp_id;
+
+                       rc = conn->ksnc_proto->pro_handle_zcreq(
+                               conn,
+                               conn->ksnc_msg.ksm_zc_cookies[0],
+                               *ksocknal_tunables.ksnd_nonblk_zcack ||
+                               le64_to_cpu(lhdr->src_nid) !=
+                               lnet_nid_to_nid4(&id->nid));
+               }
 
                if (rc && conn->ksnc_lnet_msg)
                        conn->ksnc_lnet_msg->msg_health_status =
                                LNET_MSG_STATUS_REMOTE_ERROR;
                lnet_finalize(conn->ksnc_lnet_msg, rc);
 
-                if (rc != 0) {
-                        ksocknal_new_packet(conn, 0);
-                        ksocknal_close_conn_and_siblings (conn, rc);
-                        return (-EPROTO);
-                }
-                /* Fall through */
+               if (rc != 0) {
+                       ksocknal_new_packet(conn, 0);
+                       ksocknal_close_conn_and_siblings(conn, rc);
+                       return (-EPROTO);
+               }
+               /* Fall through */
 
-        case SOCKNAL_RX_SLOP:
-                /* starting new packet? */
-                if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
-                        return 0;       /* come back later */
-                goto again;             /* try to finish reading slop now */
+       case SOCKNAL_RX_SLOP:
+               /* starting new packet? */
+               if (ksocknal_new_packet(conn, conn->ksnc_rx_nob_left))
+                       return 0;       /* come back later */
+               goto again;             /* try to finish reading slop now */
 
-        default:
-                break;
-        }
+       default:
+               break;
+       }
 
         /* Not Reached */
         LBUG ();
@@ -1718,7 +1717,7 @@ ksocknal_send_hello(struct lnet_ni *ni, struct ksock_conn *conn,
        /* rely on caller to hold a ref on socket so it wouldn't disappear */
        LASSERT(conn->ksnc_proto != NULL);
 
-       hello->kshm_src_nid         = ni->ni_nid;
+       hello->kshm_src_nid         = lnet_nid_to_nid4(&ni->ni_nid);
        hello->kshm_dst_nid         = peer_nid;
        hello->kshm_src_pid         = the_lnet.ln_pid;
 
@@ -1751,16 +1750,16 @@ ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
                    struct lnet_process_id *peerid,
                    __u64 *incarnation)
 {
-        /* Return < 0        fatal error
-         *        0          success
-         *        EALREADY   lost connection race
-         *        EPROTO     protocol version mismatch
-         */
+       /* Return < 0        fatal error
+        *        0          success
+        *        EALREADY   lost connection race
+        *        EPROTO     protocol version mismatch
+        */
        struct socket        *sock = conn->ksnc_sock;
-        int                  active = (conn->ksnc_proto != NULL);
-        int                  timeout;
-        int                  proto_match;
-        int                  rc;
+       int                  active = (conn->ksnc_proto != NULL);
+       int                  timeout;
+       int                  proto_match;
+       int                  rc;
        const struct ksock_proto *proto;
        struct lnet_process_id recv_id;
 
@@ -1768,7 +1767,7 @@ ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
        LASSERT(!active == !(conn->ksnc_type != SOCKLND_CONN_NONE));
 
        timeout = active ? ksocknal_timeout() :
-                           lnet_acceptor_timeout();
+               lnet_acceptor_timeout();
 
        rc = lnet_sock_read(sock, &hello->kshm_magic,
                            sizeof(hello->kshm_magic), timeout);
@@ -1791,47 +1790,51 @@ ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
 
        rc = lnet_sock_read(sock, &hello->kshm_version,
                            sizeof(hello->kshm_version), timeout);
-        if (rc != 0) {
+       if (rc != 0) {
                CERROR("Error %d reading HELLO from %pIS\n",
                       rc, &conn->ksnc_peeraddr);
                LASSERT(rc < 0);
-                return rc;
-        }
+               return rc;
+       }
 
-        proto = ksocknal_parse_proto_version(hello);
-        if (proto == NULL) {
-                if (!active) {
-                        /* unknown protocol from peer_ni, tell peer_ni my protocol */
-                        conn->ksnc_proto = &ksocknal_protocol_v3x;
+       proto = ksocknal_parse_proto_version(hello);
+       if (proto == NULL) {
+               if (!active) {
+                       /* unknown protocol from peer_ni,
+                        * tell peer_ni my protocol.
+                        */
+                       conn->ksnc_proto = &ksocknal_protocol_v3x;
 #if SOCKNAL_VERSION_DEBUG
-                        if (*ksocknal_tunables.ksnd_protocol == 2)
-                                conn->ksnc_proto = &ksocknal_protocol_v2x;
-                        else if (*ksocknal_tunables.ksnd_protocol == 1)
-                                conn->ksnc_proto = &ksocknal_protocol_v1x;
+                       if (*ksocknal_tunables.ksnd_protocol == 2)
+                               conn->ksnc_proto = &ksocknal_protocol_v2x;
+                       else if (*ksocknal_tunables.ksnd_protocol == 1)
+                               conn->ksnc_proto = &ksocknal_protocol_v1x;
 #endif
-                        hello->kshm_nips = 0;
-                        ksocknal_send_hello(ni, conn, ni->ni_nid, hello);
-                }
+                       hello->kshm_nips = 0;
+                       ksocknal_send_hello(ni, conn,
+                                           lnet_nid_to_nid4(&ni->ni_nid),
+                                           hello);
+               }
 
                CERROR("Unknown protocol version (%d.x expected) from %pIS\n",
                       conn->ksnc_proto->pro_version, &conn->ksnc_peeraddr);
 
-                return -EPROTO;
-        }
+               return -EPROTO;
+       }
 
-        proto_match = (conn->ksnc_proto == proto);
-        conn->ksnc_proto = proto;
+       proto_match = (conn->ksnc_proto == proto);
+       conn->ksnc_proto = proto;
 
-        /* receive the rest of hello message anyway */
-        rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
-        if (rc != 0) {
+       /* receive the rest of hello message anyway */
+       rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout);
+       if (rc != 0) {
                CERROR("Error %d reading or checking hello from from %pIS\n",
                       rc, &conn->ksnc_peeraddr);
-                LASSERT (rc < 0);
-                return rc;
-        }
+               LASSERT(rc < 0);
+               return rc;
+       }
 
-        *incarnation = hello->kshm_src_incarnation;
+       *incarnation = hello->kshm_src_incarnation;
 
        if (hello->kshm_src_nid == LNET_NID_ANY) {
                CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY from %pIS\n",
@@ -1848,7 +1851,7 @@ ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
                        LNET_PID_USERFLAG;
                LASSERT(conn->ksnc_peeraddr.ss_family == AF_INET);
                recv_id.nid = LNET_MKNID(
-                       LNET_NIDNET(ni->ni_nid),
+                       LNET_NID_NET(&ni->ni_nid),
                        ntohl(((struct sockaddr_in *)
                               &conn->ksnc_peeraddr)->sin_addr.s_addr));
        } else {
@@ -1856,8 +1859,8 @@ ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
                recv_id.pid = hello->kshm_src_pid;
        }
 
-        if (!active) {
-                *peerid = recv_id;
+       if (!active) {
+               *peerid = recv_id;
 
                /* peer_ni determines type */
                conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype);
@@ -1880,10 +1883,10 @@ ksocknal_recv_hello(struct lnet_ni *ni, struct ksock_conn *conn,
                return -EPROTO;
        }
 
-        if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
-                /* Possible protocol mismatch or I lost the connection race */
-                return proto_match ? EALREADY : EPROTO;
-        }
+       if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
+               /* Possible protocol mismatch or I lost the connection race */
+               return proto_match ? EALREADY : EPROTO;
+       }
 
        if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) {
                CERROR("Mismatched types: me %d, %s ip %pIS %d\n",
@@ -1932,7 +1935,8 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
                if (peer_ni->ksnp_accepting > 0) {
                        CDEBUG(D_NET,
                               "peer_ni %s(%d) already connecting to me, retry later.\n",
-                              libcfs_nid2str(peer_ni->ksnp_id.nid), peer_ni->ksnp_accepting);
+                              libcfs_nidstr(&peer_ni->ksnp_id.nid),
+                              peer_ni->ksnp_accepting);
                        retry_later = true;
                }
 
@@ -1955,13 +1959,13 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
 
                if (ktime_get_seconds() >= deadline) {
                        rc = -ETIMEDOUT;
-                       lnet_connect_console_error(rc, peer_ni->ksnp_id.nid,
-                                                  (struct sockaddr *)
-                                                  &conn_cb->ksnr_addr);
+                       lnet_connect_console_error(
+                               rc, &peer_ni->ksnp_id.nid,
+                               (struct sockaddr *)&conn_cb->ksnr_addr);
                        goto failed;
                }
 
-               sock = lnet_connect(peer_ni->ksnp_id.nid,
+               sock = lnet_connect(&peer_ni->ksnp_id.nid,
                                    conn_cb->ksnr_myiface,
                                    (struct sockaddr *)&conn_cb->ksnr_addr,
                                    peer_ni->ksnp_ni->ni_net_ns);
@@ -1973,18 +1977,19 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
                rc = ksocknal_create_conn(peer_ni->ksnp_ni, conn_cb, sock,
                                          type);
                if (rc < 0) {
-                       lnet_connect_console_error(rc, peer_ni->ksnp_id.nid,
-                                                  (struct sockaddr *)
-                                                  &conn_cb->ksnr_addr);
+                       lnet_connect_console_error(
+                               rc, &peer_ni->ksnp_id.nid,
+                               (struct sockaddr *)&conn_cb->ksnr_addr);
                        goto failed;
                }
 
                /* A +ve RC means I have to retry because I lost the connection
-                * race or I have to renegotiate protocol version */
+                * race or I have to renegotiate protocol version
+                */
                retry_later = (rc != 0);
                if (retry_later)
                        CDEBUG(D_NET, "peer_ni %s: conn race, retry later.\n",
-                              libcfs_nid2str(peer_ni->ksnp_id.nid));
+                              libcfs_nidstr(&peer_ni->ksnp_id.nid));
 
                write_lock_bh(&ksocknal_data.ksnd_global_lock);
        }
@@ -2070,7 +2075,6 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
 static int
 ksocknal_connd_check_start(time64_t sec, long *timeout)
 {
-       char name[16];
         int rc;
         int total = ksocknal_data.ksnd_connd_starting +
                     ksocknal_data.ksnd_connd_running;
@@ -2108,8 +2112,8 @@ ksocknal_connd_check_start(time64_t sec, long *timeout)
        spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
 
        /* NB: total is the next id */
-       snprintf(name, sizeof(name), "socknal_cd%02d", total);
-       rc = ksocknal_thread_start(ksocknal_connd, NULL, name);
+       rc = ksocknal_thread_start(ksocknal_connd, NULL,
+                                  "socknal_cd%02d", total);
 
        spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
         if (rc == 0)
@@ -2303,16 +2307,15 @@ ksocknal_find_timed_out_conn(struct ksock_peer_ni *peer_ni)
 {
         /* We're called with a shared lock on ksnd_global_lock */
        struct ksock_conn *conn;
-       struct list_head *ctmp;
        struct ksock_tx *tx;
+       struct ksock_sched *sched;
 
-       list_for_each(ctmp, &peer_ni->ksnp_conns) {
+       list_for_each_entry(conn, &peer_ni->ksnp_conns, ksnc_list) {
                int error;
 
-               conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
-
                 /* Don't need the {get,put}connsock dance to deref ksnc_sock */
                 LASSERT (!conn->ksnc_closing);
+               sched = conn->ksnc_scheduler;
 
                error = conn->ksnc_sock->sk->sk_err;
                 if (error != 0) {
@@ -2321,18 +2324,18 @@ ksocknal_find_timed_out_conn(struct ksock_peer_ni *peer_ni)
                        switch (error) {
                        case ECONNRESET:
                                CNETERR("A connection with %s (%pISp) was reset; it may have rebooted.\n",
-                                       libcfs_id2str(peer_ni->ksnp_id),
+                                       libcfs_idstr(&peer_ni->ksnp_id),
                                        &conn->ksnc_peeraddr);
                                break;
                        case ETIMEDOUT:
                                CNETERR("A connection with %s (%pISp) timed out; the network or node may be down.\n",
-                                       libcfs_id2str(peer_ni->ksnp_id),
+                                       libcfs_idstr(&peer_ni->ksnp_id),
                                        &conn->ksnc_peeraddr);
                                break;
                        default:
                                CNETERR("An unexpected network error %d occurred with %s (%pISp\n",
                                        error,
-                                       libcfs_id2str(peer_ni->ksnp_id),
+                                       libcfs_idstr(&peer_ni->ksnp_id),
                                        &conn->ksnc_peeraddr);
                                break;
                        }
@@ -2345,7 +2348,7 @@ ksocknal_find_timed_out_conn(struct ksock_peer_ni *peer_ni)
                        /* Timed out incomplete incoming message */
                        ksocknal_conn_addref(conn);
                        CNETERR("Timeout receiving from %s (%pISp), state %d wanted %d left %d\n",
-                               libcfs_id2str(peer_ni->ksnp_id),
+                               libcfs_idstr(&peer_ni->ksnp_id),
                                &conn->ksnc_peeraddr,
                                conn->ksnc_rx_state,
                                conn->ksnc_rx_nob_wanted,
@@ -2353,6 +2356,7 @@ ksocknal_find_timed_out_conn(struct ksock_peer_ni *peer_ni)
                        return conn;
                }
 
+               spin_lock_bh(&sched->kss_lock);
                if ((!list_empty(&conn->ksnc_tx_queue) ||
                     conn->ksnc_sock->sk->sk_wmem_queued != 0) &&
                    ktime_get_seconds() >= conn->ksnc_tx_deadline) {
@@ -2365,10 +2369,12 @@ ksocknal_find_timed_out_conn(struct ksock_peer_ni *peer_ni)
                                tx->tx_hstatus =
                                        LNET_MSG_STATUS_LOCAL_TIMEOUT;
                        CNETERR("Timeout sending data to %s (%pISp) the network or that node may be down.\n",
-                               libcfs_id2str(peer_ni->ksnp_id),
+                               libcfs_idstr(&peer_ni->ksnp_id),
                                &conn->ksnc_peeraddr);
+                               spin_unlock_bh(&sched->kss_lock);
                                return conn;
                }
+               spin_unlock_bh(&sched->kss_lock);
        }
 
        return (NULL);
@@ -2448,7 +2454,8 @@ __must_hold(&ksocknal_data.ksnd_global_lock)
                return -ENOMEM;
        }
 
-       if (ksocknal_launch_packet(peer_ni->ksnp_ni, tx, peer_ni->ksnp_id) == 0) {
+       if (ksocknal_launch_packet(peer_ni->ksnp_ni, tx, &peer_ni->ksnp_id)
+           == 0) {
                read_lock(&ksocknal_data.ksnd_global_lock);
                return 1;
        }
@@ -2548,7 +2555,7 @@ ksocknal_check_peer_timeouts(int idx)
                CERROR("Total %d stale ZC_REQs for peer_ni %s detected; the "
                       "oldest(%p) timed out %lld secs ago, "
                       "resid: %d, wmem: %d\n",
-                      n, libcfs_nid2str(peer_ni->ksnp_id.nid), tx_stale,
+                      n, libcfs_nidstr(&peer_ni->ksnp_id.nid), tx_stale,
                       ktime_get_seconds() - deadline,
                       resid, conn->ksnc_sock->sk->sk_wmem_queued);