Whamcloud - gitweb
LU-9078 lnet: Fix route hops print
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd_cb.c
index 5d8e924..bcf5c6d 100644 (file)
@@ -237,7 +237,7 @@ kgnilnd_free_tx(kgn_tx_t *tx)
 
        /* Only free the buffer if we used it */
        if (tx->tx_buffer_copy != NULL) {
-               vfree(tx->tx_buffer_copy);
+               kgnilnd_vfree(tx->tx_buffer_copy, tx->tx_rdma_desc.length);
                tx->tx_buffer_copy = NULL;
                CDEBUG(D_MALLOC, "vfreed buffer2\n");
        }
@@ -302,7 +302,7 @@ kgnilnd_cksum(void *ptr, size_t nob)
        return sum;
 }
 
-inline __u16
+__u16
 kgnilnd_cksum_kiov(unsigned int nkiov, lnet_kiov_t *kiov,
                    unsigned int offset, unsigned int nob, int dump_blob)
 {
@@ -474,7 +474,7 @@ kgnilnd_nak_rdma(kgn_conn_t *conn, int rx_type, int error, __u64 cookie, lnet_ni
                LBUG();
        }
        /* only allow NAK on error and truncate to zero */
-       LASSERTF(error <= 0, "error %d conn 0x%p, cookie "LPU64"\n",
+       LASSERTF(error <= 0, "error %d conn 0x%p, cookie %llu\n",
                 error, conn, cookie);
 
        tx = kgnilnd_new_tx_msg(nak_type, source);
@@ -507,7 +507,8 @@ kgnilnd_setup_immediate_buffer(kgn_tx_t *tx, unsigned int niov,
        } else if (kiov != NULL) {
 
                if ((niov > 0) && unlikely(niov > (nob/PAGE_SIZE))) {
-                       niov = ((nob + offset + PAGE_SIZE - 1) / PAGE_SIZE);
+                       niov = ((nob + offset + kiov->kiov_offset + PAGE_SIZE - 1) /
+                               PAGE_SIZE);
                }
 
                LASSERTF(niov > 0 && niov < GNILND_MAX_IMMEDIATE/PAGE_SIZE,
@@ -946,7 +947,7 @@ kgnilnd_mem_add_map_list(kgn_device_t *dev, kgn_tx_t *tx)
        if (tx->tx_msg.gnm_type == GNILND_MSG_PUT_ACK ||
            tx->tx_msg.gnm_type == GNILND_MSG_GET_REQ) {
                atomic64_add(bytes, &dev->gnd_rdmaq_bytes_out);
-               GNIDBG_TX(D_NETTRACE, tx, "rdma ++ %d to "LPD64"",
+               GNIDBG_TX(D_NETTRACE, tx, "rdma ++ %d to %lld",
                          bytes, atomic64_read(&dev->gnd_rdmaq_bytes_out));
        }
 
@@ -997,7 +998,7 @@ kgnilnd_mem_del_map_list(kgn_device_t *dev, kgn_tx_t *tx)
                atomic64_sub(bytes, &dev->gnd_rdmaq_bytes_out);
                LASSERTF(atomic64_read(&dev->gnd_rdmaq_bytes_out) >= 0,
                         "bytes_out negative! %ld\n", atomic64_read(&dev->gnd_rdmaq_bytes_out));
-               GNIDBG_TX(D_NETTRACE, tx, "rdma -- %d to "LPD64"",
+               GNIDBG_TX(D_NETTRACE, tx, "rdma -- %d to %lld",
                          bytes, atomic64_read(&dev->gnd_rdmaq_bytes_out));
        }
 
@@ -1056,7 +1057,7 @@ kgnilnd_map_buffer(kgn_tx_t *tx)
                 *  GART resource, etc starvation handling */
                if (rrc != GNI_RC_SUCCESS) {
                        GNIDBG_TX(D_NET, tx, "Can't map %d pages: dev %d "
-                               "phys %u pp %u, virt %u nob "LPU64"",
+                               "phys %u pp %u, virt %u nob %llu",
                                tx->tx_phys_npages, dev->gnd_id,
                                dev->gnd_map_nphys, dev->gnd_map_physnop,
                                dev->gnd_map_nvirt, dev->gnd_map_virtnob);
@@ -1073,7 +1074,7 @@ kgnilnd_map_buffer(kgn_tx_t *tx)
                        NULL, flags, &tx->tx_map_key);
                if (rrc != GNI_RC_SUCCESS) {
                        GNIDBG_TX(D_NET, tx, "Can't map %u bytes: dev %d "
-                               "phys %u pp %u, virt %u nob "LPU64"",
+                               "phys %u pp %u, virt %u nob %llu",
                                tx->tx_nob, dev->gnd_id,
                                dev->gnd_map_nphys, dev->gnd_map_physnop,
                                dev->gnd_map_nvirt, dev->gnd_map_virtnob);
@@ -1174,7 +1175,7 @@ kgnilnd_unmap_buffer(kgn_tx_t *tx, int error)
                        hold_timeout = GNILND_TIMEOUT2DEADMAN;
 
                        GNIDBG_TX(D_NET, tx,
-                                "dev %p delaying MDD release for %dms key "LPX64"."LPX64"",
+                                "dev %p delaying MDD release for %dms key %#llx.%#llx",
                                 tx->tx_conn->gnc_device, hold_timeout,
                                 tx->tx_map_key.qword1, tx->tx_map_key.qword2);
                }
@@ -1655,7 +1656,7 @@ kgnilnd_sendmsg_trylock(kgn_tx_t *tx, void *immediate, unsigned int immediatenob
 }
 
 /* lets us know if we can push this RDMA through now */
-inline int
+static int
 kgnilnd_auth_rdma_bytes(kgn_device_t *dev, kgn_tx_t *tx)
 {
        long    bytes_left;
@@ -1804,7 +1805,7 @@ kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, lnet_process_id_t *target)
                }
 
                /* don't create a connection if the peer is marked down */
-               if (peer->gnp_down == GNILND_RCA_NODE_DOWN) {
+               if (peer->gnp_state != GNILND_PEER_UP) {
                        read_unlock(&kgnilnd_data.kgn_peer_conn_lock);
                        rc = -ENETRESET;
                        GOTO(no_peer, rc);
@@ -1843,7 +1844,7 @@ kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, lnet_process_id_t *target)
        kgnilnd_add_peer_locked(target->nid, new_peer, &peer);
 
        /* don't create a connection if the peer is not up */
-       if (peer->gnp_down != GNILND_RCA_NODE_UP) {
+       if (peer->gnp_state != GNILND_PEER_UP) {
                write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
                rc = -ENETRESET;
                GOTO(no_peer, rc);
@@ -1935,7 +1936,7 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
                        if (tx->tx_buffer_copy == NULL) {
                                /* Allocate the largest copy buffer we will need, this will prevent us from overwriting data
                                 * and require at most we allocate a few extra bytes. */
-                               tx->tx_buffer_copy = vmalloc(desc_nob);
+                               tx->tx_buffer_copy = kgnilnd_vzalloc(desc_nob);
 
                                if (!tx->tx_buffer_copy) {
                                        /* allocation of buffer failed nak the rdma */
@@ -1947,7 +1948,8 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
                                rc = kgnilnd_mem_register(conn->gnc_device->gnd_handle, (__u64)tx->tx_buffer_copy, desc_nob, NULL, GNI_MEM_READWRITE, &tx->tx_buffer_copy_map_key);
                                if (rc != GNI_RC_SUCCESS) {
                                        /* Registration Failed nak rdma and kill the tx. */
-                                       vfree(tx->tx_buffer_copy);
+                                       kgnilnd_vfree(tx->tx_buffer_copy,
+                                                     desc_nob);
                                        tx->tx_buffer_copy = NULL;
                                        kgnilnd_nak_rdma(tx->tx_conn, tx->tx_msg.gnm_type, -EFAULT, cookie, tx->tx_msg.gnm_srcnid);
                                        kgnilnd_tx_done(tx, -EFAULT);
@@ -1969,8 +1971,10 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
        tx->tx_rdma_desc.remote_mem_hndl = sink->gnrd_key;
        tx->tx_rdma_desc.length = desc_nob;
        tx->tx_nob_rdma = nob;
-       if (*kgnilnd_tunables.kgn_bte_dlvr_mode)
-               tx->tx_rdma_desc.dlvr_mode = *kgnilnd_tunables.kgn_bte_dlvr_mode;
+       if (post_type == GNI_POST_RDMA_PUT && *kgnilnd_tunables.kgn_bte_put_dlvr_mode)
+               tx->tx_rdma_desc.dlvr_mode = *kgnilnd_tunables.kgn_bte_put_dlvr_mode;
+       if (post_type == GNI_POST_RDMA_GET && *kgnilnd_tunables.kgn_bte_get_dlvr_mode)
+               tx->tx_rdma_desc.dlvr_mode = *kgnilnd_tunables.kgn_bte_get_dlvr_mode;
        /* prep final completion message */
        kgnilnd_init_msg(&tx->tx_msg, type, tx->tx_msg.gnm_srcnid);
        tx->tx_msg.gnm_u.completion.gncm_cookie = cookie;
@@ -1989,7 +1993,7 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
                 tx, conn, conn->gnc_close_sent);
 
        GNIDBG_TX(D_NET, tx, "Post RDMA type 0x%02x conn %p dlvr_mode "
-               "0x%x cookie:"LPX64,
+               "0x%x cookie:%#llx",
                type, conn, tx->tx_rdma_desc.dlvr_mode, cookie);
 
        /* set CQ dedicated for RDMA */
@@ -2011,7 +2015,7 @@ kgnilnd_rdma(kgn_tx_t *tx, int type,
                kgnilnd_unmap_buffer(tx, 0);
 
                if (tx->tx_buffer_copy != NULL) {
-                       vfree(tx->tx_buffer_copy);
+                       kgnilnd_vfree(tx->tx_buffer_copy, desc_nob);
                        tx->tx_buffer_copy = NULL;
                }
 
@@ -2376,7 +2380,7 @@ kgnilnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
                CERROR("Couldnt find matching peer %p or conn %p / %p\n",
                        peer, conn, found_conn);
                if (found_conn) {
-                       CERROR("Unexpected connstamp "LPX64"("LPX64" expected)"
+                       CERROR("Unexpected connstamp %#llx(%#llx expected)"
                                " from %s", rxmsg->gnm_connstamp,
                                found_conn->gnc_peer_connstamp,
                                libcfs_nid2str(peer->gnp_nid));
@@ -2745,7 +2749,7 @@ kgnilnd_check_conn_timeouts_locked(kgn_conn_t *conn)
        if (time_after_eq(now, newest_last_rx + timeout)) {
                uint32_t level = D_CONSOLE|D_NETERROR;
 
-               if (conn->gnc_peer->gnp_down == GNILND_RCA_NODE_DOWN) {
+               if (conn->gnc_peer->gnp_state == GNILND_PEER_DOWN) {
                        level = D_NET;
                }
                        GNIDBG_CONN(level, conn,
@@ -2821,6 +2825,14 @@ kgnilnd_check_peer_timeouts_locked(kgn_peer_t *peer, struct list_head *todie,
                                conn->gnc_close_recvd = GNILND_CLOSE_INJECT1;
                                conn->gnc_peer_error = -ETIMEDOUT;
                        }
+
+                       if (*kgnilnd_tunables.kgn_to_reconn_disable &&
+                           rc == -ETIMEDOUT) {
+                               peer->gnp_state = GNILND_PEER_TIMED_OUT;
+                               CDEBUG(D_WARNING, "%s conn timed out, will "
+                                      "reconnect upon request from peer\n",
+                                      libcfs_nid2str(conn->gnc_peer->gnp_nid));
+                       }
                        /* Once we mark closed, any of the scheduler threads could
                         * get it and move through before we hit the fail loc code */
                        kgnilnd_close_conn_locked(conn, rc);
@@ -2864,7 +2876,7 @@ kgnilnd_check_peer_timeouts_locked(kgn_peer_t *peer, struct list_head *todie,
        /* Don't reconnect if we are still trying to clear out old conns.
         * This prevents us sending traffic on the new mbox before ensuring we are done
         * with the old one */
-       reconnect = (peer->gnp_down == GNILND_RCA_NODE_UP) &&
+       reconnect = (peer->gnp_state == GNILND_PEER_UP) &&
                    (atomic_read(&peer->gnp_dirty_eps) == 0);
 
        /* fast reconnect after a timeout */
@@ -3192,7 +3204,7 @@ kgnilnd_check_rdma_cq(kgn_device_t *dev)
                        "this is bad, somehow our credits didn't protect us"
                        " from CQ overrun\n");
                LASSERTF(GNI_CQ_GET_TYPE(event_data) == GNI_CQ_EVENT_TYPE_POST,
-                       "rrc %d, GNI_CQ_GET_TYPE("LPX64") = "LPX64"\n", rrc,
+                       "rrc %d, GNI_CQ_GET_TYPE(%#llx) = %#llx\n", rrc,
                        event_data, GNI_CQ_GET_TYPE(event_data));
 
                rrc = kgnilnd_get_completed(dev->gnd_snd_rdma_cqh, event_data,
@@ -3345,7 +3357,7 @@ kgnilnd_check_fma_send_cq(kgn_device_t *dev)
 
                if (rrc == GNI_RC_NOT_DONE) {
                        CDEBUG(D_INFO,
-                              "SMSG send CQ %d not ready (data "LPX64") "
+                              "SMSG send CQ %d not ready (data %#llx) "
                               "processed %ld\n", dev->gnd_id, event_data,
                               num_processed);
                        return num_processed;
@@ -3358,7 +3370,7 @@ kgnilnd_check_fma_send_cq(kgn_device_t *dev)
                        "this is bad, somehow our credits didn't "
                        "protect us from CQ overrun\n");
                LASSERTF(GNI_CQ_GET_TYPE(event_data) == GNI_CQ_EVENT_TYPE_SMSG,
-                       "rrc %d, GNI_CQ_GET_TYPE("LPX64") = "LPX64"\n", rrc,
+                       "rrc %d, GNI_CQ_GET_TYPE(%#llx) = %#llx\n", rrc,
                        event_data, GNI_CQ_GET_TYPE(event_data));
 
                /* if SMSG couldn't handle an error, time for conn to die */
@@ -3372,7 +3384,7 @@ kgnilnd_check_fma_send_cq(kgn_device_t *dev)
                        if (conn == NULL) {
                                /* Conn was destroyed? */
                                CDEBUG(D_NET,
-                                       "SMSG CQID lookup "LPX64" failed\n",
+                                       "SMSG CQID lookup %#llx failed\n",
                                        GNI_CQ_GET_INST_ID(event_data));
                                write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
                                continue;
@@ -3500,7 +3512,7 @@ kgnilnd_check_fma_rcv_cq(kgn_device_t *dev)
                kgnilnd_gl_mutex_unlock(&dev->gnd_cq_mutex);
 
                if (rrc == GNI_RC_NOT_DONE) {
-                       CDEBUG(D_INFO, "SMSG RX CQ %d empty data "LPX64" "
+                       CDEBUG(D_INFO, "SMSG RX CQ %d empty data %#llx "
                                "processed %ld\n",
                                dev->gnd_id, event_data, num_processed);
                        return num_processed;
@@ -3524,7 +3536,7 @@ kgnilnd_check_fma_rcv_cq(kgn_device_t *dev)
                /* sender should get error event too and take care
                of failed transaction by re-transmitting */
                if (rrc == GNI_RC_TRANSACTION_ERROR) {
-                       CDEBUG(D_NET, "SMSG RX CQ error "LPX64"\n", event_data);
+                       CDEBUG(D_NET, "SMSG RX CQ error %#llx\n", event_data);
                        continue;
                }
 
@@ -3533,12 +3545,12 @@ kgnilnd_check_fma_rcv_cq(kgn_device_t *dev)
                        conn = kgnilnd_cqid2conn_locked(
                                                 GNI_CQ_GET_INST_ID(event_data));
                        if (conn == NULL) {
-                               CDEBUG(D_NET, "SMSG RX CQID lookup "LPU64" "
-                                       "failed, dropping event "LPX64"\n",
+                               CDEBUG(D_NET, "SMSG RX CQID lookup %llu "
+                                       "failed, dropping event %#llx\n",
                                        GNI_CQ_GET_INST_ID(event_data),
                                        event_data);
                        } else {
-                               CDEBUG(D_NET, "SMSG RX: CQID "LPU64" "
+                               CDEBUG(D_NET, "SMSG RX: CQID %llu "
                                       "conn %p->%s\n",
                                        GNI_CQ_GET_INST_ID(event_data),
                                        conn, conn->gnc_peer ?
@@ -3798,7 +3810,7 @@ kgnilnd_process_fmaq(kgn_conn_t *conn)
        GNITX_ASSERTF(tx, tx->tx_id.txe_smsg_id != 0,
                      "tx with zero id", NULL);
 
-       CDEBUG(D_NET, "sending regular msg: %p, type %s(0x%02x), cookie "LPX64"\n",
+       CDEBUG(D_NET, "sending regular msg: %p, type %s(0x%02x), cookie %#llx\n",
               tx, kgnilnd_msgtype2str(tx->tx_msg.gnm_type),
               tx->tx_msg.gnm_type, tx->tx_id.txe_cookie);
 
@@ -3996,8 +4008,8 @@ _kgnilnd_match_reply(kgn_conn_t *conn, int type1, int type2, __u64 cookie)
                GNITX_ASSERTF(tx, ((tx->tx_id.txe_idx == ev_id.txe_idx) &&
                                  (tx->tx_id.txe_cookie = cookie)),
                              "conn 0x%p->%s tx_ref_table hosed: wanted "
-                             "txe_cookie "LPX64" txe_idx %d "
-                             "found tx %p cookie "LPX64" txe_idx %d\n",
+                             "txe_cookie %#llx txe_idx %d "
+                             "found tx %p cookie %#llx txe_idx %d\n",
                              conn, libcfs_nid2str(conn->gnc_peer->gnp_nid),
                              cookie, ev_id.txe_idx,
                              tx, tx->tx_id.txe_cookie, tx->tx_id.txe_idx);
@@ -4011,7 +4023,7 @@ _kgnilnd_match_reply(kgn_conn_t *conn, int type1, int type2, __u64 cookie)
                        tx->tx_state, GNILND_TX_WAITING_REPLY,
                        libcfs_nid2str(conn->gnc_peer->gnp_nid));
        } else {
-               CWARN("Unmatched reply %02x, or %02x/"LPX64" from %s\n",
+               CWARN("Unmatched reply %02x, or %02x/%#llx from %s\n",
                      type1, type2, cookie, libcfs_nid2str(conn->gnc_peer->gnp_nid));
        }
        return tx;
@@ -4039,7 +4051,7 @@ kgnilnd_complete_tx(kgn_tx_t *tx, int rc)
        tx->tx_state &= ~GNILND_TX_WAITING_REPLY;
 
        if (rc == -EFAULT) {
-               CDEBUG(D_NETERROR, "Error %d TX data: TX %p tx_id %x nob %16"LPF64"u physnop %8d buffertype %#8x MemHandle "LPX64"."LPX64"x\n",
+               CDEBUG(D_NETERROR, "Error %d TX data: TX %p tx_id %x nob %16llu physnop %8d buffertype %#8x MemHandle %#llx.%#llxx\n",
                        rc, tx, id, nob, physnop, buftype, hndl.qword1, hndl.qword2);
 
                if(*kgnilnd_tunables.kgn_efault_lbug) {
@@ -4286,7 +4298,7 @@ kgnilnd_check_fma_rx(kgn_conn_t *conn)
        }
 
        if (msg->gnm_connstamp != conn->gnc_peer_connstamp) {
-               GNIDBG_MSG(D_NETERROR, msg, "Unexpected connstamp "LPX64"("LPX64
+               GNIDBG_MSG(D_NETERROR, msg, "Unexpected connstamp %#llx(%#llx"
                       " expected) from %s",
                       msg->gnm_connstamp, conn->gnc_peer_connstamp,
                       libcfs_nid2str(peer->gnp_nid));
@@ -4843,7 +4855,7 @@ kgnilnd_process_mapped_tx(kgn_device_t *dev)
                } else {
                       GNIDBG_TX(log_retrans_level, tx,
                                "transient map failure #%d %d pages/%d bytes phys %u@%u "
-                               "virt %u@"LPU64" "
+                               "virt %u@%llu "
                                "nq_map %d mdd# %d/%d GART %ld",
                                dev->gnd_map_attempt, tx->tx_phys_npages, tx->tx_nob,
                                dev->gnd_map_nphys, dev->gnd_map_physnop * PAGE_SIZE,