From 5bb421cdfd4ce6a29202ce9d680c5d95d04a5254 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Thu, 25 Jun 2020 16:29:10 +1000 Subject: [PATCH] LU-10391 lnet: change lnet_hdr to store large nids. 'struct lnet_hdr' now has large-addr nids. They are converted to 4-byte-addr on transmit, and converted back on receive. Test-Parameters: trivial Test-Parameters: serverversion=2.12 serverdistro=el7.9 testlist=runtests Test-Parameters: clientversion=2.12 testlist=runtests Signed-off-by: Mr NeilBrown Change-Id: Icb333e7b62f8151ad103db0a16aa7685a33071e1 Reviewed-on: https://review.whamcloud.com/43604 Reviewed-by: Serguei Smirnov Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 8 +++--- lnet/include/uapi/linux/lnet/lnet-idl.h | 4 +-- lnet/klnds/o2iblnd/o2iblnd_cb.c | 2 +- lnet/klnds/socklnd/socklnd_cb.c | 2 +- lnet/lnet/lib-move.c | 46 ++++++++++++++------------------- lnet/lnet/lib-msg.c | 16 ++++++------ lnet/lnet/lib-ptl.c | 4 +-- lnet/lnet/net_fault.c | 8 +++--- 8 files changed, 42 insertions(+), 48 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 4b2f270..8c1b244 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -522,8 +522,8 @@ static inline void lnet_hdr_from_nid4(struct lnet_hdr *hdr, { const struct _lnet_hdr_nid4 *hdr_nid4 = (void *)vhdr; - hdr->dest_nid = le64_to_cpu(hdr_nid4->dest_nid); - hdr->src_nid = le64_to_cpu(hdr_nid4->src_nid); + lnet_nid4_to_nid(le64_to_cpu(hdr_nid4->dest_nid), &hdr->dest_nid); + lnet_nid4_to_nid(le64_to_cpu(hdr_nid4->src_nid), &hdr->src_nid); hdr->dest_pid = le32_to_cpu(hdr_nid4->dest_pid); hdr->src_pid = le32_to_cpu(hdr_nid4->src_pid); hdr->type = le32_to_cpu(hdr_nid4->type); @@ -537,8 +537,8 @@ static inline void lnet_hdr_to_nid4(const struct lnet_hdr *hdr, { struct _lnet_hdr_nid4 *hdr_nid4 = (void *)vhdr; - hdr_nid4->dest_nid = cpu_to_le64(hdr->dest_nid); - hdr_nid4->src_nid = cpu_to_le64(hdr->src_nid); + hdr_nid4->dest_nid = cpu_to_le64(lnet_nid_to_nid4(&hdr->dest_nid)); + hdr_nid4->src_nid = cpu_to_le64(lnet_nid_to_nid4(&hdr->src_nid)); hdr_nid4->dest_pid = cpu_to_le32(hdr->dest_pid); hdr_nid4->src_pid = cpu_to_le32(hdr->src_pid); hdr_nid4->type = cpu_to_le32(hdr->type); diff --git a/lnet/include/uapi/linux/lnet/lnet-idl.h b/lnet/include/uapi/linux/lnet/lnet-idl.h index 457e6d6..7ed58a3 100644 --- a/lnet/include/uapi/linux/lnet/lnet-idl.h +++ b/lnet/include/uapi/linux/lnet/lnet-idl.h @@ -153,8 +153,8 @@ union lnet_cmd_hdr { * All fields before the union are in host-byte-order. */ struct lnet_hdr { - lnet_nid_t dest_nid; - lnet_nid_t src_nid; + struct lnet_nid dest_nid; + struct lnet_nid src_nid; lnet_pid_t dest_pid; lnet_pid_t src_pid; __u32 type; /* enum lnet_msg_type */ diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index f1cea6d..78a772b 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1867,7 +1867,7 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[rlen]); if (nob > rx->rx_nob) { CERROR("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(lntmsg->msg_hdr.src_nid), + libcfs_nidstr(&lntmsg->msg_hdr.src_nid), nob, rx->rx_nob); rc = -EPROTO; break; diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 7d5d48a..05567ae 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1274,7 +1274,7 @@ ksocknal_process_receive(struct ksock_conn *conn, /* Substitute process ID assigned at connection time */ hdr.src_pid = id->pid; - hdr.src_nid = lnet_nid_to_nid4(&id->nid); + hdr.src_nid = id->nid; } conn->ksnc_rx_state = SOCKNAL_RX_PARSE; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index d20145a..ed7f102 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -724,7 +724,7 @@ lnet_prep_send(struct lnet_msg *msg, int type, struct lnet_process_id target, memset (&msg->msg_hdr, 0, sizeof (msg->msg_hdr)); msg->msg_hdr.type = type; /* dest_nid will be overwritten by lnet_select_pathway() */ - msg->msg_hdr.dest_nid = target.nid; + lnet_nid4_to_nid(target.nid, &msg->msg_hdr.dest_nid); msg->msg_hdr.dest_pid = target.pid; /* src_nid will be set later */ msg->msg_hdr.src_pid = the_lnet.ln_pid; @@ -1813,11 +1813,9 @@ lnet_handle_lo_send(struct lnet_send_data *sd) /* No send credit hassles with LOLND */ lnet_ni_addref_locked(the_lnet.ln_loni, cpt); - msg->msg_hdr.dest_nid = - lnet_nid_to_nid4(&the_lnet.ln_loni->ni_nid); + msg->msg_hdr.dest_nid = the_lnet.ln_loni->ni_nid; if (!msg->msg_routing) - msg->msg_hdr.src_nid = - lnet_nid_to_nid4(&the_lnet.ln_loni->ni_nid); + msg->msg_hdr.src_nid = the_lnet.ln_loni->ni_nid; msg->msg_target.nid = the_lnet.ln_loni->ni_nid; lnet_msg_commit(msg, cpt); msg->msg_txni = the_lnet.ln_loni; @@ -1918,8 +1916,7 @@ lnet_handle_send(struct lnet_send_data *sd) * originator and set it here. */ if (!msg->msg_routing) - msg->msg_hdr.src_nid = - lnet_nid_to_nid4(&msg->msg_txni->ni_nid); + msg->msg_hdr.src_nid = msg->msg_txni->ni_nid; if (routing) { msg->msg_target_is_router = 1; @@ -1934,16 +1931,13 @@ lnet_handle_send(struct lnet_send_data *sd) * lnet_select_pathway() function and is never changed. * It's safe to use it here. */ - /* FIXME handle large-addr nid */ - msg->msg_hdr.dest_nid = - lnet_nid_to_nid4(&final_dst_lpni->lpni_nid); + msg->msg_hdr.dest_nid = final_dst_lpni->lpni_nid; } else { /* * if we're not routing set the dest_nid to the best peer * ni NID that we picked earlier in the algorithm. */ - msg->msg_hdr.dest_nid = - lnet_nid_to_nid4(&msg->msg_txpeer->lpni_nid); + msg->msg_hdr.dest_nid = msg->msg_txpeer->lpni_nid; } /* @@ -1964,10 +1958,10 @@ lnet_handle_send(struct lnet_send_data *sd) if (!rc) CDEBUG(D_NET, "TRACE: %s(%s:%s) -> %s(%s:%s) %s : %s try# %d\n", - libcfs_nid2str(msg->msg_hdr.src_nid), + libcfs_nidstr(&msg->msg_hdr.src_nid), libcfs_nidstr(&msg->msg_txni->ni_nid), libcfs_nidstr(&sd->sd_src_nid), - libcfs_nid2str(msg->msg_hdr.dest_nid), + libcfs_nidstr(&msg->msg_hdr.dest_nid), libcfs_nidstr(&sd->sd_dst_nid), libcfs_nidstr(&msg->msg_txpeer->lpni_nid), libcfs_nidstr(&sd->sd_rtr_nid), @@ -3029,8 +3023,8 @@ again: struct lnet_peer *src_lp; struct lnet_peer_ni *src_lpni; - src_lpni = lnet_nid2peerni_locked(msg->msg_hdr.src_nid, - LNET_NID_ANY, cpt); + src_lpni = lnet_peerni_by_nid_locked(&msg->msg_hdr.src_nid, + NULL, cpt); /* We don't fail the send if we hit any errors here. We'll just * try to send it via non-multi-rail criteria */ @@ -3351,11 +3345,11 @@ lnet_resend_pending_msgs_locked(struct list_head *resendq, int cpt) list_del_init(&msg->msg_list); - lpni = lnet_find_peer_ni_locked(msg->msg_hdr.dest_nid); + lpni = lnet_peer_ni_find_locked(&msg->msg_hdr.dest_nid); if (!lpni) { lnet_net_unlock(cpt); CERROR("Expected that a peer is already created for %s\n", - libcfs_nid2str(msg->msg_hdr.dest_nid)); + libcfs_nidstr(&msg->msg_hdr.dest_nid)); msg->msg_no_resend = true; lnet_finalize(msg, -EFAULT); lnet_net_lock(cpt); @@ -4254,7 +4248,7 @@ lnet_parse_get(struct lnet_ni *ni, struct lnet_msg *msg, int rdma_get) hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length); hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset); - source_id.nid = hdr->src_nid; + source_id.nid = lnet_nid_to_nid4(&hdr->src_nid); source_id.pid = hdr->src_pid; /* Primary peer NID */ info.mi_id.nid = msg->msg_initiator; @@ -4323,7 +4317,7 @@ lnet_parse_reply(struct lnet_ni *ni, struct lnet_msg *msg) cpt = lnet_cpt_of_cookie(hdr->msg.reply.dst_wmd.wh_object_cookie); lnet_res_lock(cpt); - src.nid = hdr->src_nid; + src.nid = lnet_nid_to_nid4(&hdr->src_nid); src.pid = hdr->src_pid; /* NB handles only looked up by creator (no flips) */ @@ -4384,7 +4378,7 @@ lnet_parse_ack(struct lnet_ni *ni, struct lnet_msg *msg) struct lnet_libmd *md; int cpt; - src.nid = hdr->src_nid; + src.nid = lnet_nid_to_nid4(&hdr->src_nid); src.pid = hdr->src_pid; /* Convert ack fields to host byte order */ @@ -4524,8 +4518,8 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid4, lnet_nid4_to_nid(from_nid4, &from_nid); type = hdr->type; - src_nid = hdr->src_nid; - dest_nid = hdr->dest_nid; + src_nid = lnet_nid_to_nid4(&hdr->src_nid); + dest_nid = lnet_nid_to_nid4(&hdr->dest_nid); dest_pid = hdr->dest_pid; payload_length = hdr->payload_length; @@ -4819,7 +4813,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason) msg = list_entry(head->next, struct lnet_msg, msg_list); list_del(&msg->msg_list); - id.nid = msg->msg_hdr.src_nid; + id.nid = lnet_nid_to_nid4(&msg->msg_hdr.src_nid); id.pid = msg->msg_hdr.src_pid; LASSERT(msg->msg_md == NULL); @@ -4866,7 +4860,7 @@ lnet_recv_delayed_msg_list(struct list_head *head) /* md won't disappear under me, since each msg * holds a ref on it */ - id.nid = msg->msg_hdr.src_nid; + id.nid = lnet_nid_to_nid4(&msg->msg_hdr.src_nid); id.pid = msg->msg_hdr.src_pid; LASSERT(msg->msg_rx_delayed); @@ -5137,7 +5131,7 @@ lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg) getmsg->msg_txpeer->lpni_peer_net->lpn_peer->lp_primary_nid; msg->msg_from = peer_id->nid; msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */ - msg->msg_hdr.src_nid = lnet_nid_to_nid4(&peer_id->nid); + msg->msg_hdr.src_nid = peer_id->nid; msg->msg_hdr.payload_length = getmd->md_length; msg->msg_receiving = 1; /* required by lnet_msg_attach_md */ diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index 6373d6f..bea9883 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -68,7 +68,7 @@ lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type) if (ev_type == LNET_EVENT_SEND) { /* event for active message */ - lnet_nid4_to_nid(hdr->dest_nid, &ev->target.nid); + ev->target.nid = hdr->dest_nid; ev->target.pid = hdr->dest_pid; ev->initiator.nid = LNET_ANY_NID; ev->initiator.pid = the_lnet.ln_pid; @@ -78,13 +78,13 @@ lnet_build_msg_event(struct lnet_msg *msg, enum lnet_event_kind ev_type) } else { /* event for passive message */ ev->target.pid = hdr->dest_pid; - lnet_nid4_to_nid(hdr->dest_nid, &ev->target.nid); + ev->target.nid = hdr->dest_nid; ev->initiator.pid = hdr->src_pid; /* Multi-Rail: resolve src_nid to "primary" peer NID */ ev->initiator.nid = msg->msg_initiator; /* Multi-Rail: track source NID. */ ev->source.pid = hdr->src_pid; - lnet_nid4_to_nid(hdr->src_nid, &ev->source.nid); + ev->source.nid = hdr->src_nid; ev->rlength = hdr->payload_length; ev->sender = msg->msg_from; ev->mlength = msg->msg_wanted; @@ -635,15 +635,15 @@ lnet_resend_msg_locked(struct lnet_msg *msg) * this message consumed. The message will * consume another credit when it gets resent. */ - lnet_nid4_to_nid(msg->msg_hdr.dest_nid, &msg->msg_target.nid); + msg->msg_target.nid = msg->msg_hdr.dest_nid; lnet_msg_decommit_tx(msg, -EAGAIN); msg->msg_sending = 0; msg->msg_receiving = 0; msg->msg_target_is_router = 0; CDEBUG(D_NET, "%s->%s:%s:%s - queuing msg (%p) for resend\n", - libcfs_nid2str(msg->msg_hdr.src_nid), - libcfs_nid2str(msg->msg_hdr.dest_nid), + libcfs_nidstr(&msg->msg_hdr.src_nid), + libcfs_nidstr(&msg->msg_hdr.dest_nid), lnet_msgtyp2str(msg->msg_type), lnet_health_error2str(msg->msg_health_status), msg); @@ -1114,9 +1114,9 @@ lnet_send_error_simulation(struct lnet_msg *msg, return false; CDEBUG(D_NET, "src %s(%s)->dst %s: %s simulate health error: %s\n", - libcfs_nid2str(msg->msg_hdr.src_nid), + libcfs_nidstr(&msg->msg_hdr.src_nid), libcfs_nidstr(&msg->msg_txni->ni_nid), - libcfs_nid2str(msg->msg_hdr.dest_nid), + libcfs_nidstr(&msg->msg_hdr.dest_nid), lnet_msgtyp2str(msg->msg_type), lnet_health_error2str(*hstatus)); diff --git a/lnet/lnet/lib-ptl.c b/lnet/lnet/lib-ptl.c index 95b541f..cbe7a30 100644 --- a/lnet/lnet/lib-ptl.c +++ b/lnet/lnet/lib-ptl.c @@ -279,8 +279,8 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg) return mtable; /* it's a wildcard portal */ - routed = LNET_NIDNET(msg->msg_hdr.src_nid) != - LNET_NIDNET(msg->msg_hdr.dest_nid); + routed = LNET_NID_NET(&msg->msg_hdr.src_nid) != + LNET_NID_NET(&msg->msg_hdr.dest_nid); if (portal_rotor == LNET_PTL_ROTOR_OFF || (portal_rotor != LNET_PTL_ROTOR_ON && !routed)) { diff --git a/lnet/lnet/net_fault.c b/lnet/lnet/net_fault.c index a4fdf97..ebdde6f 100644 --- a/lnet/lnet/net_fault.c +++ b/lnet/lnet/net_fault.c @@ -429,8 +429,8 @@ lnet_drop_rule_match(struct lnet_hdr *hdr, lnet_nid_t local_nid, enum lnet_msg_hstatus *hstatus) { - lnet_nid_t src = hdr->src_nid; - lnet_nid_t dst = hdr->dest_nid; + lnet_nid_t src = lnet_nid_to_nid4(&hdr->src_nid); + lnet_nid_t dst = lnet_nid_to_nid4(&hdr->dest_nid); unsigned int typ = hdr->type; struct lnet_drop_rule *rule; unsigned int ptl = -1; @@ -605,8 +605,8 @@ bool lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg) { struct lnet_delay_rule *rule; - lnet_nid_t src = hdr->src_nid; - lnet_nid_t dst = hdr->dest_nid; + lnet_nid_t src = lnet_nid_to_nid4(&hdr->src_nid); + lnet_nid_t dst = lnet_nid_to_nid4(&hdr->dest_nid); unsigned int typ = hdr->type; unsigned int ptl = -1; -- 1.8.3.1