From 3e37ac8bb7e068a3070f5770570766ad87270186 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Thu, 9 Jul 2020 16:35:58 +1000 Subject: [PATCH] LU-10391 lnet: Convert ping to support 16-bytes address Now that ksocknal can send hello messages with 16-byte address, we can change lnet_send_ping() to ping hosts with large-address nids. Note that this doesn't change the addresses in the ping message sent, only the sending and receiving of the message. Test-Parameters: trivial Test-Parameters: serverversion=2.12 serverdistro=el7.9 testlist=runtests Test-Parameters: clientversion=2.12 testlist=runtests Signed-off-by: Mr NeilBrown Change-Id: I6f591c2f053698876195575c71da42f64788637e Reviewed-on: https://review.whamcloud.com/43612 Reviewed-by: James Simmons Reviewed-by: Serguei Smirnov Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 5 +++-- lnet/lnet/lib-move.c | 40 +++++++++++++++++++--------------------- lnet/lnet/peer.c | 3 +-- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 7d11672..577bfae 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -685,8 +685,9 @@ void lnet_prep_send(struct lnet_msg *msg, int type, unsigned int len); int lnet_send(struct lnet_nid *nid, struct lnet_msg *msg, struct lnet_nid *rtr_nid); -int lnet_send_ping(lnet_nid_t dest_nid, struct lnet_handle_md *mdh, int nnis, - void *user_ptr, lnet_handler_t handler, bool recovery); +int lnet_send_ping(struct lnet_nid *dest_nid, struct lnet_handle_md *mdh, + int nnis, void *user_ptr, lnet_handler_t handler, + bool recovery); void lnet_return_tx_credits_locked(struct lnet_msg *msg); void lnet_return_rx_credits_locked(struct lnet_msg *msg); void lnet_schedule_blocked_locked(struct lnet_rtrbufpool *rbp); diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index b85cdf5..e0c2d8e 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -3136,7 +3136,7 @@ enum lnet_mt_event_type { struct lnet_mt_event_info { enum lnet_mt_event_type mt_type; - lnet_nid_t mt_nid; + struct lnet_nid mt_nid; }; /* called with res_lock held */ @@ -3422,7 +3422,7 @@ lnet_recover_local_nis(void) struct lnet_handle_md mdh; struct lnet_ni *tmp; struct lnet_ni *ni; - lnet_nid_t nid; + struct lnet_nid nid; int healthv; int rc; time64_t now; @@ -3509,8 +3509,7 @@ lnet_recover_local_nis(void) * We'll unlink the mdh in this case below. */ LNetInvalidateMDHandle(&ni->ni_ping_mdh); - /* FIXME need to handle large-addr nid */ - nid = lnet_nid_to_nid4(&ni->ni_nid); + nid = ni->ni_nid; /* * remove the NI from the local queue and drop the @@ -3536,12 +3535,12 @@ lnet_recover_local_nis(void) ev_info->mt_type = MT_TYPE_LOCAL_NI; ev_info->mt_nid = nid; - rc = lnet_send_ping(nid, &mdh, LNET_INTERFACES_MIN, + rc = lnet_send_ping(&nid, &mdh, LNET_INTERFACES_MIN, ev_info, the_lnet.ln_mt_handler, true); /* lookup the nid again */ lnet_net_lock(0); - ni = lnet_nid2ni_locked(nid, 0); + ni = lnet_nid_to_ni_locked(&nid, 0); if (!ni) { /* * the NI has been deleted when we dropped @@ -3683,7 +3682,7 @@ lnet_recover_peer_nis(void) struct lnet_handle_md mdh; struct lnet_peer_ni *lpni; struct lnet_peer_ni *tmp; - lnet_nid_t nid; + struct lnet_nid nid; int healthv; int rc; time64_t now; @@ -3760,9 +3759,8 @@ lnet_recover_peer_nis(void) /* look at the comments in lnet_recover_local_nis() */ mdh = lpni->lpni_recovery_ping_mdh; + nid = lpni->lpni_nid; LNetInvalidateMDHandle(&lpni->lpni_recovery_ping_mdh); - /* FIXME handle large-addr nid */ - nid = lnet_nid_to_nid4(&lpni->lpni_nid); lnet_net_lock(0); list_del_init(&lpni->lpni_recovery); lnet_peer_ni_decref_locked(lpni); @@ -3770,7 +3768,7 @@ lnet_recover_peer_nis(void) ev_info->mt_type = MT_TYPE_PEER_NI; ev_info->mt_nid = nid; - rc = lnet_send_ping(nid, &mdh, LNET_INTERFACES_MIN, + rc = lnet_send_ping(&nid, &mdh, LNET_INTERFACES_MIN, ev_info, the_lnet.ln_mt_handler, true); lnet_net_lock(0); @@ -3778,7 +3776,7 @@ lnet_recover_peer_nis(void) * lnet_find_peer_ni_locked() grabs a refcount for * us. No need to take it explicitly. */ - lpni = lnet_find_peer_ni_locked(nid); + lpni = lnet_peer_ni_find_locked(&nid); if (!lpni) { lnet_net_unlock(0); LNetMDUnlink(mdh); @@ -3881,7 +3879,7 @@ lnet_monitor_thread(void *arg) * Returns < 0 if LNetGet fails */ int -lnet_send_ping(lnet_nid_t dest_nid, +lnet_send_ping(struct lnet_nid *dest_nid, struct lnet_handle_md *mdh, int nnis, void *user_data, lnet_handler_t handler, bool recovery) { @@ -3890,7 +3888,7 @@ lnet_send_ping(lnet_nid_t dest_nid, struct lnet_ping_buffer *pbuf; int rc; - if (dest_nid == LNET_NID_ANY) { + if (LNET_NID_IS_ANY(dest_nid)) { rc = -EHOSTUNREACH; goto fail_error; } @@ -3918,7 +3916,7 @@ lnet_send_ping(lnet_nid_t dest_nid, goto fail_error; } id.pid = LNET_PID_LUSTRE; - id.nid = dest_nid; + id.nid = lnet_nid_to_nid4(dest_nid); rc = LNetGet(LNET_NID_ANY, *mdh, id, LNET_RESERVED_PORTAL, @@ -3940,13 +3938,13 @@ static void lnet_handle_recovery_reply(struct lnet_mt_event_info *ev_info, int status, bool send, bool unlink_event) { - lnet_nid_t nid = ev_info->mt_nid; + struct lnet_nid *nid = &ev_info->mt_nid; if (ev_info->mt_type == MT_TYPE_LOCAL_NI) { struct lnet_ni *ni; lnet_net_lock(0); - ni = lnet_nid2ni_locked(nid, 0); + ni = lnet_nid_to_ni_locked(nid, 0); if (!ni) { lnet_net_unlock(0); return; @@ -3961,7 +3959,7 @@ lnet_handle_recovery_reply(struct lnet_mt_event_info *ev_info, if (status != 0) { CERROR("local NI (%s) recovery failed with %d\n", - libcfs_nid2str(nid), status); + libcfs_nidstr(nid), status); return; } /* @@ -3979,7 +3977,7 @@ lnet_handle_recovery_reply(struct lnet_mt_event_info *ev_info, int cpt; cpt = lnet_net_lock_current(); - lpni = lnet_find_peer_ni_locked(nid); + lpni = lnet_peer_ni_find_locked(nid); if (!lpni) { lnet_net_unlock(cpt); return; @@ -3995,7 +3993,7 @@ lnet_handle_recovery_reply(struct lnet_mt_event_info *ev_info, if (status != 0) CERROR("peer NI (%s) recovery failed with %d\n", - libcfs_nid2str(nid), status); + libcfs_nidstr(nid), status); } } @@ -4016,7 +4014,7 @@ lnet_mt_event_handler(struct lnet_event *event) switch (event->type) { case LNET_EVENT_UNLINK: CDEBUG(D_NET, "%s recovery ping unlinked\n", - libcfs_nid2str(ev_info->mt_nid)); + libcfs_nidstr(&ev_info->mt_nid)); /* fallthrough */ case LNET_EVENT_REPLY: lnet_handle_recovery_reply(ev_info, event->status, false, @@ -4024,7 +4022,7 @@ lnet_mt_event_handler(struct lnet_event *event) break; case LNET_EVENT_SEND: CDEBUG(D_NET, "%s recovery message sent %s:%d\n", - libcfs_nid2str(ev_info->mt_nid), + libcfs_nidstr(&ev_info->mt_nid), (event->status) ? "unsuccessfully" : "successfully", event->status); lnet_handle_recovery_reply(ev_info, event->status, true, false); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index b126515..3e280b7 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -3491,8 +3491,7 @@ __must_hold(&lp->lp_lock) nnis = max(lp->lp_data_nnis, LNET_INTERFACES_MIN); - rc = lnet_send_ping(lnet_nid_to_nid4(&lp->lp_primary_nid), - &lp->lp_ping_mdh, nnis, lp, + rc = lnet_send_ping(&lp->lp_primary_nid, &lp->lp_ping_mdh, nnis, lp, the_lnet.ln_dc_handler, false); /* -- 1.8.3.1