From: Amir Shehata Date: Fri, 19 Apr 2019 00:19:22 +0000 (-0700) Subject: LU-12200 lnet: check peer timeout on a router X-Git-Tag: 2.12.55~25^2 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=41f3c27adf160d3661db252e9046a783301a9830;ds=sidebyside LU-12200 lnet: check peer timeout on a router On a router assume that a peer is alive and attempt to send it messages as long as the peer_timeout hasn't expired. Signed-off-by: Amir Shehata Change-Id: I0806a52c8ad7acc1c93dcf32353f1c4467c618b1 Reviewed-on: https://review.whamcloud.com/34772 Reviewed-by: Sebastien Buisson Reviewed-by: Olaf Weber Tested-by: Jenkins --- diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index ae1a6390..0550090 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -584,6 +584,8 @@ struct lnet_peer_ni { __u32 lpni_gw_seq; /* returned RC ping features. Protected with lpni_lock */ unsigned int lpni_ping_feats; + /* time last message was received from the peer */ + time64_t lpni_last_alive; /* preferred local nids: if only one, use lpni_pref.nid */ union lpni_pref { lnet_nid_t nid; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 21c13c9..d3cf14b 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -796,12 +796,32 @@ lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg) return rc; } +static bool +lnet_is_peer_deadline_passed(struct lnet_peer_ni *lpni, time64_t now) +{ + time64_t deadline; + + deadline = lpni->lpni_last_alive + + lpni->lpni_net->net_tunables.lct_peer_timeout; + + /* + * assume peer_ni is alive as long as we're within the configured + * peer timeout + */ + if (deadline > now) + return false; + + return true; +} + /* NB: returns 1 when alive, 0 when dead, negative when error; * may drop the lnet_net_lock */ static int lnet_peer_alive_locked(struct lnet_ni *ni, struct lnet_peer_ni *lpni, struct lnet_msg *msg) { + time64_t now = ktime_get_seconds(); + if (!lnet_peer_aliveness_enabled(lpni)) return -ENODEV; @@ -821,6 +841,9 @@ lnet_peer_alive_locked(struct lnet_ni *ni, struct lnet_peer_ni *lpni, msg->msg_type == LNET_MSG_REPLY) return 1; + if (!lnet_is_peer_deadline_passed(lpni, now)) + return true; + return lnet_is_peer_ni_alive(lpni); } @@ -4420,6 +4443,10 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, return 0; goto drop; } + + if (the_lnet.ln_routing) + lpni->lpni_last_alive = ktime_get_seconds(); + msg->msg_rxpeer = lpni; msg->msg_rxni = ni; lnet_ni_addref_locked(ni, cpt);