From: Chris Horn Date: Thu, 15 Oct 2020 22:33:33 +0000 (-0500) Subject: LU-13569 lnet: Add health ping stats X-Git-Tag: 2.14.53~109 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=4c7e4aa57629660386ae2849151a0639b6177200 LU-13569 lnet: Add health ping stats Add the NI and peer NI ping count and next ping timestamp to detailed output of lnetctl peer and net output. Test-Parameters: trivial HPE-bug-id: LUS-9109 Signed-off-by: Chris Horn Change-Id: I208cb3ea0b08a2984572cf0ec9874dbd09f6168e Reviewed-on: https://review.whamcloud.com/40314 Reviewed-by: Alexander Boyko Tested-by: jenkins Tested-by: Maloo Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin --- diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index 6481457..1017dd3 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -191,6 +191,8 @@ struct lnet_ioctl_local_ni_hstats { __u32 hlni_local_timeout; __u32 hlni_local_error; __s32 hlni_health_value; + __u32 hlni_ping_count; + __u64 hlni_next_ping; }; struct lnet_ioctl_peer_ni_hstats { @@ -199,6 +201,8 @@ struct lnet_ioctl_peer_ni_hstats { __u32 hlpni_remote_error; __u32 hlpni_network_timeout; __s32 hlpni_health_value; + __u32 hlpni_ping_count; + __u64 hlpni_next_ping; }; struct lnet_ioctl_element_msg_stats { diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 9ea4719..75cad7e 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -3702,6 +3702,8 @@ lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats) stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout); stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error); stats->hlni_health_value = atomic_read(&ni->ni_healthv); + stats->hlni_ping_count = ni->ni_ping_count; + stats->hlni_next_ping = ni->ni_next_ping; unlock: lnet_net_unlock(cpt); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 92701fa..dadc8ab 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -4018,6 +4018,8 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) atomic_read(&lpni->lpni_hstats.hlt_remote_error); lpni_hstats->hlpni_health_value = atomic_read(&lpni->lpni_healthv); + lpni_hstats->hlpni_ping_count = lpni->lpni_ping_count; + lpni_hstats->hlpni_next_ping = lpni->lpni_next_ping; if (copy_to_user(bulk, lpni_hstats, sizeof(*lpni_hstats))) goto out_free_hstats; bulk += sizeof(*lpni_hstats); @@ -4112,7 +4114,7 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all) lnet_net_unlock(LNET_LOCK_EX); return; } - atomic_set(&lpni->lpni_healthv, value); + lnet_set_lpni_healthv_locked(lpni, value); lnet_peer_ni_add_to_recoveryq_locked(lpni, &the_lnet.ln_mt_peerNIRecovq, now); lnet_peer_ni_decref_locked(lpni); @@ -4133,7 +4135,8 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all) list_for_each_entry(lpn, &lp->lp_peer_nets, lpn_peer_nets) { list_for_each_entry(lpni, &lpn->lpn_peer_nis, lpni_peer_nis) { - atomic_set(&lpni->lpni_healthv, value); + lnet_set_lpni_healthv_locked(lpni, + value); lnet_peer_ni_add_to_recoveryq_locked(lpni, &the_lnet.ln_mt_peerNIRecovq, now); } diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index e371771..dd59a33 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2250,6 +2250,14 @@ continue_without_udsp_info: hstats.hlni_local_error) == NULL) goto out; + if (cYAML_create_number(yhstats, "ping_count", + hstats.hlni_ping_count) + == NULL) + goto out; + if (cYAML_create_number(yhstats, "next_ping", + hstats.hlni_next_ping) + == NULL) + goto out; continue_without_msg_stats: tunables = cYAML_create_object(item, "tunables"); @@ -3134,6 +3142,16 @@ continue_without_udsp_info: hstats->hlpni_network_timeout) == NULL) goto out; + if (cYAML_create_number(yhstats, "ping_count", + hstats->hlpni_ping_count) + == NULL) + goto out; + + if (cYAML_create_number(yhstats, "next_ping", + hstats->hlpni_next_ping) + == NULL) + goto out; + } }