From d852004a0945ba4fc09c2f3da46246282c18828f Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Thu, 15 Oct 2020 17:33:33 -0500 Subject: [PATCH] LU-13569 lnet: Add health ping stats Add the NI and peer NI ping count and next ping timestamp to detailed output of lnetctl peer and net output. Lustre-change: https://review.whamcloud.com/40314 Lustre-commit: 4c7e4aa57629660386ae2849151a0639b6177200 Test-Parameters: trivial HPE-bug-id: LUS-9109 Signed-off-by: Chris Horn Change-Id: I208cb3ea0b08a2984572cf0ec9874dbd09f6168e Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54405 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Andreas Dilger --- lnet/include/uapi/linux/lnet/lnet-dlc.h | 4 ++++ lnet/lnet/api-ni.c | 2 ++ lnet/lnet/peer.c | 7 +++++-- lnet/utils/lnetconfig/liblnetconfig.c | 18 ++++++++++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index 7ee051f..7c427fc 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -200,6 +200,8 @@ struct lnet_ioctl_local_ni_hstats { __u32 hlni_local_error; __s32 hlni_fatal_error; __s32 hlni_health_value; + __u32 hlni_ping_count; + __u64 hlni_next_ping; }; struct lnet_ioctl_peer_ni_hstats { @@ -208,6 +210,8 @@ struct lnet_ioctl_peer_ni_hstats { __u32 hlpni_remote_error; __u32 hlpni_network_timeout; __s32 hlpni_health_value; + __u32 hlpni_ping_count; + __u64 hlpni_next_ping; }; struct lnet_ioctl_element_msg_stats { diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 66b091c..45f3d89 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -3667,6 +3667,8 @@ lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats) stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error); stats->hlni_fatal_error = atomic_read(&ni->ni_fatal_error_on); stats->hlni_health_value = atomic_read(&ni->ni_healthv); + stats->hlni_ping_count = ni->ni_ping_count; + stats->hlni_next_ping = ni->ni_next_ping; unlock: lnet_net_unlock(cpt); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 5b307b1..35bc4b9 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -4154,6 +4154,8 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) atomic_read(&lpni->lpni_hstats.hlt_remote_error); lpni_hstats->hlpni_health_value = atomic_read(&lpni->lpni_healthv); + lpni_hstats->hlpni_ping_count = lpni->lpni_ping_count; + lpni_hstats->hlpni_next_ping = lpni->lpni_next_ping; if (copy_to_user(bulk, lpni_hstats, sizeof(*lpni_hstats))) goto out_free_hstats; bulk += sizeof(*lpni_hstats); @@ -4248,7 +4250,7 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all) lnet_net_unlock(LNET_LOCK_EX); return; } - atomic_set(&lpni->lpni_healthv, value); + lnet_set_lpni_healthv_locked(lpni, value); lnet_peer_ni_add_to_recoveryq_locked(lpni, &the_lnet.ln_mt_peerNIRecovq, now); lnet_peer_ni_decref_locked(lpni); @@ -4269,7 +4271,8 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all) list_for_each_entry(lpn, &lp->lp_peer_nets, lpn_peer_nets) { list_for_each_entry(lpni, &lpn->lpn_peer_nis, lpni_peer_nis) { - atomic_set(&lpni->lpni_healthv, value); + lnet_set_lpni_healthv_locked(lpni, + value); lnet_peer_ni_add_to_recoveryq_locked(lpni, &the_lnet.ln_mt_peerNIRecovq, now); } diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 66743af..bf62252 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2252,6 +2252,14 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no, hstats.hlni_local_error) == NULL) goto out; + if (cYAML_create_number(yhstats, "ping_count", + hstats.hlni_ping_count) + == NULL) + goto out; + if (cYAML_create_number(yhstats, "next_ping", + hstats.hlni_next_ping) + == NULL) + goto out; continue_without_msg_stats: tunables = cYAML_create_object(item, "tunables"); @@ -3115,6 +3123,16 @@ int lustre_lnet_show_peer(char *knid, int detail, int seq_no, hstats->hlpni_network_timeout) == NULL) goto out; + if (cYAML_create_number(yhstats, "ping_count", + hstats->hlpni_ping_count) + == NULL) + goto out; + + if (cYAML_create_number(yhstats, "next_ping", + hstats->hlpni_next_ping) + == NULL) + goto out; + } } -- 1.8.3.1