Whamcloud - gitweb
LU-13569 lnet: Add health ping stats 14/40314/12
authorChris Horn <chris.horn@hpe.com>
Thu, 15 Oct 2020 22:33:33 +0000 (17:33 -0500)
committerOleg Drokin <green@whamcloud.com>
Mon, 14 Jun 2021 16:44:11 +0000 (16:44 +0000)
Add the NI and peer NI ping count and next ping timestamp to
detailed output of lnetctl peer and net output.

Test-Parameters: trivial
HPE-bug-id: LUS-9109
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I208cb3ea0b08a2984572cf0ec9874dbd09f6168e
Reviewed-on: https://review.whamcloud.com/40314
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/uapi/linux/lnet/lnet-dlc.h
lnet/lnet/api-ni.c
lnet/lnet/peer.c
lnet/utils/lnetconfig/liblnetconfig.c

index 6481457..1017dd3 100644 (file)
@@ -191,6 +191,8 @@ struct lnet_ioctl_local_ni_hstats {
        __u32 hlni_local_timeout;
        __u32 hlni_local_error;
        __s32 hlni_health_value;
+       __u32 hlni_ping_count;
+       __u64 hlni_next_ping;
 };
 
 struct lnet_ioctl_peer_ni_hstats {
@@ -199,6 +201,8 @@ struct lnet_ioctl_peer_ni_hstats {
        __u32 hlpni_remote_error;
        __u32 hlpni_network_timeout;
        __s32 hlpni_health_value;
+       __u32 hlpni_ping_count;
+       __u64 hlpni_next_ping;
 };
 
 struct lnet_ioctl_element_msg_stats {
index 9ea4719..75cad7e 100644 (file)
@@ -3702,6 +3702,8 @@ lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats)
        stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout);
        stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error);
        stats->hlni_health_value = atomic_read(&ni->ni_healthv);
+       stats->hlni_ping_count = ni->ni_ping_count;
+       stats->hlni_next_ping = ni->ni_next_ping;
 
 unlock:
        lnet_net_unlock(cpt);
index 92701fa..dadc8ab 100644 (file)
@@ -4018,6 +4018,8 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
                  atomic_read(&lpni->lpni_hstats.hlt_remote_error);
                lpni_hstats->hlpni_health_value =
                  atomic_read(&lpni->lpni_healthv);
+               lpni_hstats->hlpni_ping_count = lpni->lpni_ping_count;
+               lpni_hstats->hlpni_next_ping = lpni->lpni_next_ping;
                if (copy_to_user(bulk, lpni_hstats, sizeof(*lpni_hstats)))
                        goto out_free_hstats;
                bulk += sizeof(*lpni_hstats);
@@ -4112,7 +4114,7 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all)
                        lnet_net_unlock(LNET_LOCK_EX);
                        return;
                }
-               atomic_set(&lpni->lpni_healthv, value);
+               lnet_set_lpni_healthv_locked(lpni, value);
                lnet_peer_ni_add_to_recoveryq_locked(lpni,
                                             &the_lnet.ln_mt_peerNIRecovq, now);
                lnet_peer_ni_decref_locked(lpni);
@@ -4133,7 +4135,8 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all)
                        list_for_each_entry(lpn, &lp->lp_peer_nets, lpn_peer_nets) {
                                list_for_each_entry(lpni, &lpn->lpn_peer_nis,
                                                    lpni_peer_nis) {
-                                       atomic_set(&lpni->lpni_healthv, value);
+                                       lnet_set_lpni_healthv_locked(lpni,
+                                                                    value);
                                        lnet_peer_ni_add_to_recoveryq_locked(lpni,
                                             &the_lnet.ln_mt_peerNIRecovq, now);
                                }
index e371771..dd59a33 100644 (file)
@@ -2250,6 +2250,14 @@ continue_without_udsp_info:
                                                hstats.hlni_local_error)
                                                        == NULL)
                                goto out;
+                       if (cYAML_create_number(yhstats, "ping_count",
+                                               hstats.hlni_ping_count)
+                                                       == NULL)
+                               goto out;
+                       if (cYAML_create_number(yhstats, "next_ping",
+                                               hstats.hlni_next_ping)
+                                                       == NULL)
+                               goto out;
 
 continue_without_msg_stats:
                        tunables = cYAML_create_object(item, "tunables");
@@ -3134,6 +3142,16 @@ continue_without_udsp_info:
                                                hstats->hlpni_network_timeout)
                                                        == NULL)
                                goto out;
+                       if (cYAML_create_number(yhstats, "ping_count",
+                                               hstats->hlpni_ping_count)
+                                                       == NULL)
+                               goto out;
+
+                       if (cYAML_create_number(yhstats, "next_ping",
+                                               hstats->hlpni_next_ping)
+                                                       == NULL)
+                               goto out;
+
                }
        }