From f75ff33d9fbefd6995a26693032a32a0ba211b51 Mon Sep 17 00:00:00 2001 From: Cyril Bordage Date: Wed, 7 Jul 2021 15:27:54 +0200 Subject: [PATCH] LU-14114 lnet: print device status in net show command A device can be in fatal state, if the cable was disconnected, or the port brought down on the switch side. In these cases, the LND (o2iblnd for now), will flag the device in fatal state. That device will not be used any further. However, it's health will not be decremented. This causes some confusion when examining the state of the node. It is better to print the device status in the output of the lnetctl net show command. Signed-off-by: Cyril Bordage Change-Id: I7c635ab1062f6153449fcec1bc07585065818a72 Reviewed-on: https://review.whamcloud.com/44169 Tested-by: jenkins Reviewed-by: Amir Shehata Reviewed-by: Chris Horn Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/include/uapi/linux/lnet/lnet-dlc.h | 1 + lnet/lnet/api-ni.c | 1 + lnet/utils/lnetconfig/liblnetconfig.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index 1017dd3..0ed61bd 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -190,6 +190,7 @@ struct lnet_ioctl_local_ni_hstats { __u32 hlni_local_no_route; __u32 hlni_local_timeout; __u32 hlni_local_error; + __s32 hlni_fatal_error; __s32 hlni_health_value; __u32 hlni_ping_count; __u64 hlni_next_ping; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 4b47457..033e418 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -3759,6 +3759,7 @@ lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats) stats->hlni_local_no_route = atomic_read(&ni->ni_hstats.hlt_local_no_route); stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout); stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error); + stats->hlni_fatal_error = atomic_read(&ni->ni_fatal_error_on); stats->hlni_health_value = atomic_read(&ni->ni_healthv); stats->hlni_ping_count = ni->ni_ping_count; stats->hlni_next_ping = ni->ni_next_ping; diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index dd59a33..6a30c59 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -2222,6 +2222,10 @@ continue_without_udsp_info: yhstats = cYAML_create_object(item, "health stats"); if (!yhstats) goto out; + if (cYAML_create_number(yhstats, "fatal_error", + hstats.hlni_fatal_error) + == NULL) + goto out; if (cYAML_create_number(yhstats, "health value", hstats.hlni_health_value) == NULL) -- 1.8.3.1