From: Amir Shehata Date: Tue, 3 Jul 2018 23:27:10 +0000 (-0700) Subject: LU-9120 lnet: Add ioctl to get health stats X-Git-Tag: 2.11.55~65^2^2~10 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=10958cac798db5c384f64ee1d48fc2b5ba423f0a;p=fs%2Flustre-release.git LU-9120 lnet: Add ioctl to get health stats At the time of this patch the sysfs statistics features is still in development. Therefore, using ioctl to get the stats from LNet. Test-Parameters: forbuildonly Signed-off-by: Amir Shehata Change-Id: Ia216484f9e6ee062c766c1043f456e38a27e4d39 Reviewed-on: https://review.whamcloud.com/32776 Tested-by: Jenkins Reviewed-by: Sonia Sharma Reviewed-by: Olaf Weber --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 78eee36..a5b66e1 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -931,6 +931,7 @@ int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid, __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits, __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis, __u32 *peer_tx_qnob); +int lnet_get_peer_ni_hstats(struct lnet_ioctl_peer_ni_hstats *stats); static inline bool lnet_is_peer_ni_healthy_locked(struct lnet_peer_ni *lpni) diff --git a/lnet/include/uapi/linux/lnet/libcfs_ioctl.h b/lnet/include/uapi/linux/lnet/libcfs_ioctl.h index 9ab8d99..d8080a8 100644 --- a/lnet/include/uapi/linux/lnet/libcfs_ioctl.h +++ b/lnet/include/uapi/linux/lnet/libcfs_ioctl.h @@ -148,7 +148,8 @@ struct libcfs_debug_ioctl_data { #define IOC_LIBCFS_GET_PEER_LIST _IOWR(IOC_LIBCFS_TYPE, 100, IOCTL_CONFIG_SIZE) #define IOC_LIBCFS_GET_LOCAL_NI_MSG_STATS _IOWR(IOC_LIBCFS_TYPE, 101, IOCTL_CONFIG_SIZE) #define IOC_LIBCFS_SET_HEALHV _IOWR(IOC_LIBCFS_TYPE, 102, IOCTL_CONFIG_SIZE) -#define IOC_LIBCFS_MAX_NR 102 +#define IOC_LIBCFS_GET_LOCAL_HSTATS _IOWR(IOC_LIBCFS_TYPE, 103, IOCTL_CONFIG_SIZE) +#define IOC_LIBCFS_MAX_NR 103 extern int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data); diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index 69ab4a6..1d4b98d 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -172,6 +172,31 @@ struct lnet_ioctl_element_stats { __u32 iel_drop_count; }; +enum lnet_health_type { + LNET_HEALTH_TYPE_LOCAL_NI = 0, + LNET_HEALTH_TYPE_PEER_NI, +}; + +struct lnet_ioctl_local_ni_hstats { + struct libcfs_ioctl_hdr hlni_hdr; + lnet_nid_t hlni_nid; + __u32 hlni_local_interrupt; + __u32 hlni_local_dropped; + __u32 hlni_local_aborted; + __u32 hlni_local_no_route; + __u32 hlni_local_timeout; + __u32 hlni_local_error; + __s32 hlni_health_value; +}; + +struct lnet_ioctl_peer_ni_hstats { + __u32 hlpni_remote_dropped; + __u32 hlpni_remote_timeout; + __u32 hlpni_remote_error; + __u32 hlpni_network_timeout; + __s32 hlpni_health_value; +}; + struct lnet_ioctl_element_msg_stats { struct libcfs_ioctl_hdr im_hdr; __u32 im_idx; @@ -239,12 +264,6 @@ struct lnet_ioctl_peer_cfg { void __user *prcfg_bulk; }; - -enum lnet_health_type { - LNET_HEALTH_TYPE_LOCAL_NI = 0, - LNET_HEALTH_TYPE_PEER_NI, -}; - struct lnet_ioctl_reset_health_cfg { struct libcfs_ioctl_hdr rh_hdr; enum lnet_health_type rh_type; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index d89ea55..d5120d5 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -3270,6 +3270,35 @@ lnet_ni_set_healthv(lnet_nid_t nid, int value, bool all) lnet_net_unlock(LNET_LOCK_EX); } +static int +lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats) +{ + int cpt, rc = 0; + struct lnet_ni *ni; + lnet_nid_t nid = stats->hlni_nid; + + cpt = lnet_net_lock_current(); + ni = lnet_nid2ni_locked(nid, cpt); + + if (!ni) { + rc = -ENOENT; + goto unlock; + } + + stats->hlni_local_interrupt = atomic_read(&ni->ni_hstats.hlt_local_interrupt); + stats->hlni_local_dropped = atomic_read(&ni->ni_hstats.hlt_local_dropped); + stats->hlni_local_aborted = atomic_read(&ni->ni_hstats.hlt_local_aborted); + stats->hlni_local_no_route = atomic_read(&ni->ni_hstats.hlt_local_no_route); + stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout); + stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error); + stats->hlni_health_value = atomic_read(&ni->ni_healthv); + +unlock: + lnet_net_unlock(cpt); + + return rc; +} + /** * LNet ioctl handler. * @@ -3480,6 +3509,19 @@ LNetCtl(unsigned int cmd, void *arg) return rc; } + case IOC_LIBCFS_GET_LOCAL_HSTATS: { + struct lnet_ioctl_local_ni_hstats *stats = arg; + + if (stats->hlni_hdr.ioc_len < sizeof(*stats)) + return -EINVAL; + + mutex_lock(&the_lnet.ln_api_mutex); + rc = lnet_get_local_ni_hstats(stats); + mutex_unlock(&the_lnet.ln_api_mutex); + + return rc; + } + case IOC_LIBCFS_ADD_PEER_NI: { struct lnet_ioctl_peer_cfg *cfg = arg; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 9d87f52..cb71514 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -3351,6 +3351,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) { struct lnet_ioctl_element_stats *lpni_stats; struct lnet_ioctl_element_msg_stats *lpni_msg_stats; + struct lnet_ioctl_peer_ni_hstats *lpni_hstats; struct lnet_peer_ni_credit_info *lpni_info; struct lnet_peer_ni *lpni; struct lnet_peer *lp; @@ -3366,7 +3367,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) } size = sizeof(nid) + sizeof(*lpni_info) + sizeof(*lpni_stats) - + sizeof(*lpni_msg_stats); + + sizeof(*lpni_msg_stats) + sizeof(*lpni_hstats); size *= lp->lp_nnis; if (size > cfg->prcfg_size) { cfg->prcfg_size = size; @@ -3392,6 +3393,9 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) LIBCFS_ALLOC(lpni_msg_stats, sizeof(*lpni_msg_stats)); if (!lpni_msg_stats) goto out_free_stats; + LIBCFS_ALLOC(lpni_hstats, sizeof(*lpni_hstats)); + if (!lpni_hstats) + goto out_free_msg_stats; lpni = NULL; @@ -3399,7 +3403,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) { nid = lpni->lpni_nid; if (copy_to_user(bulk, &nid, sizeof(nid))) - goto out_free_msg_stats; + goto out_free_hstats; bulk += sizeof(nid); memset(lpni_info, 0, sizeof(*lpni_info)); @@ -3418,7 +3422,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) lpni_info->cr_peer_min_tx_credits = lpni->lpni_mintxcredits; lpni_info->cr_peer_tx_qnob = lpni->lpni_txqnob; if (copy_to_user(bulk, lpni_info, sizeof(*lpni_info))) - goto out_free_msg_stats; + goto out_free_hstats; bulk += sizeof(*lpni_info); memset(lpni_stats, 0, sizeof(*lpni_stats)); @@ -3429,15 +3433,30 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk) lpni_stats->iel_drop_count = lnet_sum_stats(&lpni->lpni_stats, LNET_STATS_TYPE_DROP); if (copy_to_user(bulk, lpni_stats, sizeof(*lpni_stats))) - goto out_free_msg_stats; + goto out_free_hstats; bulk += sizeof(*lpni_stats); lnet_usr_translate_stats(lpni_msg_stats, &lpni->lpni_stats); if (copy_to_user(bulk, lpni_msg_stats, sizeof(*lpni_msg_stats))) - goto out_free_msg_stats; + goto out_free_hstats; bulk += sizeof(*lpni_msg_stats); + lpni_hstats->hlpni_network_timeout = + atomic_read(&lpni->lpni_hstats.hlt_network_timeout); + lpni_hstats->hlpni_remote_dropped = + atomic_read(&lpni->lpni_hstats.hlt_remote_dropped); + lpni_hstats->hlpni_remote_timeout = + atomic_read(&lpni->lpni_hstats.hlt_remote_timeout); + lpni_hstats->hlpni_remote_error = + atomic_read(&lpni->lpni_hstats.hlt_remote_error); + lpni_hstats->hlpni_health_value = + atomic_read(&lpni->lpni_healthv); + if (copy_to_user(bulk, lpni_hstats, sizeof(*lpni_hstats))) + goto out_free_hstats; + bulk += sizeof(*lpni_hstats); } rc = 0; +out_free_hstats: + LIBCFS_FREE(lpni_hstats, sizeof(*lpni_hstats)); out_free_msg_stats: LIBCFS_FREE(lpni_msg_stats, sizeof(*lpni_msg_stats)); out_free_stats: @@ -3512,3 +3531,4 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all) } lnet_net_unlock(LNET_LOCK_EX); } +