Whamcloud - gitweb
LU-9120 lnet: Add ioctl to get health stats 76/32776/16
authorAmir Shehata <amir.shehata@intel.com>
Tue, 3 Jul 2018 23:27:10 +0000 (16:27 -0700)
committerAmir Shehata <ashehata@whamcloud.com>
Fri, 17 Aug 2018 20:18:17 +0000 (20:18 +0000)
At the time of this patch the sysfs statistics features is
still in development. Therefore, using ioctl to get the stats
from LNet.

Test-Parameters: forbuildonly
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: Ia216484f9e6ee062c766c1043f456e38a27e4d39
Reviewed-on: https://review.whamcloud.com/32776
Tested-by: Jenkins
Reviewed-by: Sonia Sharma <sharmaso@whamcloud.com>
Reviewed-by: Olaf Weber <olaf.weber@hpe.com>
lnet/include/lnet/lib-lnet.h
lnet/include/uapi/linux/lnet/libcfs_ioctl.h
lnet/include/uapi/linux/lnet/lnet-dlc.h
lnet/lnet/api-ni.c
lnet/lnet/peer.c

index 78eee36..a5b66e1 100644 (file)
@@ -931,6 +931,7 @@ int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
                          __u32 *ni_peer_tx_credits, __u32 *peer_tx_credits,
                          __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credtis,
                          __u32 *peer_tx_qnob);
+int lnet_get_peer_ni_hstats(struct lnet_ioctl_peer_ni_hstats *stats);
 
 static inline bool
 lnet_is_peer_ni_healthy_locked(struct lnet_peer_ni *lpni)
index 9ab8d99..d8080a8 100644 (file)
@@ -148,7 +148,8 @@ struct libcfs_debug_ioctl_data {
 #define IOC_LIBCFS_GET_PEER_LIST          _IOWR(IOC_LIBCFS_TYPE, 100, IOCTL_CONFIG_SIZE)
 #define IOC_LIBCFS_GET_LOCAL_NI_MSG_STATS  _IOWR(IOC_LIBCFS_TYPE, 101, IOCTL_CONFIG_SIZE)
 #define IOC_LIBCFS_SET_HEALHV             _IOWR(IOC_LIBCFS_TYPE, 102, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_MAX_NR                                        102
+#define IOC_LIBCFS_GET_LOCAL_HSTATS       _IOWR(IOC_LIBCFS_TYPE, 103, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_MAX_NR                                        103
 
 extern int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
 
index 69ab4a6..1d4b98d 100644 (file)
@@ -172,6 +172,31 @@ struct lnet_ioctl_element_stats {
        __u32 iel_drop_count;
 };
 
+enum lnet_health_type {
+       LNET_HEALTH_TYPE_LOCAL_NI = 0,
+       LNET_HEALTH_TYPE_PEER_NI,
+};
+
+struct lnet_ioctl_local_ni_hstats {
+       struct libcfs_ioctl_hdr hlni_hdr;
+       lnet_nid_t hlni_nid;
+       __u32 hlni_local_interrupt;
+       __u32 hlni_local_dropped;
+       __u32 hlni_local_aborted;
+       __u32 hlni_local_no_route;
+       __u32 hlni_local_timeout;
+       __u32 hlni_local_error;
+       __s32 hlni_health_value;
+};
+
+struct lnet_ioctl_peer_ni_hstats {
+       __u32 hlpni_remote_dropped;
+       __u32 hlpni_remote_timeout;
+       __u32 hlpni_remote_error;
+       __u32 hlpni_network_timeout;
+       __s32 hlpni_health_value;
+};
+
 struct lnet_ioctl_element_msg_stats {
        struct libcfs_ioctl_hdr im_hdr;
        __u32 im_idx;
@@ -239,12 +264,6 @@ struct lnet_ioctl_peer_cfg {
        void __user *prcfg_bulk;
 };
 
-
-enum lnet_health_type {
-       LNET_HEALTH_TYPE_LOCAL_NI = 0,
-       LNET_HEALTH_TYPE_PEER_NI,
-};
-
 struct lnet_ioctl_reset_health_cfg {
        struct libcfs_ioctl_hdr rh_hdr;
        enum lnet_health_type rh_type;
index d89ea55..d5120d5 100644 (file)
@@ -3270,6 +3270,35 @@ lnet_ni_set_healthv(lnet_nid_t nid, int value, bool all)
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
+static int
+lnet_get_local_ni_hstats(struct lnet_ioctl_local_ni_hstats *stats)
+{
+       int cpt, rc = 0;
+       struct lnet_ni *ni;
+       lnet_nid_t nid = stats->hlni_nid;
+
+       cpt = lnet_net_lock_current();
+       ni = lnet_nid2ni_locked(nid, cpt);
+
+       if (!ni) {
+               rc = -ENOENT;
+               goto unlock;
+       }
+
+       stats->hlni_local_interrupt = atomic_read(&ni->ni_hstats.hlt_local_interrupt);
+       stats->hlni_local_dropped = atomic_read(&ni->ni_hstats.hlt_local_dropped);
+       stats->hlni_local_aborted = atomic_read(&ni->ni_hstats.hlt_local_aborted);
+       stats->hlni_local_no_route = atomic_read(&ni->ni_hstats.hlt_local_no_route);
+       stats->hlni_local_timeout = atomic_read(&ni->ni_hstats.hlt_local_timeout);
+       stats->hlni_local_error = atomic_read(&ni->ni_hstats.hlt_local_error);
+       stats->hlni_health_value = atomic_read(&ni->ni_healthv);
+
+unlock:
+       lnet_net_unlock(cpt);
+
+       return rc;
+}
+
 /**
  * LNet ioctl handler.
  *
@@ -3480,6 +3509,19 @@ LNetCtl(unsigned int cmd, void *arg)
                return rc;
        }
 
+       case IOC_LIBCFS_GET_LOCAL_HSTATS: {
+               struct lnet_ioctl_local_ni_hstats *stats = arg;
+
+               if (stats->hlni_hdr.ioc_len < sizeof(*stats))
+                       return -EINVAL;
+
+               mutex_lock(&the_lnet.ln_api_mutex);
+               rc = lnet_get_local_ni_hstats(stats);
+               mutex_unlock(&the_lnet.ln_api_mutex);
+
+               return rc;
+       }
+
        case IOC_LIBCFS_ADD_PEER_NI: {
                struct lnet_ioctl_peer_cfg *cfg = arg;
 
index 9d87f52..cb71514 100644 (file)
@@ -3351,6 +3351,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
 {
        struct lnet_ioctl_element_stats *lpni_stats;
        struct lnet_ioctl_element_msg_stats *lpni_msg_stats;
+       struct lnet_ioctl_peer_ni_hstats *lpni_hstats;
        struct lnet_peer_ni_credit_info *lpni_info;
        struct lnet_peer_ni *lpni;
        struct lnet_peer *lp;
@@ -3366,7 +3367,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
        }
 
        size = sizeof(nid) + sizeof(*lpni_info) + sizeof(*lpni_stats)
-               + sizeof(*lpni_msg_stats);
+               + sizeof(*lpni_msg_stats) + sizeof(*lpni_hstats);
        size *= lp->lp_nnis;
        if (size > cfg->prcfg_size) {
                cfg->prcfg_size = size;
@@ -3392,6 +3393,9 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
        LIBCFS_ALLOC(lpni_msg_stats, sizeof(*lpni_msg_stats));
        if (!lpni_msg_stats)
                goto out_free_stats;
+       LIBCFS_ALLOC(lpni_hstats, sizeof(*lpni_hstats));
+       if (!lpni_hstats)
+               goto out_free_msg_stats;
 
 
        lpni = NULL;
@@ -3399,7 +3403,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
        while ((lpni = lnet_get_next_peer_ni_locked(lp, NULL, lpni)) != NULL) {
                nid = lpni->lpni_nid;
                if (copy_to_user(bulk, &nid, sizeof(nid)))
-                       goto out_free_msg_stats;
+                       goto out_free_hstats;
                bulk += sizeof(nid);
 
                memset(lpni_info, 0, sizeof(*lpni_info));
@@ -3418,7 +3422,7 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
                lpni_info->cr_peer_min_tx_credits = lpni->lpni_mintxcredits;
                lpni_info->cr_peer_tx_qnob = lpni->lpni_txqnob;
                if (copy_to_user(bulk, lpni_info, sizeof(*lpni_info)))
-                       goto out_free_msg_stats;
+                       goto out_free_hstats;
                bulk += sizeof(*lpni_info);
 
                memset(lpni_stats, 0, sizeof(*lpni_stats));
@@ -3429,15 +3433,30 @@ int lnet_get_peer_info(struct lnet_ioctl_peer_cfg *cfg, void __user *bulk)
                lpni_stats->iel_drop_count = lnet_sum_stats(&lpni->lpni_stats,
                                                            LNET_STATS_TYPE_DROP);
                if (copy_to_user(bulk, lpni_stats, sizeof(*lpni_stats)))
-                       goto out_free_msg_stats;
+                       goto out_free_hstats;
                bulk += sizeof(*lpni_stats);
                lnet_usr_translate_stats(lpni_msg_stats, &lpni->lpni_stats);
                if (copy_to_user(bulk, lpni_msg_stats, sizeof(*lpni_msg_stats)))
-                       goto out_free_msg_stats;
+                       goto out_free_hstats;
                bulk += sizeof(*lpni_msg_stats);
+               lpni_hstats->hlpni_network_timeout =
+                 atomic_read(&lpni->lpni_hstats.hlt_network_timeout);
+               lpni_hstats->hlpni_remote_dropped =
+                 atomic_read(&lpni->lpni_hstats.hlt_remote_dropped);
+               lpni_hstats->hlpni_remote_timeout =
+                 atomic_read(&lpni->lpni_hstats.hlt_remote_timeout);
+               lpni_hstats->hlpni_remote_error =
+                 atomic_read(&lpni->lpni_hstats.hlt_remote_error);
+               lpni_hstats->hlpni_health_value =
+                 atomic_read(&lpni->lpni_healthv);
+               if (copy_to_user(bulk, lpni_hstats, sizeof(*lpni_hstats)))
+                       goto out_free_hstats;
+               bulk += sizeof(*lpni_hstats);
        }
        rc = 0;
 
+out_free_hstats:
+       LIBCFS_FREE(lpni_hstats, sizeof(*lpni_hstats));
 out_free_msg_stats:
        LIBCFS_FREE(lpni_msg_stats, sizeof(*lpni_msg_stats));
 out_free_stats:
@@ -3512,3 +3531,4 @@ lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all)
        }
        lnet_net_unlock(LNET_LOCK_EX);
 }
+