Whamcloud - gitweb
LU-7734 lnet: Add peer_ni and NI stats for DLC
authorDoug Oucharek <doug.s.oucharek@intel.com>
Fri, 13 May 2016 00:25:21 +0000 (17:25 -0700)
committerAmir Shehata <amir.shehata@intel.com>
Wed, 25 Jan 2017 03:10:15 +0000 (19:10 -0800)
This patch adds three stats to the peer_ni and NI structures:
send_count, recv_count, and drop_count. These stats get printed
when you do an "lnetctl net show -v" (for NI) and
"lnetctl peer show" (for peer_ni).

Signed-off-by: Doug Oucharek <doug.s.oucharek@intel.com>
Change-Id: Ic41c88cbc68dba677151d87a1fab53a48d36ea29
Reviewed-on: http://review.whamcloud.com/20170
Reviewed-by: Amir Shehata <amir.shehata@intel.com>
Tested-by: Amir Shehata <amir.shehata@intel.com>
lnet/include/lnet/lib-dlc.h
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-types.h
lnet/lnet/api-ni.c
lnet/lnet/lib-move.c
lnet/lnet/lib-msg.c
lnet/lnet/peer.c
lnet/utils/lnetconfig/liblnetconfig.c

index 016e392..454b962 100644 (file)
@@ -141,6 +141,12 @@ struct lnet_ioctl_config_data {
        char cfg_bulk[0];
 };
 
+struct lnet_ioctl_element_stats {
+       __u32   send_count;
+       __u32   recv_count;
+       __u32   drop_count;
+};
+
 /*
  * lnet_ioctl_config_ni
  *  This structure describes an NI configuration. There are multiple components
index 2ef7187..89efd70 100644 (file)
@@ -818,7 +818,8 @@ bool lnet_peer_is_ni_pref_locked(struct lnet_peer_ni *lpni,
 int lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid, bool mr);
 int lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid);
 int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
-                      struct lnet_peer_ni_credit_info *peer_ni_info);
+                      struct lnet_peer_ni_credit_info *peer_ni_info,
+                      struct lnet_ioctl_element_stats *peer_ni_stats);
 int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
                          char alivness[LNET_MAX_STR_LEN],
                          __u32 *cpt_iter, __u32 *refcount,
index 84b1064..77b3564 100644 (file)
@@ -292,6 +292,12 @@ enum lnet_ni_state {
        LNET_NI_STATE_DELETING
 };
 
+struct lnet_element_stats {
+       atomic_t        send_count;
+       atomic_t        recv_count;
+       atomic_t        drop_count;
+};
+
 struct lnet_net {
        /* chain on the ln_nets */
        struct list_head        net_list;
@@ -381,6 +387,9 @@ typedef struct lnet_ni {
        /* lnd tunables set explicitly */
        bool ni_lnd_tunables_set;
 
+       /* NI statistics */
+       struct lnet_element_stats ni_stats;
+
        /* physical device CPT */
        int                     dev_cpt;
 
@@ -427,7 +436,7 @@ typedef struct {
 } lnet_rc_data_t;
 
 struct lnet_peer_ni {
-       /* cahian on peer_net */
+       /* chain on peer_net */
        struct list_head        lpni_on_peer_net_list;
        /* chain on remote peer list */
        struct list_head        lpni_on_remote_peer_ni_list;
@@ -441,6 +450,8 @@ struct lnet_peer_ni {
        struct list_head        lpni_rtr_list;
        /* pointer to peer net I'm part of */
        struct lnet_peer_net    *lpni_peer_net;
+       /* statistics kept on each peer NI */
+       struct lnet_element_stats lpni_stats;
        /* # tx credits available */
        int                     lpni_txcredits;
        /* low water mark */
index c6b0907..3be384e 100644 (file)
@@ -1952,6 +1952,7 @@ static int lnet_handle_dbg_task(struct lnet_ioctl_dbg *dbg,
 static void
 lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
                   struct lnet_ioctl_config_lnd_tunables *tun,
+                  struct lnet_ioctl_element_stats *stats,
                   __u32 tun_size)
 {
        size_t min_size = 0;
@@ -1977,6 +1978,11 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni,
 
        memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn));
 
+       if (stats) {
+               stats->send_count = atomic_read(&ni->ni_stats.send_count);
+               stats->recv_count = atomic_read(&ni->ni_stats.recv_count);
+       }
+
        /*
         * tun->lt_tun will always be present, but in order to be
         * backwards compatible, we need to deal with the cases when
@@ -2173,13 +2179,14 @@ lnet_get_net_config(struct lnet_ioctl_config_data *config)
 int
 lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
                   struct lnet_ioctl_config_lnd_tunables *tun,
+                  struct lnet_ioctl_element_stats *stats,
                   __u32 tun_size)
 {
        struct lnet_ni          *ni;
        int                     cpt;
        int                     rc = -ENOENT;
 
-       if (!cfg_ni || !tun)
+       if (!cfg_ni || !tun || !stats)
                return -EINVAL;
 
        cpt = lnet_net_lock_current();
@@ -2189,7 +2196,7 @@ lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni,
        if (ni) {
                rc = 0;
                lnet_ni_lock(ni);
-               lnet_fill_ni_info(ni, cfg_ni, tun, tun_size);
+               lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size);
                lnet_ni_unlock(ni);
        }
 
@@ -2662,20 +2669,24 @@ LNetCtl(unsigned int cmd, void *arg)
        case IOC_LIBCFS_GET_LOCAL_NI: {
                struct lnet_ioctl_config_ni *cfg_ni;
                struct lnet_ioctl_config_lnd_tunables *tun = NULL;
+               struct lnet_ioctl_element_stats *stats;
                __u32 tun_size;
 
                cfg_ni = arg;
                /* get the tunables if they are available */
                if (cfg_ni->lic_cfg_hdr.ioc_len <
-                   sizeof(*cfg_ni) + sizeof(*tun))
+                   sizeof(*cfg_ni) + sizeof(*stats)+ sizeof(*tun))
                        return -EINVAL;
 
+               stats = (struct lnet_ioctl_element_stats *)
+                       cfg_ni->lic_bulk;
                tun = (struct lnet_ioctl_config_lnd_tunables *)
-                               cfg_ni->lic_bulk;
+                               (cfg_ni->lic_bulk + sizeof(*stats));
 
-               tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni);
+               tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) -
+                       sizeof(*stats);
 
-               return lnet_get_ni_config(cfg_ni, tun, tun_size);
+               return lnet_get_ni_config(cfg_ni, tun, stats, tun_size);
        }
 
        case IOC_LIBCFS_GET_NET: {
@@ -2806,15 +2817,20 @@ LNetCtl(unsigned int cmd, void *arg)
        case IOC_LIBCFS_GET_PEER_NI: {
                struct lnet_ioctl_peer_cfg *cfg = arg;
                struct lnet_peer_ni_credit_info *lpni_cri;
-               size_t total = sizeof(*cfg) + sizeof(*lpni_cri);
+               struct lnet_ioctl_element_stats *lpni_stats;
+               size_t total = sizeof(*cfg) + sizeof(*lpni_cri) +
+                              sizeof(*lpni_stats);
 
                if (cfg->prcfg_hdr.ioc_len < total)
                        return -EINVAL;
 
                lpni_cri = (struct lnet_peer_ni_credit_info*) cfg->prcfg_bulk;
+               lpni_stats = (struct lnet_ioctl_element_stats *)
+                            (cfg->prcfg_bulk + sizeof(*lpni_cri));
 
                return lnet_get_peer_info(cfg->prcfg_idx, &cfg->prcfg_key_nid,
-                                         &cfg->prcfg_cfg_nid, lpni_cri);
+                                         &cfg->prcfg_cfg_nid, lpni_cri,
+                                         lpni_stats);
        }
 
        case IOC_LIBCFS_NOTIFY_ROUTER: {
index f42857a..938e536 100644 (file)
@@ -805,6 +805,10 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send)
                the_lnet.ln_counters[cpt]->drop_count++;
                the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
                lnet_net_unlock(cpt);
+               if (msg->msg_txpeer)
+                       atomic_inc(&msg->msg_txpeer->lpni_stats.drop_count);
+               if (msg->msg_txni)
+                       atomic_inc(&msg->msg_txni->ni_stats.drop_count);
 
                CNETERR("Dropping message for %s: peer not alive\n",
                        libcfs_id2str(msg->msg_target));
index 42099cc..a6ffe8e 100644 (file)
@@ -216,6 +216,10 @@ lnet_msg_decommit_tx(lnet_msg_t *msg, int status)
        }
 
        counters->send_count++;
+       if (msg->msg_txpeer)
+               atomic_inc(&msg->msg_txpeer->lpni_stats.send_count);
+       if (msg->msg_txni)
+               atomic_inc(&msg->msg_txni->ni_stats.send_count);
  out:
        lnet_return_tx_credits_locked(msg);
        msg->msg_tx_committed = 0;
@@ -267,6 +271,10 @@ lnet_msg_decommit_rx(lnet_msg_t *msg, int status)
        }
 
        counters->recv_count++;
+       if (msg->msg_rxpeer)
+               atomic_inc(&msg->msg_rxpeer->lpni_stats.recv_count);
+       if (msg->msg_rxni)
+               atomic_inc(&msg->msg_rxni->ni_stats.recv_count);
        if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
                counters->recv_length += msg->msg_wanted;
 
index c9e93c5..86b56ae 100644 (file)
@@ -965,7 +965,8 @@ int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid,
 }
 
 int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
-                      struct lnet_peer_ni_credit_info *peer_ni_info)
+                      struct lnet_peer_ni_credit_info *peer_ni_info,
+                      struct lnet_ioctl_element_stats *peer_ni_stats)
 {
        struct lnet_peer_ni *lpni = NULL;
        struct lnet_peer_net *lpn = NULL;
@@ -992,5 +993,9 @@ int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid,
        peer_ni_info->cr_peer_min_rtr_credits = lpni->lpni_mintxcredits;
        peer_ni_info->cr_peer_tx_qnob = lpni->lpni_txqnob;
 
+       peer_ni_stats->send_count = atomic_read(&lpni->lpni_stats.send_count);
+       peer_ni_stats->recv_count = atomic_read(&lpni->lpni_stats.recv_count);
+       peer_ni_stats->drop_count = atomic_read(&lpni->lpni_stats.drop_count);
+
        return 0;
 }
index 335072d..917ca44 100644 (file)
@@ -1405,6 +1405,7 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
        char *buf;
        struct lnet_ioctl_config_ni *ni_data;
        struct lnet_ioctl_config_lnd_tunables *lnd;
+       struct lnet_ioctl_element_stats *stats;
        __u32 net = LNET_NIDNET(LNET_NID_ANY);
        __u32 prev_net = LNET_NIDNET(LNET_NID_ANY);
        int rc = LUSTRE_CFG_RC_OUT_OF_MEM, i, j;
@@ -1412,14 +1413,14 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
        struct cYAML *root = NULL, *tunables = NULL,
                *net_node = NULL, *interfaces = NULL,
                *item = NULL, *first_seq = NULL,
-               *tmp = NULL;
+               *tmp = NULL, *statistics = NULL;
        int str_buf_len = LNET_MAX_SHOW_NUM_CPT * 2;
        char str_buf[str_buf_len];
        char *pos;
        char err_str[LNET_MAX_STR_LEN];
        bool exist = false, new_net = true;
        int net_num = 0;
-       size_t buf_size = sizeof(*ni_data) + sizeof(*lnd);
+       size_t buf_size = sizeof(*ni_data) + sizeof(*lnd) + sizeof(*stats);
 
        snprintf(err_str, sizeof(err_str), "\"out of memory\"");
 
@@ -1479,7 +1480,9 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
                rc = -1;
                exist = true;
 
-               lnd = (struct lnet_ioctl_config_lnd_tunables *)ni_data->lic_bulk;
+               stats = (struct lnet_ioctl_element_stats *)ni_data->lic_bulk;
+               lnd = (struct lnet_ioctl_config_lnd_tunables *)
+                       (ni_data->lic_bulk + sizeof(*stats));
 
                if (rc_net != prev_net) {
                        prev_net = rc_net;
@@ -1540,6 +1543,25 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no,
                if (detail) {
                        char *limit;
 
+                       statistics = cYAML_create_object(item, "statistics");
+                       if (statistics == NULL)
+                               goto out;
+
+                       if (cYAML_create_number(statistics, "send_count",
+                                               stats->send_count)
+                                                       == NULL)
+                               goto out;
+
+                       if (cYAML_create_number(statistics, "recv_count",
+                                               stats->recv_count)
+                                                       == NULL)
+                               goto out;
+
+                       if (cYAML_create_number(statistics, "drop_count",
+                                               stats->drop_count)
+                                                       == NULL)
+                               goto out;
+
                        tunables = cYAML_create_object(item, "tunables");
                        if (!tunables)
                                goto out;
@@ -1889,13 +1911,16 @@ int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc,
 {
        struct lnet_ioctl_peer_cfg *peer_info;
        struct lnet_peer_ni_credit_info *lpni_cri;
+       struct lnet_ioctl_element_stats *lpni_stats;
        int rc = LUSTRE_CFG_RC_OUT_OF_MEM, ncpt = 0, i = 0, j = 0;
        int l_errno = 0;
        struct cYAML *root = NULL, *peer = NULL, *peer_ni = NULL,
                     *first_seq = NULL, *peer_root = NULL, *tmp = NULL;
        char err_str[LNET_MAX_STR_LEN];
        lnet_nid_t prev_primary_nid = LNET_NID_ANY, primary_nid = LNET_NID_ANY;
-       char *data = calloc(sizeof(*peer_info) + sizeof(*lpni_cri), 1);
+       int data_size = sizeof(*peer_info) + sizeof(*lpni_cri) +
+                       sizeof(*lpni_stats);
+       char *data = calloc(data_size, 1);
        bool new_peer = true;
 
        snprintf(err_str, sizeof(err_str),
@@ -1920,10 +1945,9 @@ int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc,
 
        do {
                for (i = 0;; i++) {
-                       memset(data, 0, sizeof(*peer_info) + sizeof(*lpni_cri));
+                       memset(data, 0, data_size);
                        LIBCFS_IOC_INIT_V2(*peer_info, prcfg_hdr);
-                       peer_info->prcfg_hdr.ioc_len = sizeof(*peer_info) +
-                                                      sizeof(*lpni_cri);
+                       peer_info->prcfg_hdr.ioc_len = data_size;
                        peer_info->prcfg_idx = i;
 
                        rc = l_ioctl(LNET_DEV_ID,
@@ -1938,6 +1962,9 @@ int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc,
                                        continue;
 
                        lpni_cri = (struct lnet_peer_ni_credit_info*)peer_info->prcfg_bulk;
+                       lpni_stats = (struct lnet_ioctl_element_stats *)
+                                    (peer_info->prcfg_bulk +
+                                    sizeof(*lpni_cri));
 
                        peer = cYAML_create_seq_item(peer_root);
                        if (peer == NULL)
@@ -2006,6 +2033,21 @@ int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc,
                                                lpni_cri->cr_peer_tx_qnob)
                            == NULL)
                                goto out;
+
+                       if (cYAML_create_number(peer_ni, "send_count",
+                                               lpni_stats->send_count)
+                           == NULL)
+                               goto out;
+
+                       if (cYAML_create_number(peer_ni, "recv_count",
+                                               lpni_stats->recv_count)
+                           == NULL)
+                               goto out;
+
+                       if (cYAML_create_number(peer_ni, "drop_count",
+                                               lpni_stats->drop_count)
+                           == NULL)
+                               goto out;
                }
 
                if (l_errno != ENOENT) {