From d704b0ec46ed5b77ff955f87044569dd98484333 Mon Sep 17 00:00:00 2001 From: Doug Oucharek Date: Thu, 12 May 2016 17:25:21 -0700 Subject: [PATCH] LU-7734 lnet: Add peer_ni and NI stats for DLC This patch adds three stats to the peer_ni and NI structures: send_count, recv_count, and drop_count. These stats get printed when you do an "lnetctl net show -v" (for NI) and "lnetctl peer show" (for peer_ni). Signed-off-by: Doug Oucharek Change-Id: Ic41c88cbc68dba677151d87a1fab53a48d36ea29 Reviewed-on: http://review.whamcloud.com/20170 Reviewed-by: Amir Shehata Tested-by: Amir Shehata --- lnet/include/lnet/lib-dlc.h | 6 ++++ lnet/include/lnet/lib-lnet.h | 3 +- lnet/include/lnet/lib-types.h | 13 +++++++- lnet/lnet/api-ni.c | 32 +++++++++++++++----- lnet/lnet/lib-move.c | 4 +++ lnet/lnet/lib-msg.c | 8 +++++ lnet/lnet/peer.c | 7 ++++- lnet/utils/lnetconfig/liblnetconfig.c | 56 ++++++++++++++++++++++++++++++----- 8 files changed, 111 insertions(+), 18 deletions(-) diff --git a/lnet/include/lnet/lib-dlc.h b/lnet/include/lnet/lib-dlc.h index 016e392..454b962 100644 --- a/lnet/include/lnet/lib-dlc.h +++ b/lnet/include/lnet/lib-dlc.h @@ -141,6 +141,12 @@ struct lnet_ioctl_config_data { char cfg_bulk[0]; }; +struct lnet_ioctl_element_stats { + __u32 send_count; + __u32 recv_count; + __u32 drop_count; +}; + /* * lnet_ioctl_config_ni * This structure describes an NI configuration. There are multiple components diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 2ef7187..89efd70 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -818,7 +818,8 @@ bool lnet_peer_is_ni_pref_locked(struct lnet_peer_ni *lpni, int lnet_add_peer_ni_to_peer(lnet_nid_t key_nid, lnet_nid_t nid, bool mr); int lnet_del_peer_ni_from_peer(lnet_nid_t key_nid, lnet_nid_t nid); int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid, - struct lnet_peer_ni_credit_info *peer_ni_info); + struct lnet_peer_ni_credit_info *peer_ni_info, + struct lnet_ioctl_element_stats *peer_ni_stats); int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid, char alivness[LNET_MAX_STR_LEN], __u32 *cpt_iter, __u32 *refcount, diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 84b1064..77b3564 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -292,6 +292,12 @@ enum lnet_ni_state { LNET_NI_STATE_DELETING }; +struct lnet_element_stats { + atomic_t send_count; + atomic_t recv_count; + atomic_t drop_count; +}; + struct lnet_net { /* chain on the ln_nets */ struct list_head net_list; @@ -381,6 +387,9 @@ typedef struct lnet_ni { /* lnd tunables set explicitly */ bool ni_lnd_tunables_set; + /* NI statistics */ + struct lnet_element_stats ni_stats; + /* physical device CPT */ int dev_cpt; @@ -427,7 +436,7 @@ typedef struct { } lnet_rc_data_t; struct lnet_peer_ni { - /* cahian on peer_net */ + /* chain on peer_net */ struct list_head lpni_on_peer_net_list; /* chain on remote peer list */ struct list_head lpni_on_remote_peer_ni_list; @@ -441,6 +450,8 @@ struct lnet_peer_ni { struct list_head lpni_rtr_list; /* pointer to peer net I'm part of */ struct lnet_peer_net *lpni_peer_net; + /* statistics kept on each peer NI */ + struct lnet_element_stats lpni_stats; /* # tx credits available */ int lpni_txcredits; /* low water mark */ diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index c6b0907..3be384e 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -1952,6 +1952,7 @@ static int lnet_handle_dbg_task(struct lnet_ioctl_dbg *dbg, static void lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni, struct lnet_ioctl_config_lnd_tunables *tun, + struct lnet_ioctl_element_stats *stats, __u32 tun_size) { size_t min_size = 0; @@ -1977,6 +1978,11 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_ni *cfg_ni, memcpy(&tun->lt_cmn, &ni->ni_net->net_tunables, sizeof(tun->lt_cmn)); + if (stats) { + stats->send_count = atomic_read(&ni->ni_stats.send_count); + stats->recv_count = atomic_read(&ni->ni_stats.recv_count); + } + /* * tun->lt_tun will always be present, but in order to be * backwards compatible, we need to deal with the cases when @@ -2173,13 +2179,14 @@ lnet_get_net_config(struct lnet_ioctl_config_data *config) int lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni, struct lnet_ioctl_config_lnd_tunables *tun, + struct lnet_ioctl_element_stats *stats, __u32 tun_size) { struct lnet_ni *ni; int cpt; int rc = -ENOENT; - if (!cfg_ni || !tun) + if (!cfg_ni || !tun || !stats) return -EINVAL; cpt = lnet_net_lock_current(); @@ -2189,7 +2196,7 @@ lnet_get_ni_config(struct lnet_ioctl_config_ni *cfg_ni, if (ni) { rc = 0; lnet_ni_lock(ni); - lnet_fill_ni_info(ni, cfg_ni, tun, tun_size); + lnet_fill_ni_info(ni, cfg_ni, tun, stats, tun_size); lnet_ni_unlock(ni); } @@ -2662,20 +2669,24 @@ LNetCtl(unsigned int cmd, void *arg) case IOC_LIBCFS_GET_LOCAL_NI: { struct lnet_ioctl_config_ni *cfg_ni; struct lnet_ioctl_config_lnd_tunables *tun = NULL; + struct lnet_ioctl_element_stats *stats; __u32 tun_size; cfg_ni = arg; /* get the tunables if they are available */ if (cfg_ni->lic_cfg_hdr.ioc_len < - sizeof(*cfg_ni) + sizeof(*tun)) + sizeof(*cfg_ni) + sizeof(*stats)+ sizeof(*tun)) return -EINVAL; + stats = (struct lnet_ioctl_element_stats *) + cfg_ni->lic_bulk; tun = (struct lnet_ioctl_config_lnd_tunables *) - cfg_ni->lic_bulk; + (cfg_ni->lic_bulk + sizeof(*stats)); - tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni); + tun_size = cfg_ni->lic_cfg_hdr.ioc_len - sizeof(*cfg_ni) - + sizeof(*stats); - return lnet_get_ni_config(cfg_ni, tun, tun_size); + return lnet_get_ni_config(cfg_ni, tun, stats, tun_size); } case IOC_LIBCFS_GET_NET: { @@ -2806,15 +2817,20 @@ LNetCtl(unsigned int cmd, void *arg) case IOC_LIBCFS_GET_PEER_NI: { struct lnet_ioctl_peer_cfg *cfg = arg; struct lnet_peer_ni_credit_info *lpni_cri; - size_t total = sizeof(*cfg) + sizeof(*lpni_cri); + struct lnet_ioctl_element_stats *lpni_stats; + size_t total = sizeof(*cfg) + sizeof(*lpni_cri) + + sizeof(*lpni_stats); if (cfg->prcfg_hdr.ioc_len < total) return -EINVAL; lpni_cri = (struct lnet_peer_ni_credit_info*) cfg->prcfg_bulk; + lpni_stats = (struct lnet_ioctl_element_stats *) + (cfg->prcfg_bulk + sizeof(*lpni_cri)); return lnet_get_peer_info(cfg->prcfg_idx, &cfg->prcfg_key_nid, - &cfg->prcfg_cfg_nid, lpni_cri); + &cfg->prcfg_cfg_nid, lpni_cri, + lpni_stats); } case IOC_LIBCFS_NOTIFY_ROUTER: { diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index f42857a..938e536 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -805,6 +805,10 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) the_lnet.ln_counters[cpt]->drop_count++; the_lnet.ln_counters[cpt]->drop_length += msg->msg_len; lnet_net_unlock(cpt); + if (msg->msg_txpeer) + atomic_inc(&msg->msg_txpeer->lpni_stats.drop_count); + if (msg->msg_txni) + atomic_inc(&msg->msg_txni->ni_stats.drop_count); CNETERR("Dropping message for %s: peer not alive\n", libcfs_id2str(msg->msg_target)); diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index 42099cc..a6ffe8e 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -216,6 +216,10 @@ lnet_msg_decommit_tx(lnet_msg_t *msg, int status) } counters->send_count++; + if (msg->msg_txpeer) + atomic_inc(&msg->msg_txpeer->lpni_stats.send_count); + if (msg->msg_txni) + atomic_inc(&msg->msg_txni->ni_stats.send_count); out: lnet_return_tx_credits_locked(msg); msg->msg_tx_committed = 0; @@ -267,6 +271,10 @@ lnet_msg_decommit_rx(lnet_msg_t *msg, int status) } counters->recv_count++; + if (msg->msg_rxpeer) + atomic_inc(&msg->msg_rxpeer->lpni_stats.recv_count); + if (msg->msg_rxni) + atomic_inc(&msg->msg_rxni->ni_stats.recv_count); if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY) counters->recv_length += msg->msg_wanted; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index c9e93c5..86b56ae 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -965,7 +965,8 @@ int lnet_get_peer_ni_info(__u32 peer_index, __u64 *nid, } int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid, - struct lnet_peer_ni_credit_info *peer_ni_info) + struct lnet_peer_ni_credit_info *peer_ni_info, + struct lnet_ioctl_element_stats *peer_ni_stats) { struct lnet_peer_ni *lpni = NULL; struct lnet_peer_net *lpn = NULL; @@ -992,5 +993,9 @@ int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid, peer_ni_info->cr_peer_min_rtr_credits = lpni->lpni_mintxcredits; peer_ni_info->cr_peer_tx_qnob = lpni->lpni_txqnob; + peer_ni_stats->send_count = atomic_read(&lpni->lpni_stats.send_count); + peer_ni_stats->recv_count = atomic_read(&lpni->lpni_stats.recv_count); + peer_ni_stats->drop_count = atomic_read(&lpni->lpni_stats.drop_count); + return 0; } diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index 335072d..917ca44 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -1405,6 +1405,7 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no, char *buf; struct lnet_ioctl_config_ni *ni_data; struct lnet_ioctl_config_lnd_tunables *lnd; + struct lnet_ioctl_element_stats *stats; __u32 net = LNET_NIDNET(LNET_NID_ANY); __u32 prev_net = LNET_NIDNET(LNET_NID_ANY); int rc = LUSTRE_CFG_RC_OUT_OF_MEM, i, j; @@ -1412,14 +1413,14 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no, struct cYAML *root = NULL, *tunables = NULL, *net_node = NULL, *interfaces = NULL, *item = NULL, *first_seq = NULL, - *tmp = NULL; + *tmp = NULL, *statistics = NULL; int str_buf_len = LNET_MAX_SHOW_NUM_CPT * 2; char str_buf[str_buf_len]; char *pos; char err_str[LNET_MAX_STR_LEN]; bool exist = false, new_net = true; int net_num = 0; - size_t buf_size = sizeof(*ni_data) + sizeof(*lnd); + size_t buf_size = sizeof(*ni_data) + sizeof(*lnd) + sizeof(*stats); snprintf(err_str, sizeof(err_str), "\"out of memory\""); @@ -1479,7 +1480,9 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no, rc = -1; exist = true; - lnd = (struct lnet_ioctl_config_lnd_tunables *)ni_data->lic_bulk; + stats = (struct lnet_ioctl_element_stats *)ni_data->lic_bulk; + lnd = (struct lnet_ioctl_config_lnd_tunables *) + (ni_data->lic_bulk + sizeof(*stats)); if (rc_net != prev_net) { prev_net = rc_net; @@ -1540,6 +1543,25 @@ int lustre_lnet_show_net(char *nw, int detail, int seq_no, if (detail) { char *limit; + statistics = cYAML_create_object(item, "statistics"); + if (statistics == NULL) + goto out; + + if (cYAML_create_number(statistics, "send_count", + stats->send_count) + == NULL) + goto out; + + if (cYAML_create_number(statistics, "recv_count", + stats->recv_count) + == NULL) + goto out; + + if (cYAML_create_number(statistics, "drop_count", + stats->drop_count) + == NULL) + goto out; + tunables = cYAML_create_object(item, "tunables"); if (!tunables) goto out; @@ -1889,13 +1911,16 @@ int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc, { struct lnet_ioctl_peer_cfg *peer_info; struct lnet_peer_ni_credit_info *lpni_cri; + struct lnet_ioctl_element_stats *lpni_stats; int rc = LUSTRE_CFG_RC_OUT_OF_MEM, ncpt = 0, i = 0, j = 0; int l_errno = 0; struct cYAML *root = NULL, *peer = NULL, *peer_ni = NULL, *first_seq = NULL, *peer_root = NULL, *tmp = NULL; char err_str[LNET_MAX_STR_LEN]; lnet_nid_t prev_primary_nid = LNET_NID_ANY, primary_nid = LNET_NID_ANY; - char *data = calloc(sizeof(*peer_info) + sizeof(*lpni_cri), 1); + int data_size = sizeof(*peer_info) + sizeof(*lpni_cri) + + sizeof(*lpni_stats); + char *data = calloc(data_size, 1); bool new_peer = true; snprintf(err_str, sizeof(err_str), @@ -1920,10 +1945,9 @@ int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc, do { for (i = 0;; i++) { - memset(data, 0, sizeof(*peer_info) + sizeof(*lpni_cri)); + memset(data, 0, data_size); LIBCFS_IOC_INIT_V2(*peer_info, prcfg_hdr); - peer_info->prcfg_hdr.ioc_len = sizeof(*peer_info) + - sizeof(*lpni_cri); + peer_info->prcfg_hdr.ioc_len = data_size; peer_info->prcfg_idx = i; rc = l_ioctl(LNET_DEV_ID, @@ -1938,6 +1962,9 @@ int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc, continue; lpni_cri = (struct lnet_peer_ni_credit_info*)peer_info->prcfg_bulk; + lpni_stats = (struct lnet_ioctl_element_stats *) + (peer_info->prcfg_bulk + + sizeof(*lpni_cri)); peer = cYAML_create_seq_item(peer_root); if (peer == NULL) @@ -2006,6 +2033,21 @@ int lustre_lnet_show_peer(char *knid, int seq_no, struct cYAML **show_rc, lpni_cri->cr_peer_tx_qnob) == NULL) goto out; + + if (cYAML_create_number(peer_ni, "send_count", + lpni_stats->send_count) + == NULL) + goto out; + + if (cYAML_create_number(peer_ni, "recv_count", + lpni_stats->recv_count) + == NULL) + goto out; + + if (cYAML_create_number(peer_ni, "drop_count", + lpni_stats->drop_count) + == NULL) + goto out; } if (l_errno != ENOENT) { -- 1.8.3.1