Whamcloud - gitweb
LU-11422 lnet: Fix selftest backward compatibility post health 42/33242/8
authorSonia Sharma <sharmaso@whamcloud.com>
Sun, 23 Sep 2018 16:15:31 +0000 (12:15 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 10 Oct 2018 01:51:17 +0000 (01:51 +0000)
Post LNet health feature landing, lnet-selftest loses
backward compatibility. This patch fixes that by
adding a new structure lnet_counters_common similar
to lnet_counters(pre-Health version). Now,
lnet_counters_common is the struct that selftest depends on.

Also, adds a struct lnet_counters_health specifically
for health stats.

Change-Id: Ia066ff7b3522738080d192e80c14cf5ac55a33e2
Test-Parameters: trivial testlist=lnet-selftest clientjob=lustre-b2_10 clientbuildno=136
Signed-off-by: Sonia Sharma <sharmaso@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/33242
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Amir Shehata <ashehata@whamcloud.com>
Tested-by: Jenkins
Tested-by: Andreas Dilger <adilger@whamcloud.com>
12 files changed:
lnet/include/lnet/lib-lnet.h
lnet/include/uapi/linux/lnet/lnet-types.h
lnet/lnet/api-ni.c
lnet/lnet/lib-move.c
lnet/lnet/lib-msg.c
lnet/lnet/router_proc.c
lnet/selftest/console.c
lnet/selftest/framework.c
lnet/selftest/module.c
lnet/selftest/rpc.h
lnet/utils/lnetconfig/liblnetconfig.c
lnet/utils/lst.c

index 2b52cb7..4441009 100644 (file)
@@ -496,7 +496,7 @@ lnet_rspt_alloc(int cpt)
        struct lnet_rsp_tracker *rspt;
        LIBCFS_ALLOC(rspt, sizeof(*rspt));
        lnet_net_lock(cpt);
-       the_lnet.ln_counters[cpt]->rst_alloc++;
+       the_lnet.ln_counters[cpt]->lct_health.lch_rst_alloc++;
        lnet_net_unlock(cpt);
        return rspt;
 }
@@ -506,7 +506,7 @@ lnet_rspt_free(struct lnet_rsp_tracker *rspt, int cpt)
 {
        LIBCFS_FREE(rspt, sizeof(*rspt));
        lnet_net_lock(cpt);
-       the_lnet.ln_counters[cpt]->rst_alloc--;
+       the_lnet.ln_counters[cpt]->lct_health.lch_rst_alloc--;
        lnet_net_unlock(cpt);
 }
 
@@ -729,6 +729,7 @@ bool lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg);
 
 /** @} lnet_fault_simulation */
 
+void lnet_counters_get_common(struct lnet_counters_common *common);
 void lnet_counters_get(struct lnet_counters *counters);
 void lnet_counters_reset(void);
 
index 3f5b8bd..c7e779a 100644 (file)
@@ -223,33 +223,41 @@ struct lnet_acceptor_connreq {
 
 #define LNET_PROTO_ACCEPTOR_VERSION    1
 
-struct lnet_counters {
-       __u32   msgs_alloc;
-       __u32   msgs_max;
-       __u32   rst_alloc;
-       __u32   errors;
-       __u32   send_count;
-       __u32   recv_count;
-       __u32   route_count;
-       __u32   drop_count;
-       __u32   resend_count;
-       __u32   response_timeout_count;
-       __u32   local_interrupt_count;
-       __u32   local_dropped_count;
-       __u32   local_aborted_count;
-       __u32   local_no_route_count;
-       __u32   local_timeout_count;
-       __u32   local_error_count;
-       __u32   remote_dropped_count;
-       __u32   remote_error_count;
-       __u32   remote_timeout_count;
-       __u32   network_timeout_count;
-       __u64   send_length;
-       __u64   recv_length;
-       __u64   route_length;
-       __u64   drop_length;
+struct lnet_counters_common {
+       __u32   lcc_msgs_alloc;
+       __u32   lcc_msgs_max;
+       __u32   lcc_errors;
+       __u32   lcc_send_count;
+       __u32   lcc_recv_count;
+       __u32   lcc_route_count;
+       __u32   lcc_drop_count;
+       __u64   lcc_send_length;
+       __u64   lcc_recv_length;
+       __u64   lcc_route_length;
+       __u64   lcc_drop_length;
 } WIRE_ATTR;
 
+struct lnet_counters_health {
+       __u32   lch_rst_alloc;
+       __u32   lch_resend_count;
+       __u32   lch_response_timeout_count;
+       __u32   lch_local_interrupt_count;
+       __u32   lch_local_dropped_count;
+       __u32   lch_local_aborted_count;
+       __u32   lch_local_no_route_count;
+       __u32   lch_local_timeout_count;
+       __u32   lch_local_error_count;
+       __u32   lch_remote_dropped_count;
+       __u32   lch_remote_error_count;
+       __u32   lch_remote_timeout_count;
+       __u32   lch_network_timeout_count;
+};
+
+struct lnet_counters {
+       struct lnet_counters_common lct_common;
+       struct lnet_counters_health lct_health;
+};
+
 #define LNET_NI_STATUS_UP      0x15aac0de
 #define LNET_NI_STATUS_DOWN    0xdeadface
 #define LNET_NI_STATUS_INVALID 0x00000000
index 059aa41..b2d4c25 100644 (file)
@@ -739,41 +739,70 @@ lnet_unregister_lnd(struct lnet_lnd *lnd)
 EXPORT_SYMBOL(lnet_unregister_lnd);
 
 void
+lnet_counters_get_common(struct lnet_counters_common *common)
+{
+       struct lnet_counters *ctr;
+       int i;
+
+       memset(common, 0, sizeof(*common));
+
+       lnet_net_lock(LNET_LOCK_EX);
+
+       cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
+               common->lcc_msgs_max     += ctr->lct_common.lcc_msgs_max;
+               common->lcc_msgs_alloc   += ctr->lct_common.lcc_msgs_alloc;
+               common->lcc_errors       += ctr->lct_common.lcc_errors;
+               common->lcc_send_count   += ctr->lct_common.lcc_send_count;
+               common->lcc_recv_count   += ctr->lct_common.lcc_recv_count;
+               common->lcc_route_count  += ctr->lct_common.lcc_route_count;
+               common->lcc_drop_count   += ctr->lct_common.lcc_drop_count;
+               common->lcc_send_length  += ctr->lct_common.lcc_send_length;
+               common->lcc_recv_length  += ctr->lct_common.lcc_recv_length;
+               common->lcc_route_length += ctr->lct_common.lcc_route_length;
+               common->lcc_drop_length  += ctr->lct_common.lcc_drop_length;
+       }
+       lnet_net_unlock(LNET_LOCK_EX);
+}
+EXPORT_SYMBOL(lnet_counters_get_common);
+
+void
 lnet_counters_get(struct lnet_counters *counters)
 {
        struct lnet_counters *ctr;
+       struct lnet_counters_health *health = &counters->lct_health;
        int             i;
 
        memset(counters, 0, sizeof(*counters));
 
+       lnet_counters_get_common(&counters->lct_common);
+
        lnet_net_lock(LNET_LOCK_EX);
 
        cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
-               counters->msgs_max     += ctr->msgs_max;
-               counters->msgs_alloc   += ctr->msgs_alloc;
-               counters->rst_alloc    += ctr->rst_alloc;
-               counters->errors       += ctr->errors;
-               counters->resend_count += ctr->resend_count;
-               counters->response_timeout_count += ctr->response_timeout_count;
-               counters->local_interrupt_count += ctr->local_interrupt_count;
-               counters->local_dropped_count += ctr->local_dropped_count;
-               counters->local_aborted_count += ctr->local_aborted_count;
-               counters->local_no_route_count += ctr->local_no_route_count;
-               counters->local_timeout_count += ctr->local_timeout_count;
-               counters->local_error_count += ctr->local_error_count;
-               counters->remote_dropped_count += ctr->remote_dropped_count;
-               counters->remote_error_count += ctr->remote_error_count;
-               counters->remote_timeout_count += ctr->remote_timeout_count;
-               counters->network_timeout_count += ctr->network_timeout_count;
-               counters->send_count   += ctr->send_count;
-               counters->recv_count   += ctr->recv_count;
-               counters->route_count  += ctr->route_count;
-               counters->drop_count   += ctr->drop_count;
-               counters->send_length  += ctr->send_length;
-               counters->recv_length  += ctr->recv_length;
-               counters->route_length += ctr->route_length;
-               counters->drop_length  += ctr->drop_length;
-
+               health->lch_rst_alloc    += ctr->lct_health.lch_rst_alloc;
+               health->lch_resend_count += ctr->lct_health.lch_resend_count;
+               health->lch_response_timeout_count +=
+                               ctr->lct_health.lch_response_timeout_count;
+               health->lch_local_interrupt_count +=
+                               ctr->lct_health.lch_local_interrupt_count;
+               health->lch_local_dropped_count +=
+                               ctr->lct_health.lch_local_dropped_count;
+               health->lch_local_aborted_count +=
+                               ctr->lct_health.lch_local_aborted_count;
+               health->lch_local_no_route_count +=
+                               ctr->lct_health.lch_local_no_route_count;
+               health->lch_local_timeout_count +=
+                               ctr->lct_health.lch_local_timeout_count;
+               health->lch_local_error_count +=
+                               ctr->lct_health.lch_local_error_count;
+               health->lch_remote_dropped_count +=
+                               ctr->lct_health.lch_remote_dropped_count;
+               health->lch_remote_error_count +=
+                               ctr->lct_health.lch_remote_error_count;
+               health->lch_remote_timeout_count +=
+                               ctr->lct_health.lch_remote_timeout_count;
+               health->lch_network_timeout_count +=
+                               ctr->lct_health.lch_network_timeout_count;
        }
        lnet_net_unlock(LNET_LOCK_EX);
 }
index c572751..d5f1132 100644 (file)
@@ -942,8 +942,9 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send)
        /* NB 'lp' is always the next hop */
        if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
            lnet_peer_alive_locked(ni, lp, msg) == 0) {
-               the_lnet.ln_counters[cpt]->drop_count++;
-               the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
+               the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++;
+               the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length +=
+                       msg->msg_len;
                lnet_net_unlock(cpt);
                if (msg->msg_txpeer)
                        lnet_incr_stats(&msg->msg_txpeer->lpni_stats,
@@ -2746,7 +2747,7 @@ lnet_finalize_expired_responses(bool force)
                                lnet_res_unlock(i);
 
                                lnet_net_lock(i);
-                               the_lnet.ln_counters[i]->response_timeout_count++;
+                               the_lnet.ln_counters[i]->lct_health.lch_response_timeout_count++;
                                lnet_net_unlock(i);
 
                                list_del_init(&rspt->rspt_on_list);
@@ -2832,7 +2833,7 @@ lnet_resend_pending_msgs_locked(struct list_head *resendq, int cpt)
                        }
                        lnet_net_lock(cpt);
                        if (!rc)
-                               the_lnet.ln_counters[cpt]->resend_count++;
+                               the_lnet.ln_counters[cpt]->lct_health.lch_resend_count++;
                }
        }
 }
@@ -3600,8 +3601,8 @@ lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob,
 {
        lnet_net_lock(cpt);
        lnet_incr_stats(&ni->ni_stats, msg_type, LNET_STATS_TYPE_DROP);
-       the_lnet.ln_counters[cpt]->drop_count++;
-       the_lnet.ln_counters[cpt]->drop_length += nob;
+       the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++;
+       the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length += nob;
        lnet_net_unlock(cpt);
 
        lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob);
@@ -4591,8 +4592,9 @@ lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg)
 
        lnet_net_lock(cpt);
        lnet_incr_stats(&ni->ni_stats, LNET_MSG_GET, LNET_STATS_TYPE_DROP);
-       the_lnet.ln_counters[cpt]->drop_count++;
-       the_lnet.ln_counters[cpt]->drop_length += getmd->md_length;
+       the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++;
+       the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length +=
+               getmd->md_length;
        lnet_net_unlock(cpt);
 
        if (msg != NULL)
index 3bd6946..2f3b689 100644 (file)
@@ -142,7 +142,7 @@ void
 lnet_msg_commit(struct lnet_msg *msg, int cpt)
 {
        struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
-       struct lnet_counters *counters = the_lnet.ln_counters[cpt];
+       struct lnet_counters_common *common;
        s64 timeout_ns;
 
        /* set the message deadline */
@@ -171,30 +171,31 @@ lnet_msg_commit(struct lnet_msg *msg, int cpt)
        msg->msg_onactivelist = 1;
        list_add_tail(&msg->msg_activelist, &container->msc_active);
 
-       counters->msgs_alloc++;
-       if (counters->msgs_alloc > counters->msgs_max)
-               counters->msgs_max = counters->msgs_alloc;
+       common = &the_lnet.ln_counters[cpt]->lct_common;
+       common->lcc_msgs_alloc++;
+       if (common->lcc_msgs_alloc > common->lcc_msgs_max)
+               common->lcc_msgs_max = common->lcc_msgs_alloc;
 }
 
 static void
 lnet_msg_decommit_tx(struct lnet_msg *msg, int status)
 {
-       struct lnet_counters *counters;
+       struct lnet_counters_common *common;
        struct lnet_event *ev = &msg->msg_ev;
 
        LASSERT(msg->msg_tx_committed);
        if (status != 0)
                goto out;
 
-       counters = the_lnet.ln_counters[msg->msg_tx_cpt];
+       common = &(the_lnet.ln_counters[msg->msg_tx_cpt]->lct_common);
        switch (ev->type) {
        default: /* routed message */
                LASSERT(msg->msg_routing);
                LASSERT(msg->msg_rx_committed);
                LASSERT(ev->type == 0);
 
-               counters->route_length += msg->msg_len;
-               counters->route_count++;
+               common->lcc_route_length += msg->msg_len;
+               common->lcc_route_count++;
                goto incr_stats;
 
        case LNET_EVENT_PUT:
@@ -208,7 +209,7 @@ lnet_msg_decommit_tx(struct lnet_msg *msg, int status)
        case LNET_EVENT_SEND:
                LASSERT(!msg->msg_rx_committed);
                if (msg->msg_type == LNET_MSG_PUT)
-                       counters->send_length += msg->msg_len;
+                       common->lcc_send_length += msg->msg_len;
                break;
 
        case LNET_EVENT_GET:
@@ -220,7 +221,7 @@ lnet_msg_decommit_tx(struct lnet_msg *msg, int status)
                break;
        }
 
-       counters->send_count++;
+       common->lcc_send_count++;
 
 incr_stats:
        if (msg->msg_txpeer)
@@ -239,7 +240,7 @@ incr_stats:
 static void
 lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
 {
-       struct lnet_counters *counters;
+       struct lnet_counters_common *common;
        struct lnet_event *ev = &msg->msg_ev;
 
        LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */
@@ -248,7 +249,7 @@ lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
        if (status != 0)
                goto out;
 
-       counters = the_lnet.ln_counters[msg->msg_rx_cpt];
+       common = &(the_lnet.ln_counters[msg->msg_rx_cpt]->lct_common);
        switch (ev->type) {
        default:
                LASSERT(ev->type == 0);
@@ -266,7 +267,7 @@ lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
                 * lnet_msg_decommit_tx(), see details in lnet_parse_get() */
                LASSERT(msg->msg_type == LNET_MSG_REPLY ||
                        msg->msg_type == LNET_MSG_GET);
-               counters->send_length += msg->msg_wanted;
+               common->lcc_send_length += msg->msg_wanted;
                break;
 
        case LNET_EVENT_PUT:
@@ -281,7 +282,7 @@ lnet_msg_decommit_rx(struct lnet_msg *msg, int status)
                break;
        }
 
-       counters->recv_count++;
+       common->lcc_recv_count++;
 
 incr_stats:
        if (msg->msg_rxpeer)
@@ -293,7 +294,7 @@ incr_stats:
                                msg->msg_type,
                                LNET_STATS_TYPE_RECV);
        if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY)
-               counters->recv_length += msg->msg_wanted;
+               common->lcc_recv_length += msg->msg_wanted;
 
  out:
        lnet_return_rx_credits_locked(msg);
@@ -326,7 +327,7 @@ lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status)
        list_del(&msg->msg_activelist);
        msg->msg_onactivelist = 0;
 
-       the_lnet.ln_counters[cpt2]->msgs_alloc--;
+       the_lnet.ln_counters[cpt2]->lct_common.lcc_msgs_alloc--;
 
        if (cpt2 != cpt) {
                lnet_net_unlock(cpt2);
@@ -541,52 +542,54 @@ lnet_incr_hstats(struct lnet_msg *msg, enum lnet_msg_hstatus hstatus)
 {
        struct lnet_ni *ni = msg->msg_txni;
        struct lnet_peer_ni *lpni = msg->msg_txpeer;
-       struct lnet_counters *counters = the_lnet.ln_counters[0];
+       struct lnet_counters_health *health;
+
+       health = &the_lnet.ln_counters[0]->lct_health;
 
        switch (hstatus) {
        case LNET_MSG_STATUS_LOCAL_INTERRUPT:
                atomic_inc(&ni->ni_hstats.hlt_local_interrupt);
-               counters->local_interrupt_count++;
+               health->lch_local_interrupt_count++;
                break;
        case LNET_MSG_STATUS_LOCAL_DROPPED:
                atomic_inc(&ni->ni_hstats.hlt_local_dropped);
-               counters->local_dropped_count++;
+               health->lch_local_dropped_count++;
                break;
        case LNET_MSG_STATUS_LOCAL_ABORTED:
                atomic_inc(&ni->ni_hstats.hlt_local_aborted);
-               counters->local_aborted_count++;
+               health->lch_local_aborted_count++;
                break;
        case LNET_MSG_STATUS_LOCAL_NO_ROUTE:
                atomic_inc(&ni->ni_hstats.hlt_local_no_route);
-               counters->local_no_route_count++;
+               health->lch_local_no_route_count++;
                break;
        case LNET_MSG_STATUS_LOCAL_TIMEOUT:
                atomic_inc(&ni->ni_hstats.hlt_local_timeout);
-               counters->local_timeout_count++;
+               health->lch_local_timeout_count++;
                break;
        case LNET_MSG_STATUS_LOCAL_ERROR:
                atomic_inc(&ni->ni_hstats.hlt_local_error);
-               counters->local_error_count++;
+               health->lch_local_error_count++;
                break;
        case LNET_MSG_STATUS_REMOTE_DROPPED:
                if (lpni)
                        atomic_inc(&lpni->lpni_hstats.hlt_remote_dropped);
-               counters->remote_dropped_count++;
+               health->lch_remote_dropped_count++;
                break;
        case LNET_MSG_STATUS_REMOTE_ERROR:
                if (lpni)
                        atomic_inc(&lpni->lpni_hstats.hlt_remote_error);
-               counters->remote_error_count++;
+               health->lch_remote_error_count++;
                break;
        case LNET_MSG_STATUS_REMOTE_TIMEOUT:
                if (lpni)
                        atomic_inc(&lpni->lpni_hstats.hlt_remote_timeout);
-               counters->remote_timeout_count++;
+               health->lch_remote_timeout_count++;
                break;
        case LNET_MSG_STATUS_NETWORK_TIMEOUT:
                if (lpni)
                        atomic_inc(&lpni->lpni_hstats.hlt_network_timeout);
-               counters->network_timeout_count++;
+               health->lch_network_timeout_count++;
                break;
        case LNET_MSG_STATUS_OK:
                break;
index b4e4d7b..c0c5c25 100644 (file)
@@ -82,6 +82,7 @@ static int __proc_lnet_stats(void *data, int write,
 {
        int              rc;
        struct lnet_counters *ctrs;
+       struct lnet_counters_common common;
        int              len;
        char            *tmpstr;
        const int        tmpsiz = 256; /* 7 %u and 4 __u64 */
@@ -104,16 +105,17 @@ static int __proc_lnet_stats(void *data, int write,
        }
 
        lnet_counters_get(ctrs);
+       common = ctrs->lct_common;
 
        len = snprintf(tmpstr, tmpsiz,
                       "%u %u %u %u %u %u %u %llu %llu "
                       "%llu %llu",
-                      ctrs->msgs_alloc, ctrs->msgs_max,
-                      ctrs->errors,
-                      ctrs->send_count, ctrs->recv_count,
-                      ctrs->route_count, ctrs->drop_count,
-                      ctrs->send_length, ctrs->recv_length,
-                      ctrs->route_length, ctrs->drop_length);
+                      common.lcc_msgs_alloc, common.lcc_msgs_max,
+                      common.lcc_errors,
+                      common.lcc_send_count, common.lcc_recv_count,
+                      common.lcc_route_count, common.lcc_drop_count,
+                      common.lcc_send_length, common.lcc_recv_length,
+                      common.lcc_route_length, common.lcc_drop_length);
 
        if (pos >= min_t(int, len, strlen(tmpstr)))
                rc = 0;
index a595476..1e37454 100644 (file)
@@ -1472,7 +1472,7 @@ lstcon_statrpc_readent(int transop, struct srpc_msg *msg,
        struct srpc_stat_reply *rep = &msg->msg_body.stat_reply;
        struct sfw_counters __user *sfwk_stat;
        struct srpc_counters __user *srpc_stat;
-       struct lnet_counters __user *lnet_stat;
+       struct lnet_counters_common __user *lnet_stat;
 
         if (rep->str_status != 0)
                 return 0;
@@ -1480,7 +1480,7 @@ lstcon_statrpc_readent(int transop, struct srpc_msg *msg,
        sfwk_stat = (struct sfw_counters __user *)&ent_up->rpe_payload[0];
        srpc_stat = (struct srpc_counters __user *)
                ((char __user *)sfwk_stat + sizeof(*sfwk_stat));
-       lnet_stat = (struct lnet_counters __user *)
+       lnet_stat = (struct lnet_counters_common __user *)
                ((char __user *)srpc_stat + sizeof(*srpc_stat));
 
        if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) ||
index 7e10c09..000fca9 100644 (file)
@@ -51,49 +51,49 @@ MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never
 
 #define sfw_unpack_id(id)               \
 do {                                    \
-        __swab64s(&(id).nid);           \
-        __swab32s(&(id).pid);           \
+       __swab64s(&(id).nid);           \
+       __swab32s(&(id).pid);           \
 } while (0)
 
 #define sfw_unpack_sid(sid)             \
 do {                                    \
-        __swab64s(&(sid).ses_nid);      \
-        __swab64s(&(sid).ses_stamp);    \
+       __swab64s(&(sid).ses_nid);      \
+       __swab64s(&(sid).ses_stamp);    \
 } while (0)
 
 #define sfw_unpack_fw_counters(fc)        \
 do {                                      \
-        __swab32s(&(fc).running_ms);      \
-        __swab32s(&(fc).active_batches);  \
-        __swab32s(&(fc).zombie_sessions); \
-        __swab32s(&(fc).brw_errors);      \
-        __swab32s(&(fc).ping_errors);     \
+       __swab32s(&(fc).running_ms);      \
+       __swab32s(&(fc).active_batches);  \
+       __swab32s(&(fc).zombie_sessions); \
+       __swab32s(&(fc).brw_errors);      \
+       __swab32s(&(fc).ping_errors);     \
 } while (0)
 
 #define sfw_unpack_rpc_counters(rc)     \
 do {                                    \
-        __swab32s(&(rc).errors);        \
-        __swab32s(&(rc).rpcs_sent);     \
-        __swab32s(&(rc).rpcs_rcvd);     \
-        __swab32s(&(rc).rpcs_dropped);  \
-        __swab32s(&(rc).rpcs_expired);  \
-        __swab64s(&(rc).bulk_get);      \
-        __swab64s(&(rc).bulk_put);      \
+       __swab32s(&(rc).errors);        \
+       __swab32s(&(rc).rpcs_sent);     \
+       __swab32s(&(rc).rpcs_rcvd);     \
+       __swab32s(&(rc).rpcs_dropped);  \
+       __swab32s(&(rc).rpcs_expired);  \
+       __swab64s(&(rc).bulk_get);      \
+       __swab64s(&(rc).bulk_put);      \
 } while (0)
 
 #define sfw_unpack_lnet_counters(lc)    \
 do {                                    \
-        __swab32s(&(lc).errors);        \
-        __swab32s(&(lc).msgs_max);      \
-        __swab32s(&(lc).msgs_alloc);    \
-        __swab32s(&(lc).send_count);    \
-        __swab32s(&(lc).recv_count);    \
-        __swab32s(&(lc).drop_count);    \
-        __swab32s(&(lc).route_count);   \
-        __swab64s(&(lc).send_length);   \
-        __swab64s(&(lc).recv_length);   \
-        __swab64s(&(lc).drop_length);   \
-        __swab64s(&(lc).route_length);  \
+       __swab32s(&(lc).lcc_errors);        \
+       __swab32s(&(lc).lcc_msgs_max);      \
+       __swab32s(&(lc).lcc_msgs_alloc);    \
+       __swab32s(&(lc).lcc_send_count);    \
+       __swab32s(&(lc).lcc_recv_count);    \
+       __swab32s(&(lc).lcc_drop_count);    \
+       __swab32s(&(lc).lcc_route_count);   \
+       __swab64s(&(lc).lcc_send_length);   \
+       __swab64s(&(lc).lcc_recv_length);   \
+       __swab64s(&(lc).lcc_drop_length);   \
+       __swab64s(&(lc).lcc_route_length);  \
 } while (0)
 
 #define sfw_test_active(t)      (atomic_read(&(t)->tsi_nactive) != 0)
@@ -390,7 +390,7 @@ sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply)
                 return 0;
         }
 
-       lnet_counters_get(&reply->str_lnet);
+       lnet_counters_get_common(&reply->str_lnet);
        srpc_get_counters(&reply->str_rpc);
 
         /* send over the msecs since the session was started
index 112f5bf..df66eab 100644 (file)
@@ -87,13 +87,13 @@ lnet_selftest_exit(void)
 void
 lnet_selftest_structure_assertion(void)
 {
-/*     CLASSERT(sizeof(struct srpc_msg) == 160);
+       CLASSERT(sizeof(struct srpc_msg) == 160);
        CLASSERT(sizeof(struct srpc_test_reqst) == 70);
        CLASSERT(offsetof(struct srpc_msg, msg_body.tes_reqst.tsr_concur) == 72);
        CLASSERT(offsetof(struct srpc_msg, msg_body.tes_reqst.tsr_ndest) == 78);
        CLASSERT(sizeof(struct srpc_stat_reply) == 136);
        CLASSERT(sizeof(struct srpc_stat_reqst) == 28);
-*/
+
 }
 
 static int __init
index c3a543a..8cc8c43 100644 (file)
@@ -157,11 +157,11 @@ struct srpc_stat_reqst {
 } WIRE_ATTR;
 
 struct srpc_stat_reply {
-        __u32                   str_status;
-       struct lst_sid          str_sid;
-       struct sfw_counters     str_fw;
-       struct srpc_counters    str_rpc;
-       struct lnet_counters    str_lnet;
+       __u32                    str_status;
+       struct lst_sid           str_sid;
+       struct sfw_counters      str_fw;
+       struct srpc_counters     str_rpc;
+       struct lnet_counters_common str_lnet;
 } WIRE_ATTR;
 
 struct test_bulk_req {
index ecc72e2..4978c42 100644 (file)
@@ -3607,6 +3607,7 @@ int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc,
                           struct cYAML **err_rc)
 {
        struct lnet_ioctl_lnet_stats data;
+       struct lnet_counters *cntrs;
        int rc;
        int l_errno;
        char err_str[LNET_MAX_STR_LEN];
@@ -3617,7 +3618,7 @@ int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc,
        LIBCFS_IOC_INIT_V2(data, st_hdr);
 
        rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_LNET_STATS, &data);
-       if (rc != 0) {
+       if (rc) {
                l_errno = errno;
                snprintf(err_str,
                         sizeof(err_str),
@@ -3629,111 +3630,113 @@ int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc,
 
        rc = LUSTRE_CFG_RC_OUT_OF_MEM;
 
+       cntrs = &data.st_cntrs;
+
        root = cYAML_create_object(NULL, NULL);
-       if (root == NULL)
+       if (!root)
                goto out;
 
        stats = cYAML_create_object(root, "statistics");
-       if (stats == NULL)
+       if (!stats)
                goto out;
 
-       if (cYAML_create_number(stats, "msgs_alloc",
-                               data.st_cntrs.msgs_alloc) == NULL)
+       if (!cYAML_create_number(stats, "msgs_alloc",
+                                cntrs->lct_common.lcc_msgs_alloc))
                goto out;
 
-       if (cYAML_create_number(stats, "msgs_max",
-                               data.st_cntrs.msgs_max) == NULL)
+       if (!cYAML_create_number(stats, "msgs_max",
+                                cntrs->lct_common.lcc_msgs_max))
                goto out;
 
-       if (cYAML_create_number(stats, "rst_alloc",
-                               data.st_cntrs.rst_alloc) == NULL)
+       if (!cYAML_create_number(stats, "rst_alloc",
+                                cntrs->lct_health.lch_rst_alloc))
                goto out;
 
-       if (cYAML_create_number(stats, "errors",
-                               data.st_cntrs.errors) == NULL)
+       if (!cYAML_create_number(stats, "errors",
+                                cntrs->lct_common.lcc_errors))
                goto out;
 
-       if (cYAML_create_number(stats, "send_count",
-                               data.st_cntrs.send_count) == NULL)
+       if (!cYAML_create_number(stats, "send_count",
+                                cntrs->lct_common.lcc_send_count))
                goto out;
 
-       if (cYAML_create_number(stats, "resend_count",
-                               data.st_cntrs.resend_count) == NULL)
+       if (!cYAML_create_number(stats, "resend_count",
+                                cntrs->lct_health.lch_resend_count))
                goto out;
 
-       if (cYAML_create_number(stats, "response_timeout_count",
-                               data.st_cntrs.response_timeout_count) == NULL)
+       if (!cYAML_create_number(stats, "response_timeout_count",
+                                cntrs->lct_health.lch_response_timeout_count))
                goto out;
 
-       if (cYAML_create_number(stats, "local_interrupt_count",
-                               data.st_cntrs.local_interrupt_count) == NULL)
+       if (!cYAML_create_number(stats, "local_interrupt_count",
+                                cntrs->lct_health.lch_local_interrupt_count))
                goto out;
 
-       if (cYAML_create_number(stats, "local_dropped_count",
-                               data.st_cntrs.local_dropped_count) == NULL)
+       if (!cYAML_create_number(stats, "local_dropped_count",
+                                cntrs->lct_health.lch_local_dropped_count))
                goto out;
 
-       if (cYAML_create_number(stats, "local_aborted_count",
-                               data.st_cntrs.local_aborted_count) == NULL)
+       if (!cYAML_create_number(stats, "local_aborted_count",
+                                cntrs->lct_health.lch_local_aborted_count))
                goto out;
 
-       if (cYAML_create_number(stats, "local_no_route_count",
-                               data.st_cntrs.local_no_route_count) == NULL)
+       if (!cYAML_create_number(stats, "local_no_route_count",
+                                cntrs->lct_health.lch_local_no_route_count))
                goto out;
 
-       if (cYAML_create_number(stats, "local_timeout_count",
-                               data.st_cntrs.local_timeout_count) == NULL)
+       if (!cYAML_create_number(stats, "local_timeout_count",
+                                cntrs->lct_health.lch_local_timeout_count))
                goto out;
 
-       if (cYAML_create_number(stats, "local_error_count",
-                               data.st_cntrs.local_error_count) == NULL)
+       if (!cYAML_create_number(stats, "local_error_count",
+                                cntrs->lct_health.lch_local_error_count))
                goto out;
 
-       if (cYAML_create_number(stats, "remote_dropped_count",
-                               data.st_cntrs.remote_dropped_count) == NULL)
+       if (!cYAML_create_number(stats, "remote_dropped_count",
+                                cntrs->lct_health.lch_remote_dropped_count))
                goto out;
 
-       if (cYAML_create_number(stats, "remote_error_count",
-                               data.st_cntrs.remote_error_count) == NULL)
+       if (!cYAML_create_number(stats, "remote_error_count",
+                                cntrs->lct_health.lch_remote_error_count))
                goto out;
 
-       if (cYAML_create_number(stats, "remote_timeout_count",
-                               data.st_cntrs.remote_timeout_count) == NULL)
+       if (!cYAML_create_number(stats, "remote_timeout_count",
+                                cntrs->lct_health.lch_remote_timeout_count))
                goto out;
 
-       if (cYAML_create_number(stats, "network_timeout_count",
-                               data.st_cntrs.network_timeout_count) == NULL)
+       if (!cYAML_create_number(stats, "network_timeout_count",
+                                cntrs->lct_health.lch_network_timeout_count))
                goto out;
 
-       if (cYAML_create_number(stats, "recv_count",
-                               data.st_cntrs.recv_count) == NULL)
+       if (!cYAML_create_number(stats, "recv_count",
+                                cntrs->lct_common.lcc_recv_count))
                goto out;
 
-       if (cYAML_create_number(stats, "route_count",
-                               data.st_cntrs.route_count) == NULL)
+       if (!cYAML_create_number(stats, "route_count",
+                                cntrs->lct_common.lcc_route_count))
                goto out;
 
-       if (cYAML_create_number(stats, "drop_count",
-                               data.st_cntrs.drop_count) == NULL)
+       if (!cYAML_create_number(stats, "drop_count",
+                                cntrs->lct_common.lcc_drop_count))
                goto out;
 
-       if (cYAML_create_number(stats, "send_length",
-                               data.st_cntrs.send_length) == NULL)
+       if (!cYAML_create_number(stats, "send_length",
+                                cntrs->lct_common.lcc_send_length))
                goto out;
 
-       if (cYAML_create_number(stats, "recv_length",
-                               data.st_cntrs.recv_length) == NULL)
+       if (!cYAML_create_number(stats, "recv_length",
+                                cntrs->lct_common.lcc_recv_length))
                goto out;
 
-       if (cYAML_create_number(stats, "route_length",
-                               data.st_cntrs.route_length) == NULL)
+       if (!cYAML_create_number(stats, "route_length",
+                                cntrs->lct_common.lcc_route_length))
                goto out;
 
-       if (cYAML_create_number(stats, "drop_length",
-                               data.st_cntrs.drop_length) == NULL)
+       if (!cYAML_create_number(stats, "drop_length",
+                                cntrs->lct_common.lcc_drop_length))
                goto out;
 
-       if (show_rc == NULL)
+       if (!show_rc)
                cYAML_print_tree(root);
 
        snprintf(err_str, sizeof(err_str), "\"success\"");
index 0b6d40a..7d11a24 100644 (file)
@@ -1555,27 +1555,27 @@ lst_stat_req_param_alloc(char *name, lst_stat_req_param_t **srpp, int save_old)
                 return rc;
         }
 
-        srp->srp_name = name;
+       srp->srp_name = name;
 
-        for (i = 0; i < count; i++) {
-                rc = lst_alloc_rpcent(&srp->srp_result[i], srp->srp_count,
+       for (i = 0; i < count; i++) {
+               rc = lst_alloc_rpcent(&srp->srp_result[i], srp->srp_count,
                                      sizeof(struct sfw_counters)  +
                                      sizeof(struct srpc_counters) +
-                                     sizeof(struct lnet_counters));
-                if (rc != 0) {
-                        fprintf(stderr, "Out of memory\n");
-                        break;
-                }
-        }
+                                     sizeof(struct lnet_counters_common));
+               if (rc != 0) {
+                       fprintf(stderr, "Out of memory\n");
+                       break;
+               }
+       }
 
-        if (rc == 0) {
-                *srpp = srp;
-                return 0;
-        }
+       if (rc == 0) {
+               *srpp = srp;
+               return 0;
+       }
 
-        lst_stat_req_param_free(srp);
+       lst_stat_req_param_free(srp);
 
-        return rc;
+       return rc;
 }
 
 typedef struct {
@@ -1646,16 +1646,16 @@ lst_timeval_diff(struct timeval *tv1,
 }
 
 static void
-lst_cal_lnet_stat(float delta, struct lnet_counters *lnet_new,
-                 struct lnet_counters *lnet_old, int mbs)
+lst_cal_lnet_stat(float delta, struct lnet_counters_common *lnet_new,
+                 struct lnet_counters_common *lnet_old, int mbs)
 {
        float perf;
        float rate;
        unsigned int unit_divisor;
 
        unit_divisor = (mbs) ? (1000 * 1000) : (1024 * 1024);
-       perf = (float)(lnet_new->send_length -
-                      lnet_old->send_length) / unit_divisor / delta;
+       perf = (float)(lnet_new->lcc_send_length -
+                      lnet_old->lcc_send_length) / unit_divisor / delta;
        lnet_stat_result.lnet_total_sndperf += perf;
 
        if (lnet_stat_result.lnet_min_sndperf > perf ||
@@ -1665,8 +1665,8 @@ lst_cal_lnet_stat(float delta, struct lnet_counters *lnet_new,
        if (lnet_stat_result.lnet_max_sndperf < perf)
                lnet_stat_result.lnet_max_sndperf = perf;
 
-       perf = (float)(lnet_new->recv_length -
-                      lnet_old->recv_length) / unit_divisor / delta;
+       perf = (float)(lnet_new->lcc_recv_length -
+                      lnet_old->lcc_recv_length) / unit_divisor / delta;
        lnet_stat_result.lnet_total_rcvperf += perf;
 
        if (lnet_stat_result.lnet_min_rcvperf > perf ||
@@ -1676,7 +1676,7 @@ lst_cal_lnet_stat(float delta, struct lnet_counters *lnet_new,
        if (lnet_stat_result.lnet_max_rcvperf < perf)
                lnet_stat_result.lnet_max_rcvperf = perf;
 
-       rate = (lnet_new->send_count - lnet_old->send_count) / delta;
+       rate = (lnet_new->lcc_send_count - lnet_old->lcc_send_count) / delta;
        lnet_stat_result.lnet_total_sndrate += rate;
 
        if (lnet_stat_result.lnet_min_sndrate > rate ||
@@ -1686,7 +1686,7 @@ lst_cal_lnet_stat(float delta, struct lnet_counters *lnet_new,
        if (lnet_stat_result.lnet_max_sndrate < rate)
                lnet_stat_result.lnet_max_sndrate = rate;
 
-       rate = (lnet_new->recv_count - lnet_old->recv_count) / delta;
+       rate = (lnet_new->lcc_recv_count - lnet_old->lcc_recv_count) / delta;
        lnet_stat_result.lnet_total_rcvrate += rate;
 
        if (lnet_stat_result.lnet_min_rcvrate > rate ||
@@ -1772,17 +1772,17 @@ lst_print_stat(char *name, struct list_head *resultp,
               int idx, int lnet, int bwrt, int rdwr, int type,
               int mbs)
 {
-       struct list_head        tmp[2];
+       struct list_head tmp[2];
        struct lstcon_rpc_ent *new;
        struct lstcon_rpc_ent *old;
-       struct sfw_counters   *sfwk_new;
-       struct sfw_counters   *sfwk_old;
-       struct srpc_counters  *srpc_new;
-       struct srpc_counters  *srpc_old;
-       struct lnet_counters  *lnet_new;
-       struct lnet_counters  *lnet_old;
-        float             delta;
-        int               errcount = 0;
+       struct sfw_counters *sfwk_new;
+       struct sfw_counters *sfwk_old;
+       struct srpc_counters *srpc_new;
+       struct srpc_counters *srpc_old;
+       struct lnet_counters_common *lnet_new;
+       struct lnet_counters_common *lnet_old;
+       float delta;
+       int errcount = 0;
 
        INIT_LIST_HEAD(&tmp[0]);
        INIT_LIST_HEAD(&tmp[1]);
@@ -1825,28 +1825,32 @@ lst_print_stat(char *name, struct list_head *resultp,
                sfwk_new = (struct sfw_counters *)&new->rpe_payload[0];
                sfwk_old = (struct sfw_counters *)&old->rpe_payload[0];
 
-               srpc_new = (struct srpc_counters *)((char *)sfwk_new + sizeof(*sfwk_new));
-               srpc_old = (struct srpc_counters *)((char *)sfwk_old + sizeof(*sfwk_old));
+               srpc_new = (struct srpc_counters *)((char *)sfwk_new +
+                                                   sizeof(*sfwk_new));
+               srpc_old = (struct srpc_counters *)((char *)sfwk_old +
+                                                   sizeof(*sfwk_old));
 
-               lnet_new = (struct lnet_counters *)((char *)srpc_new + sizeof(*srpc_new));
-               lnet_old = (struct lnet_counters *)((char *)srpc_old + sizeof(*srpc_old));
+               lnet_new = (struct lnet_counters_common *)((char *)srpc_new +
+                                                          sizeof(*srpc_new));
+               lnet_old = (struct lnet_counters_common *)((char *)srpc_old +
+                                                          sizeof(*srpc_old));
 
                /* Prior to version 2.3, the running_ms field was a counter for
                 * the number of running tests.  We are looking at this value
                 * to determine if it is a millisecond timestamep (>= 2.3) or a
                 * test counter (< 2.3).  The number 500 is being used for this
                 * barrier as the test counter should never get this high, and
-                * the timestamp should never get this low. */
-
+                * the timestamp should never get this low.
+                */
                if (sfwk_new->running_ms > 500) {
                        /* use the timestamp from the remote node, not our
                         * rpe_stamp from when we copied up the data out of
-                        * the kernel */
-
-                       delta = (float) (sfwk_new->running_ms -
+                        * the kernel.
+                        */
+                       delta = (float)(sfwk_new->running_ms -
                                        sfwk_old->running_ms) / 1000;
                } else {
-                       struct timeval    tv;
+                       struct timeval tv;
 
                        lst_timeval_diff(&new->rpe_stamp, &old->rpe_stamp, &tv);
                        delta = tv.tv_sec + (float)tv.tv_usec / 1000000;