From: Sonia Sharma Date: Sun, 23 Sep 2018 16:15:31 +0000 (-0400) Subject: LU-11422 lnet: Fix selftest backward compatibility post health X-Git-Tag: 2.11.56~2 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=60f6f2b480b482f2022cbea416d8bea87f848bec;p=fs%2Flustre-release.git LU-11422 lnet: Fix selftest backward compatibility post health Post LNet health feature landing, lnet-selftest loses backward compatibility. This patch fixes that by adding a new structure lnet_counters_common similar to lnet_counters(pre-Health version). Now, lnet_counters_common is the struct that selftest depends on. Also, adds a struct lnet_counters_health specifically for health stats. Change-Id: Ia066ff7b3522738080d192e80c14cf5ac55a33e2 Test-Parameters: trivial testlist=lnet-selftest clientjob=lustre-b2_10 clientbuildno=136 Signed-off-by: Sonia Sharma Reviewed-on: https://review.whamcloud.com/33242 Reviewed-by: Andreas Dilger Reviewed-by: Amir Shehata Tested-by: Jenkins Tested-by: Andreas Dilger --- diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 2b52cb7..4441009 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -496,7 +496,7 @@ lnet_rspt_alloc(int cpt) struct lnet_rsp_tracker *rspt; LIBCFS_ALLOC(rspt, sizeof(*rspt)); lnet_net_lock(cpt); - the_lnet.ln_counters[cpt]->rst_alloc++; + the_lnet.ln_counters[cpt]->lct_health.lch_rst_alloc++; lnet_net_unlock(cpt); return rspt; } @@ -506,7 +506,7 @@ lnet_rspt_free(struct lnet_rsp_tracker *rspt, int cpt) { LIBCFS_FREE(rspt, sizeof(*rspt)); lnet_net_lock(cpt); - the_lnet.ln_counters[cpt]->rst_alloc--; + the_lnet.ln_counters[cpt]->lct_health.lch_rst_alloc--; lnet_net_unlock(cpt); } @@ -729,6 +729,7 @@ bool lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg); /** @} lnet_fault_simulation */ +void lnet_counters_get_common(struct lnet_counters_common *common); void lnet_counters_get(struct lnet_counters *counters); void lnet_counters_reset(void); diff --git a/lnet/include/uapi/linux/lnet/lnet-types.h b/lnet/include/uapi/linux/lnet/lnet-types.h index 3f5b8bd..c7e779a 100644 --- a/lnet/include/uapi/linux/lnet/lnet-types.h +++ b/lnet/include/uapi/linux/lnet/lnet-types.h @@ -223,33 +223,41 @@ struct lnet_acceptor_connreq { #define LNET_PROTO_ACCEPTOR_VERSION 1 -struct lnet_counters { - __u32 msgs_alloc; - __u32 msgs_max; - __u32 rst_alloc; - __u32 errors; - __u32 send_count; - __u32 recv_count; - __u32 route_count; - __u32 drop_count; - __u32 resend_count; - __u32 response_timeout_count; - __u32 local_interrupt_count; - __u32 local_dropped_count; - __u32 local_aborted_count; - __u32 local_no_route_count; - __u32 local_timeout_count; - __u32 local_error_count; - __u32 remote_dropped_count; - __u32 remote_error_count; - __u32 remote_timeout_count; - __u32 network_timeout_count; - __u64 send_length; - __u64 recv_length; - __u64 route_length; - __u64 drop_length; +struct lnet_counters_common { + __u32 lcc_msgs_alloc; + __u32 lcc_msgs_max; + __u32 lcc_errors; + __u32 lcc_send_count; + __u32 lcc_recv_count; + __u32 lcc_route_count; + __u32 lcc_drop_count; + __u64 lcc_send_length; + __u64 lcc_recv_length; + __u64 lcc_route_length; + __u64 lcc_drop_length; } WIRE_ATTR; +struct lnet_counters_health { + __u32 lch_rst_alloc; + __u32 lch_resend_count; + __u32 lch_response_timeout_count; + __u32 lch_local_interrupt_count; + __u32 lch_local_dropped_count; + __u32 lch_local_aborted_count; + __u32 lch_local_no_route_count; + __u32 lch_local_timeout_count; + __u32 lch_local_error_count; + __u32 lch_remote_dropped_count; + __u32 lch_remote_error_count; + __u32 lch_remote_timeout_count; + __u32 lch_network_timeout_count; +}; + +struct lnet_counters { + struct lnet_counters_common lct_common; + struct lnet_counters_health lct_health; +}; + #define LNET_NI_STATUS_UP 0x15aac0de #define LNET_NI_STATUS_DOWN 0xdeadface #define LNET_NI_STATUS_INVALID 0x00000000 diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 059aa41..b2d4c25 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -739,41 +739,70 @@ lnet_unregister_lnd(struct lnet_lnd *lnd) EXPORT_SYMBOL(lnet_unregister_lnd); void +lnet_counters_get_common(struct lnet_counters_common *common) +{ + struct lnet_counters *ctr; + int i; + + memset(common, 0, sizeof(*common)); + + lnet_net_lock(LNET_LOCK_EX); + + cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) { + common->lcc_msgs_max += ctr->lct_common.lcc_msgs_max; + common->lcc_msgs_alloc += ctr->lct_common.lcc_msgs_alloc; + common->lcc_errors += ctr->lct_common.lcc_errors; + common->lcc_send_count += ctr->lct_common.lcc_send_count; + common->lcc_recv_count += ctr->lct_common.lcc_recv_count; + common->lcc_route_count += ctr->lct_common.lcc_route_count; + common->lcc_drop_count += ctr->lct_common.lcc_drop_count; + common->lcc_send_length += ctr->lct_common.lcc_send_length; + common->lcc_recv_length += ctr->lct_common.lcc_recv_length; + common->lcc_route_length += ctr->lct_common.lcc_route_length; + common->lcc_drop_length += ctr->lct_common.lcc_drop_length; + } + lnet_net_unlock(LNET_LOCK_EX); +} +EXPORT_SYMBOL(lnet_counters_get_common); + +void lnet_counters_get(struct lnet_counters *counters) { struct lnet_counters *ctr; + struct lnet_counters_health *health = &counters->lct_health; int i; memset(counters, 0, sizeof(*counters)); + lnet_counters_get_common(&counters->lct_common); + lnet_net_lock(LNET_LOCK_EX); cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) { - counters->msgs_max += ctr->msgs_max; - counters->msgs_alloc += ctr->msgs_alloc; - counters->rst_alloc += ctr->rst_alloc; - counters->errors += ctr->errors; - counters->resend_count += ctr->resend_count; - counters->response_timeout_count += ctr->response_timeout_count; - counters->local_interrupt_count += ctr->local_interrupt_count; - counters->local_dropped_count += ctr->local_dropped_count; - counters->local_aborted_count += ctr->local_aborted_count; - counters->local_no_route_count += ctr->local_no_route_count; - counters->local_timeout_count += ctr->local_timeout_count; - counters->local_error_count += ctr->local_error_count; - counters->remote_dropped_count += ctr->remote_dropped_count; - counters->remote_error_count += ctr->remote_error_count; - counters->remote_timeout_count += ctr->remote_timeout_count; - counters->network_timeout_count += ctr->network_timeout_count; - counters->send_count += ctr->send_count; - counters->recv_count += ctr->recv_count; - counters->route_count += ctr->route_count; - counters->drop_count += ctr->drop_count; - counters->send_length += ctr->send_length; - counters->recv_length += ctr->recv_length; - counters->route_length += ctr->route_length; - counters->drop_length += ctr->drop_length; - + health->lch_rst_alloc += ctr->lct_health.lch_rst_alloc; + health->lch_resend_count += ctr->lct_health.lch_resend_count; + health->lch_response_timeout_count += + ctr->lct_health.lch_response_timeout_count; + health->lch_local_interrupt_count += + ctr->lct_health.lch_local_interrupt_count; + health->lch_local_dropped_count += + ctr->lct_health.lch_local_dropped_count; + health->lch_local_aborted_count += + ctr->lct_health.lch_local_aborted_count; + health->lch_local_no_route_count += + ctr->lct_health.lch_local_no_route_count; + health->lch_local_timeout_count += + ctr->lct_health.lch_local_timeout_count; + health->lch_local_error_count += + ctr->lct_health.lch_local_error_count; + health->lch_remote_dropped_count += + ctr->lct_health.lch_remote_dropped_count; + health->lch_remote_error_count += + ctr->lct_health.lch_remote_error_count; + health->lch_remote_timeout_count += + ctr->lct_health.lch_remote_timeout_count; + health->lch_network_timeout_count += + ctr->lct_health.lch_network_timeout_count; } lnet_net_unlock(LNET_LOCK_EX); } diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index c572751..d5f1132 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -942,8 +942,9 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send) /* NB 'lp' is always the next hop */ if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 && lnet_peer_alive_locked(ni, lp, msg) == 0) { - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += msg->msg_len; + the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++; + the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length += + msg->msg_len; lnet_net_unlock(cpt); if (msg->msg_txpeer) lnet_incr_stats(&msg->msg_txpeer->lpni_stats, @@ -2746,7 +2747,7 @@ lnet_finalize_expired_responses(bool force) lnet_res_unlock(i); lnet_net_lock(i); - the_lnet.ln_counters[i]->response_timeout_count++; + the_lnet.ln_counters[i]->lct_health.lch_response_timeout_count++; lnet_net_unlock(i); list_del_init(&rspt->rspt_on_list); @@ -2832,7 +2833,7 @@ lnet_resend_pending_msgs_locked(struct list_head *resendq, int cpt) } lnet_net_lock(cpt); if (!rc) - the_lnet.ln_counters[cpt]->resend_count++; + the_lnet.ln_counters[cpt]->lct_health.lch_resend_count++; } } } @@ -3600,8 +3601,8 @@ lnet_drop_message(struct lnet_ni *ni, int cpt, void *private, unsigned int nob, { lnet_net_lock(cpt); lnet_incr_stats(&ni->ni_stats, msg_type, LNET_STATS_TYPE_DROP); - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += nob; + the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++; + the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length += nob; lnet_net_unlock(cpt); lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob); @@ -4591,8 +4592,9 @@ lnet_create_reply_msg(struct lnet_ni *ni, struct lnet_msg *getmsg) lnet_net_lock(cpt); lnet_incr_stats(&ni->ni_stats, LNET_MSG_GET, LNET_STATS_TYPE_DROP); - the_lnet.ln_counters[cpt]->drop_count++; - the_lnet.ln_counters[cpt]->drop_length += getmd->md_length; + the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++; + the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length += + getmd->md_length; lnet_net_unlock(cpt); if (msg != NULL) diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index 3bd6946..2f3b689 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -142,7 +142,7 @@ void lnet_msg_commit(struct lnet_msg *msg, int cpt) { struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt]; - struct lnet_counters *counters = the_lnet.ln_counters[cpt]; + struct lnet_counters_common *common; s64 timeout_ns; /* set the message deadline */ @@ -171,30 +171,31 @@ lnet_msg_commit(struct lnet_msg *msg, int cpt) msg->msg_onactivelist = 1; list_add_tail(&msg->msg_activelist, &container->msc_active); - counters->msgs_alloc++; - if (counters->msgs_alloc > counters->msgs_max) - counters->msgs_max = counters->msgs_alloc; + common = &the_lnet.ln_counters[cpt]->lct_common; + common->lcc_msgs_alloc++; + if (common->lcc_msgs_alloc > common->lcc_msgs_max) + common->lcc_msgs_max = common->lcc_msgs_alloc; } static void lnet_msg_decommit_tx(struct lnet_msg *msg, int status) { - struct lnet_counters *counters; + struct lnet_counters_common *common; struct lnet_event *ev = &msg->msg_ev; LASSERT(msg->msg_tx_committed); if (status != 0) goto out; - counters = the_lnet.ln_counters[msg->msg_tx_cpt]; + common = &(the_lnet.ln_counters[msg->msg_tx_cpt]->lct_common); switch (ev->type) { default: /* routed message */ LASSERT(msg->msg_routing); LASSERT(msg->msg_rx_committed); LASSERT(ev->type == 0); - counters->route_length += msg->msg_len; - counters->route_count++; + common->lcc_route_length += msg->msg_len; + common->lcc_route_count++; goto incr_stats; case LNET_EVENT_PUT: @@ -208,7 +209,7 @@ lnet_msg_decommit_tx(struct lnet_msg *msg, int status) case LNET_EVENT_SEND: LASSERT(!msg->msg_rx_committed); if (msg->msg_type == LNET_MSG_PUT) - counters->send_length += msg->msg_len; + common->lcc_send_length += msg->msg_len; break; case LNET_EVENT_GET: @@ -220,7 +221,7 @@ lnet_msg_decommit_tx(struct lnet_msg *msg, int status) break; } - counters->send_count++; + common->lcc_send_count++; incr_stats: if (msg->msg_txpeer) @@ -239,7 +240,7 @@ incr_stats: static void lnet_msg_decommit_rx(struct lnet_msg *msg, int status) { - struct lnet_counters *counters; + struct lnet_counters_common *common; struct lnet_event *ev = &msg->msg_ev; LASSERT(!msg->msg_tx_committed); /* decommitted or never committed */ @@ -248,7 +249,7 @@ lnet_msg_decommit_rx(struct lnet_msg *msg, int status) if (status != 0) goto out; - counters = the_lnet.ln_counters[msg->msg_rx_cpt]; + common = &(the_lnet.ln_counters[msg->msg_rx_cpt]->lct_common); switch (ev->type) { default: LASSERT(ev->type == 0); @@ -266,7 +267,7 @@ lnet_msg_decommit_rx(struct lnet_msg *msg, int status) * lnet_msg_decommit_tx(), see details in lnet_parse_get() */ LASSERT(msg->msg_type == LNET_MSG_REPLY || msg->msg_type == LNET_MSG_GET); - counters->send_length += msg->msg_wanted; + common->lcc_send_length += msg->msg_wanted; break; case LNET_EVENT_PUT: @@ -281,7 +282,7 @@ lnet_msg_decommit_rx(struct lnet_msg *msg, int status) break; } - counters->recv_count++; + common->lcc_recv_count++; incr_stats: if (msg->msg_rxpeer) @@ -293,7 +294,7 @@ incr_stats: msg->msg_type, LNET_STATS_TYPE_RECV); if (ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_REPLY) - counters->recv_length += msg->msg_wanted; + common->lcc_recv_length += msg->msg_wanted; out: lnet_return_rx_credits_locked(msg); @@ -326,7 +327,7 @@ lnet_msg_decommit(struct lnet_msg *msg, int cpt, int status) list_del(&msg->msg_activelist); msg->msg_onactivelist = 0; - the_lnet.ln_counters[cpt2]->msgs_alloc--; + the_lnet.ln_counters[cpt2]->lct_common.lcc_msgs_alloc--; if (cpt2 != cpt) { lnet_net_unlock(cpt2); @@ -541,52 +542,54 @@ lnet_incr_hstats(struct lnet_msg *msg, enum lnet_msg_hstatus hstatus) { struct lnet_ni *ni = msg->msg_txni; struct lnet_peer_ni *lpni = msg->msg_txpeer; - struct lnet_counters *counters = the_lnet.ln_counters[0]; + struct lnet_counters_health *health; + + health = &the_lnet.ln_counters[0]->lct_health; switch (hstatus) { case LNET_MSG_STATUS_LOCAL_INTERRUPT: atomic_inc(&ni->ni_hstats.hlt_local_interrupt); - counters->local_interrupt_count++; + health->lch_local_interrupt_count++; break; case LNET_MSG_STATUS_LOCAL_DROPPED: atomic_inc(&ni->ni_hstats.hlt_local_dropped); - counters->local_dropped_count++; + health->lch_local_dropped_count++; break; case LNET_MSG_STATUS_LOCAL_ABORTED: atomic_inc(&ni->ni_hstats.hlt_local_aborted); - counters->local_aborted_count++; + health->lch_local_aborted_count++; break; case LNET_MSG_STATUS_LOCAL_NO_ROUTE: atomic_inc(&ni->ni_hstats.hlt_local_no_route); - counters->local_no_route_count++; + health->lch_local_no_route_count++; break; case LNET_MSG_STATUS_LOCAL_TIMEOUT: atomic_inc(&ni->ni_hstats.hlt_local_timeout); - counters->local_timeout_count++; + health->lch_local_timeout_count++; break; case LNET_MSG_STATUS_LOCAL_ERROR: atomic_inc(&ni->ni_hstats.hlt_local_error); - counters->local_error_count++; + health->lch_local_error_count++; break; case LNET_MSG_STATUS_REMOTE_DROPPED: if (lpni) atomic_inc(&lpni->lpni_hstats.hlt_remote_dropped); - counters->remote_dropped_count++; + health->lch_remote_dropped_count++; break; case LNET_MSG_STATUS_REMOTE_ERROR: if (lpni) atomic_inc(&lpni->lpni_hstats.hlt_remote_error); - counters->remote_error_count++; + health->lch_remote_error_count++; break; case LNET_MSG_STATUS_REMOTE_TIMEOUT: if (lpni) atomic_inc(&lpni->lpni_hstats.hlt_remote_timeout); - counters->remote_timeout_count++; + health->lch_remote_timeout_count++; break; case LNET_MSG_STATUS_NETWORK_TIMEOUT: if (lpni) atomic_inc(&lpni->lpni_hstats.hlt_network_timeout); - counters->network_timeout_count++; + health->lch_network_timeout_count++; break; case LNET_MSG_STATUS_OK: break; diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index b4e4d7b..c0c5c25 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -82,6 +82,7 @@ static int __proc_lnet_stats(void *data, int write, { int rc; struct lnet_counters *ctrs; + struct lnet_counters_common common; int len; char *tmpstr; const int tmpsiz = 256; /* 7 %u and 4 __u64 */ @@ -104,16 +105,17 @@ static int __proc_lnet_stats(void *data, int write, } lnet_counters_get(ctrs); + common = ctrs->lct_common; len = snprintf(tmpstr, tmpsiz, "%u %u %u %u %u %u %u %llu %llu " "%llu %llu", - ctrs->msgs_alloc, ctrs->msgs_max, - ctrs->errors, - ctrs->send_count, ctrs->recv_count, - ctrs->route_count, ctrs->drop_count, - ctrs->send_length, ctrs->recv_length, - ctrs->route_length, ctrs->drop_length); + common.lcc_msgs_alloc, common.lcc_msgs_max, + common.lcc_errors, + common.lcc_send_count, common.lcc_recv_count, + common.lcc_route_count, common.lcc_drop_count, + common.lcc_send_length, common.lcc_recv_length, + common.lcc_route_length, common.lcc_drop_length); if (pos >= min_t(int, len, strlen(tmpstr))) rc = 0; diff --git a/lnet/selftest/console.c b/lnet/selftest/console.c index a595476..1e37454 100644 --- a/lnet/selftest/console.c +++ b/lnet/selftest/console.c @@ -1472,7 +1472,7 @@ lstcon_statrpc_readent(int transop, struct srpc_msg *msg, struct srpc_stat_reply *rep = &msg->msg_body.stat_reply; struct sfw_counters __user *sfwk_stat; struct srpc_counters __user *srpc_stat; - struct lnet_counters __user *lnet_stat; + struct lnet_counters_common __user *lnet_stat; if (rep->str_status != 0) return 0; @@ -1480,7 +1480,7 @@ lstcon_statrpc_readent(int transop, struct srpc_msg *msg, sfwk_stat = (struct sfw_counters __user *)&ent_up->rpe_payload[0]; srpc_stat = (struct srpc_counters __user *) ((char __user *)sfwk_stat + sizeof(*sfwk_stat)); - lnet_stat = (struct lnet_counters __user *) + lnet_stat = (struct lnet_counters_common __user *) ((char __user *)srpc_stat + sizeof(*srpc_stat)); if (copy_to_user(sfwk_stat, &rep->str_fw, sizeof(*sfwk_stat)) || diff --git a/lnet/selftest/framework.c b/lnet/selftest/framework.c index 7e10c09..000fca9 100644 --- a/lnet/selftest/framework.c +++ b/lnet/selftest/framework.c @@ -51,49 +51,49 @@ MODULE_PARM_DESC(rpc_timeout, "rpc timeout in seconds (64 by default, 0 == never #define sfw_unpack_id(id) \ do { \ - __swab64s(&(id).nid); \ - __swab32s(&(id).pid); \ + __swab64s(&(id).nid); \ + __swab32s(&(id).pid); \ } while (0) #define sfw_unpack_sid(sid) \ do { \ - __swab64s(&(sid).ses_nid); \ - __swab64s(&(sid).ses_stamp); \ + __swab64s(&(sid).ses_nid); \ + __swab64s(&(sid).ses_stamp); \ } while (0) #define sfw_unpack_fw_counters(fc) \ do { \ - __swab32s(&(fc).running_ms); \ - __swab32s(&(fc).active_batches); \ - __swab32s(&(fc).zombie_sessions); \ - __swab32s(&(fc).brw_errors); \ - __swab32s(&(fc).ping_errors); \ + __swab32s(&(fc).running_ms); \ + __swab32s(&(fc).active_batches); \ + __swab32s(&(fc).zombie_sessions); \ + __swab32s(&(fc).brw_errors); \ + __swab32s(&(fc).ping_errors); \ } while (0) #define sfw_unpack_rpc_counters(rc) \ do { \ - __swab32s(&(rc).errors); \ - __swab32s(&(rc).rpcs_sent); \ - __swab32s(&(rc).rpcs_rcvd); \ - __swab32s(&(rc).rpcs_dropped); \ - __swab32s(&(rc).rpcs_expired); \ - __swab64s(&(rc).bulk_get); \ - __swab64s(&(rc).bulk_put); \ + __swab32s(&(rc).errors); \ + __swab32s(&(rc).rpcs_sent); \ + __swab32s(&(rc).rpcs_rcvd); \ + __swab32s(&(rc).rpcs_dropped); \ + __swab32s(&(rc).rpcs_expired); \ + __swab64s(&(rc).bulk_get); \ + __swab64s(&(rc).bulk_put); \ } while (0) #define sfw_unpack_lnet_counters(lc) \ do { \ - __swab32s(&(lc).errors); \ - __swab32s(&(lc).msgs_max); \ - __swab32s(&(lc).msgs_alloc); \ - __swab32s(&(lc).send_count); \ - __swab32s(&(lc).recv_count); \ - __swab32s(&(lc).drop_count); \ - __swab32s(&(lc).route_count); \ - __swab64s(&(lc).send_length); \ - __swab64s(&(lc).recv_length); \ - __swab64s(&(lc).drop_length); \ - __swab64s(&(lc).route_length); \ + __swab32s(&(lc).lcc_errors); \ + __swab32s(&(lc).lcc_msgs_max); \ + __swab32s(&(lc).lcc_msgs_alloc); \ + __swab32s(&(lc).lcc_send_count); \ + __swab32s(&(lc).lcc_recv_count); \ + __swab32s(&(lc).lcc_drop_count); \ + __swab32s(&(lc).lcc_route_count); \ + __swab64s(&(lc).lcc_send_length); \ + __swab64s(&(lc).lcc_recv_length); \ + __swab64s(&(lc).lcc_drop_length); \ + __swab64s(&(lc).lcc_route_length); \ } while (0) #define sfw_test_active(t) (atomic_read(&(t)->tsi_nactive) != 0) @@ -390,7 +390,7 @@ sfw_get_stats(struct srpc_stat_reqst *request, struct srpc_stat_reply *reply) return 0; } - lnet_counters_get(&reply->str_lnet); + lnet_counters_get_common(&reply->str_lnet); srpc_get_counters(&reply->str_rpc); /* send over the msecs since the session was started diff --git a/lnet/selftest/module.c b/lnet/selftest/module.c index 112f5bf..df66eab 100644 --- a/lnet/selftest/module.c +++ b/lnet/selftest/module.c @@ -87,13 +87,13 @@ lnet_selftest_exit(void) void lnet_selftest_structure_assertion(void) { -/* CLASSERT(sizeof(struct srpc_msg) == 160); + CLASSERT(sizeof(struct srpc_msg) == 160); CLASSERT(sizeof(struct srpc_test_reqst) == 70); CLASSERT(offsetof(struct srpc_msg, msg_body.tes_reqst.tsr_concur) == 72); CLASSERT(offsetof(struct srpc_msg, msg_body.tes_reqst.tsr_ndest) == 78); CLASSERT(sizeof(struct srpc_stat_reply) == 136); CLASSERT(sizeof(struct srpc_stat_reqst) == 28); -*/ + } static int __init diff --git a/lnet/selftest/rpc.h b/lnet/selftest/rpc.h index c3a543a..8cc8c43 100644 --- a/lnet/selftest/rpc.h +++ b/lnet/selftest/rpc.h @@ -157,11 +157,11 @@ struct srpc_stat_reqst { } WIRE_ATTR; struct srpc_stat_reply { - __u32 str_status; - struct lst_sid str_sid; - struct sfw_counters str_fw; - struct srpc_counters str_rpc; - struct lnet_counters str_lnet; + __u32 str_status; + struct lst_sid str_sid; + struct sfw_counters str_fw; + struct srpc_counters str_rpc; + struct lnet_counters_common str_lnet; } WIRE_ATTR; struct test_bulk_req { diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index ecc72e2..4978c42 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -3607,6 +3607,7 @@ int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc) { struct lnet_ioctl_lnet_stats data; + struct lnet_counters *cntrs; int rc; int l_errno; char err_str[LNET_MAX_STR_LEN]; @@ -3617,7 +3618,7 @@ int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc, LIBCFS_IOC_INIT_V2(data, st_hdr); rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_LNET_STATS, &data); - if (rc != 0) { + if (rc) { l_errno = errno; snprintf(err_str, sizeof(err_str), @@ -3629,111 +3630,113 @@ int lustre_lnet_show_stats(int seq_no, struct cYAML **show_rc, rc = LUSTRE_CFG_RC_OUT_OF_MEM; + cntrs = &data.st_cntrs; + root = cYAML_create_object(NULL, NULL); - if (root == NULL) + if (!root) goto out; stats = cYAML_create_object(root, "statistics"); - if (stats == NULL) + if (!stats) goto out; - if (cYAML_create_number(stats, "msgs_alloc", - data.st_cntrs.msgs_alloc) == NULL) + if (!cYAML_create_number(stats, "msgs_alloc", + cntrs->lct_common.lcc_msgs_alloc)) goto out; - if (cYAML_create_number(stats, "msgs_max", - data.st_cntrs.msgs_max) == NULL) + if (!cYAML_create_number(stats, "msgs_max", + cntrs->lct_common.lcc_msgs_max)) goto out; - if (cYAML_create_number(stats, "rst_alloc", - data.st_cntrs.rst_alloc) == NULL) + if (!cYAML_create_number(stats, "rst_alloc", + cntrs->lct_health.lch_rst_alloc)) goto out; - if (cYAML_create_number(stats, "errors", - data.st_cntrs.errors) == NULL) + if (!cYAML_create_number(stats, "errors", + cntrs->lct_common.lcc_errors)) goto out; - if (cYAML_create_number(stats, "send_count", - data.st_cntrs.send_count) == NULL) + if (!cYAML_create_number(stats, "send_count", + cntrs->lct_common.lcc_send_count)) goto out; - if (cYAML_create_number(stats, "resend_count", - data.st_cntrs.resend_count) == NULL) + if (!cYAML_create_number(stats, "resend_count", + cntrs->lct_health.lch_resend_count)) goto out; - if (cYAML_create_number(stats, "response_timeout_count", - data.st_cntrs.response_timeout_count) == NULL) + if (!cYAML_create_number(stats, "response_timeout_count", + cntrs->lct_health.lch_response_timeout_count)) goto out; - if (cYAML_create_number(stats, "local_interrupt_count", - data.st_cntrs.local_interrupt_count) == NULL) + if (!cYAML_create_number(stats, "local_interrupt_count", + cntrs->lct_health.lch_local_interrupt_count)) goto out; - if (cYAML_create_number(stats, "local_dropped_count", - data.st_cntrs.local_dropped_count) == NULL) + if (!cYAML_create_number(stats, "local_dropped_count", + cntrs->lct_health.lch_local_dropped_count)) goto out; - if (cYAML_create_number(stats, "local_aborted_count", - data.st_cntrs.local_aborted_count) == NULL) + if (!cYAML_create_number(stats, "local_aborted_count", + cntrs->lct_health.lch_local_aborted_count)) goto out; - if (cYAML_create_number(stats, "local_no_route_count", - data.st_cntrs.local_no_route_count) == NULL) + if (!cYAML_create_number(stats, "local_no_route_count", + cntrs->lct_health.lch_local_no_route_count)) goto out; - if (cYAML_create_number(stats, "local_timeout_count", - data.st_cntrs.local_timeout_count) == NULL) + if (!cYAML_create_number(stats, "local_timeout_count", + cntrs->lct_health.lch_local_timeout_count)) goto out; - if (cYAML_create_number(stats, "local_error_count", - data.st_cntrs.local_error_count) == NULL) + if (!cYAML_create_number(stats, "local_error_count", + cntrs->lct_health.lch_local_error_count)) goto out; - if (cYAML_create_number(stats, "remote_dropped_count", - data.st_cntrs.remote_dropped_count) == NULL) + if (!cYAML_create_number(stats, "remote_dropped_count", + cntrs->lct_health.lch_remote_dropped_count)) goto out; - if (cYAML_create_number(stats, "remote_error_count", - data.st_cntrs.remote_error_count) == NULL) + if (!cYAML_create_number(stats, "remote_error_count", + cntrs->lct_health.lch_remote_error_count)) goto out; - if (cYAML_create_number(stats, "remote_timeout_count", - data.st_cntrs.remote_timeout_count) == NULL) + if (!cYAML_create_number(stats, "remote_timeout_count", + cntrs->lct_health.lch_remote_timeout_count)) goto out; - if (cYAML_create_number(stats, "network_timeout_count", - data.st_cntrs.network_timeout_count) == NULL) + if (!cYAML_create_number(stats, "network_timeout_count", + cntrs->lct_health.lch_network_timeout_count)) goto out; - if (cYAML_create_number(stats, "recv_count", - data.st_cntrs.recv_count) == NULL) + if (!cYAML_create_number(stats, "recv_count", + cntrs->lct_common.lcc_recv_count)) goto out; - if (cYAML_create_number(stats, "route_count", - data.st_cntrs.route_count) == NULL) + if (!cYAML_create_number(stats, "route_count", + cntrs->lct_common.lcc_route_count)) goto out; - if (cYAML_create_number(stats, "drop_count", - data.st_cntrs.drop_count) == NULL) + if (!cYAML_create_number(stats, "drop_count", + cntrs->lct_common.lcc_drop_count)) goto out; - if (cYAML_create_number(stats, "send_length", - data.st_cntrs.send_length) == NULL) + if (!cYAML_create_number(stats, "send_length", + cntrs->lct_common.lcc_send_length)) goto out; - if (cYAML_create_number(stats, "recv_length", - data.st_cntrs.recv_length) == NULL) + if (!cYAML_create_number(stats, "recv_length", + cntrs->lct_common.lcc_recv_length)) goto out; - if (cYAML_create_number(stats, "route_length", - data.st_cntrs.route_length) == NULL) + if (!cYAML_create_number(stats, "route_length", + cntrs->lct_common.lcc_route_length)) goto out; - if (cYAML_create_number(stats, "drop_length", - data.st_cntrs.drop_length) == NULL) + if (!cYAML_create_number(stats, "drop_length", + cntrs->lct_common.lcc_drop_length)) goto out; - if (show_rc == NULL) + if (!show_rc) cYAML_print_tree(root); snprintf(err_str, sizeof(err_str), "\"success\""); diff --git a/lnet/utils/lst.c b/lnet/utils/lst.c index 0b6d40a..7d11a24 100644 --- a/lnet/utils/lst.c +++ b/lnet/utils/lst.c @@ -1555,27 +1555,27 @@ lst_stat_req_param_alloc(char *name, lst_stat_req_param_t **srpp, int save_old) return rc; } - srp->srp_name = name; + srp->srp_name = name; - for (i = 0; i < count; i++) { - rc = lst_alloc_rpcent(&srp->srp_result[i], srp->srp_count, + for (i = 0; i < count; i++) { + rc = lst_alloc_rpcent(&srp->srp_result[i], srp->srp_count, sizeof(struct sfw_counters) + sizeof(struct srpc_counters) + - sizeof(struct lnet_counters)); - if (rc != 0) { - fprintf(stderr, "Out of memory\n"); - break; - } - } + sizeof(struct lnet_counters_common)); + if (rc != 0) { + fprintf(stderr, "Out of memory\n"); + break; + } + } - if (rc == 0) { - *srpp = srp; - return 0; - } + if (rc == 0) { + *srpp = srp; + return 0; + } - lst_stat_req_param_free(srp); + lst_stat_req_param_free(srp); - return rc; + return rc; } typedef struct { @@ -1646,16 +1646,16 @@ lst_timeval_diff(struct timeval *tv1, } static void -lst_cal_lnet_stat(float delta, struct lnet_counters *lnet_new, - struct lnet_counters *lnet_old, int mbs) +lst_cal_lnet_stat(float delta, struct lnet_counters_common *lnet_new, + struct lnet_counters_common *lnet_old, int mbs) { float perf; float rate; unsigned int unit_divisor; unit_divisor = (mbs) ? (1000 * 1000) : (1024 * 1024); - perf = (float)(lnet_new->send_length - - lnet_old->send_length) / unit_divisor / delta; + perf = (float)(lnet_new->lcc_send_length - + lnet_old->lcc_send_length) / unit_divisor / delta; lnet_stat_result.lnet_total_sndperf += perf; if (lnet_stat_result.lnet_min_sndperf > perf || @@ -1665,8 +1665,8 @@ lst_cal_lnet_stat(float delta, struct lnet_counters *lnet_new, if (lnet_stat_result.lnet_max_sndperf < perf) lnet_stat_result.lnet_max_sndperf = perf; - perf = (float)(lnet_new->recv_length - - lnet_old->recv_length) / unit_divisor / delta; + perf = (float)(lnet_new->lcc_recv_length - + lnet_old->lcc_recv_length) / unit_divisor / delta; lnet_stat_result.lnet_total_rcvperf += perf; if (lnet_stat_result.lnet_min_rcvperf > perf || @@ -1676,7 +1676,7 @@ lst_cal_lnet_stat(float delta, struct lnet_counters *lnet_new, if (lnet_stat_result.lnet_max_rcvperf < perf) lnet_stat_result.lnet_max_rcvperf = perf; - rate = (lnet_new->send_count - lnet_old->send_count) / delta; + rate = (lnet_new->lcc_send_count - lnet_old->lcc_send_count) / delta; lnet_stat_result.lnet_total_sndrate += rate; if (lnet_stat_result.lnet_min_sndrate > rate || @@ -1686,7 +1686,7 @@ lst_cal_lnet_stat(float delta, struct lnet_counters *lnet_new, if (lnet_stat_result.lnet_max_sndrate < rate) lnet_stat_result.lnet_max_sndrate = rate; - rate = (lnet_new->recv_count - lnet_old->recv_count) / delta; + rate = (lnet_new->lcc_recv_count - lnet_old->lcc_recv_count) / delta; lnet_stat_result.lnet_total_rcvrate += rate; if (lnet_stat_result.lnet_min_rcvrate > rate || @@ -1772,17 +1772,17 @@ lst_print_stat(char *name, struct list_head *resultp, int idx, int lnet, int bwrt, int rdwr, int type, int mbs) { - struct list_head tmp[2]; + struct list_head tmp[2]; struct lstcon_rpc_ent *new; struct lstcon_rpc_ent *old; - struct sfw_counters *sfwk_new; - struct sfw_counters *sfwk_old; - struct srpc_counters *srpc_new; - struct srpc_counters *srpc_old; - struct lnet_counters *lnet_new; - struct lnet_counters *lnet_old; - float delta; - int errcount = 0; + struct sfw_counters *sfwk_new; + struct sfw_counters *sfwk_old; + struct srpc_counters *srpc_new; + struct srpc_counters *srpc_old; + struct lnet_counters_common *lnet_new; + struct lnet_counters_common *lnet_old; + float delta; + int errcount = 0; INIT_LIST_HEAD(&tmp[0]); INIT_LIST_HEAD(&tmp[1]); @@ -1825,28 +1825,32 @@ lst_print_stat(char *name, struct list_head *resultp, sfwk_new = (struct sfw_counters *)&new->rpe_payload[0]; sfwk_old = (struct sfw_counters *)&old->rpe_payload[0]; - srpc_new = (struct srpc_counters *)((char *)sfwk_new + sizeof(*sfwk_new)); - srpc_old = (struct srpc_counters *)((char *)sfwk_old + sizeof(*sfwk_old)); + srpc_new = (struct srpc_counters *)((char *)sfwk_new + + sizeof(*sfwk_new)); + srpc_old = (struct srpc_counters *)((char *)sfwk_old + + sizeof(*sfwk_old)); - lnet_new = (struct lnet_counters *)((char *)srpc_new + sizeof(*srpc_new)); - lnet_old = (struct lnet_counters *)((char *)srpc_old + sizeof(*srpc_old)); + lnet_new = (struct lnet_counters_common *)((char *)srpc_new + + sizeof(*srpc_new)); + lnet_old = (struct lnet_counters_common *)((char *)srpc_old + + sizeof(*srpc_old)); /* Prior to version 2.3, the running_ms field was a counter for * the number of running tests. We are looking at this value * to determine if it is a millisecond timestamep (>= 2.3) or a * test counter (< 2.3). The number 500 is being used for this * barrier as the test counter should never get this high, and - * the timestamp should never get this low. */ - + * the timestamp should never get this low. + */ if (sfwk_new->running_ms > 500) { /* use the timestamp from the remote node, not our * rpe_stamp from when we copied up the data out of - * the kernel */ - - delta = (float) (sfwk_new->running_ms - + * the kernel. + */ + delta = (float)(sfwk_new->running_ms - sfwk_old->running_ms) / 1000; } else { - struct timeval tv; + struct timeval tv; lst_timeval_diff(&new->rpe_stamp, &old->rpe_stamp, &tv); delta = tv.tv_sec + (float)tv.tv_usec / 1000000;