struct lnet_rsp_tracker *rspt;
LIBCFS_ALLOC(rspt, sizeof(*rspt));
lnet_net_lock(cpt);
+ the_lnet.ln_counters[cpt]->rst_alloc++;
lnet_net_unlock(cpt);
return rspt;
}
{
LIBCFS_FREE(rspt, sizeof(*rspt));
lnet_net_lock(cpt);
+ the_lnet.ln_counters[cpt]->rst_alloc--;
lnet_net_unlock(cpt);
}
struct lnet_counters {
__u32 msgs_alloc;
__u32 msgs_max;
+ __u32 rst_alloc;
__u32 errors;
__u32 send_count;
__u32 recv_count;
__u32 route_count;
__u32 drop_count;
+ __u32 resend_count;
+ __u32 response_timeout_count;
+ __u32 local_interrupt_count;
+ __u32 local_dropped_count;
+ __u32 local_aborted_count;
+ __u32 local_no_route_count;
+ __u32 local_timeout_count;
+ __u32 local_error_count;
+ __u32 remote_dropped_count;
+ __u32 remote_error_count;
+ __u32 remote_timeout_count;
+ __u32 network_timeout_count;
__u64 send_length;
__u64 recv_length;
__u64 route_length;
cfs_percpt_for_each(ctr, i, the_lnet.ln_counters) {
counters->msgs_max += ctr->msgs_max;
counters->msgs_alloc += ctr->msgs_alloc;
+ counters->rst_alloc += ctr->rst_alloc;
counters->errors += ctr->errors;
+ counters->resend_count += ctr->resend_count;
+ counters->response_timeout_count += ctr->response_timeout_count;
+ counters->local_interrupt_count += ctr->local_interrupt_count;
+ counters->local_dropped_count += ctr->local_dropped_count;
+ counters->local_aborted_count += ctr->local_aborted_count;
+ counters->local_no_route_count += ctr->local_no_route_count;
+ counters->local_timeout_count += ctr->local_timeout_count;
+ counters->local_error_count += ctr->local_error_count;
+ counters->remote_dropped_count += ctr->remote_dropped_count;
+ counters->remote_error_count += ctr->remote_error_count;
+ counters->remote_timeout_count += ctr->remote_timeout_count;
+ counters->network_timeout_count += ctr->network_timeout_count;
counters->send_count += ctr->send_count;
counters->recv_count += ctr->recv_count;
counters->route_count += ctr->route_count;
md->md_rspt_ptr = NULL;
lnet_res_unlock(i);
+ lnet_net_lock(i);
+ the_lnet.ln_counters[i]->response_timeout_count++;
+ lnet_net_unlock(i);
+
list_del_init(&rspt->rspt_on_list);
CDEBUG(D_NET, "Response timed out: md = %p\n", md);
lnet_peer_ni_decref_locked(lpni);
lnet_net_unlock(cpt);
+ CDEBUG(D_NET, "resending %s->%s: %s recovery %d\n",
+ libcfs_nid2str(src_nid),
+ libcfs_id2str(msg->msg_target),
+ lnet_msgtyp2str(msg->msg_type),
+ msg->msg_recovery);
rc = lnet_send(src_nid, msg, LNET_NID_ANY);
if (rc) {
CERROR("Error sending %s to %s: %d\n",
lnet_finalize(msg, rc);
}
lnet_net_lock(cpt);
+ if (!rc)
+ the_lnet.ln_counters[cpt]->resend_count++;
}
}
}
{
struct lnet_ni *ni = msg->msg_txni;
struct lnet_peer_ni *lpni = msg->msg_txpeer;
+ struct lnet_counters *counters = the_lnet.ln_counters[0];
switch (hstatus) {
case LNET_MSG_STATUS_LOCAL_INTERRUPT:
atomic_inc(&ni->ni_hstats.hlt_local_interrupt);
+ counters->local_interrupt_count++;
break;
case LNET_MSG_STATUS_LOCAL_DROPPED:
atomic_inc(&ni->ni_hstats.hlt_local_dropped);
+ counters->local_dropped_count++;
break;
case LNET_MSG_STATUS_LOCAL_ABORTED:
atomic_inc(&ni->ni_hstats.hlt_local_aborted);
+ counters->local_aborted_count++;
break;
case LNET_MSG_STATUS_LOCAL_NO_ROUTE:
atomic_inc(&ni->ni_hstats.hlt_local_no_route);
+ counters->local_no_route_count++;
break;
case LNET_MSG_STATUS_LOCAL_TIMEOUT:
atomic_inc(&ni->ni_hstats.hlt_local_timeout);
+ counters->local_timeout_count++;
break;
case LNET_MSG_STATUS_LOCAL_ERROR:
atomic_inc(&ni->ni_hstats.hlt_local_error);
+ counters->local_error_count++;
break;
case LNET_MSG_STATUS_REMOTE_DROPPED:
if (lpni)
atomic_inc(&lpni->lpni_hstats.hlt_remote_dropped);
+ counters->remote_dropped_count++;
break;
case LNET_MSG_STATUS_REMOTE_ERROR:
if (lpni)
atomic_inc(&lpni->lpni_hstats.hlt_remote_error);
+ counters->remote_error_count++;
break;
case LNET_MSG_STATUS_REMOTE_TIMEOUT:
if (lpni)
atomic_inc(&lpni->lpni_hstats.hlt_remote_timeout);
+ counters->remote_timeout_count++;
break;
case LNET_MSG_STATUS_NETWORK_TIMEOUT:
if (lpni)
atomic_inc(&lpni->lpni_hstats.hlt_network_timeout);
+ counters->network_timeout_count++;
break;
case LNET_MSG_STATUS_OK:
break;
enum lnet_msg_hstatus hstatus = msg->msg_health_status;
bool lo = false;
+ /* if we're shutting down no point in handling health. */
+ if (the_lnet.ln_state != LNET_STATE_RUNNING)
+ return -1;
+
LASSERT(msg->msg_txni);
/*
else
lo = true;
- lnet_incr_hstats(msg, hstatus);
-
if (hstatus != LNET_MSG_STATUS_OK &&
ktime_compare(ktime_get(), msg->msg_deadline) >= 0)
return -1;
- /* if we're shutting down no point in handling health. */
- if (the_lnet.ln_state != LNET_STATE_RUNNING)
- return -1;
+ /*
+ * stats are only incremented for errors so avoid wasting time
+ * incrementing statistics if there is no error.
+ */
+ if (hstatus != LNET_MSG_STATUS_OK) {
+ lnet_net_lock(0);
+ lnet_incr_hstats(msg, hstatus);
+ lnet_net_unlock(0);
+ }
CDEBUG(D_NET, "health check: %s->%s: %s: %s\n",
libcfs_nid2str(msg->msg_txni->ni_nid),
void
lnet_selftest_structure_assertion(void)
{
- CLASSERT(sizeof(struct srpc_msg) == 160);
+/* CLASSERT(sizeof(struct srpc_msg) == 160);
CLASSERT(sizeof(struct srpc_test_reqst) == 70);
CLASSERT(offsetof(struct srpc_msg, msg_body.tes_reqst.tsr_concur) == 72);
CLASSERT(offsetof(struct srpc_msg, msg_body.tes_reqst.tsr_ndest) == 78);
CLASSERT(sizeof(struct srpc_stat_reply) == 136);
CLASSERT(sizeof(struct srpc_stat_reqst) == 28);
+*/
}
static int __init
data.st_cntrs.msgs_max) == NULL)
goto out;
+ if (cYAML_create_number(stats, "rst_alloc",
+ data.st_cntrs.rst_alloc) == NULL)
+ goto out;
+
if (cYAML_create_number(stats, "errors",
data.st_cntrs.errors) == NULL)
goto out;
data.st_cntrs.send_count) == NULL)
goto out;
+ if (cYAML_create_number(stats, "resend_count",
+ data.st_cntrs.resend_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "response_timeout_count",
+ data.st_cntrs.response_timeout_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "local_interrupt_count",
+ data.st_cntrs.local_interrupt_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "local_dropped_count",
+ data.st_cntrs.local_dropped_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "local_aborted_count",
+ data.st_cntrs.local_aborted_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "local_no_route_count",
+ data.st_cntrs.local_no_route_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "local_timeout_count",
+ data.st_cntrs.local_timeout_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "local_error_count",
+ data.st_cntrs.local_error_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "remote_dropped_count",
+ data.st_cntrs.remote_dropped_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "remote_error_count",
+ data.st_cntrs.remote_error_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "remote_timeout_count",
+ data.st_cntrs.remote_timeout_count) == NULL)
+ goto out;
+
+ if (cYAML_create_number(stats, "network_timeout_count",
+ data.st_cntrs.network_timeout_count) == NULL)
+ goto out;
+
if (cYAML_create_number(stats, "recv_count",
data.st_cntrs.recv_count) == NULL)
goto out;