kiblnd_peer_decref(peer_ni);
}
+
+static void
+kiblnd_debug_rx(struct kib_rx *rx)
+{
+ CDEBUG(D_CONSOLE, " %p msg_type %x cred %d\n",
+ rx, rx->rx_msg->ibm_type,
+ rx->rx_msg->ibm_credits);
+}
+
+static void
+kiblnd_debug_tx(struct kib_tx *tx)
+{
+ CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lld "
+ "cookie %#llx msg %s%s type %x cred %d\n",
+ tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting,
+ tx->tx_status, ktime_to_ns(tx->tx_deadline), tx->tx_cookie,
+ tx->tx_lntmsg[0] == NULL ? "-" : "!",
+ tx->tx_lntmsg[1] == NULL ? "-" : "!",
+ tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits);
+}
+
+static void
+kiblnd_debug_conn(struct kib_conn *conn)
+{
+ struct list_head *tmp;
+ int i;
+
+ spin_lock(&conn->ibc_lock);
+
+ CDEBUG(D_CONSOLE, "conn[%d] %p [version %x] -> %s:\n",
+ atomic_read(&conn->ibc_refcount), conn,
+ conn->ibc_version, libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ CDEBUG(D_CONSOLE, " state %d nposted %d/%d cred %d o_cred %d "
+ " r_cred %d\n", conn->ibc_state, conn->ibc_noops_posted,
+ conn->ibc_nsends_posted, conn->ibc_credits,
+ conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
+ CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error);
+
+ CDEBUG(D_CONSOLE, " early_rxs:\n");
+ list_for_each(tmp, &conn->ibc_early_rxs)
+ kiblnd_debug_rx(list_entry(tmp, struct kib_rx, rx_list));
+
+ CDEBUG(D_CONSOLE, " tx_noops:\n");
+ list_for_each(tmp, &conn->ibc_tx_noops)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue_nocred:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue_nocred)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue_rsrvd)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " tx_queue:\n");
+ list_for_each(tmp, &conn->ibc_tx_queue)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " active_txs:\n");
+ list_for_each(tmp, &conn->ibc_active_txs)
+ kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
+
+ CDEBUG(D_CONSOLE, " rxs:\n");
+ for (i = 0; i < IBLND_RX_MSGS(conn); i++)
+ kiblnd_debug_rx(&conn->ibc_rxs[i]);
+
+ spin_unlock(&conn->ibc_lock);
+}
+
+static void
+kiblnd_dump_peer_debug_info(struct kib_peer_ni *peer_ni)
+{
+ struct kib_conn *conn;
+ struct kib_conn *cnxt;
+ int count = 0;
+
+ CDEBUG(D_CONSOLE, "[last_alive, races, reconnected, error]: %lld, %d, %d, %d\n",
+ peer_ni->ibp_last_alive,
+ peer_ni->ibp_races,
+ peer_ni->ibp_reconnected,
+ peer_ni->ibp_error);
+ list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns,
+ ibc_list) {
+ CDEBUG(D_CONSOLE, "Conn %d:\n", count);
+ kiblnd_debug_conn(conn);
+ count++;
+ }
+}
+
+
static int
-kiblnd_get_peer_info(struct lnet_ni *ni, int index,
+kiblnd_get_peer_info(struct lnet_ni *ni, lnet_nid_t nid, int index,
lnet_nid_t *nidp, int *count)
{
struct kib_peer_ni *peer_ni;
if (peer_ni->ibp_ni != ni)
continue;
+ if (peer_ni->ibp_nid == nid)
+ kiblnd_dump_peer_debug_info(peer_ni);
+
if (index-- > 0)
continue;
}
static void
-kiblnd_debug_rx(struct kib_rx *rx)
-{
- CDEBUG(D_CONSOLE, " %p msg_type %x cred %d\n",
- rx, rx->rx_msg->ibm_type,
- rx->rx_msg->ibm_credits);
-}
-
-static void
-kiblnd_debug_tx(struct kib_tx *tx)
-{
- CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lld "
- "cookie %#llx msg %s%s type %x cred %d\n",
- tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting,
- tx->tx_status, ktime_to_ns(tx->tx_deadline), tx->tx_cookie,
- tx->tx_lntmsg[0] == NULL ? "-" : "!",
- tx->tx_lntmsg[1] == NULL ? "-" : "!",
- tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits);
-}
-
-void
-kiblnd_debug_conn(struct kib_conn *conn)
-{
- struct list_head *tmp;
- int i;
-
- spin_lock(&conn->ibc_lock);
-
- CDEBUG(D_CONSOLE, "conn[%d] %p [version %x] -> %s:\n",
- atomic_read(&conn->ibc_refcount), conn,
- conn->ibc_version, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- CDEBUG(D_CONSOLE, " state %d nposted %d/%d cred %d o_cred %d "
- " r_cred %d\n", conn->ibc_state, conn->ibc_noops_posted,
- conn->ibc_nsends_posted, conn->ibc_credits,
- conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
- CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error);
-
- CDEBUG(D_CONSOLE, " early_rxs:\n");
- list_for_each(tmp, &conn->ibc_early_rxs)
- kiblnd_debug_rx(list_entry(tmp, struct kib_rx, rx_list));
-
- CDEBUG(D_CONSOLE, " tx_noops:\n");
- list_for_each(tmp, &conn->ibc_tx_noops)
- kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue_nocred:\n");
- list_for_each(tmp, &conn->ibc_tx_queue_nocred)
- kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n");
- list_for_each(tmp, &conn->ibc_tx_queue_rsrvd)
- kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
-
- CDEBUG(D_CONSOLE, " tx_queue:\n");
- list_for_each(tmp, &conn->ibc_tx_queue)
- kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
-
- CDEBUG(D_CONSOLE, " active_txs:\n");
- list_for_each(tmp, &conn->ibc_active_txs)
- kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list));
-
- CDEBUG(D_CONSOLE, " rxs:\n");
- for (i = 0; i < IBLND_RX_MSGS(conn); i++)
- kiblnd_debug_rx(&conn->ibc_rxs[i]);
-
- spin_unlock(&conn->ibc_lock);
-}
-
-static void
kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid)
{
/* XXX There is no path record for iWARP, set by netdev->change_mtu? */
lnet_nid_t nid = 0;
int count = 0;
- rc = kiblnd_get_peer_info(ni, data->ioc_count,
+ rc = kiblnd_get_peer_info(ni, data->ioc_nid, data->ioc_count,
&nid, &count);
data->ioc_nid = nid;
data->ioc_count = count;
ksocknal_peer_decref(peer_ni);
}
+
+static void
+ksocknal_dump_peer_debug_info(struct ksock_peer_ni *peer_ni)
+{
+ struct ksock_conn *conn;
+ struct list_head *ctmp;
+ struct list_head *txtmp;
+ int ccount = 0;
+ int txcount = 0;
+
+ list_for_each(ctmp, &peer_ni->ksnp_conns) {
+ conn = list_entry(ctmp, struct ksock_conn, ksnc_list);
+
+ if (!list_empty(&conn->ksnc_tx_queue))
+ list_for_each(txtmp, &conn->ksnc_tx_queue) txcount++;
+
+ CDEBUG(D_CONSOLE, "Conn %d [type, closing, crefcnt, srefcnt]: %d, %d, %d, %d\n",
+ ccount,
+ conn->ksnc_type,
+ conn->ksnc_closing,
+ refcount_read(&conn->ksnc_conn_refcount),
+ refcount_read(&conn->ksnc_sock_refcount));
+ CDEBUG(D_CONSOLE, "Conn %d rx [scheduled, ready, state]: %d, %d, %d\n",
+ ccount,
+ conn->ksnc_rx_scheduled,
+ conn->ksnc_rx_ready,
+ conn->ksnc_rx_state);
+ CDEBUG(D_CONSOLE, "Conn %d tx [txqcnt, scheduled, last_post, ready, deadline]: %d, %d, %lld, %d, %lld\n",
+ ccount,
+ txcount,
+ conn->ksnc_tx_scheduled,
+ conn->ksnc_tx_last_post,
+ conn->ksnc_rx_ready,
+ conn->ksnc_rx_deadline);
+
+ if (conn->ksnc_scheduler)
+ CDEBUG(D_CONSOLE, "Conn %d sched [nconns, cpt]: %d, %d\n",
+ ccount,
+ conn->ksnc_scheduler->kss_nconns,
+ conn->ksnc_scheduler->kss_cpt);
+
+ txcount = 0;
+ ccount++;
+ }
+}
+
static int
ksocknal_get_peer_info(struct lnet_ni *ni, int index,
struct lnet_processid *id, __u32 *myip, __u32 *peer_ip,
if (index-- > 0)
continue;
+ *id = peer_ni->ksnp_id;
conn_cb = peer_ni->ksnp_conn_cb;
if (conn_cb == NULL) {
- *id = peer_ni->ksnp_id;
*myip = 0;
*peer_ip = 0;
*port = 0;
*share_count = 0;
rc = 0;
} else {
- *id = peer_ni->ksnp_id;
+ ksocknal_dump_peer_debug_info(peer_ni);
+
if (conn_cb->ksnr_addr.ss_family == AF_INET) {
struct sockaddr_in *sa =
(void *)&conn_cb->ksnr_addr;
ni->ni_net_ns);
*peer_ip = ntohl(sa->sin_addr.s_addr);
*port = ntohs(sa->sin_port);
+
} else {
*myip = 0xFFFFFFFF;
*peer_ip = 0xFFFFFFFF;
}
case IOC_LIBCFS_GET_PEER: {
- __u32 myip = 0;
- __u32 ip = 0;
- int port = 0;
- int conn_count = 0;
- int share_count = 0;
+ __u32 myip = 0;
+ __u32 ip = 0;
+ int port = 0;
+ int conn_count = 0;
+ int share_count = 0;
rc = ksocknal_get_peer_info(ni, data->ioc_count,
&id, &myip, &ip, &port,
&conn_count, &share_count);
if (rc != 0)
return rc;
+
if (!nid_is_nid4(&id.nid))
return -EINVAL;
data->ioc_nid = lnet_nid_to_nid4(&id.nid);
return rc;
}
+int lustre_lnet_show_peer_debug_info(char *peer_nid, int seq_no,
+ struct cYAML **err_rc)
+{
+ struct libcfs_ioctl_data data;
+ int rc;
+ char err_str[LNET_MAX_STR_LEN] = "Error";
+ lnet_nid_t pnid = LNET_NID_ANY;
+
+ if (!peer_nid) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ snprintf(err_str, LNET_MAX_STR_LEN,
+ "--nid must be specified");
+ goto out;
+ }
+
+ pnid = libcfs_str2nid(peer_nid);
+ if (pnid == LNET_NID_ANY) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ snprintf(err_str, LNET_MAX_STR_LEN,
+ "badly formatted primary NID: %s", peer_nid);
+ goto out;
+ }
+
+ LIBCFS_IOC_INIT(data);
+ data.ioc_net = LNET_NIDNET(pnid);
+ data.ioc_nid = pnid;
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_PEER, &data);
+
+out:
+ if (rc != 0)
+ cYAML_build_error(rc, seq_no, "debug", "peer", err_str,
+ err_rc);
+
+ return rc;
+}
+
int lustre_lnet_show_local_ni_recovq(int seq_no, struct cYAML **show_rc,
struct cYAML **err_rc)
{
int lustre_lnet_show_udsp(int idx, int seq_no, struct cYAML **show_rc,
struct cYAML **err_rc);
+/* lustre_lnet_show_peer_debug_info
+ * dump peer debug info
+ * peer_nid - nid of the peer to show
+ * seq_no - sequence number of the request
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_show_peer_debug_info(char *peer_nid, int seq_no,
+ struct cYAML **err_rc);
+
#endif /* LIB_LNET_CONFIG_API_H */
static int jt_udsp(int argc, char **argv);
static int jt_setup_mrrouting(int argc, char **argv);
static int jt_calc_cpt_of_nid(int argc, char **argv);
+static int jt_show_peer_debug_info(int argc, char **argv);
command_t cmd_list[] = {
{"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"},
{"import", jt_import, 0, "import FILE.yaml"},
{"export", jt_export, 0, "export FILE.yaml"},
{"stats", jt_stats, 0, "stats {show | help}"},
- {"debug", jt_debug, 0, "debug recovery {local | peer}"},
+ {"debug", jt_debug, 0, "debug {recovery {local | peer} | peer}"},
{"global", jt_global, 0, "global {show | help}"},
{"peer", jt_peers, 0, "peer {add | del | show | list | set | help}"},
{"ping", jt_ping, 0, "ping nid,[nid,...]"},
{"recovery", jt_show_recovery, 0, "list recovery queues\n"
"\t--local : list local recovery queue\n"
"\t--peer : list peer recovery queue\n"},
+ {"peer", jt_show_peer_debug_info, 0, "show peer debug info\n"
+ "\t--nid: peer's NID\n"},
{ 0, 0, 0, NULL }
};
{
int rc, opt;
struct cYAML *err_rc = NULL, *show_rc = NULL;
-
const char *const short_options = "lp";
static const struct option long_options[] = {
{ .name = "local", .has_arg = no_argument, .val = 'l' },
{ .name = "peer", .has_arg = no_argument, .val = 'p' },
{ .name = NULL } };
- rc = check_cmd(debug_cmds, "recovery", NULL, 0, argc, argv);
+ rc = check_cmd(debug_cmds, "debug", "recovery", 0, argc, argv);
if (rc)
return rc;
return rc;
}
+static int jt_show_peer_debug_info(int argc, char **argv)
+{
+ int rc, opt;
+ struct cYAML *err_rc = NULL;
+ char *peer_nid = optarg;
+ const char *const short_opts = "k";
+ const struct option long_opts[] = {
+ { .name = "nid", .has_arg = required_argument, .val = 'k' },
+ { .name = NULL } };
+
+ rc = check_cmd(debug_cmds, "debug", "peer", 0, argc, argv);
+
+ if (rc)
+ return rc;
+
+ while ((opt = getopt_long(argc, argv, short_opts,
+ long_opts, NULL)) != -1) {
+ switch (opt) {
+ case 'k':
+ peer_nid = optarg;
+ break;
+ default:
+ return 0;
+ }
+ }
+
+ rc = lustre_lnet_show_peer_debug_info(peer_nid, -1, &err_rc);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
static int jt_show_net(int argc, char **argv)
{
char *network = NULL;
{
int rc;
- rc = check_cmd(debug_cmds, "recovery", NULL, 2, argc, argv);
+ rc = check_cmd(debug_cmds, "debug", NULL, 2, argc, argv);
if (rc)
return rc;
.SS "LNet Statistics"
.
.TP
-\fBlnetctl stats\fR
+\fBlnetctl stats\fR show
Show LNET statistics
.
.br
\fBlnetctl udsp\fR show
Show all user-defined selection policies in the system\. The policies are dumped in YAML form\.
.
+.SS "Debugging"
+.
+.TP
+\fBlnetctl debug\fR recovery
+List NIDs on recovery queue
+.
+.br
+\-\-local : list NIDs on local recovery queue
+.
+.br
+\-\-peer : list NIDs on peer recovery queue
+.
+.br
+.
+.TP
+\fBlnetctl debug\fR peer
+Dump peer debug info to debug log and console
+.
+.br
+\-\-nid : peer's NID
+.
.br
.
.SH "OPTIONS"
}
run_test 301 "Check for dynamic adds of same/wrong interface (memory leak)"
+test_302() {
+ ! [[ $NETTYPE =~ (tcp|o2ib) ]] && skip "Need tcp or o2ib NETTYPE"
+ reinit_dlc || return $?
+
+ add_net "${NETTYPE}" "${INTERFACES[0]}" || return $?
+
+ local nid=$($LCTL list_nids)
+
+ do_lnetctl ping ${nid} ||
+ error "pinging self failed $?"
+ do_lnetctl debug peer --nid ${nid} ||
+ error "failed to dump peer debug info $?"
+}
+run_test 302 "Check that peer debug info can be dumped"
+
complete $SECONDS
cleanup_testsuite