From 950e59ced18d49e9fdd31c1e9de43b89a0bc1c1d Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Mon, 28 Feb 2022 11:04:00 -0800 Subject: [PATCH] LU-15234 lnet: add mechanism for dumping lnd peer debug info Add ability to dump lnd peer debug info: lnetctl debug peer --nid= The debug info is dumped to the log as D_CONSOLE by the respective lnd and can be retrieved with "lctl dk" or seen in syslog. This mechanism has been added for socklnd and o2iblnd peers. Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Serguei Smirnov Change-Id: Ia9c4d59143206bcb7ec43806594cf0cfaed5f0a9 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48566 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd.c | 165 +++++++++++++++++++--------------- lnet/klnds/socklnd/socklnd.c | 63 +++++++++++-- lnet/utils/lnetconfig/liblnetconfig.c | 37 ++++++++ lnet/utils/lnetconfig/liblnetconfig.h | 10 +++ lnet/utils/lnetctl.c | 46 +++++++++- lustre/doc/lnetctl.8 | 23 ++++- lustre/tests/sanity-lnet.sh | 15 ++++ 7 files changed, 277 insertions(+), 82 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index e31d0d6..30cadfa 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -424,8 +424,98 @@ kiblnd_unlink_peer_locked(struct kib_peer_ni *peer_ni) kiblnd_peer_decref(peer_ni); } + +static void +kiblnd_debug_rx(struct kib_rx *rx) +{ + CDEBUG(D_CONSOLE, " %p msg_type %x cred %d\n", + rx, rx->rx_msg->ibm_type, + rx->rx_msg->ibm_credits); +} + +static void +kiblnd_debug_tx(struct kib_tx *tx) +{ + CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lld " + "cookie %#llx msg %s%s type %x cred %d\n", + tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting, + tx->tx_status, ktime_to_ns(tx->tx_deadline), tx->tx_cookie, + tx->tx_lntmsg[0] == NULL ? "-" : "!", + tx->tx_lntmsg[1] == NULL ? "-" : "!", + tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits); +} + +static void +kiblnd_debug_conn(struct kib_conn *conn) +{ + struct list_head *tmp; + int i; + + spin_lock(&conn->ibc_lock); + + CDEBUG(D_CONSOLE, "conn[%d] %p [version %x] -> %s:\n", + atomic_read(&conn->ibc_refcount), conn, + conn->ibc_version, libcfs_nid2str(conn->ibc_peer->ibp_nid)); + CDEBUG(D_CONSOLE, " state %d nposted %d/%d cred %d o_cred %d " + " r_cred %d\n", conn->ibc_state, conn->ibc_noops_posted, + conn->ibc_nsends_posted, conn->ibc_credits, + conn->ibc_outstanding_credits, conn->ibc_reserved_credits); + CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error); + + CDEBUG(D_CONSOLE, " early_rxs:\n"); + list_for_each(tmp, &conn->ibc_early_rxs) + kiblnd_debug_rx(list_entry(tmp, struct kib_rx, rx_list)); + + CDEBUG(D_CONSOLE, " tx_noops:\n"); + list_for_each(tmp, &conn->ibc_tx_noops) + kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); + + CDEBUG(D_CONSOLE, " tx_queue_nocred:\n"); + list_for_each(tmp, &conn->ibc_tx_queue_nocred) + kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); + + CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n"); + list_for_each(tmp, &conn->ibc_tx_queue_rsrvd) + kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); + + CDEBUG(D_CONSOLE, " tx_queue:\n"); + list_for_each(tmp, &conn->ibc_tx_queue) + kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); + + CDEBUG(D_CONSOLE, " active_txs:\n"); + list_for_each(tmp, &conn->ibc_active_txs) + kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); + + CDEBUG(D_CONSOLE, " rxs:\n"); + for (i = 0; i < IBLND_RX_MSGS(conn); i++) + kiblnd_debug_rx(&conn->ibc_rxs[i]); + + spin_unlock(&conn->ibc_lock); +} + +static void +kiblnd_dump_peer_debug_info(struct kib_peer_ni *peer_ni) +{ + struct kib_conn *conn; + struct kib_conn *cnxt; + int count = 0; + + CDEBUG(D_CONSOLE, "[last_alive, races, reconnected, error]: %lld, %d, %d, %d\n", + peer_ni->ibp_last_alive, + peer_ni->ibp_races, + peer_ni->ibp_reconnected, + peer_ni->ibp_error); + list_for_each_entry_safe(conn, cnxt, &peer_ni->ibp_conns, + ibc_list) { + CDEBUG(D_CONSOLE, "Conn %d:\n", count); + kiblnd_debug_conn(conn); + count++; + } +} + + static int -kiblnd_get_peer_info(struct lnet_ni *ni, int index, +kiblnd_get_peer_info(struct lnet_ni *ni, lnet_nid_t nid, int index, lnet_nid_t *nidp, int *count) { struct kib_peer_ni *peer_ni; @@ -440,6 +530,9 @@ kiblnd_get_peer_info(struct lnet_ni *ni, int index, if (peer_ni->ibp_ni != ni) continue; + if (peer_ni->ibp_nid == nid) + kiblnd_dump_peer_debug_info(peer_ni); + if (index-- > 0) continue; @@ -557,74 +650,6 @@ kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index) } static void -kiblnd_debug_rx(struct kib_rx *rx) -{ - CDEBUG(D_CONSOLE, " %p msg_type %x cred %d\n", - rx, rx->rx_msg->ibm_type, - rx->rx_msg->ibm_credits); -} - -static void -kiblnd_debug_tx(struct kib_tx *tx) -{ - CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lld " - "cookie %#llx msg %s%s type %x cred %d\n", - tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting, - tx->tx_status, ktime_to_ns(tx->tx_deadline), tx->tx_cookie, - tx->tx_lntmsg[0] == NULL ? "-" : "!", - tx->tx_lntmsg[1] == NULL ? "-" : "!", - tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits); -} - -void -kiblnd_debug_conn(struct kib_conn *conn) -{ - struct list_head *tmp; - int i; - - spin_lock(&conn->ibc_lock); - - CDEBUG(D_CONSOLE, "conn[%d] %p [version %x] -> %s:\n", - atomic_read(&conn->ibc_refcount), conn, - conn->ibc_version, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - CDEBUG(D_CONSOLE, " state %d nposted %d/%d cred %d o_cred %d " - " r_cred %d\n", conn->ibc_state, conn->ibc_noops_posted, - conn->ibc_nsends_posted, conn->ibc_credits, - conn->ibc_outstanding_credits, conn->ibc_reserved_credits); - CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error); - - CDEBUG(D_CONSOLE, " early_rxs:\n"); - list_for_each(tmp, &conn->ibc_early_rxs) - kiblnd_debug_rx(list_entry(tmp, struct kib_rx, rx_list)); - - CDEBUG(D_CONSOLE, " tx_noops:\n"); - list_for_each(tmp, &conn->ibc_tx_noops) - kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_nocred:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_nocred) - kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_rsrvd) - kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue:\n"); - list_for_each(tmp, &conn->ibc_tx_queue) - kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); - - CDEBUG(D_CONSOLE, " active_txs:\n"); - list_for_each(tmp, &conn->ibc_active_txs) - kiblnd_debug_tx(list_entry(tmp, struct kib_tx, tx_list)); - - CDEBUG(D_CONSOLE, " rxs:\n"); - for (i = 0; i < IBLND_RX_MSGS(conn); i++) - kiblnd_debug_rx(&conn->ibc_rxs[i]); - - spin_unlock(&conn->ibc_lock); -} - -static void kiblnd_setup_mtu_locked(struct rdma_cm_id *cmid) { /* XXX There is no path record for iWARP, set by netdev->change_mtu? */ @@ -1148,7 +1173,7 @@ kiblnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) lnet_nid_t nid = 0; int count = 0; - rc = kiblnd_get_peer_info(ni, data->ioc_count, + rc = kiblnd_get_peer_info(ni, data->ioc_nid, data->ioc_count, &nid, &count); data->ioc_nid = nid; data->ioc_count = count; diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 216a578..6e800f1 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -277,6 +277,52 @@ ksocknal_unlink_peer_locked(struct ksock_peer_ni *peer_ni) ksocknal_peer_decref(peer_ni); } + +static void +ksocknal_dump_peer_debug_info(struct ksock_peer_ni *peer_ni) +{ + struct ksock_conn *conn; + struct list_head *ctmp; + struct list_head *txtmp; + int ccount = 0; + int txcount = 0; + + list_for_each(ctmp, &peer_ni->ksnp_conns) { + conn = list_entry(ctmp, struct ksock_conn, ksnc_list); + + if (!list_empty(&conn->ksnc_tx_queue)) + list_for_each(txtmp, &conn->ksnc_tx_queue) txcount++; + + CDEBUG(D_CONSOLE, "Conn %d [type, closing, crefcnt, srefcnt]: %d, %d, %d, %d\n", + ccount, + conn->ksnc_type, + conn->ksnc_closing, + refcount_read(&conn->ksnc_conn_refcount), + refcount_read(&conn->ksnc_sock_refcount)); + CDEBUG(D_CONSOLE, "Conn %d rx [scheduled, ready, state]: %d, %d, %d\n", + ccount, + conn->ksnc_rx_scheduled, + conn->ksnc_rx_ready, + conn->ksnc_rx_state); + CDEBUG(D_CONSOLE, "Conn %d tx [txqcnt, scheduled, last_post, ready, deadline]: %d, %d, %lld, %d, %lld\n", + ccount, + txcount, + conn->ksnc_tx_scheduled, + conn->ksnc_tx_last_post, + conn->ksnc_rx_ready, + conn->ksnc_rx_deadline); + + if (conn->ksnc_scheduler) + CDEBUG(D_CONSOLE, "Conn %d sched [nconns, cpt]: %d, %d\n", + ccount, + conn->ksnc_scheduler->kss_nconns, + conn->ksnc_scheduler->kss_cpt); + + txcount = 0; + ccount++; + } +} + static int ksocknal_get_peer_info(struct lnet_ni *ni, int index, struct lnet_processid *id, __u32 *myip, __u32 *peer_ip, @@ -296,9 +342,9 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, if (index-- > 0) continue; + *id = peer_ni->ksnp_id; conn_cb = peer_ni->ksnp_conn_cb; if (conn_cb == NULL) { - *id = peer_ni->ksnp_id; *myip = 0; *peer_ip = 0; *port = 0; @@ -306,7 +352,8 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, *share_count = 0; rc = 0; } else { - *id = peer_ni->ksnp_id; + ksocknal_dump_peer_debug_info(peer_ni); + if (conn_cb->ksnr_addr.ss_family == AF_INET) { struct sockaddr_in *sa = (void *)&conn_cb->ksnr_addr; @@ -317,6 +364,7 @@ ksocknal_get_peer_info(struct lnet_ni *ni, int index, ni->ni_net_ns); *peer_ip = ntohl(sa->sin_addr.s_addr); *port = ntohs(sa->sin_port); + } else { *myip = 0xFFFFFFFF; *peer_ip = 0xFFFFFFFF; @@ -1745,17 +1793,18 @@ ksocknal_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg) } case IOC_LIBCFS_GET_PEER: { - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; + __u32 myip = 0; + __u32 ip = 0; + int port = 0; + int conn_count = 0; + int share_count = 0; rc = ksocknal_get_peer_info(ni, data->ioc_count, &id, &myip, &ip, &port, &conn_count, &share_count); if (rc != 0) return rc; + if (!nid_is_nid4(&id.nid)) return -EINVAL; data->ioc_nid = lnet_nid_to_nid4(&id.nid); diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index bbb412a..4ee7832 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -3986,6 +3986,43 @@ out: return rc; } +int lustre_lnet_show_peer_debug_info(char *peer_nid, int seq_no, + struct cYAML **err_rc) +{ + struct libcfs_ioctl_data data; + int rc; + char err_str[LNET_MAX_STR_LEN] = "Error"; + lnet_nid_t pnid = LNET_NID_ANY; + + if (!peer_nid) { + rc = LUSTRE_CFG_RC_BAD_PARAM; + snprintf(err_str, LNET_MAX_STR_LEN, + "--nid must be specified"); + goto out; + } + + pnid = libcfs_str2nid(peer_nid); + if (pnid == LNET_NID_ANY) { + rc = LUSTRE_CFG_RC_BAD_PARAM; + snprintf(err_str, LNET_MAX_STR_LEN, + "badly formatted primary NID: %s", peer_nid); + goto out; + } + + LIBCFS_IOC_INIT(data); + data.ioc_net = LNET_NIDNET(pnid); + data.ioc_nid = pnid; + + rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_PEER, &data); + +out: + if (rc != 0) + cYAML_build_error(rc, seq_no, "debug", "peer", err_str, + err_rc); + + return rc; +} + int lustre_lnet_show_local_ni_recovq(int seq_no, struct cYAML **show_rc, struct cYAML **err_rc) { diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index bea08c5..990d3cf 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -946,4 +946,14 @@ int lustre_lnet_del_udsp(unsigned int idx, int seq_no, struct cYAML **err_rc); int lustre_lnet_show_udsp(int idx, int seq_no, struct cYAML **show_rc, struct cYAML **err_rc); +/* lustre_lnet_show_peer_debug_info + * dump peer debug info + * peer_nid - nid of the peer to show + * seq_no - sequence number of the request + * err_rc - [OUT] struct cYAML tree describing the error. Freed by + * caller + */ +int lustre_lnet_show_peer_debug_info(char *peer_nid, int seq_no, + struct cYAML **err_rc); + #endif /* LIB_LNET_CONFIG_API_H */ diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 8d09520..380d60d 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -93,6 +93,7 @@ static int jt_set_recovery_limit(int argc, char **argv); static int jt_udsp(int argc, char **argv); static int jt_setup_mrrouting(int argc, char **argv); static int jt_calc_cpt_of_nid(int argc, char **argv); +static int jt_show_peer_debug_info(int argc, char **argv); command_t cmd_list[] = { {"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"}, @@ -108,7 +109,7 @@ command_t cmd_list[] = { {"import", jt_import, 0, "import FILE.yaml"}, {"export", jt_export, 0, "export FILE.yaml"}, {"stats", jt_stats, 0, "stats {show | help}"}, - {"debug", jt_debug, 0, "debug recovery {local | peer}"}, + {"debug", jt_debug, 0, "debug {recovery {local | peer} | peer}"}, {"global", jt_global, 0, "global {show | help}"}, {"peer", jt_peers, 0, "peer {add | del | show | list | set | help}"}, {"ping", jt_ping, 0, "ping nid,[nid,...]"}, @@ -197,6 +198,8 @@ command_t debug_cmds[] = { {"recovery", jt_show_recovery, 0, "list recovery queues\n" "\t--local : list local recovery queue\n" "\t--peer : list peer recovery queue\n"}, + {"peer", jt_show_peer_debug_info, 0, "show peer debug info\n" + "\t--nid: peer's NID\n"}, { 0, 0, 0, NULL } }; @@ -1499,14 +1502,13 @@ static int jt_show_recovery(int argc, char **argv) { int rc, opt; struct cYAML *err_rc = NULL, *show_rc = NULL; - const char *const short_options = "lp"; static const struct option long_options[] = { { .name = "local", .has_arg = no_argument, .val = 'l' }, { .name = "peer", .has_arg = no_argument, .val = 'p' }, { .name = NULL } }; - rc = check_cmd(debug_cmds, "recovery", NULL, 0, argc, argv); + rc = check_cmd(debug_cmds, "debug", "recovery", 0, argc, argv); if (rc) return rc; @@ -1535,6 +1537,42 @@ static int jt_show_recovery(int argc, char **argv) return rc; } +static int jt_show_peer_debug_info(int argc, char **argv) +{ + int rc, opt; + struct cYAML *err_rc = NULL; + char *peer_nid = optarg; + const char *const short_opts = "k"; + const struct option long_opts[] = { + { .name = "nid", .has_arg = required_argument, .val = 'k' }, + { .name = NULL } }; + + rc = check_cmd(debug_cmds, "debug", "peer", 0, argc, argv); + + if (rc) + return rc; + + while ((opt = getopt_long(argc, argv, short_opts, + long_opts, NULL)) != -1) { + switch (opt) { + case 'k': + peer_nid = optarg; + break; + default: + return 0; + } + } + + rc = lustre_lnet_show_peer_debug_info(peer_nid, -1, &err_rc); + + if (rc != LUSTRE_CFG_RC_NO_ERR) + cYAML_print_tree2file(stderr, err_rc); + + cYAML_free_tree(err_rc); + + return rc; +} + static int jt_show_net(int argc, char **argv) { char *network = NULL; @@ -1830,7 +1868,7 @@ static int jt_debug(int argc, char **argv) { int rc; - rc = check_cmd(debug_cmds, "recovery", NULL, 2, argc, argv); + rc = check_cmd(debug_cmds, "debug", NULL, 2, argc, argv); if (rc) return rc; diff --git a/lustre/doc/lnetctl.8 b/lustre/doc/lnetctl.8 index d88cf18..381ecea 100644 --- a/lustre/doc/lnetctl.8 +++ b/lustre/doc/lnetctl.8 @@ -329,7 +329,7 @@ of the \fBexport\fR command can be used as input to the \fBimport\fR command\. .SS "LNet Statistics" . .TP -\fBlnetctl stats\fR +\fBlnetctl stats\fR show Show LNET statistics . .br @@ -501,6 +501,27 @@ Delete user-defined selection policy. \fBlnetctl udsp\fR show Show all user-defined selection policies in the system\. The policies are dumped in YAML form\. . +.SS "Debugging" +. +.TP +\fBlnetctl debug\fR recovery +List NIDs on recovery queue +. +.br +\-\-local : list NIDs on local recovery queue +. +.br +\-\-peer : list NIDs on peer recovery queue +. +.br +. +.TP +\fBlnetctl debug\fR peer +Dump peer debug info to debug log and console +. +.br +\-\-nid : peer's NID +. .br . .SH "OPTIONS" diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index 6fdee82..1336d13 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -3121,6 +3121,21 @@ test_301() { } run_test 301 "Check for dynamic adds of same/wrong interface (memory leak)" +test_302() { + ! [[ $NETTYPE =~ (tcp|o2ib) ]] && skip "Need tcp or o2ib NETTYPE" + reinit_dlc || return $? + + add_net "${NETTYPE}" "${INTERFACES[0]}" || return $? + + local nid=$($LCTL list_nids) + + do_lnetctl ping ${nid} || + error "pinging self failed $?" + do_lnetctl debug peer --nid ${nid} || + error "failed to dump peer debug info $?" +} +run_test 302 "Check that peer debug info can be dumped" + complete $SECONDS cleanup_testsuite -- 1.8.3.1