From: Chris Horn Date: Thu, 12 Aug 2021 16:26:07 +0000 (-0500) Subject: LU-14939 lnet: Allow specifying a source NID for lnetctl ping X-Git-Tag: 2.14.56~45 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=48ef9982c474a02c460293bce17c9e45f9829eab LU-14939 lnet: Allow specifying a source NID for lnetctl ping Add a new --source option for lnetctl ping command. This allows the user to specify a local NI from which to send the ping. This also ensures that the specified destination NID is also used. Otherwise, pings to multi-rail peers may end up going to a different peer NI based on the multi-rail selection algorithm. The ability to specify a source NI, and thus fix the destination NI, is a great help in troubleshooting communication issues between multi-rail peers. Add test to exercise lnetctl ping --source option. HPE-bug-id: LUS-10296 Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Chris Horn Change-Id: I454217b30a92414de537880f076a11a693b1f0b3 Reviewed-on: https://review.whamcloud.com/44727 Reviewed-by: Serguei Smirnov Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andriy Skulysh Reviewed-by: Oleg Drokin --- diff --git a/lnet/include/uapi/linux/lnet/lnet-dlc.h b/lnet/include/uapi/linux/lnet/lnet-dlc.h index fccadbe..2b2c05f 100644 --- a/lnet/include/uapi/linux/lnet/lnet-dlc.h +++ b/lnet/include/uapi/linux/lnet/lnet-dlc.h @@ -132,6 +132,7 @@ struct lnet_ioctl_ping_data { __u32 mr_info; struct lnet_process_id ping_id; struct lnet_process_id __user *ping_buf; + lnet_nid_t ping_src; }; struct lnet_ioctl_config_data { diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index f7be169..7598c8d 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -261,8 +261,9 @@ static void lnet_set_lnd_timeout(void) */ static atomic_t lnet_dlc_seq_no = ATOMIC_INIT(0); -static int lnet_ping(struct lnet_process_id id, signed long timeout, - struct lnet_process_id __user *ids, int n_ids); +static int lnet_ping(struct lnet_process_id id, lnet_nid_t src_nid, + signed long timeout, struct lnet_process_id __user *ids, + int n_ids); static int lnet_discover(struct lnet_process_id id, __u32 force, struct lnet_process_id __user *ids, int n_ids); @@ -4334,7 +4335,7 @@ LNetCtl(unsigned int cmd, void *arg) else timeout = nsecs_to_jiffies(data->ioc_u32[1] * NSEC_PER_MSEC); - rc = lnet_ping(id, timeout, data->ioc_pbuf1, + rc = lnet_ping(id, LNET_NID_ANY, timeout, data->ioc_pbuf1, data->ioc_plen1 / sizeof(struct lnet_process_id)); if (rc < 0) @@ -4348,6 +4349,18 @@ LNetCtl(unsigned int cmd, void *arg) struct lnet_ioctl_ping_data *ping = arg; struct lnet_peer *lp; signed long timeout; + lnet_nid_t src_nid = LNET_NID_ANY; + + /* Check if the supplied ping data supports source nid + * NB: This check is sufficient if lnet_ioctl_ping_data has + * additional fields added, but if they are re-ordered or + * fields removed then this will break. It is expected that + * these ioctls will be replaced with netlink implementation, so + * it is probably not worth coming up with a more robust version + * compatibility scheme. + */ + if (ping->ping_hdr.ioc_len >= sizeof(struct lnet_ioctl_ping_data)) + src_nid = ping->ping_src; /* If timeout is negative then set default of 3 minutes */ if (((s32)ping->op_param) <= 0 || @@ -4356,7 +4369,7 @@ LNetCtl(unsigned int cmd, void *arg) else timeout = nsecs_to_jiffies(ping->op_param * NSEC_PER_MSEC); - rc = lnet_ping(ping->ping_id, timeout, + rc = lnet_ping(ping->ping_id, src_nid, timeout, ping->ping_buf, ping->ping_count); if (rc < 0) @@ -4622,8 +4635,9 @@ lnet_ping_event_handler(struct lnet_event *event) complete(&pd->completion); } -static int lnet_ping(struct lnet_process_id id, signed long timeout, - struct lnet_process_id __user *ids, int n_ids) +static int lnet_ping(struct lnet_process_id id, lnet_nid_t src_nid, + signed long timeout, struct lnet_process_id __user *ids, + int n_ids) { struct lnet_md md = { NULL }; struct ping_data pd = { 0 }; @@ -4669,7 +4683,7 @@ static int lnet_ping(struct lnet_process_id id, signed long timeout, goto fail_ping_buffer_decref; } - rc = LNetGet(LNET_NID_ANY, pd.mdh, id, + rc = LNetGet(src_nid, pd.mdh, id, LNET_RESERVED_PORTAL, LNET_PROTO_PING_MATCHBITS, 0, false); diff --git a/lnet/utils/lnetconfig/liblnetconfig.c b/lnet/utils/lnetconfig/liblnetconfig.c index e9541f4..0b8b110 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.c +++ b/lnet/utils/lnetconfig/liblnetconfig.c @@ -420,9 +420,9 @@ static int dispatch_peer_ni_cmd(__u32 cmd, struct lnet_ioctl_peer_cfg *data, return rc; } -static int infra_ping_nid(char *ping_nids, char *oper, int param, int ioc_call, - int seq_no, struct cYAML **show_rc, - struct cYAML **err_rc) +static int infra_ping_nid(char *ping_nids, char *src_nidstr, char *oper, + int param, int ioc_call, int seq_no, + struct cYAML **show_rc, struct cYAML **err_rc) { void *data = NULL; struct lnet_ioctl_ping_data ping; @@ -436,6 +436,7 @@ static int infra_ping_nid(char *ping_nids, char *oper, int param, int ioc_call, int rc = LUSTRE_CFG_RC_OUT_OF_MEM; int i; bool flag = false; + lnet_nid_t src; len = (sizeof(struct lnet_process_id) * LNET_INTERFACES_MAX_DEFAULT); @@ -452,6 +453,21 @@ static int infra_ping_nid(char *ping_nids, char *oper, int param, int ioc_call, if (ping_node == NULL) goto out; + if (src_nidstr) { + src = libcfs_str2nid(src_nidstr); + if (src == LNET_NID_ANY) { + snprintf(err_str, sizeof(err_str), + "\"cannot parse source NID '%s'\"", + src_nidstr); + rc = LUSTRE_CFG_RC_BAD_PARAM; + cYAML_build_error(rc, seq_no, MANAGE_CMD, + oper, err_str, err_rc); + goto out; + } + } else { + src = LNET_NID_ANY; + } + /* tokenise each nid in string ping_nids */ token = strtok(ping_nids, ","); @@ -516,6 +532,7 @@ static int infra_ping_nid(char *ping_nids, char *oper, int param, int ioc_call, LIBCFS_IOC_INIT_V2(ping, ping_hdr); ping.ping_hdr.ioc_len = sizeof(ping); ping.ping_id = id; + ping.ping_src = src; ping.op_param = param; ping.ping_count = LNET_INTERFACES_MAX_DEFAULT; ping.ping_buf = data; @@ -591,13 +608,14 @@ out: return rc; } -int lustre_lnet_ping_nid(char *ping_nids, int timeout, int seq_no, - struct cYAML **show_rc, struct cYAML **err_rc) +int lustre_lnet_ping_nid(char *ping_nids, char *src_nidstr, int timeout, + int seq_no, struct cYAML **show_rc, + struct cYAML **err_rc) { int rc; - rc = infra_ping_nid(ping_nids, "ping", timeout, IOC_LIBCFS_PING_PEER, - seq_no, show_rc, err_rc); + rc = infra_ping_nid(ping_nids, src_nidstr, "ping", timeout, + IOC_LIBCFS_PING_PEER, seq_no, show_rc, err_rc); return rc; } @@ -606,8 +624,8 @@ int lustre_lnet_discover_nid(char *ping_nids, int force, int seq_no, { int rc; - rc = infra_ping_nid(ping_nids, "discover", force, IOC_LIBCFS_DISCOVER, - seq_no, show_rc, err_rc); + rc = infra_ping_nid(ping_nids, NULL, "discover", force, + IOC_LIBCFS_DISCOVER, seq_no, show_rc, err_rc); return rc; } @@ -5183,13 +5201,15 @@ static int handle_yaml_show_global_settings(struct cYAML *tree, static int handle_yaml_ping(struct cYAML *tree, struct cYAML **show_rc, struct cYAML **err_rc) { - struct cYAML *seq_no, *nid, *timeout; + struct cYAML *seq_no, *nid, *timeout, *src_nid; seq_no = cYAML_get_object_item(tree, "seq_no"); nid = cYAML_get_object_item(tree, "primary nid"); timeout = cYAML_get_object_item(tree, "timeout"); + src_nid = cYAML_get_object_item(tree, "source_nid"); return lustre_lnet_ping_nid((nid) ? nid->cy_valuestring : NULL, + (src_nid) ? src_nid->cy_valuestring : NULL, (timeout) ? timeout->cy_valueint : 1000, (seq_no) ? seq_no->cy_valueint : -1, show_rc, err_rc); diff --git a/lnet/utils/lnetconfig/liblnetconfig.h b/lnet/utils/lnetconfig/liblnetconfig.h index 67403ec..dae9268 100644 --- a/lnet/utils/lnetconfig/liblnetconfig.h +++ b/lnet/utils/lnetconfig/liblnetconfig.h @@ -698,13 +698,14 @@ int lustre_lnet_list_peer(int seq_no, * Ping the nid list, pnids. * * pnids - NID list to ping. + * src_nidstr - source NID * timeout - timeout(seconds) for ping. * seq_no - sequence number of the command. * show_rc - YAML structure of the resultant show. * err_rc - YAML strucutre of the resultant return code. * */ -int lustre_lnet_ping_nid(char *pnid, int timeout, int seq_no, +int lustre_lnet_ping_nid(char *pnid, char *src_nidstr, int timeout, int seq_no, struct cYAML **show_rc, struct cYAML **err_rc); /* lustre_lnet_discover_nid diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 23888fb..6c76812 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -2178,21 +2178,27 @@ static int jt_ping(int argc, char **argv) struct cYAML *show_rc = NULL; int timeout = 1000; int rc = 0, opt; + char *src_nidstr = NULL; - const char *const short_options = "ht:"; + const char *const short_options = "hs:t:"; const struct option long_options[] = { { .name = "help", .has_arg = no_argument, .val = 'h' }, { .name = "timeout", .has_arg = required_argument, .val = 't' }, + { .name = "source", .has_arg = required_argument, .val = 's' }, { .name = NULL } }; while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) != -1) { switch (opt) { + case 's': + src_nidstr = optarg; + break; case 't': timeout = 1000 * atol(optarg); break; case 'h': printf("ping nid[,nid,...]\n" + "\t --source: source nid\n" "\t --timeout: ping timeout\n" "\t --help: display this help\n"); return 0; @@ -2202,7 +2208,8 @@ static int jt_ping(int argc, char **argv) } for (; optind < argc; optind++) - rc = lustre_lnet_ping_nid(argv[optind], timeout, -1, &show_rc, &err_rc); + rc = lustre_lnet_ping_nid(argv[optind], src_nidstr, timeout, -1, + &show_rc, &err_rc); if (show_rc) cYAML_print_tree(show_rc); diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index f64fcec..ea76ab1 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -2072,6 +2072,126 @@ test_214() { } run_test 214 "Check local NI status when link is downed" +get_ni_stat() { + local nid=$1 + local stat=$2 + + $LNETCTL net show -v 2 | + egrep -e nid -e $stat | + grep -wA 1 $nid | + awk '/'$stat':/{print $NF}' +} + +ni_stats_pre() { + local nidvar s + for nidvar in nid1 nid2; do + for stat in send_count recv_count; do + s=$(get_ni_stat ${!nidvar} $stat) + eval ${nidvar}_pre_${stat}=$s + done + done +} + +ni_stats_post() { + local nidvar s + for nidvar in nid1 nid2; do + for stat in send_count recv_count; do + s=$(get_ni_stat ${!nidvar} $stat) + eval ${nidvar}_post_${stat}=$s + done + done +} + +ni_stat_changed() { + local nidvar=$1 + local stat=$2 + + local pre post + eval pre=\${${nidvar}_pre_${stat}} + eval post=\${${nidvar}_post_${stat}} + + echo "${!nidvar} pre ${stat} $pre post ${stat} $post" + + [[ $pre -ne $post ]] +} + +test_215() { + have_interface "eth0" || skip "Need eth0 interface with ipv4 configured" + + cleanup_netns || error "Failed to cleanup netns before test execution" + cleanup_lnet || error "Failed to unload modules before test execution" + + reinit_dlc || return $? + + add_net "tcp1" "eth0" || return $? + add_net "tcp2" "eth0" || return $? + + local nid1=$($LCTL list_nids | head -n 1) + local nid2=$($LCTL list_nids | tail --lines 1) + + do_lnetctl peer add --prim $nid1 --nid $nid2 || + error "Failed to add peer" + + local npings=25 + + for nidvarA in nid1 nid2; do + src=${!nidvarA} + dst=${!nidvarA} + for nidvarB in nid1 nid2; do + [[ $nidvarA == $nidvarB ]] && continue + + ni_stats_pre + + echo "$LNETCTL ping $dst x $npings" + for i in $(seq 1 $npings); do + $LNETCTL ping $dst &>/dev/null || + error "$LNETCTL ping $dst failed" + done + + ni_stats_post + + # No source specified, sends to either NID should cause + # counts to increase across both NIs + for nidvar in nid1 nid2; do + for stat in send_count recv_count; do + ni_stat_changed $nidvar $stat || + error "$stat unchanged for ${!nidvar}" + done + done + + ni_stats_pre + + echo "$LNETCTL ping --source $src $dst x $npings" + for i in $(seq 1 $npings); do + $LNETCTL ping --source $src $dst &>/dev/null || + error "$LNETCTL ping --source $src $dst failed" + done + + ni_stats_post + + # src nid == dest nid means stats for the _other_ NI + # should be unchanged + for nidvar in nid1 nid2; do + for stat in send_count recv_count; do + if [[ ${!nidvar} == $src ]]; then + ni_stat_changed $nidvar $stat || + error "$stat unchanged for ${!nidvar}" + else + ni_stat_changed $nidvar $stat && + error "$stat changed for ${!nidvar}" + fi + done + done + done + # Double number of pings for next iteration because the net + # sequence numbers will have diverged + npings=$(($npings * 2)) + done + + return 0 +} +run_test 215 "Test lnetctl ping --source option" + test_230() { # LU-12815 echo "Check valid values; Should succeed"