From 277b07e1750ec101e0fe238cada651668a57e39e Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Thu, 12 Dec 2024 11:56:17 -0700 Subject: [PATCH] LU-18540 lnet: Fix lnetctl ping timeout parsing The src_nid argument to yaml_lnet_ping() is never null, so we need to check it with nid_addr_is_set() to determine whether the source key should be added to the ping command yaml. sanity-lnet/256 is updated to verify that all ping commands complete successfully. This verifies that lnetctl ping --timeout works correctly. Test-Parameters: trivial testlist=sanity-lnet Fixes: 5ddc054540 ("LU-17629 utils: support hostname with lustre_lnet_parse_nid_range()") Signed-off-by: Chris Horn Change-Id: Id6617b38edc68e49800ae37982390e17bc95e141 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57405 Reviewed-by: James Simmons Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lnet/utils/lnetctl.c | 2 +- lustre/tests/sanity-lnet.sh | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/lnet/utils/lnetctl.c b/lnet/utils/lnetctl.c index 701696d..2e0fbc2 100644 --- a/lnet/utils/lnetctl.c +++ b/lnet/utils/lnetctl.c @@ -5190,7 +5190,7 @@ static int yaml_lnet_ping(char *group, int timeout, struct lnet_nid *src_nid, goto emitter_error; if (timeout != 1000 || (src_nid && nid_addr_is_set(src_nid))) { - if (src_nid) { + if (src_nid && nid_addr_is_set(src_nid)) { char *src_nidstr = libcfs_nidstr(src_nid); yaml_scalar_event_initialize(&event, NULL, diff --git a/lustre/tests/sanity-lnet.sh b/lustre/tests/sanity-lnet.sh index e842191..0990236 100755 --- a/lustre/tests/sanity-lnet.sh +++ b/lustre/tests/sanity-lnet.sh @@ -3982,25 +3982,36 @@ test_256() { done done - local i + local idx + declare -a pids - for i in $(seq 1 ${rtr_pc}); do + for idx in $(seq 1 ${rtr_pc}); do + echo "$LNETCTL ping --timeout $((delay+2)) $rpnid" $LNETCTL ping --timeout $((delay+2)) $rpnid 1>/dev/null & + pids[$idx]=$! done - echo "Issued ${rtr_pc} pings to $rpnid" + ((idx++)) - local pid + echo "Issued ${rtr_pc} pings to $rpnid" # This ping should be queued on the router's peer NI tx credit queue $LNETCTL ping --timeout $((delay+2)) $rpnid & + pids[$idx]=$! echo "Issued last ping - sleep $delay" sleep ${delay} do_node $router $LCTL net_delay_del -a - wait + local rc=0 rcsum=0 + for idx in $(seq 1 $((rtr_pc + 1))); do + wait ${pids[$idx]} || rc=$? + ((rc != 0)) && echo "ping pid ${pids[$idx]} returned rc=$rc" + ((rcsum += rc)) + done + + ((rcsum == 0)) || error "Detected ping failures" do_node $router $LNETCTL set transaction_timeout ${old_tto} do_node $router $LNETCTL set retry_count ${old_retry} -- 1.8.3.1