Whamcloud - gitweb
LU-18540 lnet: Fix lnetctl ping timeout parsing 05/57405/2
authorChris Horn <chris.horn@hpe.com>
Thu, 12 Dec 2024 18:56:17 +0000 (11:56 -0700)
committerOleg Drokin <green@whamcloud.com>
Sat, 18 Jan 2025 22:05:18 +0000 (22:05 +0000)
The src_nid argument to yaml_lnet_ping() is never null, so we need to
check it with nid_addr_is_set() to determine whether the source key
should be added to the ping command yaml.

sanity-lnet/256 is updated to verify that all ping commands complete
successfully. This verifies that lnetctl ping --timeout works
correctly.

Test-Parameters: trivial testlist=sanity-lnet
Fixes: 5ddc054540 ("LU-17629 utils: support hostname with lustre_lnet_parse_nid_range()")
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: Id6617b38edc68e49800ae37982390e17bc95e141
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57405
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lnet/utils/lnetctl.c
lustre/tests/sanity-lnet.sh

index 701696d..2e0fbc2 100644 (file)
@@ -5190,7 +5190,7 @@ static int yaml_lnet_ping(char *group, int timeout, struct lnet_nid *src_nid,
                goto emitter_error;
 
        if (timeout != 1000 || (src_nid && nid_addr_is_set(src_nid))) {
-               if (src_nid) {
+               if (src_nid && nid_addr_is_set(src_nid)) {
                        char *src_nidstr = libcfs_nidstr(src_nid);
 
                        yaml_scalar_event_initialize(&event, NULL,
index e842191..0990236 100755 (executable)
@@ -3982,25 +3982,36 @@ test_256() {
                done
        done
 
-       local i
+       local idx
+       declare -a pids
 
-       for i in $(seq 1 ${rtr_pc}); do
+       for idx in $(seq 1 ${rtr_pc}); do
+               echo "$LNETCTL ping --timeout $((delay+2)) $rpnid"
                $LNETCTL ping --timeout $((delay+2)) $rpnid 1>/dev/null &
+               pids[$idx]=$!
        done
 
-       echo "Issued ${rtr_pc} pings to $rpnid"
+       ((idx++))
 
-       local pid
+       echo "Issued ${rtr_pc} pings to $rpnid"
 
        # This ping should be queued on the router's peer NI tx credit queue
        $LNETCTL ping --timeout $((delay+2)) $rpnid &
+       pids[$idx]=$!
 
        echo "Issued last ping - sleep $delay"
        sleep ${delay}
 
        do_node $router $LCTL net_delay_del -a
 
-       wait
+       local rc=0 rcsum=0
+       for idx in $(seq 1 $((rtr_pc + 1))); do
+               wait ${pids[$idx]} || rc=$?
+               ((rc != 0)) && echo "ping pid ${pids[$idx]} returned rc=$rc"
+               ((rcsum += rc))
+       done
+
+       ((rcsum == 0)) || error "Detected ping failures"
 
        do_node $router $LNETCTL set transaction_timeout ${old_tto}
        do_node $router $LNETCTL set retry_count ${old_retry}