Whamcloud - gitweb
LU-16551 tests: Ensure all peer credits used in MR 79/49979/2
authorChris Horn <chris.horn@hpe.com>
Mon, 13 Feb 2023 21:15:31 +0000 (15:15 -0600)
committerOleg Drokin <green@whamcloud.com>
Thu, 23 Feb 2023 06:34:24 +0000 (06:34 +0000)
sanity-lnet test_254 needs to ensure that all peer credits are
consumed. Because of the raciness of the round robin code in LNet,
we cannot rely on just issuing the appropriate number of pings.
Instead we should use the --source argument to lnetctl ping to ensure
that we send the correct number of pings over each interface.

To simplify matters, only perform this test, and the other tests that
call setup_health_test(), in non-routed configurations.

Test-Parameters: trivial testlist=sanity-lnet
Fixes: 52db11cdce ("LU-16303 lnet: Drop LNet message if deadline exceeded")
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I05a7ffec37d16c14711fe696232708f927357b1c
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49979
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/sanity-lnet.sh

index 5d1a70f..a1ef920 100755 (executable)
@@ -1475,6 +1475,20 @@ setup_health_test() {
        [[ ${#RNIDS[@]} -lt 1 ]] &&
                error "No NIDs configured for remote host $RNODE"
 
+       # Ensure all peer NIs are local (i.e. non-routed config)
+       local rnid rnet lnid lnet
+
+       for rnid in ${RNIDS[@]}; do
+               rnet=${rnid##*@}
+               for lnid in ${LNIDS[@]}; do
+                       lnet=${lnid##*@}
+                       [[ ${lnet} == ${rnet} ]] &&
+                               break
+               done
+               [[ ${lnet} != ${rnet} ]] &&
+                       skip "Need non-routed configuration"
+       done
+
        do_lnetctl discover ${RNIDS[0]} ||
                error "Unable to discover ${RNIDS[0]}"
 
@@ -3199,22 +3213,35 @@ do_expired_message_drop_test() {
                done
        done
 
-       local pc
+       declare -a pcs
 
-       pc=$($LNETCTL peer show -v --nid "${RNIDS[0]}" |
-                       awk '/max_ni_tx_credits:/{print $NF}' |
-                       xargs echo |
-                       sed 's/ /\+/g' | bc)
+       pcs=( $($LNETCTL peer show -v --nid "${RNIDS[0]}" |
+               awk '/max_ni_tx_credits:/{print $NF}' |
+               xargs echo) )
 
-       echo "Found $pc peer_credits for ${RNIDS[0]}"
+       [[ ${#RNIDS[@]} -ne ${#pcs[@]} ]] &&
+               error "Expect ${#RNIDS[@]} peer credit values found ${#pcs[@]}"
 
-       local i
+       local rnet lnid lnet i j
 
-       for i in $(seq 1 "${pc}"); do
-               $LNETCTL ping --timeout $((delay+2)) "${RNIDS[0]}" 1>/dev/null &
-       done
+       # Need to use --source for multi-rail configs to ensure we consume
+       # all available peer credits
+       for ((i = 0; i < ${#RNIDS[@]}; i++)); do
+               local ping_args="--timeout $((delay+2))"
+
+               rnet=${RNIDS[i]##*@}
+               for lnid in ${LNIDS[@]}; do
+                       lnet=${lnid##*@}
+                       [[ $rnet == $lnet ]] && break
+               done
 
-       echo "Issued ${pc} pings to ${RNIDS[0]}"
+               ping_args+=" --source ${lnid} ${RNIDS[i]}"
+               for j in $(seq 1 "${pcs[i]}"); do
+                       $LNETCTL ping ${ping_args} 1>/dev/null &
+               done
+
+               echo "Issued ${pcs[i]} pings to ${RNIDS[i]} from $lnid"
+       done
 
        # This ping should be queued on peer NI tx credit
        $LNETCTL ping --timeout $((delay+2)) "${RNIDS[0]}" &