ALWAYS_EXCEPT="$SANITY_LNET_EXCEPT "
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
-# skip the grant tests for ARM until they are fixed
-if [[ $(uname -m) = aarch64 ]]; then
- # bug number: LU-14067
- ALWAYS_EXCEPT+=" 300"
-fi
-
[ "$SLOW" = "no" ] && EXCEPT_SLOW=""
LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
. $LUSTRE/tests/test-framework.sh
CLEANUP=${CLEANUP:-:}
SETUP=${SETUP:-:}
-init_test_env $@
+init_test_env "$@"
. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
init_logging
return 0
}
-load_lnet() {
- load_module ../libcfs/libcfs/libcfs
- # Prevent local MODOPTS_LIBCFS being passed as part of environment
- # variable to remote nodes
- unset MODOPTS_LIBCFS
-
- set_default_debug "neterror net nettrace malloc"
- load_module ../lnet/lnet/lnet "$@"
-
- LNDPATH=${LNDPATH:-"../lnet/klnds"}
- if [ -z "$LNETLND" ]; then
- case $NETTYPE in
- o2ib*) LNETLND="o2iblnd/ko2iblnd" ;;
- tcp*) LNETLND="socklnd/ksocklnd" ;;
- *) local lnd="${NETTYPE%%[0-9]}lnd"
- [ -f "$LNDPATH/$lnd/k$lnd.ko" ] &&
- LNETLND="$lnd/k$lnd" ||
- LNETLND="socklnd/ksocklnd"
- esac
- fi
- load_module ../lnet/klnds/$LNETLND
-}
-
-do_lnetctl() {
- $LCTL mark "$LNETCTL $@"
- echo "$LNETCTL $@"
- $LNETCTL "$@"
-}
-
TESTNS='test_ns'
FAKE_IF="test1pg"
FAKE_IP="10.1.2.3"
do_ns() {
- echo "ip netns exec $TESTNS $@"
+ echo "ip netns exec $TESTNS $*"
ip netns exec $TESTNS "$@"
}
local num_re='[0-9]\+'
local ip_re="[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}"
- if [[ $net =~ gni[0-9]* ]]; then
+ if [[ $net =~ gni[0-9]* ]] || [[ $net =~ kfi[0-9]* ]]; then
[[ $addr =~ ${num_re} ]] && return 0
else
[[ $addr =~ ${ip_re} ]] && return 0
setup_netns || error "setup_netns failed with $?"
# Determine the local interface(s) used for LNet
-load_modules || error "Failed to load modules"
+load_lnet "config_on_load=1" || error "Failed to load modules"
do_lnetctl net show
ip a
- nid: 6.6.3.6@o2ib
- nid: 6@gni
- nid: 10@gni
+ - nid: 6@kfi
+ - nid: 10@kfi
EOF
append_global_yaml
- compare_peer_add "6.6.6.6@tcp" \
- "6.6.[6-7].[0-4/2]@tcp,6.6.[1-4/2].[0-6/3]@o2ib,[6-12/4]@gni"
+
+ local nid_expr="6.6.[6-7].[0-4/2]@tcp"
+ nid_expr+=",6.6.[1-4/2].[0-6/3]@o2ib"
+ nid_expr+=",[6-12/4]@gni"
+ nid_expr+=",[6-12/4]@kfi"
+
+ compare_peer_add "6.6.6.6@tcp" "${nid_expr}"
}
run_test 6 "Add peer with multiple nidranges"
error "Peer add failed $?"
compare_peer_del "7@gni"
- echo "Delete peer that has tcp, o2ib and gni nids"
+ echo "Delete peer with single nid (kfi)"
+ do_lnetctl peer add --prim_nid 7@kfi || error "Peer add failed $?"
+ compare_peer_del "7@kfi"
+
+ echo "Delete peer that has multiple nids (kfi)"
+ do_lnetctl peer add --prim_nid 7@kfi --nid [8-12]@kfi ||
+ error "Peer add failed $?"
+ compare_peer_del "7@kfi"
+
+ echo "Delete peer that has tcp, o2ib, gni and kfi nids"
do_lnetctl peer add --prim_nid 7@gni \
- --nid [8-12]@gni,7.7.7.[9-12]@tcp,7.7.7.[13-15]@o2ib ||
+ --nid [8-12]@gni,7.7.7.[1-4]@tcp,7.7.7.[5-9]@o2ib,[1-5]@kfi ||
error "Peer add failed $?"
compare_peer_del "7@gni"
}
local num="$1"
local net="$2"
- if [[ $net =~ gni* ]]; then
+ if [[ $net =~ gni* ]] || [[ $net =~ kfi* ]]; then
echo "${num}@${net}"
else
echo "${num}.${num}.${num}.${num}@${net}"
# Loading modules should configure LNet with the appropriate
# test-framework configuration
- load_modules || error "Failed to load modules"
+ load_lnet "config_on_load=1" || error "Failed to load modules"
LNIDS=( $($LCTL list_nids | xargs echo) )
RNIDS=( $(do_node $RNODE $LCTL list_nids | xargs echo) )
if [[ -z ${RNIDS[@]} ]]; then
- do_rpc_nodes $RNODE load_modules_local
+ do_rpc_nodes $RNODE load_lnet "config_on_load=1"
RLOADED=true
RNIDS=( $(do_node $RNODE $LCTL list_nids | xargs echo) )
fi
local hstatus=$1
local lnid rnid
- for lnid in ${LNIDS[@]}; do
- for rnid in ${RNIDS[@]}; do
+ for lnid in "${LNIDS[@]}"; do
+ for rnid in "${RNIDS[@]}"; do
$LCTL net_drop_add -s $lnid -d $rnid -m GET -r 1 -e ${hstatus}
done
done
# If the recovery limit is 10 seconds, then when the 5th enqueue happens
# we expect the peer NI to have aged out, so it will not actually be
# queued.
+# If max_recovery_ping_interval is set to 2 then:
+# First enqueue happens at time 0.
+# 2nd at 0 + 2^0 = 1
+# 3rd at 1 + 2^1 = 3
+# 4th at 3 + 2^1 = 5
+# 5th at 5 + 2^1 = 7
+# 6th at 7 + 2^1 = 9
+# 7th at 9 + 2^1 = 11
+# e.g. after 4 seconds we would expect to have seen the 3th enqueue,
+# (2 pings sent, 3rd about to happen), and the 4th enqueue is yet to happen
+# e.g. after 10 seconds we would expect to have seen the 6th enqueue,
+# (5 pings sent, 6th about to happen), and the 8th enqueue is yet to happen
check_ping_count() {
local queue="$1"
local expect="$2"
local count
local found=false
- for count in ${ping_count[@]}; do
+ for count in "${ping_count[@]}"; do
if [[ $count -eq $expect ]]; then
if [[ $expect -ne 0 ]] && $found ; then
error "Found more than one interface matching \"$expect\" ping count"
do_lnetctl discover $prim_nid ||
error "failed to discover myself"
+ local default=$($LNETCTL global show |
+ awk '/recovery_limit/{print $NF}')
# Set recovery limit to 10 seconds.
do_lnetctl set recovery_limit 10 ||
error "failed to set recovery_limit"
# Use local_error so LNet doesn't attempt to resend the discovery ping
$LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e local_error
$LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e local_error
- do_lnetctl discover $($LCTL list_nids | head -n 1) &&
+ do_lnetctl discover $prim_nid &&
error "Expected discovery to fail"
+ # See comment for check_ping_count()
sleep 5
- check_nid_in_recovq "-l" 1
+ check_nid_in_recovq "-l" "1"
check_ping_count "ni" "2"
sleep 5
- check_nid_in_recovq "-l" 1
+ check_nid_in_recovq "-l" "1"
check_ping_count "ni" "3"
$LCTL net_drop_del -a
+ reinit_dlc || return $?
+ add_net "tcp" "${INTERFACES[0]}" || return $?
+ add_net "tcp1" "${INTERFACES[0]}" || return $?
+
+ local prim_nid=$($LCTL list_nids | head -n 1)
+
+ do_lnetctl discover $prim_nid ||
+ error "failed to discover myself"
+
+ do_lnetctl set recovery_limit $default ||
+ error "failed to set recovery_limit"
+
+ default=$($LNETCTL global show |
+ awk '/max_recovery_ping_interval/{print $NF}')
+ do_lnetctl set max_recovery_ping_interval 2 ||
+ error "failed to set max_recovery_ping_interval"
+
+ $LCTL set_param debug=+net
+ # Use local_error so LNet doesn't attempt to resend the discovery ping
+ $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e local_error
+ $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e local_error
+ do_lnetctl discover $prim_nid &&
+ error "Expected discovery to fail"
+
+ # See comment for check_ping_count()
+ sleep 4
+ check_nid_in_recovq "-l" "1"
+ check_ping_count "ni" "2"
+
+ sleep 6
+ check_nid_in_recovq "-l" "1"
+ check_ping_count "ni" "5"
+
+ $LCTL net_drop_del -a
+
+ do_lnetctl set max_recovery_ping_interval $default ||
+ error "failed to set max_recovery_ping_interval"
+
return 0
}
run_test 210 "Local NI recovery checks"
do_lnetctl discover $prim_nid ||
error "failed to discover myself"
+ local default=$($LNETCTL global show |
+ awk '/recovery_limit/{print $NF}')
# Set recovery limit to 10 seconds.
do_lnetctl set recovery_limit 10 ||
error "failed to set recovery_limit"
check_nid_in_recovq "-p" 0
check_ping_count "peer_ni" "0"
+ reinit_dlc || return $?
+ add_net "tcp" "${INTERFACES[0]}" || return $?
+ add_net "tcp1" "${INTERFACES[0]}" || return $?
+
+ local prim_nid=$($LCTL list_nids | head -n 1)
+
+ do_lnetctl discover $prim_nid ||
+ error "failed to discover myself"
+
+ do_lnetctl set recovery_limit $default ||
+ error "failed to set recovery_limit"
+
+ default=$($LNETCTL global show |
+ awk '/max_recovery_ping_interval/{print $NF}')
+ do_lnetctl set max_recovery_ping_interval 2 ||
+ error "failed to set max_recovery_ping_interval"
+
+ $LCTL net_drop_add -s *@tcp -d *@tcp -m GET -r 1 -e remote_error
+ $LCTL net_drop_add -s *@tcp1 -d *@tcp1 -m GET -r 1 -e remote_error
+
+ # Set health to 0 on one interface. This forces it onto the recovery
+ # queue.
+ $LNETCTL peer set --nid $prim_nid --health 0
+
+ # See comment for check_ping_count()
+ sleep 4
+ check_nid_in_recovq "-p" "1"
+ check_ping_count "peer_ni" "2"
+
+ sleep 6
+ check_nid_in_recovq "-p" "1"
+ check_ping_count "peer_ni" "5"
+
+ $LCTL net_drop_del -a
+
+ do_lnetctl set max_recovery_ping_interval $default ||
+ error "failed to set max_recovery_ping_interval"
+
return 0
}
run_test 211 "Remote NI recovery checks"
# Loading modules should configure LNet with the appropriate
# test-framework configuration
- load_modules || error "Failed to load modules"
+ load_lnet "config_on_load=1" || error "Failed to load modules"
local my_nid=$($LCTL list_nids | head -n 1)
[[ -z $my_nid ]] &&
local rloaded=false
if [[ -z $rnodenids ]]; then
- do_rpc_nodes $rnode load_modules_local
+ do_rpc_nodes $rnode load_lnet "config_on_load=1"
rloaded=true
rnodenids=$(do_node $rnode $LCTL list_nids | xargs echo)
fi
error "Initial ping failed $?"
local src dst
- for src in ${nids[@]}; do
- for dst in ${nids[@]}; do
+ for src in "${nids[@]}"; do
+ for dst in "${nids[@]}"; do
$LCTL net_drop_add -r 1 -s $src -d $dst -e network_timeout
done
done
}
run_test 218 "Local recovery pings should exercise all available paths"
+test_219() {
+ reinit_dlc || return $?
+ add_net "tcp" "${INTERFACES[0]}" || return $?
+ add_net "tcp1" "${INTERFACES[0]}" || return $?
+
+ local nid1=$(lctl list_nids | head -n 1)
+ local nid2=$(lctl list_nids | tail --lines 1)
+
+ do_lnetctl ping $nid1 ||
+ error "Ping failed $?"
+ do_lnetctl ping $nid2 ||
+ error "Ping failed $?"
+
+ do_lnetctl discover $nid2 ||
+ error "Discovery failed"
+
+ $LNETCTL peer show --nid $nid1 | grep -q $nid2 ||
+ error "$nid2 is not listed under $nid1"
+}
+run_test 219 "Consolidate peer entries"
+
test_230() {
# LU-12815
echo "Check valid values; Should succeed"