3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 # bug number for skipped test:
11 ALWAYS_EXCEPT="$SANITY_LNET_EXCEPT "
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
17 . $LUSTRE/tests/test-framework.sh
21 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
26 [[ -z $LNETCTL ]] && skip "Need lnetctl"
29 load_module ../libcfs/libcfs/libcfs
30 # Prevent local MODOPTS_LIBCFS being passed as part of environment
31 # variable to remote nodes
35 load_module ../lnet/lnet/lnet "$@"
37 LNDPATH=${LNDPATH:-"../lnet/klnds"}
38 if [ -z "$LNETLND" ]; then
40 o2ib*) LNETLND="o2iblnd/ko2iblnd" ;;
41 tcp*) LNETLND="socklnd/ksocklnd" ;;
42 *) local lnd="${NETTYPE%%[0-9]}lnd"
43 [ -f "$LNDPATH/$lnd/k$lnd.ko" ] &&
44 LNETLND="$lnd/k$lnd" ||
45 LNETLND="socklnd/ksocklnd"
48 load_module ../lnet/klnds/$LNETLND
52 echo "Cleaning up LNet"
53 lsmod | grep -q lnet &&
54 $LNETCTL lnet unconfigure 2>/dev/null
59 RUN_NS="ip netns exec $TESTNS"
67 ip link add 'test1pl' type veth peer name $FAKE_IF netns $TESTNS
68 ip link set 'test1pl' up
69 $RUN_NS ip addr add "${FAKE_IP}/31" dev $FAKE_IF
70 $RUN_NS ip link set $FAKE_IF up
74 (ip netns list | grep -q $TESTNS) && ip netns del $TESTNS
75 if ip link show test1pl >/dev/null 2>&1; then
86 $LNETCTL lnet configure
88 run_test 1 "configure lnet with lnetctl"
91 ### load lnet in default namespace, configure in target namespace
94 cleanup_lnet || exit 1
95 load_lnet "networks=\"\""
96 $RUN_NS $LNETCTL lnet configure --all || exit 1
97 $LNETCTL net show --net tcp | grep -q "nid: ${FAKE_IP}@tcp$"
99 run_test 2 "load lnet w/o module option, configure in a non-default namespace"
102 cleanup_lnet || exit 1
103 load_lnet "networks=tcp($FAKE_IF)"
104 $RUN_NS $LNETCTL lnet configure --all || exit 1
105 $LNETCTL net show --net tcp | grep -q "nid: ${FAKE_IP}@tcp$"
107 run_test 3 "load lnet using networks module options in a non-default namespace"
110 cleanup_lnet || exit 1
111 load_lnet "networks=\"\" ip2nets=\"tcp0($FAKE_IF) ${FAKE_IP}\""
112 $RUN_NS $LNETCTL lnet configure --all || exit 1
113 $LNETCTL net show | grep -q "nid: ${FAKE_IP}@tcp$"
115 run_test 4 "load lnet using ip2nets in a non-default namespace"
118 ### Add the interfaces in the target namespace
121 cleanup_lnet || exit 1
123 $LNETCTL lnet configure || exit 1
124 $RUN_NS $LNETCTL net add --net tcp0 --if $FAKE_IF
126 run_test 5 "add a network using an interface in the non-default namespace"
129 local rnodes=$(remote_nodes_list)
130 [[ -z $rnodes ]] && skip "Need at least 1 remote node"
132 cleanup_lnet || error "Failed to cleanup before test execution"
134 # Loading modules should configure LNet with the appropriate
135 # test-framework configuration
136 load_modules || error "Failed to load modules"
138 local my_nid=$($LCTL list_nids | head -n 1)
140 error "Failed to get primary NID for local host $HOSTNAME"
142 local rnode=$(awk '{print $1}' <<<$rnodes)
143 local rnodenids=$(do_node $rnode $LCTL list_nids | xargs echo)
146 if [[ -z $rnodenids ]]; then
147 do_rpc_nodes $rnode load_modules_local
149 rnodenids=$(do_node $rnode $LCTL list_nids | xargs echo)
152 local rnodepnid=$(awk '{print $1}' <<< $rnodenids)
154 [[ -z $rnodepnid ]] &&
155 error "Failed to get primary NID for remote host $rnode"
157 log "Initial discovery"
158 $LNETCTL discover --force $rnodepnid ||
159 error "Failed to discover $rnodepnid"
161 do_node $rnode "$LNETCTL discover --force $my_nid" ||
162 error "$rnode failed to discover $my_nid"
164 log "Fail local discover ping to set LNET_PEER_REDISCOVER flag"
165 $LCTL net_drop_add -s "*@$NETTYPE" -d "*@$NETTYPE" -r 1 -e local_error
166 $LNETCTL discover --force $rnodepnid &&
167 error "Discovery should have failed"
168 $LCTL net_drop_del -a
171 for nid in $rnodenids; do
172 # We need GET (PING) delay just long enough so we can trigger
173 # discovery on the remote peer
174 $LCTL net_delay_add -s "*@$NETTYPE" -d $nid -r 1 -m GET -l 3
175 $LCTL net_drop_add -s "*@$NETTYPE" -d $nid -r 1 -m GET -e local_error
176 # We need PUT (PUSH) delay just long enough so we can process
178 $LCTL net_delay_add -s "*@$NETTYPE" -d $nid -r 1 -m PUT -l 6
181 log "Force $HOSTNAME to discover $rnodepnid (in background)"
182 # We want to get a PING sent that we know will eventually fail.
183 # The delay rules we added will ensure the ping is not sent until
184 # the PUSH is also in flight (see below), and the drop rule ensures that
185 # when the PING is eventually sent it will error out
186 $LNETCTL discover --force $rnodepnid &
189 # We want a discovery PUSH from rnode to put rnode back on our
190 # discovery queue. This should cause us to try and send a PUSH to rnode
191 # while the PING is still outstanding.
192 log "Force $rnode to discover $my_nid"
193 do_node $rnode $LNETCTL discover --force $my_nid
195 # At this point we'll have both PING_SENT and PUSH_SENT set for the
196 # rnode peer. Wait for the PING to error out which should terminate the
197 # discovery process that we backgrounded.
200 log "Finished wait on $pid1"
202 # The PING send failure clears the PING_SENT flag and puts the peer back
203 # on the discovery queue. When discovery thread processes the peer it
204 # will mistakenly clear the PUSH_SENT flag (and set PUSH_FAILED).
205 # Discovery will then complete for this peer even though we have an
207 # When PUSH is actually unlinked it will be forced back onto the
208 # discovery queue, but we no longer have a ref on the peer. When
209 # discovery completes again, we'll trip the ASSERT in
210 # lnet_destroy_peer_locked()
212 # Delete the delay rules to send the PUSH
213 $LCTL net_delay_del -a
214 # Delete the drop rules
215 $LCTL net_drop_del -a
218 error "Failed to unload modules"
220 do_rpc_nodes $rnode unload_modules_local ||
221 error "Failed to unload modules on $rnode"
226 run_test 212 "Check discovery refcount loss bug (LU-14627)"