From: Andreas Dilger Date: Thu, 11 Mar 2021 09:39:57 +0000 (-0700) Subject: LU-10632 tests: recovery-small test_26 idle_timeout X-Git-Tag: 2.14.52~188 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=b4391fcdaf392a50bd1419342eca3b730c077ed2 LU-10632 tests: recovery-small test_26 idle_timeout In recovery-small test_26() use "lfs df" instead of plain "df" since statfs may be fetched from the MDS cache and will not ensure that the client->OST connections are currently active. Also, check a few entries further back in the OSC state log for an EVICTED message, in case the client idle disconnects from the server again while checking all of the imports. Test-Parameters: trivial testlist=recovery-small env=ONLY=26a,ONLY_REPEAT=100 Fixes: 5a6ceb664f07 ("LU-7236 ptlrpc: idle connections can disconnect") Signed-off-by: Andreas Dilger Change-Id: I8c370cb75f4e06258ef3c032630fc20354a15dcc Reviewed-on: https://review.whamcloud.com/42006 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: James Nunez Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 8b4b594..77992a7 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1069,6 +1069,8 @@ test_26a() { # was test_26 bug 5921 - evict dead exports by pinger check_timeout || return 1 + # make sure all imports are connected and not IDLE + do_facet client lfs df > /dev/null # OBD_FAIL_PTLRPC_DROP_RPC 0x505 do_facet client lctl set_param fail_loc=0x505 local before=$(date +%s) @@ -1079,7 +1081,7 @@ test_26a() { # was test_26 bug 5921 - evict dead exports by pinger # the loser might have to wait for the next ping. sleep $((TIMEOUT * 2 + TIMEOUT * 3 / 4)) do_facet client lctl set_param fail_loc=0x0 - do_facet client df > /dev/null + do_facet client lfs df > /dev/null local oscs=$(lctl dl | awk '/-osc-/ {print $4}') check_clients_evicted $before ${oscs[@]} @@ -1117,10 +1119,9 @@ test_26b() { # bug 10140 - evict dead exports by pinger # = 9 * PING_INTERVAL + PING_INTERVAL # = 10 PING_INTERVAL = 10 obd_timeout / 4 = 2.5 obd_timeout # let's wait $((TIMEOUT * 3)) # bug 19887 - local rc=0 - wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 3)) || \ - error "Client was not evicted by ost" rc=1 - wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 3)) || \ + wait_client_evicted ost1 $OST_NEXP $((TIMEOUT * 3)) || + error "Client was not evicted by ost" + wait_client_evicted $SINGLEMDS $MDS_NEXP $((TIMEOUT * 3)) || error "Client was not evicted by mds" } run_test 26b "evict dead exports" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 46601ad..71d4aa3 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -9572,14 +9572,17 @@ check_clients_evicted() { local rc=0 for osc in $oscs; do - ((rc++)) echo "Check state for $osc" local evicted=$(do_facet client $LCTL get_param osc.$osc.state | - tail -n 3 | awk -F"[ [,]" \ - '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + tail -n 5 | awk -F"[ ,]" \ + '/EVICTED/ { if (mx<$4) { mx=$4; } } END { print mx }') if (($? == 0)) && (($evicted > $before)); then echo "$osc is evicted at $evicted" - ((rc--)) + else + ((rc++)) + echo "$osc was not evicted after $before:" + do_facet client $LCTL get_param osc.$osc.state | + tail -n 8 fi done