From: Alex Zhuravlev Date: Mon, 16 Aug 2021 17:22:00 +0000 (+0300) Subject: LU-14938 tests: fail_abort() in t-f to take care of MDTs X-Git-Tag: 2.14.56~137 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=436cd4fd21ffee5830c9b4e75055db80c47547d5 LU-14938 tests: fail_abort() in t-f to take care of MDTs fail_abort() in test-framework ensures that the clients are back after evictions. the same should be done for MDTs as otherwise any subsequent test may fail due to another MDT observing eviction and interrupting current request with -EIO. Signed-off-by: Alex Zhuravlev Change-Id: I0a00ece52d28c6d28eef029a4f87a348efaa041c Reviewed-on: https://review.whamcloud.com/44671 Reviewed-by: Mike Pershin Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 7f0ef15..4506cce 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2269,6 +2269,7 @@ test_110k() { error "cleanup: start mds2 failed" zconf_mount $(hostname) $MOUNT || error "cleanup: mount failed" client_up || error "post-failover df failed" + all_mds_up } run_test 110k "FID_QUERY failed during recovery" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 97a73dc..e337b9d 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3556,6 +3556,19 @@ clients_up() { lfs_df_check } +all_mds_up() { + (( MDSCOUNT == 1 )) && return + + # wait so that statfs data on MDT expire + local delay=$(do_facet $SINGLEMDS lctl \ + get_param -n osp.*MDT0000*MDT0001.maxage) + sleep $delay + local nodes=$(comma_list $(mdts_nodes)) + # initiate statfs RPC, all to all MDTs + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null +} + client_up() { # usually checked on particular client or locally sleep 1 @@ -3789,6 +3802,7 @@ fail_abort() { mount_facet $facet -o $abort_type clients_up || echo "first stat failed: $?" clients_up || error "post-failover stat: $?" + all_mds_up } host_nids_address() {