From e0143a820daab5be1f1635e50a2485feda5ae94c Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Mon, 16 Aug 2021 20:22:00 +0300 Subject: [PATCH] LU-14938 tests: fail_abort() in t-f to take care of MDTs fail_abort() in test-framework ensures that the clients are back after evictions. the same should be done for MDTs as otherwise any subsequent test may fail due to another MDT observing eviction and interrupting current request with -EIO. Lustre-change: https://review.whamcloud.com/44671 Lustre-commit: 436cd4fd21ffee5830c9b4e75055db80c47547d5 Signed-off-by: Alex Zhuravlev Change-Id: I0a00ece52d28c6d28eef029a4f87a348efaa041c Reviewed-by: Mike Pershin Reviewed-by: Andreas Dilger Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49598 Tested-by: jenkins Tested-by: Maloo --- lustre/tests/recovery-small.sh | 1 + lustre/tests/test-framework.sh | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 7793e3f..26fa2ca 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2269,6 +2269,7 @@ test_110k() { error "cleanup: start mds2 failed" zconf_mount $(hostname) $MOUNT || error "cleanup: mount failed" client_up || error "post-failover df failed" + all_mds_up } run_test 110k "FID_QUERY failed during recovery" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index e007420..724b618 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3563,6 +3563,19 @@ clients_up() { lfs_df_check } +all_mds_up() { + (( MDSCOUNT == 1 )) && return + + # wait so that statfs data on MDT expire + local delay=$(do_facet $SINGLEMDS lctl \ + get_param -n osp.*MDT0000*MDT0001.maxage) + sleep $delay + local nodes=$(comma_list $(mdts_nodes)) + # initiate statfs RPC, all to all MDTs + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null + do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null +} + client_up() { # usually checked on particular client or locally sleep 1 @@ -3800,6 +3813,7 @@ fail_abort() { mount_facet $facet -o $abort_type clients_up || echo "first stat failed: $?" clients_up || error "post-failover stat: $?" + all_mds_up } host_nids_address() { -- 1.8.3.1