From 55fb2ba1187e7c566e218df4f3311d39de204fe8 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Tue, 6 Dec 2022 23:04:42 -0500 Subject: [PATCH] LU-16335 test: add fail_abort_cleanup() Add helper fail_abort_cleanup() to unlink test directories (call lfs rm_entry if directory is broken) after fail_abort because after LU-16159 update logs will be canceled upon recovery abort, which may leave broken directories. Update replay-single.sh in places where fail_abort is called and directory may become broken. Lustre-change: https://review.whamcloud.com/49335 Lustre-commit: d5fe41a02a6ed57bcbfc4a4c695bb509c9c7c313 Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Test-Parameters: trivial mdscount=2 mdtcount=4 testlist=replay-single Signed-off-by: Lai Siyao Change-Id: I260689b1a6fa5b0b4db5aab5095cb062ae57d612 Reviewed-by: Andreas Dilger Reviewed-by: Arshad Hussain Reviewed-by: Mikhail Pershin Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49713 Tested-by: jenkins Tested-by: Maloo --- lustre/tests/replay-single.sh | 15 +++++++-------- lustre/tests/test-framework.sh | 9 +++++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 3d044a7..4ff685a 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -3545,6 +3545,7 @@ test_100c() { replay_barrier mds2 $LFS mkdir -i1 -c2 $striped_dir + stack_trap fail_abort_cleanup RETURN fail_abort mds2 abort_recov_mdt createmany -o $striped_dir/f-%d 20 && @@ -3552,10 +3553,10 @@ test_100c() { fail mds2 - # $striped_dir creation partly fails due to abort_recov_mdt, - # but at least this directory should be able to be deleted - #$LFS rm_entry $striped_dir - #rm -rf $DIR/$tdir || error "rmdir failed" + # LU-16159 abort_recovery will cancel update logs, the second recovery + # won't replay $striped_dir creation + (( $MDS1_VERSION >= $(version_code 2.14.0.73) )) || + striped_dir_check_100 || error "striped dir check failed" } run_test 100c "DNE: create striped dir, abort_recov_mdt mds2" @@ -3584,6 +3585,7 @@ test_100d() { count=$(do_facet $mdt "$LCTL --device $devname llog_print update_log | grep -c index") (( count > 0 )) || error "no update logs found" + stack_trap fail_abort_cleanup RETURN fail_abort $mdt || error "fail_abort $mdt failed" wait_update_facet $mdt "$LCTL --device $devname llog_print update_log | grep -c index" 0 60 || error "update logs not canceled" @@ -4779,6 +4781,7 @@ test_120() { } done + stack_trap fail_abort_cleanup RETURN fail_abort mds1 for ((i = 0; i < 20; i++)); do @@ -5018,10 +5021,6 @@ test_135() { } run_test 135 "Server failure in lock replay phase" -# LU-16159 abort_recovery may cause directory unlink fail, now that LFSCK can't -# fix all the inconsistencies, formatall so it won't fail in cleanup -(( $MDS1_VERSION >= $(version_code 2.14.0) )) && formatall - complete $SECONDS check_and_cleanup_lustre exit_status diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index d529f91..19f6ef2 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3816,6 +3816,15 @@ fail_abort() { all_mds_up } +# LU-16159: abort recovery will cancel update logs, which may leave broken +# directories in the system, remove name entry if necessary +fail_abort_cleanup() { + rm -rf $DIR/$tdir/* + find $DIR/$tdir -depth | while read D; do + rmdir "$D" || $LFS rm_entry "$D" || error "rm $D failed" + done +} + host_nids_address() { local nodes=$1 local net=${2:-"."} -- 1.8.3.1