From d345a0171be80d0448dd128bbe8bbbd239c26f59 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 20 Jan 2023 13:34:42 -0700 Subject: [PATCH] LU-11170 tests: add debugging to sanity/415 Add a loop of renames without the concurrent 'touch' operation to measure the test time, and then a second loop that has the 'touch' so that we can see whether slow renames are because of COS (which would make the test failure a kernel bug to be fixed), or because the test is running in a VM and the server/disk is slow (which is something to be fixed in the test (e.g. by making "slow" relative to the non-touch baseline time). Lustre-change: https://review.whamcloud.com/49724 Lustre-commit: 6594babc73851fab335c514cd1fee018425e7bb3 Test-Parameters: trivial testlist=sanity env=ONLY=415,ONLY_REPEAT=120 mdscount=2 mdtcount=4 Signed-off-by: Andreas Dilger Change-Id: Ic1a952be0b861005b46da3e673216e455f3ebbe5 Reviewed-by: Arshad Hussain Reviewed-by: Alex Deiter Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50203 Tested-by: jenkins Tested-by: Maloo --- lustre/tests/sanity.sh | 44 ++++++++++++++++++++++++++---------------- lustre/tests/test-framework.sh | 15 +++++++------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index ce6221f..b29851f 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -26040,45 +26040,55 @@ test_414() { run_test 414 "simulate ENOMEM in ptlrpc_register_bulk()" test_415() { - [ $PARALLEL == "yes" ] && skip "skip parallel run" - [ $MDS1_VERSION -lt $(version_code 2.11.52) ] && + [[ $PARALLEL == "yes" ]] && skip "skip parallel run" + (( $MDS1_VERSION >= $(version_code 2.11.52) )) || skip "Need server version at least 2.11.52" # LU-11102 - local total - local setattr_pid - local start_time - local end_time - local duration + local total=500 + local max=120 - total=500 # this test may be slow on ZFS - [ "$mds1_FSTYPE" == "zfs" ] && total=100 + [[ "$mds1_FSTYPE" == "zfs" ]] && total=50 # though this test is designed for striped directory, let's test normal # directory too since lock is always saved as CoS lock. test_mkdir $DIR/$tdir || error "mkdir $tdir" createmany -o $DIR/$tdir/$tfile. $total || error "createmany" + stack_trap "unlinkmany $DIR/$tdir/$tfile. $total || true" + # if looping with ONLY_REPEAT, wait for previous deletions to finish + wait_delete_completed_mds + + # run a loop without concurrent touch to measure rename duration. + # only for test debug/robustness, NOT part of COS functional test. + local start_time=$SECONDS + for ((i = 0; i < total; i++)); do + mrename $DIR/$tdir/$tfile.$i $DIR/$tdir/$tfile-new.$i \ + > /dev/null + done + local baseline=$((SECONDS - start_time)) + echo "rename $total files without 'touch' took $baseline sec" ( while true; do touch $DIR/$tdir done ) & - setattr_pid=$! + local setattr_pid=$! - start_time=$(date +%s) - for i in $(seq $total); do - mrename $DIR/$tdir/$tfile.$i $DIR/$tdir/$tfile-new.$i \ + # rename files back to original name so unlinkmany works + start_time=$SECONDS + for ((i = 0; i < total; i++)); do + mrename $DIR/$tdir/$tfile-new.$i $DIR/$tdir/$tfile.$i\ > /dev/null done - end_time=$(date +%s) - duration=$((end_time - start_time)) + local duration=$((SECONDS - start_time)) kill -9 $setattr_pid - echo "rename $total files took $duration sec" - [ $duration -lt 100 ] || error "rename took $duration sec" + echo "rename $total files with 'touch' took $duration sec" + (( max > 2 * baseline )) || max=$((2 * baseline + 5)) + (( duration <= max )) || error "rename took $duration > $max sec" } run_test 415 "lock revoke is not missing" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 64b3ce2..2425164 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -10835,16 +10835,15 @@ is_rmentry_supported() { function createmany() { local count=${!#} - (( count > 100 )) && { - local saved_debug=$($LCTL get_param -n debug) - local list=$(comma_list $(all_nodes)) - - do_nodes $list $LCTL set_param -n debug=0 - } + if (( count > 100 )); then + debugsave + do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0 + fi $LUSTRE/tests/createmany $* local rc=$? - (( count > 100 )) && - do_nodes $list "$LCTL set_param -n debug=\\\"$saved_debug\\\"" + if (( count > 100 )); then + debugrestore + fi return $rc } -- 1.8.3.1