Whamcloud - gitweb
LU-11170 tests: add debugging to sanity/415 24/49724/4
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 20 Jan 2023 20:34:42 +0000 (13:34 -0700)
committerOleg Drokin <green@whamcloud.com>
Tue, 31 Jan 2023 02:33:23 +0000 (02:33 +0000)
Add a loop of renames without the concurrent 'touch' operation to
measure the test time, and then a second loop that has the 'touch'
so that we can see whether slow renames are because of COS (which
would make the test failure a kernel bug to be fixed), or because
the test is running in a VM and the server/disk is slow (which is
something to be fixed in the test (e.g. by making "slow" relative
to the non-touch baseline time).

Test-Parameters: trivial testlist=sanity env=ONLY=415,ONLY_REPEAT=120 mdscount=2 mdtcount=4
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Ic1a952be0b861005b46da3e673216e455f3ebbe5
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49724
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: Alex Deiter <alex.deiter@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/sanity.sh
lustre/tests/test-framework.sh

index d94511a..439d2db 100755 (executable)
@@ -26712,45 +26712,55 @@ test_414() {
 run_test 414 "simulate ENOMEM in ptlrpc_register_bulk()"
 
 test_415() {
-       [ $PARALLEL == "yes" ] && skip "skip parallel run"
-       [ $MDS1_VERSION -lt $(version_code 2.11.52) ] &&
+       [[ $PARALLEL == "yes" ]] && skip "skip parallel run"
+       (( $MDS1_VERSION >= $(version_code 2.11.52) )) ||
                skip "Need server version at least 2.11.52"
 
        # LU-11102
-       local total
-       local setattr_pid
-       local start_time
-       local end_time
-       local duration
+       local total=500
+       local max=120
 
-       total=500
        # this test may be slow on ZFS
-       [ "$mds1_FSTYPE" == "zfs" ] && total=50
+       [[ "$mds1_FSTYPE" == "zfs" ]] && total=50
 
        # though this test is designed for striped directory, let's test normal
        # directory too since lock is always saved as CoS lock.
        test_mkdir $DIR/$tdir || error "mkdir $tdir"
        createmany -o $DIR/$tdir/$tfile. $total || error "createmany"
+       stack_trap "unlinkmany $DIR/$tdir/$tfile. $total || true"
+       # if looping with ONLY_REPEAT, wait for previous deletions to finish
+       wait_delete_completed_mds
+
+       # run a loop without concurrent touch to measure rename duration.
+       # only for test debug/robustness, NOT part of COS functional test.
+       local start_time=$SECONDS
+       for ((i = 0; i < total; i++)); do
+               mrename $DIR/$tdir/$tfile.$i $DIR/$tdir/$tfile-new.$i \
+                       > /dev/null
+       done
+       local baseline=$((SECONDS - start_time))
+       echo "rename $total files without 'touch' took $baseline sec"
 
        (
                while true; do
                        touch $DIR/$tdir
                done
        ) &
-       setattr_pid=$!
+       local setattr_pid=$!
 
-       start_time=$(date +%s)
-       for i in $(seq $total); do
-               mrename $DIR/$tdir/$tfile.$i $DIR/$tdir/$tfile-new.$i \
+       # rename files back to original name so unlinkmany works
+       start_time=$SECONDS
+       for ((i = 0; i < total; i++)); do
+               mrename $DIR/$tdir/$tfile-new.$i $DIR/$tdir/$tfile.$i\
                        > /dev/null
        done
-       end_time=$(date +%s)
-       duration=$((end_time - start_time))
+       local duration=$((SECONDS - start_time))
 
        kill -9 $setattr_pid
 
-       echo "rename $total files took $duration sec"
-       [ $duration -lt 100 ] || error "rename took $duration sec"
+       echo "rename $total files with 'touch' took $duration sec"
+       (( max > 2 * baseline )) || max=$((2 * baseline + 5))
+       (( duration <= max )) || error "rename took $duration > $max sec"
 }
 run_test 415 "lock revoke is not missing"
 
index 75ce9f4..7970291 100755 (executable)
@@ -10839,7 +10839,9 @@ function createmany() {
        fi
        $LUSTRE/tests/createmany $*
        rc=$?
-       debugrestore
+       if (( count > 100 )); then
+               debugrestore
+       fi
 
        return $rc
 }