Whamcloud - gitweb
LU-11170 tests: add debugging to sanity/415
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 20 Jan 2023 20:34:42 +0000 (13:34 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 10 Mar 2023 19:53:18 +0000 (19:53 +0000)
Add a loop of renames without the concurrent 'touch' operation to
measure the test time, and then a second loop that has the 'touch'
so that we can see whether slow renames are because of COS (which
would make the test failure a kernel bug to be fixed), or because
the test is running in a VM and the server/disk is slow (which is
something to be fixed in the test (e.g. by making "slow" relative
to the non-touch baseline time).

Lustre-change: https://review.whamcloud.com/49724
Lustre-commit: 6594babc73851fab335c514cd1fee018425e7bb3

Test-Parameters: trivial testlist=sanity env=ONLY=415,ONLY_REPEAT=120 mdscount=2 mdtcount=4
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Ic1a952be0b861005b46da3e673216e455f3ebbe5
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: Alex Deiter <alex.deiter@gmail.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50203
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/tests/sanity.sh
lustre/tests/test-framework.sh

index ce6221f..b29851f 100755 (executable)
@@ -26040,45 +26040,55 @@ test_414() {
 run_test 414 "simulate ENOMEM in ptlrpc_register_bulk()"
 
 test_415() {
-       [ $PARALLEL == "yes" ] && skip "skip parallel run"
-       [ $MDS1_VERSION -lt $(version_code 2.11.52) ] &&
+       [[ $PARALLEL == "yes" ]] && skip "skip parallel run"
+       (( $MDS1_VERSION >= $(version_code 2.11.52) )) ||
                skip "Need server version at least 2.11.52"
 
        # LU-11102
-       local total
-       local setattr_pid
-       local start_time
-       local end_time
-       local duration
+       local total=500
+       local max=120
 
-       total=500
        # this test may be slow on ZFS
-       [ "$mds1_FSTYPE" == "zfs" ] && total=100
+       [[ "$mds1_FSTYPE" == "zfs" ]] && total=50
 
        # though this test is designed for striped directory, let's test normal
        # directory too since lock is always saved as CoS lock.
        test_mkdir $DIR/$tdir || error "mkdir $tdir"
        createmany -o $DIR/$tdir/$tfile. $total || error "createmany"
+       stack_trap "unlinkmany $DIR/$tdir/$tfile. $total || true"
+       # if looping with ONLY_REPEAT, wait for previous deletions to finish
+       wait_delete_completed_mds
+
+       # run a loop without concurrent touch to measure rename duration.
+       # only for test debug/robustness, NOT part of COS functional test.
+       local start_time=$SECONDS
+       for ((i = 0; i < total; i++)); do
+               mrename $DIR/$tdir/$tfile.$i $DIR/$tdir/$tfile-new.$i \
+                       > /dev/null
+       done
+       local baseline=$((SECONDS - start_time))
+       echo "rename $total files without 'touch' took $baseline sec"
 
        (
                while true; do
                        touch $DIR/$tdir
                done
        ) &
-       setattr_pid=$!
+       local setattr_pid=$!
 
-       start_time=$(date +%s)
-       for i in $(seq $total); do
-               mrename $DIR/$tdir/$tfile.$i $DIR/$tdir/$tfile-new.$i \
+       # rename files back to original name so unlinkmany works
+       start_time=$SECONDS
+       for ((i = 0; i < total; i++)); do
+               mrename $DIR/$tdir/$tfile-new.$i $DIR/$tdir/$tfile.$i\
                        > /dev/null
        done
-       end_time=$(date +%s)
-       duration=$((end_time - start_time))
+       local duration=$((SECONDS - start_time))
 
        kill -9 $setattr_pid
 
-       echo "rename $total files took $duration sec"
-       [ $duration -lt 100 ] || error "rename took $duration sec"
+       echo "rename $total files with 'touch' took $duration sec"
+       (( max > 2 * baseline )) || max=$((2 * baseline + 5))
+       (( duration <= max )) || error "rename took $duration > $max sec"
 }
 run_test 415 "lock revoke is not missing"
 
index 64b3ce2..2425164 100755 (executable)
@@ -10835,16 +10835,15 @@ is_rmentry_supported() {
 function createmany() {
        local count=${!#}
 
-       (( count > 100 )) && {
-               local saved_debug=$($LCTL get_param -n debug)
-               local list=$(comma_list $(all_nodes))
-
-               do_nodes $list $LCTL set_param -n debug=0
-       }
+       if (( count > 100 )); then
+               debugsave
+               do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0
+       fi
        $LUSTRE/tests/createmany $*
        local rc=$?
-       (( count > 100 )) &&
-               do_nodes $list "$LCTL set_param -n debug=\\\"$saved_debug\\\""
+       if (( count > 100 )); then
+               debugrestore
+       fi
        return $rc
 }