Whamcloud - gitweb
LU-17428 tests: restore recovery-small/10a lru_max_age 77/56377/2
authorAndreas Dilger <adilger@whamcloud.com>
Tue, 17 Sep 2024 00:23:16 +0000 (18:23 -0600)
committerOleg Drokin <green@whamcloud.com>
Wed, 25 Sep 2024 04:08:33 +0000 (04:08 +0000)
Restore the longer lru_max_age in recovery-small test_10a since
this otherwise prevents the client from being evicted.

Skip the console message check for stuck MDS threads on subsequent
iterations of test_10a when the test is run in a loop, since message
ratelimiting may prevent the console message from being printed.

Test-Parameters: trivial testlist=recovery-small env=ONLY=10a,ONLY_REPEAT=10
Fixes: 357cae970c ("LU-17428 ldlm: reduce default lru_max_age")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Ia8ba5f83aa001d3c810e13637754b0e169dc3b9b
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56377
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Timothy Day <timday@amazon.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/recovery-small.sh
lustre/tests/test-framework.sh

index 64856f4..674b8e8 100755 (executable)
@@ -139,6 +139,12 @@ run_test 9 "pause bulk on OST (bug 1420)"
 test_10a() {
        local before=$(date +%s)
        local evict
+       local lru_param="ldlm.namespaces.*mdc*.lru_max_age"
+       local old_max_age=($($LCTL get_param -n $lru_param))
+       local old_ratelimit=$($LCTL get_param console_ratelimit)
+
+       $LCTL set_param $lru_param=3900s console_ratelimit=0
+       stack_trap "$LCTL set_param $lru_param=$old_max_age $old_ratelimit"
 
        do_facet client "stat $DIR > /dev/null"  ||
                error "failed to stat $DIR: $?"
@@ -149,13 +155,16 @@ test_10a() {
        client_reconnect
        evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state |
          awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
-       [ ! -z "$evict" ] && [[ $evict -gt $before ]] ||
+       [[ -n "$evict" ]] && (( $evict > $before )) ||
                (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state;
                    error "no eviction: $evict before:$before")
 
        do_facet client checkstat -v -p 0777 $DIR ||
                error "client checkstat failed: $?"
 
+       # console messages may be ratelimited on later iterations
+       (( ONLY_REPEAT_ITER == 1 )) || return 0
+
        # check that the thread watchdog is working properly
        do_facet mds1 dmesg | tac | sed "/${TESTNAME/_/ }/,$ d" |
                grep "[Ss]ervice thread pid .* was inactive" ||
index c352747..3439af0 100755 (executable)
@@ -7569,7 +7569,7 @@ run_one_logged() {
                local repeat_end_sec=$((SECONDS + ONLY_MINUTES * 60))
        fi
 
-       local testiter=1
+       export ONLY_REPEAT_ITER=1
        while true; do
                local before_sub=$SECONDS
 
@@ -7578,7 +7578,7 @@ run_one_logged() {
                if [[ -n "$append" ]]; then
                        [[ -n "$tdir" ]] && rm -rvf $DIR/$tdir*
                        [[ -n "$tfile" ]] && rm -vf $DIR/$tfile*
-                       echo "subtest iteration $testiter/$repeat " \
+                       echo "subtest iteration $ONLY_REPEAT_ITER/$repeat " \
                                "($(((SECONDS-before)/60))/$ONLY_MINUTES min)"
                fi
                # loop around subshell so stack_trap EXIT triggers each time
@@ -7620,10 +7620,10 @@ run_one_logged() {
                # no repeat options were set, break after the first iteration
                [[ -z "$repeat" && -z "$repeat_end_sec" ]] && break
                # break if any repeat options were set and have been met
-               [[ -n "$repeat" ]] && (( $testiter >= $repeat )) && break
+               [[ -n "$repeat" ]] && (( ONLY_REPEAT_ITER >= repeat )) && break
                [[ -n "$repeat_end_sec" ]] &&
                        (( $SECONDS >= $repeat_end_sec )) && break
-               ((testiter++))
+               ((ONLY_REPEAT_ITER++))
        done
 
        [[ $KPTR_ON_MOUNT ]] || kptr_restore