Whamcloud - gitweb
EX-8584 tests: check and wait lpcc_purge scanning ends
authorLei Feng <flei@whamcloud.com>
Fri, 17 Nov 2023 07:53:21 +0000 (15:53 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Wed, 22 Nov 2023 20:49:37 +0000 (20:49 +0000)
check lpcc_purge status to make sure it finishs at least
one round of scanning.

Signed-off-by: Lei Feng <flei@whamcloud.com>
Test-Parameters: trivial testlist=sanity-pcc env=ONLY="200 201 202",ONLY_REPEAT=50
Change-Id: I8e6f50393d1a3cbb7a1bc976942631db6ecceb67
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53167
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/tests/sanity-pcc.sh

index 706ac0c..09ba79f 100755 (executable)
@@ -4993,6 +4993,30 @@ test_107() {
 }
 run_test 107 "Wait for PCC atatch finished on PCC detach command"
 
+wait_lpcc_purge_scan_end()  {
+       local pidfile=$1
+       local statsfile=$2
+       local i
+       local end_time_secs;
+
+       for (( i=0; i<50; i++ )); do
+               do_facet $SINGLEAGT pkill --pidfile $pidfile --signal USR1 \
+                       -- lpcc_purge
+               end_time_secs=$(grep "end_time_secs" $statsfile | awk '{print $2}')
+               end_time_secs=${end_time_secs/,/}
+
+               if [[ -n "$end_time_secs" && "$end_time_secs" -gt 0 ]]; then
+                       echo "finished at least 1 scanning"
+                       break;
+               else
+                       echo "wait for 0.7 sec..."
+                       sleep 0.7
+               fi
+       done
+       # purging files is async, so wait a little longer
+       sleep 5
+}
+
 test_200() {
        local loopfile="$TMP/$tfile"
        local mntpt="/mnt/pcc.$tdir"
@@ -5039,11 +5063,13 @@ test_200() {
                echo "cache device usage($usage) >= high_usage($high_usage)"
        fi
 
-       pidfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.pid
+       local pidfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.pid
+       local statsfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.stats
        do_facet $SINGLEAGT $LPCC_PURGE --mount $MOUNT --cache $hsm_root \
                --high-usage $high_usage --low-usage $low_usage \
-               --roid $HSM_ARCHIVE_NUMBER --pidfile $pidfile --interval 1 -b &
-       sleep 5
+               --roid $HSM_ARCHIVE_NUMBER --pidfile $pidfile -w $statsfile \
+               --interval 1 -b &
+       wait_lpcc_purge_scan_end $pidfile $statsfile
        do_facet $SINGLEAGT pkill --pidfile $pidfile -- lpcc_purge
 
        # Get cache usage
@@ -5110,11 +5136,13 @@ test_201() {
                echo "cache device usage ($usage)"
        fi
 
-       pidfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.pid
+       local pidfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.pid
+       local statsfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.stats
        do_facet $SINGLEAGT $LPCC_PURGE --mount $MOUNT --cache $hsm_root \
                --high-usage $high_usage --low-usage $low_usage \
-               --roid $HSM_ARCHIVE_NUMBER --pidfile $pidfile --interval 1 -b &
-       sleep 5
+               --roid $HSM_ARCHIVE_NUMBER --pidfile $pidfile -w $statsfile \
+               --interval 1 -b &
+       wait_lpcc_purge_scan_end $pidfile $statsfile
        do_facet $SINGLEAGT pkill --pidfile $pidfile -- lpcc_purge
 
        # Get cache usage
@@ -5175,11 +5203,13 @@ test_202() {
                sleep 0.1
        done
 
-       pidfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.pid
+       local pidfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.pid
+       local statsfile=lpcc_purge-${HSM_ARCHIVE_NUMBER}.stats
        do_facet $SINGLEAGT $LPCC_PURGE --mount $MOUNT --cache $hsm_root \
                --high-usage $high_usage --low-usage $low_usage \
-               --roid $HSM_ARCHIVE_NUMBER --pidfile $pidfile --interval 1 -b &
-       sleep 5
+               --roid $HSM_ARCHIVE_NUMBER --pidfile $pidfile -w $statsfile \
+               --interval 1 -b &
+       wait_lpcc_purge_scan_end $pidfile $statsfile
        do_facet $SINGLEAGT pkill --pidfile $pidfile -- lpcc_purge
 
        # verify that the independent file was not removed
@@ -5189,10 +5219,10 @@ test_202() {
 
        # there should be 54 files been purged
        # in theroy, file 1-54 should be purge, 55-100 should be kept.
-       # But since we are an approxomate LRU algorithm, we test f.50 being
+       # But since we are an approxomate LRU algorithm, we test f.40 being
        # purged and f.60 not
 
-       file=$DIR/$tdir/$tfile.50.dat
+       file=$DIR/$tdir/$tfile.40.dat
        check_lpcc_state $file "none"
        file=$DIR/$tdir/$tfile.60.dat
        check_lpcc_state $file "readonly"