Whamcloud - gitweb
LU-17165 tests: stable count in recovery-small/141 45/56945/3
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 8 Nov 2024 23:35:55 +0000 (16:35 -0700)
committerOleg Drokin <green@whamcloud.com>
Mon, 9 Dec 2024 06:15:19 +0000 (06:15 +0000)
The lock cancellation and fetching the lock count on the OST
is racy and can randomly be "0" or "23" either before or after
the OST is restarted.  Wait until the count has stabilized to
ensure the test can pass consistently.

Test-Parameters: trivial testlist=recovery-small
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Ib6d6fdeb721d6ff85a366e58c621ed7b883ebbe5
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56945
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Tested-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/recovery-small.sh

index 55e54ae..2ef1681 100755 (executable)
@@ -3043,7 +3043,7 @@ test_140b() {
 run_test 140b "local mount is excluded from recovery"
 
 test_141() {
-       local oldc
+       local oldc=0
        local newc
        local oldgen
 
@@ -3056,18 +3056,28 @@ test_141() {
        oldgen=$(do_facet ost1 $LCTL get_param -n mgc.*.import |
                 awk '/generation:/{print $NF}')
        do_rpc_nodes $(facet_active_host ost1) cancel_lru_locks MGC
-       oldc=$(do_facet ost1 $LCTL get_param -n \
-               'ldlm.namespaces.MGC*.lock_count')
+       local end=$((SECONDS + 30))
+       local tmpc=1
+
+       while (( oldc == 0 || tmpc != oldc )) && (( SECONDS < end )); do
+               tmpc=$oldc
+               sleep 1
+               oldc=$(do_facet ost1 \
+                      $LCTL "get_param -n 'ldlm.namespaces.MGC*.lock_count'")
+       done
+
        fail $SINGLEMDS
        wait_mgc_import_state ost1 FULL
        wait_update_facet_cond ost1 "$LCTL get_param -n mgc.*.import |
-               awk '/generation:/{print}' | cut -d':' -f2" != " $oldgen"
+               awk '/generation:/{print}' | cut -d':' -f2" "!=" " $oldgen"
        do_rpc_nodes $(facet_active_host ost1) cancel_lru_locks MGC
-       newc=$(do_facet ost1 $LCTL get_param -n \
-               'ldlm.namespaces.MGC*.lock_count')
-
-       [ $oldc -eq $newc ] || error "mgc lost locks ($oldc != $newc)"
-       return 0
+       wait_update_facet ost1 \
+               "$LCTL get_param -n 'ldlm.namespaces.MGC*.lock_count'" $oldc ||
+       {
+               newc=$(do_facet ost1 \
+                      $LCTL get_param -n 'ldlm.namespaces.MGC*.lock_count')
+               error "mgc lost locks ($oldc != $newc)"
+       }
 }
 run_test 141 "do not lose locks on MGS restart"