LU-5338 tests: sanity-lfsck 11b allow larger last_id

author Andreas Dilger <adilger@whamcloud.com>

Fri, 1 Mar 2019 00:54:41 +0000 (17:54 -0700)

committer Oleg Drokin <green@whamcloud.com>

Fri, 10 Jul 2020 16:52:10 +0000 (16:52 +0000)
author Andreas Dilger <adilger@whamcloud.com>
Fri, 1 Mar 2019 00:54:41 +0000 (17:54 -0700)
committer Oleg Drokin <green@whamcloud.com>
Fri, 10 Jul 2020 16:52:10 +0000 (16:52 +0000)
diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh

index 6d377d9..499f96e 100755 (executable)
--- a/lustre/tests/sanity-hsm.sh
+++ b/lustre/tests/sanity-hsm.sh
@@ -506,7 +506,7 @@ wait_all_done() {
         [[ -n $fid ]] && cmd+=" | grep '$fid'"
         cmd+=" | egrep 'WAITING|STARTED'"
  
-       wait_result $SINGLEMDS "$cmd" "" $timeout ||
+       wait_update_facet --verbose mds1 "$cmd" "" $timeout ||
                 error "requests did not complete"
  }
  
diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh

index f19d464..bddd2ba 100644 (file)
--- a/lustre/tests/sanity-lfsck.sh
+++ b/lustre/tests/sanity-lfsck.sh
@@ -1381,9 +1381,8 @@ test_11b() {
         local proc_path="${FSNAME}-OST0000-osc-MDT0000"
         local seq=$(do_facet mds1 $LCTL get_param -n \
                     osp.${proc_path}.prealloc_last_seq)
-       local lastid1=$(do_facet ost1 "lctl get_param -n \
-               obdfilter.${ost1_svc}.last_id" | grep $seq |
-               awk -F: '{ print $2 }')
+       local id_used=$(do_facet mds1 $LCTL get_param -n \
+                       osp.${proc_path}.prealloc_last_id)
  
         umount_client $MOUNT
         stop ost1 || error "(1) Fail to stop ost1"
@@ -1395,23 +1394,23 @@ test_11b() {
                 error "(2) Fail to start ost1"
  
         for ((i = 0; i < 60; i++)); do
-               lastid2=$(do_facet ost1 "lctl get_param -n \
-                       obdfilter.${ost1_svc}.last_id" | grep $seq |
-                       awk -F: '{ print $2 }')
-               [ ! -z $lastid2 ] && break;
+               id_ost1=$(do_facet ost1 \
+                         "$LCTL get_param -n obdfilter.$ost1_svc.last_id" |
+                         awk -F: "/$seq/ { print \$2 }")
+               [ -n "$id_ost1" ] && break
                 sleep 1
         done
  
         echo "the on-disk LAST_ID should be smaller than the expected one"
-       [ $lastid1 -gt $lastid2 ] ||
-               error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]"
+       [ $id_used -gt $id_ost1 ] ||
+               error "(4) expect id_used '$id_used' > id_ost1 '$id_ost1'"
  
         echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID"
         $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!"
  
-       wait_update_facet ost1 "$LCTL get_param -n \
-               obdfilter.${OST_DEV}.lfsck_layout |
-               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+       wait_update_facet ost1 \
+               "$LCTL get_param -n obdfilter.$ost1_svc.lfsck_layout |
+                awk '/^status/ { print \\\$2 }'" "completed" 32 || {
                 $SHOW_LAYOUT_ON_OST
                 error "(6) unexpected status"
         }
@@ -1422,12 +1421,12 @@ test_11b() {
                 error "(8) Fail to start ost1"
  
         echo "the on-disk LAST_ID should have been rebuilt"
-       wait_update_facet ost1 "$LCTL get_param -n \
-               obdfilter.${ost1_svc}.last_id | grep $seq |
-               awk -F: '{ print \\\$2 }'" "$lastid1" 60 || {
-               do_facet ost1 $LCTL get_param -n \
-               obdfilter.${ost1_svc}.last_id
-               error "(9) expect lastid1 $seq:$lastid1"
+       # last_id may be larger than $id_used if objects were created/skipped
+       wait_update_facet_cond ost1 \
+               "$LCTL get_param -n obdfilter.$ost1_svc.last_id |
+                awk -F: '/$seq/ { print \\\$2 }'" "-ge" "$id_used" 60 || {
+               do_facet ost1 $LCTL get_param obdfilter.$ost1_svc.last_id
+               error "(9) expect last_id >= id_used $seq:$id_used"
         }
  
         do_facet ost1 $LCTL set_param fail_loc=0
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index 34c8753..31b36c7 100755 (executable)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -8769,9 +8769,8 @@ test_77k() { # LU-10906
         local i
  
         [ "$ORIG_CSUM" ] || ORIG_CSUM=$(eval $get_checksum)
-       stack_trap "wait_update $HOSTNAME '$get_checksum' $ORIG_CSUM" EXIT
-       stack_trap "do_facet mgs $LCTL set_param -P $cksum_param=$ORIG_CSUM" \
-               EXIT
+       stack_trap "wait_update $HOSTNAME '$get_checksum' $ORIG_CSUM || true"
+       stack_trap "do_facet mgs $LCTL set_param -P $cksum_param=$ORIG_CSUM"
  
         for i in 0 1; do
                 do_facet mgs $LCTL set_param -P $cksum_param=$i ||
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh

index fcbd4d1..84d2671 100755 (executable)
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -2983,54 +2983,103 @@ cleanup_check() {
         return 0
  }
  
-wait_update () {
+##
+# wait for a command to return the expected result
+#
+# This will run @check on @node repeatedly until the output matches @expect
+# based on the supplied condition, or until @max_wait seconds have elapsed,
+# whichever comes first.  @cond may be one of the normal bash operators,
+# "-gt", "-ge", "-eq", "-le", "-lt", "==", "!=", or "=~", and must be quoted
+# in the caller to avoid unintentional evaluation by the shell in the caller.
+#
+# If @max_wait is not specified, the condition will be checked for up to 90s.
+#
+# If --verbose is passed as the first argument, the result is printed on each
+# value change, otherwise it is only printed after every 10s interval.
+#
+# Using wait_update_cond() or related helper function is preferable to adding
+# a "long enough" wait for some state to change in the background, since
+# "long enough" may be too short due to tunables, system config, or running in
+# a VM, and must by necessity wait too long for most cases or risk failure.
+#
+# usage: wait_update_cond [--verbose] node check cond expect [max_wait]
+wait_update_cond() {
         local verbose=false
-       if [[ "$1" == "--verbose" ]]; then
-               shift
-               verbose=true
-       fi
+       [[ "$1" == "--verbose" ]] && verbose=true && shift
  
         local node=$1
-       local TEST=$2
-       local FINAL=$3
-       local MAX=${4:-90}
-       local RESULT
-       local PREV_RESULT
-       local WAIT=0
+       local check="$2"
+       local cond="$3"
+       local expect="$4"
+       local max_wait=${5:-90}
+       local result
+       local prev_result
+       local waited=0
+       local begin=$SECONDS
         local sleep=1
         local print=10
  
-       PREV_RESULT=$(do_node $node "$TEST")
-       while [ true ]; do
-               RESULT=$(do_node $node "$TEST")
-               if [[ "$RESULT" == "$FINAL" ]]; then
-                       [[ -z "$RESULT" || $WAIT -le $sleep ]] ||
-                               echo "Updated after ${WAIT}s: wanted '$FINAL'"\
-                                    "got '$RESULT'"
+       while (( $waited <= $max_wait )); do
+               result=$(do_node $node "$check")
+
+               eval [[ "'$result'" $cond "'$expect'" ]]
+               if [[ $? == 0 ]]; then
+                       [[ -z "$result" || $waited -le $sleep ]] ||
+                               echo "Updated after ${waited}s: want '$expect' got '$result'"
                         return 0
                 fi
-               if [[ $verbose && "$RESULT" != "$PREV_RESULT" ]]; then
-                       echo "Changed after ${WAIT}s: from '$PREV_RESULT'"\
-                            "to '$RESULT'"
-                       PREV_RESULT=$RESULT
+               if $verbose && [[ "$result" != "$prev_result" ]]; then
+                       [[ -n "$prev_result" ]] &&
+                               echo "Changed after ${waited}s: from '$prev_result' to '$result'"
+                       prev_result="$result"
                 fi
-               [[ $WAIT -ge $MAX ]] && break
-               [[ $((WAIT % print)) -eq 0 ]] &&
-                       echo "Waiting $((MAX - WAIT)) secs for update"
-               WAIT=$((WAIT + sleep))
+               (( $waited % $print == 0 )) &&
+                       echo "Waiting $((max_wait - waited))s for '$expect'"
                 sleep $sleep
+               waited=$((SECONDS - begin))
         done
-       echo "Update not seen after ${MAX}s: wanted '$FINAL' got '$RESULT'"
+       echo "Update not seen after ${max_wait}s: want '$expect' got '$result'"
         return 3
  }
  
+# usage: wait_update [--verbose] node check expect [max_wait]
+wait_update() {
+       local verbose=
+       [ "$1" = "--verbose" ] && verbose="$1" && shift
+
+       local node="$1"
+       local check="$2"
+       local expect="$3"
+       local max_wait=$4
+
+       wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait
+}
+
+# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait]
+wait_update_facet_cond() {
+       local verbose=
+       [ "$1" = "--verbose" ] && verbose="$1" && shift
+
+       local node=$(facet_active_host $1)
+       local check="$2"
+       local cond="$3"
+       local expect="$4"
+       local max_wait=$5
+
+       wait_update_cond $verbose $node "$check" "$cond" "$expect" $max_wait
+}
+
+# usage: wait_update_facet [--verbose] facet check expect [max_wait]
  wait_update_facet() {
         local verbose=
         [ "$1" = "--verbose" ] && verbose="$1" && shift
  
-       local facet=$1
-       shift
-       wait_update $verbose $(facet_active_host $facet) "$@"
+       local node=$(facet_active_host $1)
+       local check="$2"
+       local expect="$3"
+       local max_wait=$4
+
+       wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait
  }
  
  sync_all_data() {
@@ -10270,20 +10319,15 @@ mdts_set_param() {
         return $rc
  }
  
-wait_result() {
-       local facet=$1
-       shift
-       wait_update --verbose $(facet_active_host $facet) "$@"
-}
-
  mdts_check_param() {
         local key="$1"
         local target="$2"
         local timeout="$3"
         local mdtno
+
         for mdtno in $(seq 1 $MDSCOUNT); do
                 local idx=$(($mdtno - 1))
-               wait_result mds${mdtno} \
+               wait_update_facet --verbose mds${mdtno} \
                         "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \
                         $timeout ||
                         error "$key state is not '$target' on mds${mdtno}"
@@ -10334,7 +10378,7 @@ wait_request_state() {
         local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions"
         cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
  
-       wait_result $mds "$cmd" "$state" 200 ||
+       wait_update_facet --verbose $mds "$cmd" "$state" 200 ||
                 error "request on $fid is not $state on $mds"
  }
author	Andreas Dilger <adilger@whamcloud.com>
	Fri, 1 Mar 2019 00:54:41 +0000 (17:54 -0700)
committer	Oleg Drokin <green@whamcloud.com>
	Fri, 10 Jul 2020 16:52:10 +0000 (16:52 +0000)
lustre/tests/sanity-hsm.sh		patch \| blob \| history
lustre/tests/sanity-lfsck.sh		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history
lustre/tests/test-framework.sh		patch \| blob \| history