Whamcloud - gitweb
LU-13585 tests: add mustfail check 62/38662/4
authorElena Gryaznova <elena.gryaznova@hpe.com>
Tue, 19 May 2020 14:21:59 +0000 (17:21 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 27 May 2020 05:05:43 +0000 (05:05 +0000)
Patch adds the possibility to ignore the mpi loads failures
for  particular instances.

This is useful for Quota Pools stress tests which are supposed
to randomly hit QP limits.
The subsets of expected failures is set by specifying NINSTMUSTFAIL.
      0 - mpi tests from all clients must pass (default)
      1 - mpi tests from all clients must fail
      N - mpi tests from one client of Ns must fail.
Set NINSTMUSTFAIL=2 to expect each 2nd mpi instance fail and
NINSTMUSTFAIL=3 to expect each 3d mpi instance fail.

For QP test: the different limits set for users per pool: a half
of users have a small limit which makes IOR to fail:
  small limit is set for user1, user3, user5
  large limit is set for user2, user4
Run N ior instances on N clients, each client/instance uses own
user{1..N}. The test considered as pass-ed if IOR instances failed
on client1, client3, client5.

Test-Parameters: trivial
Signed-off-by: Elena Gryaznova <elena.gryaznova@hpe.com>
HPE-bug-id: LUS-8844, LUS-8504, LUS-8602
Reviewed-by: Vladimir Saveliev <vladimir.saveliev@hpe.com>
Reviewed-by: Sergey Cheremencev <c17829@cray.com>
Change-Id: Ia7c4e394c3724190d6cff9f086f8837e54f6110d
Reviewed-on: https://review.whamcloud.com/38662
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Reviewed-by: Vladimir Saveliev <c17830@cray.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/ha.sh

index 13af59f..d30424a 100755 (executable)
@@ -223,6 +223,7 @@ declare     ha_mdtest_params=${MDTESTP:-'" -i 1 -n 1000"'}
 declare     ha_mpirun_options=${MPIRUN_OPTIONS:-""}
 declare     ha_clients_stripe=${CLIENTSSTRIPE:-'"$STRIPEPARAMS"'}
 declare     ha_nclientsset=${NCLIENTSSET:-1}
+declare     ha_ninstmustfail=${NINSTMUSTFAIL:-0}
 
 declare     ha_racer_params=${RACERP:-"MDSCOUNT=1"}
 
@@ -420,6 +421,7 @@ ha_repeat_mpi_load()
        local machines=$5
        local stripeparams=$6
        local mpiuser=$7
+       local mustpass=$8
        local tag=${ha_mpi_load_tags[$load]}
        local cmd=${ha_mpi_load_cmds[$tag]}
        local dir=$ha_test_dir/$client-$tag
@@ -432,10 +434,12 @@ ha_repeat_mpi_load()
        cmd=${cmd//"{}"/$dir}
        cmd=${cmd//"{params}"/$parameter}
 
+       [[ -n "$ha_postcmd" ]] && ha_postcmd=${ha_postcmd//"{}"/$dir}
        ha_info "Starting $tag"
 
        machines="-machinefile $machines"
        while [ ! -e "$ha_stop_file" ] && ((rc == 0)); do
+               ha_info "$client Starts: $cmd" 2>&1 |  tee -a $log
                {
                local mdt_index
                if $ha_mdt_index_random && [ $ha_mdt_index -ne 0 ]; then
@@ -450,18 +454,32 @@ ha_repeat_mpi_load()
                ha_on $client chmod a+xwr $dir &&
                ha_on $client "su $mpiuser sh -c \" $mpirun $ha_mpirun_options \
                        -np $((${#ha_clients[@]} * mpi_threads_per_client )) \
-                       $machines $cmd \" " &&
+                       $machines $cmd \" " || rc=$?
+               [[ -n "$ha_postcmd" ]] && ha_info "$ha_postcmd" &&
+                       ha_on $client $ha_postcmd >>"$log" 2>&1
+               (( ((rc == 0)) && (( mustpass != 0 )) )) ||
+               (( ((rc != 0)) && (( mustpass == 0 )) )) &&
                        ha_on $client rm -rf "$dir";
                } >>"$log" 2>&1 || rc=$?
 
-               ha_info rc=$rc
-
-               if ((rc != 0)); then
+               ha_info rc=$rc mustpass=$mustpass
+
+               # mustpass=0 means that failure is expected
+               if (( rc !=0 )); then
+                       if (( mustpass != 0 )); then
+                               touch "$ha_fail_file"
+                               touch "$ha_stop_file"
+                               ha_dump_logs "${ha_clients[*]} ${ha_servers[*]}"
+                       else
+                               # Ok to fail
+                               rc=0
+                       fi
+               elif (( mustpass == 0 )); then
                        touch "$ha_fail_file"
                        touch "$ha_stop_file"
                        ha_dump_logs "${ha_clients[*]} ${ha_servers[*]}"
                fi
-               echo $rc >"$status"
+               echo rc=$rc mustpass=$mustpass >"$status"
 
                nr_loops=$((nr_loops + 1))
        done
@@ -469,7 +487,8 @@ ha_repeat_mpi_load()
        [ $nr_loops -ne 0 ] &&
                avg_loop_time=$((($(date +%s) - start_time) / nr_loops))
 
-       ha_info "$tag stopped: rc $rc avg loop time $avg_loop_time"
+       ha_info "$tag stopped: rc=$rc mustpass=$mustpass \
+               avg loop time $avg_loop_time"
 }
 
 ha_start_mpi_loads()
@@ -532,7 +551,11 @@ ha_start_mpi_loads()
                        local stripe=${!aref}
                        local m=$(( n % ha_nclientsset))
                        machines=${mach[m]}
-                       ha_repeat_mpi_load $client $load $status "$parameter" $machines "$stripe" "$mpiuser" &
+                       local mustpass=1
+                       [[ $ha_ninstmustfail == 0 ]] ||
+                               mustpass=$(( n % ha_ninstmustfail ))
+                       ha_repeat_mpi_load $client $load $status "$parameter" \
+                               $machines "$stripe" "$mpiuser" "$mustpass" &
                                ha_status_files+=("$status")
                done
        done
@@ -552,11 +575,12 @@ ha_repeat_nonmpi_load()
        local avg_loop_time=0
        local start_time=$(date +%s)
 
-    cmd=${cmd//"{}"/$dir}
+       cmd=${cmd//"{}"/$dir}
 
-    ha_info "Starting $tag on $client"
+       ha_info "Starting $tag on $client"
 
        while [ ! -e "$ha_stop_file" ] && ((rc == 0)); do
+               ha_info "$client Starts: $cmd" 2>&1 |  tee -a $log
                ha_on $client "mkdir -p $dir &&                              \
                        $cmd &&                                              \
                        rm -rf $dir" >>"$log" 2>&1 || rc=$?