From: Elena Gryaznova Date: Tue, 19 May 2020 14:21:59 +0000 (+0300) Subject: LU-13585 tests: add mustfail check X-Git-Tag: 2.13.54~2 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=d4bba4c239ce18d2955b5f39a0ea6bac2413d49f LU-13585 tests: add mustfail check Patch adds the possibility to ignore the mpi loads failures for particular instances. This is useful for Quota Pools stress tests which are supposed to randomly hit QP limits. The subsets of expected failures is set by specifying NINSTMUSTFAIL. 0 - mpi tests from all clients must pass (default) 1 - mpi tests from all clients must fail N - mpi tests from one client of Ns must fail. Set NINSTMUSTFAIL=2 to expect each 2nd mpi instance fail and NINSTMUSTFAIL=3 to expect each 3d mpi instance fail. For QP test: the different limits set for users per pool: a half of users have a small limit which makes IOR to fail: small limit is set for user1, user3, user5 large limit is set for user2, user4 Run N ior instances on N clients, each client/instance uses own user{1..N}. The test considered as pass-ed if IOR instances failed on client1, client3, client5. Test-Parameters: trivial Signed-off-by: Elena Gryaznova HPE-bug-id: LUS-8844, LUS-8504, LUS-8602 Reviewed-by: Vladimir Saveliev Reviewed-by: Sergey Cheremencev Change-Id: Ia7c4e394c3724190d6cff9f086f8837e54f6110d Reviewed-on: https://review.whamcloud.com/38662 Tested-by: jenkins Reviewed-by: Sergey Cheremencev Reviewed-by: Vladimir Saveliev Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/ha.sh b/lustre/tests/ha.sh index 13af59f..d30424a 100755 --- a/lustre/tests/ha.sh +++ b/lustre/tests/ha.sh @@ -223,6 +223,7 @@ declare ha_mdtest_params=${MDTESTP:-'" -i 1 -n 1000"'} declare ha_mpirun_options=${MPIRUN_OPTIONS:-""} declare ha_clients_stripe=${CLIENTSSTRIPE:-'"$STRIPEPARAMS"'} declare ha_nclientsset=${NCLIENTSSET:-1} +declare ha_ninstmustfail=${NINSTMUSTFAIL:-0} declare ha_racer_params=${RACERP:-"MDSCOUNT=1"} @@ -420,6 +421,7 @@ ha_repeat_mpi_load() local machines=$5 local stripeparams=$6 local mpiuser=$7 + local mustpass=$8 local tag=${ha_mpi_load_tags[$load]} local cmd=${ha_mpi_load_cmds[$tag]} local dir=$ha_test_dir/$client-$tag @@ -432,10 +434,12 @@ ha_repeat_mpi_load() cmd=${cmd//"{}"/$dir} cmd=${cmd//"{params}"/$parameter} + [[ -n "$ha_postcmd" ]] && ha_postcmd=${ha_postcmd//"{}"/$dir} ha_info "Starting $tag" machines="-machinefile $machines" while [ ! -e "$ha_stop_file" ] && ((rc == 0)); do + ha_info "$client Starts: $cmd" 2>&1 | tee -a $log { local mdt_index if $ha_mdt_index_random && [ $ha_mdt_index -ne 0 ]; then @@ -450,18 +454,32 @@ ha_repeat_mpi_load() ha_on $client chmod a+xwr $dir && ha_on $client "su $mpiuser sh -c \" $mpirun $ha_mpirun_options \ -np $((${#ha_clients[@]} * mpi_threads_per_client )) \ - $machines $cmd \" " && + $machines $cmd \" " || rc=$? + [[ -n "$ha_postcmd" ]] && ha_info "$ha_postcmd" && + ha_on $client $ha_postcmd >>"$log" 2>&1 + (( ((rc == 0)) && (( mustpass != 0 )) )) || + (( ((rc != 0)) && (( mustpass == 0 )) )) && ha_on $client rm -rf "$dir"; } >>"$log" 2>&1 || rc=$? - ha_info rc=$rc - - if ((rc != 0)); then + ha_info rc=$rc mustpass=$mustpass + + # mustpass=0 means that failure is expected + if (( rc !=0 )); then + if (( mustpass != 0 )); then + touch "$ha_fail_file" + touch "$ha_stop_file" + ha_dump_logs "${ha_clients[*]} ${ha_servers[*]}" + else + # Ok to fail + rc=0 + fi + elif (( mustpass == 0 )); then touch "$ha_fail_file" touch "$ha_stop_file" ha_dump_logs "${ha_clients[*]} ${ha_servers[*]}" fi - echo $rc >"$status" + echo rc=$rc mustpass=$mustpass >"$status" nr_loops=$((nr_loops + 1)) done @@ -469,7 +487,8 @@ ha_repeat_mpi_load() [ $nr_loops -ne 0 ] && avg_loop_time=$((($(date +%s) - start_time) / nr_loops)) - ha_info "$tag stopped: rc $rc avg loop time $avg_loop_time" + ha_info "$tag stopped: rc=$rc mustpass=$mustpass \ + avg loop time $avg_loop_time" } ha_start_mpi_loads() @@ -532,7 +551,11 @@ ha_start_mpi_loads() local stripe=${!aref} local m=$(( n % ha_nclientsset)) machines=${mach[m]} - ha_repeat_mpi_load $client $load $status "$parameter" $machines "$stripe" "$mpiuser" & + local mustpass=1 + [[ $ha_ninstmustfail == 0 ]] || + mustpass=$(( n % ha_ninstmustfail )) + ha_repeat_mpi_load $client $load $status "$parameter" \ + $machines "$stripe" "$mpiuser" "$mustpass" & ha_status_files+=("$status") done done @@ -552,11 +575,12 @@ ha_repeat_nonmpi_load() local avg_loop_time=0 local start_time=$(date +%s) - cmd=${cmd//"{}"/$dir} + cmd=${cmd//"{}"/$dir} - ha_info "Starting $tag on $client" + ha_info "Starting $tag on $client" while [ ! -e "$ha_stop_file" ] && ((rc == 0)); do + ha_info "$client Starts: $cmd" 2>&1 | tee -a $log ha_on $client "mkdir -p $dir && \ $cmd && \ rm -rf $dir" >>"$log" 2>&1 || rc=$?