From: Elena Gryaznova Date: Mon, 7 Feb 2022 14:14:25 +0000 (+0300) Subject: LU-15493 tests: facet_failover() improvements X-Git-Tag: 2.15.51~169 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F59%2F46359%2F10;p=fs%2Flustre-release.git LU-15493 tests: facet_failover() improvements Fix template matching in affected facets accounting (when the == and != operators are used, the string to the right of the operator is considered a pattern). Reduce failover duration done by facet_failover(): long failover duration needs increasing of ldlm_enqueue_min to avoid evictions with striped objects, so let's do node reboot and mount on failover node in parallel. Make wait_clients_import_state() working with a facet list. Test-Parameters: env=CONF_SANITY_EXCEPT=32a Signed-off-by: Elena Gryaznova Signed-off-by: Alexander Zarochentsev Signed-off-by: Andriy Skulysh HPE-bug-id: LUS-7112, LUS-9901, LUS-10718 Change-Id: Ibbeeea49632acce590219da53f322afb44fa4ffa Reviewed-on: https://review.whamcloud.com/46359 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 04f9599..9406876 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -3694,7 +3694,7 @@ facet_failover() { skip=0 #check whether facet has been included in other affected facets for ((index=0; index<$total; index++)); do - [[ *,$facet,* == ,${affecteds[index]}, ]] && skip=1 + [[ ,${affecteds[index]}, == *,$facet,* ]] && skip=1 done if [ $skip -eq 0 ]; then @@ -3710,20 +3710,52 @@ facet_failover() { shutdown_facet $facet done - $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \ - $(mdsdevname 1) "-n" || error "Running e2fsck") + echo "$(date +'%H:%M:%S (%s)') shut down" - local -a mountpids + local hostlist + local waithostlist - for ((index=0; index<$total; index++)); do - facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1) - echo reboot facets: ${affecteds[index]} + for facet in ${facets//,/ }; do + local host=$(facet_active_host $facet) - reboot_facet $facet $sleep_time + hostlist=$(expand_list $hostlist $host) + if [ $(facet_host $facet) = \ + $(facet_failover_host $facet) ]; then + waithostlist=$(expand_list $waithostlist $host) + fi + done - change_active ${affecteds[index]} + if [ "$FAILURE_MODE" = HARD ]; then + for host in ${hostlist//,/ }; do + reboot_node $host + done + echo "$(date +'%H:%M:%S (%s)') $hostlist rebooted" + # We need to wait the rebooted hosts in case if + # facet_HOST == facetfailover_HOST + if ! [ -z "$waithostlist" ]; then + wait_for_host $waithostlist + if $LOAD_MODULES_REMOTE; then + echo "loading modules on $waithostlist" + do_rpc_nodes $waithostlist load_modules_local + fi + fi + else + sleep 10 + fi - wait_for_facet ${affecteds[index]} + if [[ " ${affecteds[@]} " =~ " $SINGLEMDS " ]]; then + change_active $SINGLEMDS + fi + + $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \ + $(facet_device $SINGLEMDS) "-n" || error "Running e2fsck") + + local -a mountpids + + for ((index=0; index<$total; index++)); do + if [[ ${affecteds[index]} != $SINGLEMDS ]]; then + change_active ${affecteds[index]} + fi if $GSS_SK; then init_gss init_facets_vars_simple @@ -3753,6 +3785,20 @@ facet_failover() { xargs -IX keyctl setperm X 0x3f3f3f3f" fi done + echo "$(date +'%H:%M:%S (%s)') targets are mounted" + + if [ "$FAILURE_MODE" = HARD ]; then + hostlist=$(exclude_items_from_list $hostlist $waithostlist) + if ! [ -z "$hostlist" ]; then + wait_for_host $hostlist + if $LOAD_MODULES_REMOTE; then + echo "loading modules on $hostlist" + do_rpc_nodes $hostlist load_modules_local + fi + fi + fi + + echo "$(date +'%H:%M:%S (%s)') facet_failover done" } replay_barrier() { @@ -7884,7 +7930,9 @@ wait_clients_import_state () { local facets="$facet" if [ "$FAILURE_MODE" = HARD ]; then - facets=$(facets_on_host $(facet_active_host $facet)) + facets=$(for f in ${facet//,/ }; do + facets_on_host $(facet_active_host $f) | tr "," "\n" + done | sort -u | paste -sd , ) fi for facet in ${facets//,/ }; do