From: Elena Gryaznova Date: Tue, 24 Aug 2010 19:52:29 +0000 (+0400) Subject: b=20407 TF: "HARD" failovers with multiple targets per server X-Git-Tag: 2.0.51.0~36 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=66572cdeaefae2bcc7a3043a9b8de6ab7c37a642 b=20407 TF: "HARD" failovers with multiple targets per server i=Brian.Murrell i=Li.Wei --- diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh index 4bee2a5..298716a 100644 --- a/lustre/tests/cfg/local.sh +++ b/lustre/tests/cfg/local.sh @@ -75,15 +75,13 @@ MKFSOPT="" [ "x$L_GETIDENTITY" != "x" ] && MDSOPT=$MDSOPT" --param mdt.identity_upcall=$L_GETIDENTITY" -MDSn_MKFS_OPTS=$MDS_MKFS_OPTS MDS_MKFS_OPTS="--mdt --fsname=$FSNAME --device-size=$MDSSIZE --param sys.timeout=$TIMEOUT $MKFSOPT $MDSOPT $MDS_MKFS_OPTS" if [[ $mds1_HOST == $mgs_HOST ]] && [[ $MDSDEV1 == $MGSDEV ]]; then MDS_MKFS_OPTS="--mgs $MDS_MKFS_OPTS" else MDS_MKFS_OPTS="--mgsnode=$MGSNID $MDS_MKFS_OPTS" - mgs_MKFS_OPTS="--mgs --device-size=$MGSSIZE" + MGS_MKFS_OPTS="--mgs --device-size=$MGSSIZE" fi -MDSn_MKFS_OPTS="--mgsnode=$MGSNID --mdt --fsname=$FSNAME --device-size=$MDSSIZE --param sys.timeout=$TIMEOUT $MKFSOPT $MDSOPT $MDSn_MKFS_OPTS" MKFSOPT="" [ "x$OSTJOURNALSIZE" != "x" ] && @@ -100,7 +98,7 @@ OST_MKFS_OPTS="--ost --fsname=$FSNAME --device-size=$OSTSIZE --mgsnode=$MGSNID - MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o loop,user_xattr,acl"} OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o loop"} -mgs_MOUNT_OPTS=${mgs_MOUNT_OPTS:-$MDS_MOUNT_OPTS} +MGS_MOUNT_OPTS=${MGS_MOUNT_OPTS:-$MDS_MOUNT_OPTS} #client MOUNT=${MOUNT:-/mnt/${FSNAME}} diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 1e7c8a6..f4d8546 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -16,6 +16,12 @@ ONLY=${ONLY:-"$*"} ALWAYS_EXCEPT="$CONF_SANITY_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! +if [ "$FAILURE_MODE" = "HARD" ]; then + CONFIG_EXCEPTIONS="24a " && \ + echo "Except the tests: $CONFIG_EXCEPTIONS for FAILURE_MODE=$FAILURE_MODE, bug 23573" && \ + ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS" +fi + SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH @@ -94,7 +100,7 @@ reformat_and_config() { start_mgs () { echo "start mgs" - start mgs $MGSDEV $mgs_MOUNT_OPTS + start mgs $MGSDEV $MGS_MOUNT_OPTS } start_mds() { diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index 497887f..6ceecf8 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -14,10 +14,8 @@ init_logging ALWAYS_EXCEPT="10 $INSANITY_EXCEPT" if [ "$FAILURE_MODE" = "HARD" ]; then - mixed_ost_devs && CONFIG_EXCEPTIONS="0 2 4 5 6 8" && \ - echo -n "Several ost services on one ost node are used with FAILURE_MODE=$FAILURE_MODE. " && \ - echo "Except the tests: $CONFIG_EXCEPTIONS" && \ - ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS" + skip_env "$0: is not functional with FAILURE_MODE = HARD, please use recovery-double-scale, bz20407" + exit 0 fi # @@ -191,10 +189,10 @@ test_2() { echo "Reintegrating OST" reboot_facet ost1 - wait_for ost1 + wait_for_facet ost1 start_ost 1 || return 2 - wait_for $SINGLEMDS + wait_for_facet $SINGLEMDS start $SINGLEMDS `mdsdevname 1` $MDS_MOUNT_OPTS || return $? #Check FS @@ -267,10 +265,10 @@ test_4() { #Reintegration echo "Reintegrating OST" reboot_facet ost1 - wait_for ost1 + wait_for_facet ost1 start_ost 1 - - wait_for $SINGLEMDS + + wait_for_facet $SINGLEMDS start $SINGLEMDS `mdsdevname 1` $MDS_MOUNT_OPTS #Check FS @@ -317,9 +315,9 @@ test_5() { #Reintegration echo "Reintegrating OSTs" - wait_for ost1 + wait_for_facet ost1 start_ost 1 - wait_for ost2 + wait_for_facet ost2 start_ost 2 clients_recover_osts ost1 @@ -368,7 +366,7 @@ test_6() { #Reintegration echo "Reintegrating OST/CLIENTs" - wait_for ost1 + wait_for_facet ost1 start_ost 1 reintegrate_clients || return 1 sleep 5 @@ -485,7 +483,7 @@ test_8() { #Reintegration echo "Reintegrating CLIENTs/OST" reintegrate_clients || return 3 - wait_for ost1 + wait_for_facet ost1 start_ost 1 wait $DFPID clients_up || return 1 diff --git a/lustre/tests/mmp.sh b/lustre/tests/mmp.sh index 2ac19df..fe0eafe 100755 --- a/lustre/tests/mmp.sh +++ b/lustre/tests/mmp.sh @@ -348,7 +348,7 @@ mount_after_reboot() { if [ "$FAILURE_MODE" = "HARD" ]; then shutdown_facet $facet reboot_facet $facet - wait_for $facet + wait_for_facet $facet else replay_barrier_nodf $facet fi diff --git a/lustre/tests/recovery-double-scale.sh b/lustre/tests/recovery-double-scale.sh index 4c0ea6f..bbf6534 100644 --- a/lustre/tests/recovery-double-scale.sh +++ b/lustre/tests/recovery-double-scale.sh @@ -77,7 +77,7 @@ reboot_recover_node () { # MDS, OST item contains the facet case $nodetype in MDS|OST ) facet_failover $item - [ "$SERIAL" ] && wait_recovery_complete $item $((timeout * 4)) || true + [ "$SERIAL" ] && wait_recovery_complete $item || true ;; clients) for c in ${item//,/ }; do # make sure the client loads die diff --git a/lustre/tests/recovery-mds-scale.sh b/lustre/tests/recovery-mds-scale.sh index 2d9acb7..a4c874f 100644 --- a/lustre/tests/recovery-mds-scale.sh +++ b/lustre/tests/recovery-mds-scale.sh @@ -126,7 +126,7 @@ summary_and_cleanup () { fi rc=1 fi - + echo $(date +'%F %H:%M:%S') Terminating clients loads ... echo "$0" >> $END_RUN_FILE local result=PASS @@ -171,7 +171,7 @@ Status: $result: rc=$rc" } # -# MAIN +# MAIN # log "-----============= $0 starting =============-----" @@ -203,38 +203,37 @@ CURRENT_TS=$START_TS while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do - # In order to perform the + # In order to perform the # expected number of failovers, we need to account the following : # 1) the time that has elapsed during the client load checking # 2) time takes for failover it_time_start=$(date +%s) - + SERVERFACET=$(get_random_entry $SERVERS) var=${SERVERFACET}_numfailovers - # Check that our client loads are still running. If any have died, - # that means they have died outside of recovery, which is unacceptable. + # Check that our client loads are still running. If any have died, + # that means they have died outside of recovery, which is unacceptable. log "==== Checking the clients loads BEFORE failover -- failure NOT OK \ - ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD" + ELAPSED=$ELAPSED DURATION=$DURATION PERIOD=$SERVER_FAILOVER_PERIOD" if ! check_client_loads $NODES_TO_USE; then exit 4 fi log "Wait $SERVERFACET recovery complete before doing next failover ...." - if [[ $(server_numfailovers $SERVERFACET) != 0 ]]; then - if ! wait_recovery_complete $SERVERFACET ; then - echo "$SERVERFACET recovery is not completed!" - exit 7 - fi + + if ! wait_recovery_complete $SERVERFACET ; then + echo "$SERVERFACET recovery is not completed!" + exit 7 fi log "Checking clients are in FULL state before doing next failover" if ! wait_clients_import_state $NODES_TO_USE $SERVERFACET FULL; then echo "Clients import not FULL, please consider to increase SERVER_FAILOVER_PERIOD=$SERVER_FAILOVER_PERIOD !" - + fi log "Starting failover on $SERVERFACET" @@ -252,10 +251,10 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do # Increment the number of failovers val=$((${!var} + 1)) eval $var=$val - + CURRENT_TS=$(date +%s) ELAPSED=$((CURRENT_TS - START_TS)) - + sleep=$((SERVER_FAILOVER_PERIOD-(CURRENT_TS - it_time_start))) # keep count the number of itterations when @@ -269,8 +268,8 @@ This iteration, the load was only applied for sleep=$sleep seconds. Estimated max recovery time : $max_recov_time Probably the hardware is taking excessively long to boot. Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918" - [ $reqfail -gt $REQFAIL ] && exit 6 - fi + [ $reqfail -gt $REQFAIL ] && exit 6 + fi log "$SERVERFACET has failed over ${!var} times, and counting..." @@ -278,7 +277,7 @@ Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug break fi - if [ $sleep -gt 0 ]; then + if [ $sleep -gt 0 ]; then echo "sleeping $sleep seconds ... " sleep $sleep fi diff --git a/lustre/tests/recovery-random-scale.sh b/lustre/tests/recovery-random-scale.sh index 57fe798..ce1ba18 100644 --- a/lustre/tests/recovery-random-scale.sh +++ b/lustre/tests/recovery-random-scale.sh @@ -122,7 +122,7 @@ summary_and_cleanup () { # the one we are really interested in. if [ -n "$END_RUN_NODE" ]; then var=$(client_var_name $END_RUN_NODE)_load - echo "Client load failed on node $END_RUN_NODE" + echo "Client load failed on node $END_RUN_NODE" echo echo "client $END_RUN_NODE load stdout and debug files : ${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE} @@ -245,11 +245,11 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do log "Starting failover on $SERVERFACET" facet_failover "$SERVERFACET" || exit 1 - if ! wait_recovery_complete $SERVERFACET $((TIMEOUT * 10)); then + if ! wait_recovery_complete $SERVERFACET ; then echo "$SERVERFACET recovery is not completed!" exit 7 fi - + boot_node $FAIL_CLIENT echo "Reintegrating $FAIL_CLIENT" zconf_mount $FAIL_CLIENT $MOUNT || exit $? @@ -268,10 +268,10 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do # not for all clients. if [ -e $END_RUN_FILE ]; then read END_RUN_NODE < $END_RUN_FILE - [[ $END_RUN_NODE = $FAIL_CLIENT ]] && + [[ $END_RUN_NODE = $FAIL_CLIENT ]] && rm -f $END_RUN_FILE || exit 13 fi - + restart_client_loads $FAIL_CLIENT $ERRORS_OK || exit $? # Check that not failed clients loads are still running. @@ -285,7 +285,6 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do CURRENT_TS=$(date +%s) ELAPSED=$((CURRENT_TS - START_TS)) - sleep=$((SERVER_FAILOVER_PERIOD-(CURRENT_TS - it_time_start))) # keep count the number of itterations when @@ -299,8 +298,8 @@ This iteration, the load was only applied for sleep=$sleep seconds. Estimated max recovery time : $max_recov_time Probably the hardware is taking excessively long to boot. Try to increase SERVER_FAILOVER_PERIOD (current is $SERVER_FAILOVER_PERIOD), bug 20918" - [ $reqfail -gt $REQFAIL ] && exit 6 - fi + [ $reqfail -gt $REQFAIL ] && exit 6 + fi log " Number of failovers: $(numfailovers) and counting..." @@ -309,7 +308,7 @@ $(numfailovers) and counting..." break fi - if [ $sleep -gt 0 ]; then + if [ $sleep -gt 0 ]; then echo "sleeping $sleep seconds ... " sleep $sleep fi diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 562def5..3431fc8 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -12,13 +12,6 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then - CONFIG_EXCEPTIONS="52" - echo -n "Several ost services on one ost node are used with FAILURE_MODE=$FAILURE_MODE. " - echo "Except the tests: $CONFIG_EXCEPTIONS" - ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS" -fi - require_dsh_mds || exit 0 # also long tests: 19, 21a, 21e, 21f, 23, 27 diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 44e257a..c92bf35 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -16,13 +16,6 @@ CLEANUP=${CLEANUP:-""} MOUNT_2=${MOUNT_2:-"yes"} . $LUSTRE/tests/test-framework.sh -if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then - CONFIG_EXCEPTIONS="17" - echo -n "Several ost services on one ost node are used with FAILURE_MODE=$FAILURE_MODE. " - echo "Except the tests: $CONFIG_EXCEPTIONS" - ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS" -fi - init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 5eb0222..82f9836 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -23,13 +23,6 @@ require_dsh_mds || exit 0 # bug number: 17466 18857 ALWAYS_EXCEPT="61d 33a 33b $REPLAY_SINGLE_EXCEPT" -if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then - CONFIG_EXCEPTIONS="0b 42 47 61a 61c" - echo -n "Several ost services on one ost node are used with FAILURE_MODE=$FAILURE_MODE. " - echo "Except the tests: $CONFIG_EXCEPTIONS" - ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS" -fi - # 63 min 7 min AT AT AT AT" [ "$SLOW" = "no" ] && EXCEPT_SLOW="1 2 3 4 6 12 16 44a 44b 65 66 67 68" @@ -1538,9 +1531,9 @@ test_61d() { # bug 16002 # bug 17466 # bug 22137 # OBD_FAIL_OBD_LLOG_SETUP 0x605 stop mgs do_facet mgs "lctl set_param fail_loc=0x80000605" - start mgs $MGSDEV $mgs_MOUNT_OPTS && error "mgs start should have failed" + start mgs $MGSDEV $MGS_MOUNT_OPTS && error "mgs start should have failed" do_facet mgs "lctl set_param fail_loc=0" - start mgs $MGSDEV $mgs_MOUNT_OPTS || error "cannot restart mgs" + start mgs $MGSDEV $MGS_MOUNT_OPTS || error "cannot restart mgs" } run_test 61d "error in llog_setup should cleanup the llog context correctly" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 67bb9f8..a56027f 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -859,12 +859,18 @@ fi fi" } -shudown_node_hard () { +shutdown_node () { + local node=$1 + echo + $POWER_DOWN $node + $POWER_DOWN $node +} + +shutdown_node_hard () { local host=$1 local attempts=3 for i in $(seq $attempts) ; do - $POWER_DOWN $host + shutdown_node $host sleep 1 ping -w 3 -c 1 $host > /dev/null 2>&1 || return 0 echo "waiting for $host to fail attempts=$attempts" @@ -879,21 +885,44 @@ shutdown_client() { local attempts=3 if [ "$FAILURE_MODE" = HARD ]; then - shudown_node_hard $client + shutdown_node_hard $client else zconf_umount_clients $client $mnt -f fi } +facets_on_host () { + local host=$1 + local facets="$(get_facets OST),$(get_facets MDS)" + local affected + + combined_mgs_mds || facets="$facets,mgs" + + for facet in ${facets//,/ }; do + if [ $(facet_active_host $facet) == $host ]; then + affected="$affected $facet" + fi + done + + echo $(comma_list $affected) +} + shutdown_facet() { local facet=$1 + if [ "$FAILURE_MODE" = HARD ]; then - shudown_node_hard $(facet_active_host $facet) - elif [ "$FAILURE_MODE" = SOFT ]; then + shutdown_node_hard $(facet_active_host $facet) + else stop $facet fi } +reboot_node() { + local node=$1 + echo + $POWER_UP $node + $POWER_UP $node +} + remount_facet() { local facet=$1 @@ -902,9 +931,9 @@ remount_facet() { } reboot_facet() { - facet=$1 + local facet=$1 if [ "$FAILURE_MODE" = HARD ]; then - $POWER_UP `facet_active_host $facet` + reboot_node $(facet_active_host $facet) else sleep 10 fi @@ -913,7 +942,7 @@ reboot_facet() { boot_node() { local node=$1 if [ "$FAILURE_MODE" = HARD ]; then - $POWER_UP $node + reboot_node $node wait_for_host $node fi } @@ -1137,39 +1166,68 @@ wait_delete_completed () { } wait_for_host() { - local host=$1 - check_network "$host" 900 - while ! do_node $host hostname > /dev/null; do sleep 5; done + local hostlist=$1 + + # we can use "for" here because we are waiting the slowest + for host in ${hostlist//,/ }; do + check_network "$host" 900 + done + while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done } -wait_for() { - local facet=$1 - local host=`facet_active_host $facet` - wait_for_host $host +wait_for_facet() { + local facetlist=$1 + local hostlist + + for facet in ${facetlist//,/ }; do + hostlist=$(expand_list $hostlist $(facet_active_host $facet)) + done + wait_for_host $hostlist } -wait_recovery_complete () { - local facet=$1 +_wait_recovery_complete () { + local param=$1 # Use default policy if $2 is not passed by caller. local MAX=${2:-$(max_recovery_time)} - local var_svc=${facet}_svc - local procfile="*.${!var_svc}.recovery_status" local WAIT=0 local STATUS= while [ $WAIT -lt $MAX ]; do - STATUS=$(do_facet $facet lctl get_param -n $procfile | grep status) - [[ $STATUS = "status: COMPLETE" ]] && return 0 + STATUS=$(lctl get_param -n $param | grep status) + echo $param $STATUS + [[ $STATUS = "status: COMPLETE" || $STATUS = "status: INACTIVE" ]] && return 0 sleep 5 WAIT=$((WAIT + 5)) - echo "Waiting $((MAX - WAIT)) secs for $facet recovery done. $STATUS" + echo "Waiting $((MAX - WAIT)) secs for $param recovery done. $STATUS" done - echo "$facet recovery not done in $MAX sec. $STATUS" + echo "$param recovery not done in $MAX sec. $STATUS" return 1 } +wait_recovery_complete () { + local facet=$1 + + # with an assumption that at_max is the same on all nodes + local MAX=${2:-$(max_recovery_time)} + + local facets=$facet + if [ "$FAILURE_MODE" = HARD ]; then + facets=$(facets_on_host $(facet_active_host $facet)) + fi + echo affected facets: $facets + + # we can use "for" here because we are waiting the slowest + for facet in ${facets//,/ }; do + local var_svc=${facet}_svc + local param="*.${!var_svc}.recovery_status" + + local host=$(facet_active_host $facet) + do_rpc_nodes $host _wait_recovery_complete $param $MAX + done +} + wait_mds_ost_sync () { # just because recovery is done doesn't mean we've finished # orphan cleanup. Wait for llogs to get synchronized. @@ -1316,15 +1374,36 @@ client_reconnect() { facet_failover() { local facet=$1 local sleep_time=$2 - echo "Failing $facet on node `facet_active_host $facet`" + local host=$(facet_active_host $facet) + + echo "Failing $facet on node $host" + + local affected=$facet + + if [ "$FAILURE_MODE" = HARD ]; then + affected=$(facets_on_host $host) + fi + shutdown_facet $facet + + echo affected facets: $affected + [ -n "$sleep_time" ] && sleep $sleep_time + reboot_facet $facet - change_active $facet - local TO=`facet_active_host $facet` - echo "Failover $facet to $TO" - wait_for $facet - mount_facet $facet || error "Restart of $facet failed" + + change_active $affected + + wait_for_facet $affected + # start mgs first if it is affected + if ! combined_mgs_mds && list_member $affected mgs; then + mount_facet mgs || error "Restart of mgs failed" + fi + # FIXME; has to be changed to mount all facets concurrently + affected=$(exclude_items_from_list $affected mgs) + for facet in ${affected//,/ }; do + mount_facet $facet || error "Restart of $facet on node $host failed!" + done } obd_name() { @@ -1482,10 +1561,16 @@ facet_active_host() { } change_active() { - local facet=$1 + local facetlist=$1 + local facet + + facetlist=$(exclude_items_from_list $facetlist mgs) + + for facet in ${facetlist//,/ }; do local failover=${facet}failover - host=`facet_host $failover` + local host=`facet_host $failover` [ -z "$host" ] && return + local curactive=`facet_active $facet` if [ -z "${curactive}" -o "$curactive" == "$failover" ] ; then eval export ${facet}active=$facet @@ -1495,6 +1580,9 @@ change_active() { # save the active host for this facet local activevar=${facet}active echo "$activevar=${!activevar}" > $TMP/$activevar + local TO=`facet_active_host $facet` + echo "Failover $facet to $TO" + done } do_node() { @@ -1697,20 +1785,60 @@ cleanupall() { cleanup_gss } -mdsmkfsopts() -{ - local nr=$1 - test $nr = 1 && echo -n $MDS_MKFS_OPTS || echo -n $MDSn_MKFS_OPTS -} - combined_mgs_mds () { [[ $MDSDEV1 = $MGSDEV ]] && [[ $mds1_HOST = $mgs_HOST ]] } +mkfs_opts () { + local facet=$1 + + local tgt=$(echo $facet | tr -d [:digit:] | tr "[:lower:]" "[:upper:]") + local optvar=${tgt}_MKFS_OPTS + local opt=${!optvar} + + # FIXME: ! combo mgs/mds + mgsfailover is not supported yet + [[ $facet = mgs ]] && echo $opt && return + + # 1. + # --failnode options + local var=${facet}failover_HOST + if [ x"${!var}" != x ] && [ x"${!var}" != x$(facet_host $facet) ] ; then + local failnode=$(h2$NETTYPE ${!var}) + failnode="--failnode=$failnode" + # options does not contain + # or contains wrong --failnode= + if [[ $opt != *${failnode}* ]]; then + opt=$(echo $opt | sed 's/--failnode=.* / /') + opt="$opt $failnode" + fi + fi + + # 2. + # --mgsnode options + # no additional mkfs mds "--mgsnode" option for this configuration + if [[ $facet = mds ]] && combined_mgs_mds; then + echo $opt + return + fi + + # additional mkfs "--mgsnode" + local mgsnode="--mgsnode=$MGSNID" + opt=${opt//$mgsnode } + for nid in ${MGSNID//:/ }; do + local mgsnode="--mgsnode=$nid" + # options does not contain + # --mgsnode=$nid + if [[ $opt != *${mgsnode}" "* ]]; then + opt="$opt --mgsnode=$nid" + fi + done + + echo $opt +} + formatall() { if [ "$IAMDIR" == "yes" ]; then MDS_MKFS_OPTS="$MDS_MKFS_OPTS --iam-dir" - MDSn_MKFS_OPTS="$MDSn_MKFS_OPTS --iam-dir" fi [ "$FSTYPE" ] && FSTYPE_OPT="--backfstype $FSTYPE" @@ -1721,24 +1849,26 @@ formatall() { [ "$CLIENTONLY" ] && return echo Formatting mgs, mds, osts if ! combined_mgs_mds ; then - add mgs $mgs_MKFS_OPTS $FSTYPE_OPT --reformat $MGSDEV || exit 10 + add mgs $(mkfs_opts mgs) $FSTYPE_OPT --reformat $MGSDEV || exit 10 fi for num in `seq $MDSCOUNT`; do echo "Format mds$num: $(mdsdevname $num)" if $VERBOSE; then - add mds$num `mdsmkfsopts $num` $FSTYPE_OPT --reformat `mdsdevname $num` || exit 9 + add mds$num $(mkfs_opts mds) $FSTYPE_OPT --reformat $(mdsdevname $num) || exit 10 else - add mds$num `mdsmkfsopts $num` $FSTYPE_OPT --reformat `mdsdevname $num` > /dev/null || exit 9 + add mds$num $(mkfs_opts mds) $FSTYPE_OPT --reformat $(mdsdevname $num) > /dev/null || exit 10 fi done + # the ost-s could have different OST_MKFS_OPTS + # because of different failnode-s for num in `seq $OSTCOUNT`; do echo "Format ost$num: $(ostdevname $num)" if $VERBOSE; then - add ost$num $OST_MKFS_OPTS --reformat `ostdevname $num` || exit 10 + add ost$num $(mkfs_opts ost${num}) $FSTYPE_OPT --reformat `ostdevname $num` || exit 10 else - add ost$num $OST_MKFS_OPTS --reformat `ostdevname $num` > /dev/null || exit 10 + add ost$num $(mkfs_opts ost${num}) $FSTYPE_OPT --reformat `ostdevname $num` > /dev/null || exit 10 fi done } @@ -1820,7 +1950,7 @@ setupall() { echo $WRITECONF | grep -q "writeconf" && \ writeconf_all if ! combined_mgs_mds ; then - start mgs $MGSDEV $mgs_MOUNT_OPTS + start mgs $MGSDEV $MGS_MOUNT_OPTS fi for num in `seq $MDSCOUNT`; do @@ -1929,6 +2059,8 @@ init_facets_vars () { done fi + combined_mgs_mds || init_facet_vars mgs $MGSDEV $MGS_MOUNT_OPTS + remote_ost_nodsh && return for num in `seq $OSTCOUNT`; do @@ -2350,6 +2482,12 @@ comma_list() { echo "$*" | tr -s " " "\n" | sort -b -u | tr "\n" " " | sed 's/ \([^$]\)/,\1/g' } +list_member () { + local list=$1 + local item=$2 + echo $list | grep -qw $item +} + # list, excluded are the comma separated lists exclude_items_from_list () { local list=$1 @@ -2360,7 +2498,7 @@ exclude_items_from_list () { for item in ${excluded//,/ }; do list=$(echo " $list " | sed -re "s/\s+$item\s+/ /g") done - echo $(comma_list $list) + echo $(comma_list $list) } # list, expand are the comma separated lists @@ -2398,13 +2536,23 @@ absolute_path() { } get_facets () { - local name=$(echo $1 | tr "[:upper:]" "[:lower:]") - local type=$(echo $1 | tr "[:lower:]" "[:upper:]") + local types=${1:-"OST MDS MGS"} local list="" - local count=${type}COUNT - for ((i=1; i<=${!count}; i++)) do - list="$list ${name}$i" + + for entry in $types; do + local name=$(echo $entry | tr "[:upper:]" "[:lower:]") + local type=$(echo $entry | tr "[:lower:]" "[:upper:]") + + case $type in + MGS ) list="$list $name";; + MDS|OST ) local count=${type}COUNT + for ((i=1; i<=${!count}; i++)) do + list="$list ${name}$i" + done;; + * ) error "Invalid facet type" + exit 1;; + esac done echo $(comma_list $list) } @@ -3025,19 +3173,30 @@ remote_servers () { remote_ost && remote_mds } -osts_nodes () { - local OSTNODES=$(facet_host ost1) +facets_nodes () { + local facets=$1 + local nodes local NODES_sort - for num in `seq $OSTCOUNT`; do - local myOST=$(facet_host ost$num) - OSTNODES="$OSTNODES $myOST" + for facet in ${facets//,/ }; do + if [ "$FAILURE_MODE" = HARD ]; then + nodes="$nodes $(facet_active_host $facet)" + else + nodes="$nodes $(facet_host $facet)" + fi done - NODES_sort=$(for i in $OSTNODES; do echo $i; done | sort -u) + NODES_sort=$(for i in $nodes; do echo $i; done | sort -u) echo $NODES_sort } +osts_nodes () { + local facets=$(get_facets OST) + local nodes=$(facets_nodes $facets) + + echo $nodes +} + nodes_list () { # FIXME. We need a list of clients local myNODES=$HOSTNAME @@ -3047,7 +3206,7 @@ nodes_list () { [ -n "$CLIENTS" ] && myNODES=${CLIENTS//,/ } if [ "$PDSH" -a "$PDSH" != "no_dsh" ]; then - myNODES="$myNODES $(osts_nodes) $(mdts_nodes)" + myNODES="$myNODES $(facets_nodes $(get_facets))" fi myNODES_sort=$(for i in $myNODES; do echo $i; done | sort -u) @@ -3407,7 +3566,7 @@ convert_facet2label() { get_clientosc_proc_path() { local ost=$1 - echo "{$1}-osc-*" + echo "${1}-osc-*" } get_lustre_version () { @@ -3474,33 +3633,37 @@ get_osc_import_name() { return 0 } -wait_import_state () { +_wait_import_state () { local expected=$1 local CONN_PROC=$2 + local maxtime=${3:-max_recovery_time} local CONN_STATE local i=0 CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2) while [ "${CONN_STATE}" != "${expected}" ]; do - if [ "${expected}" == "DISCONN" ]; then - # for disconn we can check after proc entry is removed - [ "x${CONN_STATE}" == "x" ] && return 0 - # with AT we can have connect request timeout ~ reconnect timeout - # and test can't see real disconnect - [ "${CONN_STATE}" == "CONNECTING" ] && return 0 - fi - # disconnect rpc should be wait not more obd_timeout - [ $i -ge $(($TIMEOUT * 3 / 2)) ] && \ - error "can't put import for $CONN_PROC into ${expected} state" && return 1 + [ $i -ge $maxtime ] && \ + error "can't put import for $CONN_PROC into ${expected} state after $i sec, have ${CONN_STATE}" && \ + return 1 sleep 1 CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2) i=$(($i + 1)) done - log "$CONN_PROC now in ${CONN_STATE} state" + log "$CONN_PROC in ${CONN_STATE} state after $i sec" return 0 } +wait_import_state() { + local state=$1 + local params=$2 + local maxtime=${3:-max_recovery_time} + local param + + for param in ${params//,/ }; do + _wait_import_state $state $param $maxtime || return + done +} wait_osc_import_state() { local facet=$1 local ost_facet=$2 @@ -3548,8 +3711,14 @@ wait_clients_import_state () { local list=$1 local facet=$2 local expected=$3 - shift + local facets=$facet + + if [ "$FAILURE_MODE" = HARD ]; then + facets=$(facets_on_host $(facet_active_host $facet)) + fi + + for facet in ${facets//,/ }; do local label=$(convert_facet2label $facet) local proc_path case $facet in @@ -3557,8 +3726,10 @@ wait_clients_import_state () { mds* ) proc_path="mdc.$(get_clientmdc_proc_path $label).mds_server_uuid" ;; *) error "unknown facet!" ;; esac + local params=$(expand_list $params $proc_path) + done - if ! do_rpc_nodes $list wait_import_state $expected $proc_path; then + if ! do_rpc_nodes $list wait_import_state $expected $params; then error "import is not in ${expected} state" return 1 fi