From 3c42a7fca2a101298be18733564e92549b35ef95 Mon Sep 17 00:00:00 2001 From: grev Date: Wed, 9 Sep 2009 08:59:22 +0000 Subject: [PATCH] b=20331 i=Robert.Read i=Alexey.Lyashkov recovery-mds-scale: wait FULL state --- lustre/tests/Makefile.am | 2 +- lustre/tests/recovery-mds-scale.sh | 13 +++++ lustre/tests/rpc.sh | 14 +++++ lustre/tests/test-framework.sh | 105 ++++++++++++++++++++++++++++++------- 4 files changed, 113 insertions(+), 21 deletions(-) create mode 100755 lustre/tests/rpc.sh diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 123bb93..1852609 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -23,7 +23,7 @@ noinst_SCRIPTS += sanity-sec.sh sanity-gss.sh krb5_login.sh setup_kerberos.sh noinst_SCRIPTS += recovery-mds-scale.sh run_dd.sh run_tar.sh run_iozone.sh noinst_SCRIPTS += run_dbench.sh run_IOR.sh recovery-double-scale.sh noinst_SCRIPTS += recovery-random-scale.sh parallel-scale.sh metadata-updates.sh -noinst_SCRIPTS += lreplicate-test.sh ost-pools.sh +noinst_SCRIPTS += lreplicate-test.sh ost-pools.sh rpc.sh nobase_noinst_SCRIPTS = cfg/local.sh nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh nobase_noinst_SCRIPTS += racer/dir_create.sh racer/file_create.sh racer/file_list.sh diff --git a/lustre/tests/recovery-mds-scale.sh b/lustre/tests/recovery-mds-scale.sh index fd8d32f..1c7cc84 100644 --- a/lustre/tests/recovery-mds-scale.sh +++ b/lustre/tests/recovery-mds-scale.sh @@ -207,6 +207,19 @@ while [ $ELAPSED -lt $DURATION -a ! -e $END_RUN_FILE ]; do exit 4 fi + log "Wait $SERVERFACET recovery complete before doing next failover ...." + if [[ $NUM_FAILOVERS != 0 ]]; then + if ! wait_recovery_complete $SERVERFACET ; then + echo "$SERVERFACET recovery is not completed!" + exit 7 + fi + fi + + log "Checking clients are in FULL state before doing next failover" + if ! wait_clients_import_state $NODES_TO_USE $SERVERFACET FULL; then + echo "Clients import not FULL, please consider to increase SERVER_FAILOVER_PERIOD=$SERVER_FAILOVER_PERIOD !" + + fi log "Starting failover on $SERVERFACET" facet_failover "$SERVERFACET" || exit 1 diff --git a/lustre/tests/rpc.sh b/lustre/tests/rpc.sh new file mode 100755 index 0000000..11c5181 --- /dev/null +++ b/lustre/tests/rpc.sh @@ -0,0 +1,14 @@ +#!/bin/bash +export PATH=`dirname $0`/../utils:$PATH +NAME=${NAME:-local} + +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} + +cmd=$1 +shift +$cmd $@ + +exit $? diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 31158df..6c7bfb0 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -2823,16 +2823,21 @@ delayed_recovery_enabled () { } ######################## -convert_facet2name() { - case "$1" in - "ost" ) echo "OST0000" ;; - "ost1") echo "OST0000" ;; - "ost2") echo "OST0001" ;; - "ost3") echo "OST0002" ;; - "ost4") echo "OST0003" ;; - "ost5") echo "OST0004" ;; - *) error "unknown facet!" ;; - esac + +convert_facet2label() { + local facet=$1 + + if [ x$facet = xost ]; then + facet=ost1 + fi + + local varsvc=${facet}_svc + + if [ -n ${!varsvc} ]; then + echo ${!varsvc} + else + error "No lablel for $facet!" + fi } get_clientosc_proc_path() { @@ -2868,30 +2873,57 @@ get_mdtosc_proc_path() { } get_osc_import_name() { - local node=$1 + local facet=$1 local ost=$2 - local name=$(convert_facet2name $ost) + local label=$(convert_facet2label $ost) - if [ "$node" == "mds" ]; then - get_mdtosc_proc_path $name + if [ "$facet" == "mds" ]; then + get_mdtosc_proc_path $label return 0 fi - get_clientosc_proc_path $name + get_clientosc_proc_path $label + return 0 +} + +wait_import_state () { + local expected=$1 + local CONN_PROC=$2 + local CONN_STATE + local i=0 + + CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2) + while [ "${CONN_STATE}" != "${expected}" ]; do + if [ "${expected}" == "DISCONN" ]; then + # for disconn we can check after proc entry is removed + [ "x${CONN_STATE}" == "x" ] && return 0 + # with AT we can have connect request timeout ~ reconnect timeout + # and test can't see real disconnect + [ "${CONN_STATE}" == "CONNECTING" ] && return 0 + fi + # disconnect rpc should be wait not more obd_timeout + [ $i -ge $(($TIMEOUT * 3 / 2)) ] && \ + error "can't put import for $CONN_PROC into ${expected} state" && return 1 + sleep 1 + CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2) + i=$(($i + 1)) + done + + log "$CONN_PROC now in ${CONN_STATE} state" return 0 } wait_osc_import_state() { - local node=$1 + local facet=$1 local ost_facet=$2 local expected=$3 - local ost=$(get_osc_import_name $node $ost_facet) + local ost=$(get_osc_import_name $facet $ost_facet) local CONN_PROC local CONN_STATE local i=0 - CONN_PROC="osc.${FSNAME}-${ost}.ost_server_uuid" - CONN_STATE=$(do_facet $node lctl get_param -n $CONN_PROC 2>/dev/null | cut -f2) + CONN_PROC="osc.${ost}.ost_server_uuid" + CONN_STATE=$(do_facet $facet lctl get_param -n $CONN_PROC 2>/dev/null | cut -f2) while [ "${CONN_STATE}" != "${expected}" ]; do if [ "${expected}" == "DISCONN" ]; then # for disconn we can check after proc entry is removed @@ -2904,10 +2936,43 @@ wait_osc_import_state() { [ $i -ge $(($TIMEOUT * 3 / 2)) ] && \ error "can't put import for ${ost}(${ost_facet}) into ${expected} state" && return 1 sleep 1 - CONN_STATE=$(do_facet $node lctl get_param -n $CONN_PROC 2>/dev/null | cut -f2) + CONN_STATE=$(do_facet $facet lctl get_param -n $CONN_PROC 2>/dev/null | cut -f2) i=$(($i + 1)) done log "${ost_facet} now in ${CONN_STATE} state" return 0 } + +get_clientmdc_proc_path() { + echo "${1}-mdc-*" +} + +do_rpc_nodes () { + local list=$1 + shift + + do_nodes --verbose $list "PATH=$LUSTRE/tests/:$PATH sh rpc.sh $@ " +} + +wait_clients_import_state () { + local list=$1 + local facet=$2 + local expected=$3 + shift + + local label=$(convert_facet2label $facet) + local proc_path + case $facet in + ost* ) proc_path="osc.$(get_clientosc_proc_path $label).ost_server_uuid" ;; + mds* ) proc_path="mdc.$(get_clientmdc_proc_path $label).mds_server_uuid" ;; + *) error "unknown facet!" ;; + esac + + + if ! do_rpc_nodes $list wait_import_state $expected $proc_path; then + error "import is not in ${expected} state" + return 1 + fi +} + -- 1.8.3.1