From: Elena Gryaznova Date: Wed, 8 Dec 2010 18:41:08 +0000 (+0300) Subject: b=23961 fix for setup with several network interfaces X-Git-Tag: 2.0.59.0~31 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=616ffccedb71bb7a670523f0ea737ef9da1f4d05;p=fs%2Flustre-release.git b=23961 fix for setup with several network interfaces i=Brian.Murrell i=Andrew.Perepechko - metadata-updates fix for setup when several interfaces are UP on host; hostname could be assigned to IP which is different from lnet network used, the hostname-s of NODES_TO_USE are now stored in HOSTS - new SHUTDOWN_ATTEMPTS: the tunable number of attepts to shutdown node - shutdown_node_hard () fix: do not call "power off" each time, wait that the node is not pingable before the next "power off" attempt - unused check_port() is removed --- diff --git a/lustre/tests/metadata-updates.sh b/lustre/tests/metadata-updates.sh index 2212a17..0e9f0eb 100755 --- a/lustre/tests/metadata-updates.sh +++ b/lustre/tests/metadata-updates.sh @@ -20,6 +20,11 @@ NODES_TO_USE=${NODES_TO_USE:-$CLIENTS} [ -z $CLIENTS ] && NODES_TO_USE=$(hostname) +# hostname could differ from a network interface +# configured for NODES_TO_USE, bug 23961 +# the test dir on each host is created based on `hostname` of this host +HOSTS=$(comma_list $(do_nodes $NODES_TO_USE "echo \\\$(hostname)")) + FILE=testfile FILE_SIZE=1024 CURRENT_MODE=0644 @@ -99,7 +104,7 @@ get_stat () { echo "Checking file(s) attributes ... " do_nodesv $NODES_TO_USE "set $TRACE; -for HOST in ${NODES_TO_USE//,/ } ; do +for HOST in ${HOSTS//,/ } ; do TESTFILE=$TESTDIR/\\\$HOST/$FILE; tmp=\\\$(stat -c \\\"%u %g %s 0%a\\\" \\\$TESTFILE); echo \\\"\\\$TESTFILE [ uid gid size mode ] expected : $attr ; got : \\\$tmp \\\"; @@ -142,7 +147,7 @@ do_check_timestamps () { echo "Checking atime, mtime ... " do_nodesv $NODES_TO_USE "set $TRACE; -for HOST in ${NODES_TO_USE//,/ } ; do +for HOST in ${HOSTS//,/ } ; do TESTFILE=$TESTDIR/\\\$HOST/$FILE; tmp=\\\$(stat -c \\\"%X %Y\\\" \\\$TESTFILE); if [ x\\\"\\\$tmp\\\" != x\\\"$times\\\" ] ; then @@ -172,7 +177,7 @@ check_dir_contents () { echo "Checking dir contents ... (should exist files : f$num_files ... f$NUM_FILES) ... " do_nodes $NODES_TO_USE "set $TRACE; -for HOST in ${NODES_TO_USE//,/ } ; do +for HOST in ${HOSTS//,/ } ; do DIR=$TESTDIR/\\\$HOST; for i in \\\$(seq $NUM_FILES -1 $num_files) ; do if ! [ -f \\\$DIR/f\\\$i ] ; then diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index ac78020..f2e7936 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -229,6 +229,8 @@ init_test_env() { export CLIENTMODSONLY=yes fi + export SHUTDOWN_ATTEMPTS=${SHUTDOWN_ATTEMPTS:-3} + # command line while getopts "rvwf:" opt $*; do @@ -891,12 +893,12 @@ shutdown_node () { shutdown_node_hard () { local host=$1 - local attempts=3 + local attempts=$SHUTDOWN_ATTEMPTS for i in $(seq $attempts) ; do shutdown_node $host sleep 1 - ping -w 3 -c 1 $host > /dev/null 2>&1 || return 0 + wait_for_function --quiet "! ping -w 3 -c 1 $host" 5 1 && return 0 echo "waiting for $host to fail attempts=$attempts" [ $i -lt $attempts ] || \ { echo "$host still pingable after power down! attempts=$attempts" && return 1; } @@ -2529,28 +2531,45 @@ check_and_cleanup_lustre() { ####### # General functions -check_network() { - local NETWORK=0 - local WAIT=0 - local MAX=$2 - while [ $NETWORK -eq 0 ]; do - if ping -c 1 -w 3 $1 > /dev/null; then - NETWORK=1 - else - WAIT=$((WAIT + 5)) - echo "waiting for $1, $((MAX - WAIT)) secs left" - sleep 5 - fi - if [ $WAIT -gt $MAX ]; then - echo "Network not available" - exit 1 - fi +wait_for_function () { + local quiet="" + + # suppress fn both stderr and stdout + if [ "$1" = "--quiet" ]; then + shift + quiet=" > /dev/null 2>&1" + + fi + + local fn=$1 + local max=${2:-900} + local sleep=${3:-5} + + local wait=0 + + while true; do + + eval $fn $quiet && return 0 + + wait=$((wait + sleep)) + [ $wait -lt $max ] || return 1 + echo waiting $fn, $((max - wait)) secs left ... + sleep $sleep done } -check_port() { - while( !($DSH2 $1 "netstat -tna | grep -q $2") ) ; do - sleep 9 - done + +check_network() { + local host=$1 + local max=$2 + local sleep=${3:-5} + + echo `date +"%H:%M:%S (%s)"` waiting for $host network $max secs ... + if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then + echo "Network not available!" + exit 1 + fi + + echo `date +"%H:%M:%S (%s)"` network interface is UP } no_dsh() {