From 6c57a70b2438d3b54b39832420b7f4671d92017e Mon Sep 17 00:00:00 2001 From: rread Date: Fri, 21 Nov 2003 08:42:40 +0000 Subject: [PATCH] - integrate support for FAILURE_MODE into test-framework - add configs for mdev --- lustre/tests/cfg/insanity-mdev.sh | 31 ++++++++++++++++++++++++++ lustre/tests/cfg/local.sh | 2 +- lustre/tests/cfg/mdev.sh | 30 +++++++++++++++++++++++++ lustre/tests/insanity.sh | 46 ++++++++++++++------------------------- lustre/tests/replay-single.sh | 2 +- 5 files changed, 79 insertions(+), 32 deletions(-) create mode 100644 lustre/tests/cfg/insanity-mdev.sh create mode 100644 lustre/tests/cfg/mdev.sh diff --git a/lustre/tests/cfg/insanity-mdev.sh b/lustre/tests/cfg/insanity-mdev.sh new file mode 100644 index 0000000..5e69356 --- /dev/null +++ b/lustre/tests/cfg/insanity-mdev.sh @@ -0,0 +1,31 @@ +mds_HOST=${mds_HOST:-mdev4} +mdsfailover_HOST=${mdsfailover_HOST:-mdev5} +ost1_HOST=${ost1_HOST:-mdev2} +ost2_HOST=${ost2_HOST:-mdev3} +client_HOST=client +LIVE_CLIENT=${LIVE_CLIENT:-mdev6} +# This should always be a list, not a regexp +FAIL_CLIENTS=${FAIL_CLIENTS:-mdev7} + +NETTYPE=${NETTYPE:-tcp} + +TIMEOUT=${TIMEOUT:-30} +#PTLDEBUG=${PTLDEBUG:-'"ha|info|ioctl|malloc"'} +PTLDEBUG=${PTLDEBUG:-0} +MOUNT=${MOUNT:-"/mnt/lustre"} +UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh} + +MDSDEV=${MDSDEV:-/dev/sda1} +MDSSIZE=${MDSSIZE:-50000} + +OSTDEV=${OSTDEV:-/tmp/ost-`hostname`} +OSTSIZE=${OSTSIZE:=50000} +FSTYPE=${FSTYPE:-ext3} +STRIPE_BYTES=${STRIPE_BYTES:-1048576} +STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0} + +FAILURE_MODE=${FAILURE_MODE:-HARD} # or HARD +POWER_DOWN=${POWER_DOWN:-"powerman --off"} +POWER_UP=${POWER_UP:-"powerman --on"} + +PDSH="pdsh -S -w " diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh index 5dad6f5..f9c5198 100644 --- a/lustre/tests/cfg/local.sh +++ b/lustre/tests/cfg/local.sh @@ -14,7 +14,7 @@ MOUNT1=${MOUNT1:-$MOUNT} MOUNT2=${MOUNT2:-"/mnt/lustre2"} DIR=${DIR:-$MOUNT} DIR2=${DIR2:-$MOUNT1} - +PTLDEBUG=${PTLDEBUG:-0} PDSH=${PDSH:-no_dsh} MDSDEV=${MDSDEV:-$ROOT/tmp/mds-`hostname`} diff --git a/lustre/tests/cfg/mdev.sh b/lustre/tests/cfg/mdev.sh new file mode 100644 index 0000000..ec8edf2 --- /dev/null +++ b/lustre/tests/cfg/mdev.sh @@ -0,0 +1,30 @@ + +mds_HOST=${mds_HOST:-mdev4} +mdsfailover_HOST=${mdsfailover_HOST:-mdev5} +ost_HOST=${ost_HOST:-mdev2} +ost2_HOST=${ost2_HOST:-mdev3} +client_HOST=${client_HOST:-client} +NETTYPE=${NETTYPE:-tcp} + +MOUNT=${MOUNT:-"/mnt/lustre"} +MOUNT1=${MOUNT1:-$MOUNT} +MOUNT2=${MOUNT2:-"/mnt/lustre2"} +DIR=${DIR:-$MOUNT} +DIR2=${DIR2:-$MOUNT1} +PTLDEBUG=${PTLDEBUG:-0} +PDSH=${PDSH:-pdsh -S -w} + +MDSDEV=${MDSDEV:-/dev/sda1} +MDSSIZE=${MDSSIZE:-50000} +OSTDEV=${OSTDEV:-/tmp/ost-`hostname`} +OSTSIZE=${OSTSIZE:-20000} +FSTYPE=${FSTYPE:-ext3} +TIMEOUT=${TIMEOUT:-10} +UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh} + +STRIPE_BYTES=${STRIPE_BYTES:-65536} +STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0} + +FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD +POWER_DOWN=${POWER_DOWN:-"powerman --off"} +POWER_UP=${POWER_UP:-"powerman --on"} diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index 9920dea..d59671e 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -39,6 +39,15 @@ fail_client() { echo $ret } +shutdown_client() { + client=$1 + if [ "$FAILURE_MODE" = HARD ]; then + $POWER_DOWN $client + elif [ "$FAILURE_MODE" = SOFT ]; then + stop $facet --force --nomod + fi +} + fail_clients() { num=$1 if [ -z "$num" -o $num -gt $((FAIL_NUM - DOWN_NUM)) ]; then @@ -65,7 +74,7 @@ fail_clients() { reintegrate_clients() { for client in $DOWN_CLIENTS; do - wait_for $client + wait_for_host $client $PDSH $client "$LCONF --node client --select mds_svc=`facet_active mds` $CLIENTOPTS $XMLCONFIG" done DOWN_CLIENTS="" @@ -96,7 +105,7 @@ setup() { wait_for mds start mds $MDSLCONFARGS ${REFORMAT} while ! $PDSH $HOST "ls -ld $LUSTRE"; do sleep 5; done - do_node $CLIENTS lconf --node client_facet --ptldebug $PTLDEBUG \ + do_node $CLIENTS lconf --node client_facet \ --select mds_service=$ACTIVEMDS $XMLCONFIG } @@ -116,13 +125,6 @@ cleanup() { stop ost2 ${FORCE} --dump cleanup.log } -wait_for() { - facet=$1 - HOST=`facet_active_host $facet` - check_network $HOST 900 - while ! $PDSH $HOST "ls -ld $LUSTRE"; do sleep 5; done -} - client_df() { $PDSH $CLIENTS "df $MOUNT" | dshbak -c } @@ -133,22 +135,6 @@ client_mkdirs() { $PDSH $CLIENTLIST "mkdir $MOUNT/\`hostname\`; ls $MOUNT/\`hostname\` > /dev/null" } -facet_failover() { - facet=$1 - echo "Failing $facet node `facet_active_host $facet`" - shutdown_facet $facet - sleep 2 - reboot_facet $facet - client_df & - DFPID=$! - change_active $facet - TO=`facet_active_host $facet` - echo "Failover MDS to $TO" - wait_for $facet - start $facet -} - - clients_recover_osts() { facet=$1 $PDSH $CLIENTS "$LCTL "'--device %OSC_`hostname`_OST_'"${facet}_svc_MNT_client recover" @@ -291,7 +277,7 @@ test_4() { #Reintegration echo "Reintegrating OST" - reboot_node ost1 + reboot_facet ost1 wait_for ost1 start ost1 @@ -319,7 +305,7 @@ test_5() { #OST Portion echo "Failing OST" shutdown_facet ost1 - reboot_node ost1 + reboot_facet ost1 #Check FS echo "Test Lustre stability after OST failure" @@ -328,7 +314,7 @@ test_5() { #OST Portion echo "Failing OST" shutdown_node ost2 - reboot_node ost2 + reboot_facet ost2 #Check FS echo "Test Lustre stability after OST failure" @@ -360,7 +346,7 @@ test_6() { #OST Portion echo "Failing OST" shutdown_node ost1 - reboot_node ost1 + reboot_facet ost1 #Check FS echo "Test Lustre stability after OST failure" @@ -469,7 +455,7 @@ test_8() { #OST Portion echo "Failing OST" shutdown_node ost1 - reboot_node ost1 + reboot_facet ost1 #Check FS echo "Test Lustre stability after OST failure" diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 68e135e..3fca893 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -47,7 +47,7 @@ cleanup() { } if [ "$ONLY" == "cleanup" ]; then - sysctl -w portals.debug=0 + sysctl -w portals.debug=0 || true cleanup exit fi -- 1.8.3.1