X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Finsanity.sh;h=54ae83a70e72a535d486ccd88d66d9367a1b966f;hp=68d0ff9282e8fcca0148d3caa55da14820c8fc25;hb=9b73c02192b3e16c322402e8c080e660ba2c457c;hpb=2dc9c16e770415d56839e1996015fec5fab93f29 diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index 68d0ff9..54ae83a 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -10,22 +10,17 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/insanity-local.sh} -ALWAYS_EXCEPT="10" +ALWAYS_EXCEPT="10 $INSANITY_EXCEPT" + +SETUP=${SETUP:-"setup"} +CLEANUP=${CLEANUP:-"cleanup"} build_test_filter -assert_env mds_HOST ost1_HOST ost2_HOST client_HOST LIVE_CLIENT +assert_env mds_HOST MDS_MKFS_OPTS +assert_env ost_HOST OST_MKFS_OPTS OSTCOUNT +assert_env LIVE_CLIENT FSNAME -#### -# Initialize all the ostN_HOST -NUMOST=2 -if [ "$EXTRA_OSTS" ]; then - for host in $EXTRA_OSTS; do - NUMOST=$((NUMOST + 1)) - OST=ost$NUMOST - eval ${OST}_HOST=$host - done -fi # This can be a regexp, to allow more clients CLIENTS=${CLIENTS:-"`comma_list $LIVE_CLIENT $FAIL_CLIENTS $EXTRA_CLIENTS`"} @@ -107,47 +102,24 @@ reintegrate_clients() { DOWN_NUM=0 } -gen_config() { - rm -f $XMLCONFIG - add_mds mds --dev $MDSDEV --size $MDSSIZE --journal-size $MDSJOURNALSIZE - - if [ ! -z "$mdsfailover_HOST" ]; then - add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE - fi - - add_lov lov1 mds --stripe_sz $STRIPE_BYTES\ - --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 - for i in `seq $NUMOST`; do - dev=`printf $OSTDEV $i` - add_ost ost$i --lov lov1 --dev $dev --size $OSTSIZE \ - --journal-size $OSTJOURNALSIZE - done - - - add_client client mds --lov lov1 --path $MOUNT +start_ost() { + start ost$1 `ostdevname $1` $OST_MOUNT_OPTS } setup() { + cleanup rm -rf logs/* - for i in `seq $NUMOST`; do - wait_for ost$i - start ost$i ${REFORMAT} $OSTLCONFARGS - done - [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE - wait_for mds - start mds $MDSLCONFARGS ${REFORMAT} + formatall + setupall + while ! do_node $CLIENTS "ls -d $LUSTRE" > /dev/null; do sleep 5; done grep " $MOUNT " /proc/mounts || zconf_mount $CLIENTS $MOUNT - } cleanup() { zconf_umount $CLIENTS $MOUNT - - stop mds ${FORCE} $MDSLCONFARGS || : - for i in `seq $NUMOST`; do - stop ost$i ${REFORMAT} ${FORCE} $OSTLCONFARGS || : - done + cleanupall + cleanup_check } trap exit INT @@ -187,38 +159,18 @@ clients_recover_osts() { # do_node $CLIENTS "$LCTL "'--device %OSC_`hostname`_'"${facet}_svc_MNT_client_facet recover" } -node_to_ost() { - node=$1 - retvar=$2 - for i in `seq $NUMOST`; do - ostvar="ost${i}_HOST" - if [ "${!ostvar}" == $node ]; then - eval $retvar=ost${i} - return 0 - fi - done - echo "No ost found for node; $node" - return 1 - -} - - - if [ "$ONLY" == "cleanup" ]; then - cleanup + $CLEANUP exit fi -if [ -z "$NOSETUP" ]; then - gen_config - setup -fi - if [ ! -z "$EVAL" ]; then eval "$EVAL" exit $? fi +$SETUP + if [ "$ONLY" == "setup" ]; then exit 0 fi @@ -227,20 +179,17 @@ fi echo "Starting Test 17 at `date`" test_0() { - echo "Failover MDS" - facet_failover mds + facet_failover $SINGLEMDS echo "Waiting for df pid: $DFPID" - wait $DFPID || return 1 + wait $DFPID || { echo "df returned $?" && return 1; } - echo "Failing OST1" - facet_failover ost1 + facet_failover ost1 || return 4 echo "Waiting for df pid: $DFPID" - wait $DFPID || return 2 + wait $DFPID || { echo "df returned $?" && return 2; } - echo "Failing OST2" - facet_failover ost2 + facet_failover ost2 || return 5 echo "Waiting for df pid: $DFPID" - wait $DFPID || return 3 + wait $DFPID || { echo "df returned $?" && return 3; } return 0 } run_test 0 "Fail all nodes, independently" @@ -258,29 +207,26 @@ test_2() { echo "Verify Lustre filesystem is up and running" client_df - echo "Failing MDS" - shutdown_facet mds - reboot_facet mds + shutdown_facet $SINGLEMDS + reboot_facet $SINGLEMDS # prepare for MDS failover - change_active mds - reboot_facet mds + change_active $SINGLEMDS + reboot_facet $SINGLEMDS client_df & DFPID=$! sleep 5 - echo "Failing OST" shutdown_facet ost1 echo "Reintegrating OST" reboot_facet ost1 wait_for ost1 - start ost1 + start_ost 1 || return 2 - echo "Failover MDS" - wait_for mds - start mds + wait_for $SINGLEMDS + start $SINGLEMDS `mdsdevname 1` $MDS_MOUNT_OPTS || return $? #Check FS wait $DFPID @@ -299,7 +245,7 @@ test_3() { echo "Verify Lustre filesystem is up and running" #MDS Portion - facet_failover mds + facet_failover $SINGLEMDS wait $DFPID || echo df failed: $? #Check FS @@ -319,6 +265,7 @@ test_3() { reintegrate_clients || return 1 client_df || return 3 + sleep 2 # give it a little time for fully recovered before next test } run_test 3 "Thirdb Failure Mode: MDS/CLIENT `date`" ################################################### @@ -328,38 +275,38 @@ test_4() { echo "Fourth Failure Mode: OST/MDS `date`" #OST Portion - echo "Failing OST ost1" shutdown_facet ost1 #Check FS echo "Test Lustre stability after OST failure" - client_df + client_df & + DFPIDA=$! + sleep 5 #MDS Portion - echo "Failing MDS" - shutdown_facet mds - reboot_facet mds + shutdown_facet $SINGLEMDS + reboot_facet $SINGLEMDS # prepare for MDS failover - change_active mds - reboot_facet mds + change_active $SINGLEMDS + reboot_facet $SINGLEMDS client_df & - DFPID=$! + DFPIDB=$! sleep 5 #Reintegration echo "Reintegrating OST" reboot_facet ost1 wait_for ost1 - start ost1 + start_ost 1 - echo "Failover MDS" - wait_for mds - start mds + wait_for $SINGLEMDS + start $SINGLEMDS `mdsdevname 1` $MDS_MOUNT_OPTS #Check FS - wait $DFPID + wait $DFPIDA + wait $DFPIDB clients_recover_osts ost1 echo "Test Lustre stability after MDS failover" client_df || return 1 @@ -376,34 +323,38 @@ test_5() { client_df #OST Portion - echo "Failing OST" shutdown_facet ost1 reboot_facet ost1 #Check FS echo "Test Lustre stability after OST failure" - client_df + client_df & + DFPIDA=$! + sleep 5 #OST Portion - echo "Failing OST" shutdown_facet ost2 reboot_facet ost2 #Check FS echo "Test Lustre stability after OST failure" - client_df + client_df & + DFPIDB=$! + sleep 5 #Reintegration echo "Reintegrating OSTs" wait_for ost1 - start ost1 + start_ost 1 wait_for ost2 - start ost2 + start_ost 2 clients_recover_osts ost1 clients_recover_osts ost2 sleep $TIMEOUT + wait $DFPIDA + wait $DFPIDB client_df || return 2 } run_test 5 "Fifth Failure Mode: OST/OST `date`" @@ -419,13 +370,14 @@ test_6() { client_touch testfile || return 2 #OST Portion - echo "Failing OST" shutdown_facet ost1 reboot_facet ost1 #Check FS echo "Test Lustre stability after OST failure" - client_df + client_df & + DFPIDA=$! + sleep 5 #CLIENT Portion echo "Failing CLIENTs" @@ -433,15 +385,19 @@ test_6() { #Check FS echo "Test Lustre stability after CLIENTs failure" - client_df + client_df & + DFPIDB=$! + sleep 5 #Reintegration echo "Reintegrating OST/CLIENTs" wait_for ost1 - start ost1 + start_ost 1 reintegrate_clients sleep 5 + wait $DFPIDA + wait $DFPIDB echo "Verifying mount" client_df || return 3 } @@ -478,8 +434,7 @@ test_7() { client_rm testfile #MDS Portion - echo "Failing MDS" - facet_failover mds + facet_failover $SINGLEMDS #Check FS echo "Test Lustre stability after MDS failover" @@ -530,21 +485,24 @@ test_8() { #OST Portion - echo "Failing OST" shutdown_facet ost1 reboot_facet ost1 #Check FS echo "Test Lustre stability after OST failure" - client_df - $PDSH $LIVE_CLIENT "ls -l $MOUNT" - $PDSH $LIVE_CLIENT "rm -f $MOUNT/*_testfile" + client_df & + DFPID=$! + sleep 5 + #non-failout hangs forever here + #$PDSH $LIVE_CLIENT "ls -l $MOUNT" + #$PDSH $LIVE_CLIENT "rm -f $MOUNT/*_testfile" #Reintegration echo "Reintegrating CLIENTs/OST" reintegrate_clients wait_for ost1 - start ost1 + start_ost 1 + wait $DFPID client_df || return 1 client_touch testfile2 || return 2 @@ -614,5 +572,6 @@ test_10() { } run_test 10 "Running Availability for 6 hours..." -equals_msg "Done, cleaning up" -cleanup +equals_msg `basename $0`: test complete, cleaning up +$CLEANUP +[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG || true