From 80d5140369708958c806953f9b7f049807480436 Mon Sep 17 00:00:00 2001 From: tappro Date: Thu, 19 Nov 2009 07:08:38 +0000 Subject: [PATCH] *** empty log message *** --- lustre/tests/conf-sanity.sh | 2 +- lustre/tests/insanity.sh | 62 ++++++++++++++--------------- lustre/tests/recovery-small.sh | 4 +- lustre/tests/replay-dual.sh | 25 +++++------- lustre/tests/replay-single.sh | 19 ++++----- lustre/tests/replay-vbr.sh | 90 +++++++++++++++++++++--------------------- lustre/tests/test-framework.sh | 29 ++++++++++---- 7 files changed, 118 insertions(+), 113 deletions(-) diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index e300c28..ec250fb 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1747,7 +1747,7 @@ test_47() { #17674 facet_failover ost1 facet_failover $SINGLEMDS - df -h $MOUNT || return 3 + client_up || return 3 count=0 for ns in $($LCTL get_param ldlm.namespaces.$FSNAME-*-*-*.lru_size); do diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index a80d9cc..173c6ac 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -173,7 +173,7 @@ test_2() { echo "Verify Lustre filesystem is up and running" [ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running" - client_df + clients_up shutdown_facet $SINGLEMDS reboot_facet $SINGLEMDS @@ -182,7 +182,7 @@ test_2() { change_active $SINGLEMDS reboot_facet $SINGLEMDS - client_df & + clients_up & DFPID=$! sleep 5 @@ -200,7 +200,7 @@ test_2() { wait $DFPID clients_recover_osts ost1 echo "Verify reintegration" - client_df || return 1 + clients_up || return 1 } run_test 2 "Second Failure Mode: MDS/OST `date`" @@ -218,7 +218,7 @@ test_3() { #Check FS echo "Test Lustre stability after MDS failover" - client_df + clients_up #CLIENT Portion echo "Failing 2 CLIENTS" @@ -226,13 +226,13 @@ test_3() { #Check FS echo "Test Lustre stability after CLIENT failure" - client_df + clients_up #Reintegration echo "Reintegrating CLIENTS" reintegrate_clients || return 1 - client_df || return 3 + clients_up || return 3 sleep 2 # give it a little time for fully recovered before next test } run_test 3 "Thirdb Failure Mode: MDS/CLIENT `date`" @@ -247,7 +247,7 @@ test_4() { #Check FS echo "Test Lustre stability after OST failure" - client_df & + clients_up & DFPIDA=$! sleep 5 @@ -259,7 +259,7 @@ test_4() { change_active $SINGLEMDS reboot_facet $SINGLEMDS - client_df & + clients_up & DFPIDB=$! sleep 5 @@ -277,7 +277,7 @@ test_4() { wait $DFPIDB clients_recover_osts ost1 echo "Test Lustre stability after MDS failover" - client_df || return 1 + clients_up || return 1 } run_test 4 "Fourth Failure Mode: OST/MDS `date`" ################################################### @@ -292,7 +292,7 @@ test_5() { echo "Verify Lustre filesystem is up and running" [ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running" - client_df + clients_up #OST Portion shutdown_facet ost1 @@ -300,7 +300,7 @@ test_5() { #Check FS echo "Test Lustre stability after OST failure" - client_df & + clients_up & DFPIDA=$! sleep 5 @@ -310,7 +310,7 @@ test_5() { #Check FS echo "Test Lustre stability after OST failure" - client_df & + clients_up & DFPIDB=$! sleep 5 @@ -327,7 +327,7 @@ test_5() { wait $DFPIDA wait $DFPIDB - client_df || return 2 + clients_up || return 2 } run_test 5 "Fifth Failure Mode: OST/OST `date`" ################################################### @@ -340,7 +340,7 @@ test_6() { echo "Verify Lustre filesystem is up and running" [ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running" - client_df + clients_up client_touch testfile || return 2 #OST Portion @@ -349,7 +349,7 @@ test_6() { #Check FS echo "Test Lustre stability after OST failure" - client_df & + clients_up & DFPIDA=$! echo DFPIDA=$DFPIDA sleep 5 @@ -360,7 +360,7 @@ test_6() { #Check FS echo "Test Lustre stability after CLIENTs failure" - client_df & + clients_up & DFPIDB=$! echo DFPIDB=$DFPIDB sleep 5 @@ -378,7 +378,7 @@ test_6() { echo "Verifying mount" [ -z "$(mounted_lustre_filesystems)" ] && return 3 - client_df + clients_up } run_test 6 "Sixth Failure Mode: OST/CLIENT `date`" ################################################### @@ -392,7 +392,7 @@ test_7() { echo "Verify Lustre filesystem is up and running" [ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running" - client_df + clients_up client_touch testfile || return 1 #CLIENT Portion @@ -401,7 +401,7 @@ test_7() { #Check FS echo "Test Lustre stability after CLIENTs failure" - client_df + clients_up $PDSH $LIVE_CLIENT "ls -l $TESTDIR" $PDSH $LIVE_CLIENT "rm -f $TESTDIR/*_testfile" @@ -413,7 +413,7 @@ test_7() { echo "Verify Lustre filesystem is up and running" [ -z "$(mounted_lustre_filesystems)" ] && return 2 - client_df + clients_up client_rm testfile #MDS Portion @@ -425,7 +425,7 @@ test_7() { #Reintegration echo "Reintegrating CLIENTs" reintegrate_clients || return 2 - client_df + clients_up #Sleep echo "wait 1 minutes" @@ -443,7 +443,7 @@ test_8() { echo "Verify Lustre filesystem is up and running" [ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running" - client_df + clients_up client_touch testfile #CLIENT Portion @@ -452,7 +452,7 @@ test_8() { #Check FS echo "Test Lustre stability after CLIENTs failure" - client_df + clients_up $PDSH $LIVE_CLIENT "ls -l $TESTDIR" $PDSH $LIVE_CLIENT "rm -f $TESTDIR/*_testfile" @@ -464,7 +464,7 @@ test_8() { echo "Verify Lustre filesystem is up and running" [ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running" - client_df + clients_up client_touch testfile @@ -474,7 +474,7 @@ test_8() { #Check FS echo "Test Lustre stability after OST failure" - client_df & + clients_up & DFPID=$! sleep 5 #non-failout hangs forever here @@ -487,7 +487,7 @@ test_8() { wait_for ost1 start_ost 1 wait $DFPID - client_df || return 1 + clients_up || return 1 client_touch testfile2 || return 2 #Sleep @@ -506,7 +506,7 @@ test_9() { echo "Verify Lustre filesystem is up and running" [ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running" - client_df + clients_up client_touch testfile || return 1 #CLIENT Portion @@ -515,7 +515,7 @@ test_9() { #Check FS echo "Test Lustre stability after CLIENTs failure" - client_df + clients_up $PDSH $LIVE_CLIENT "ls -l $TESTDIR" || return 1 $PDSH $LIVE_CLIENT "rm -f $TESTDIR/*_testfile" || return 2 @@ -525,7 +525,7 @@ test_9() { #Create files echo "Verify Lustre filesystem is up and running" - $PDSH $LIVE_CLIENT df $MOUNT || return 3 + client_up $LIVE_CLIENT || return 3 client_touch testfile || return 4 #CLIENT Portion @@ -534,14 +534,14 @@ test_9() { #Check FS echo "Test Lustre stability after CLIENTs failure" - client_df + clients_up $PDSH $LIVE_CLIENT "ls -l $TESTDIR" || return 5 $PDSH $LIVE_CLIENT "rm -f $TESTDIR/*_testfile" || return 6 #Reintegration echo "Reintegrating CLIENTs/CLIENTs" reintegrate_clients || return 7 - client_df + clients_up #Sleep echo "Wait 1 minutes" diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 710a4a6..f4745eb 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -681,7 +681,7 @@ test_26b() { # bug 10140 - evict dead exports by pinger remote_ost_nodsh && skip "remote OST with nodsh" && return 0 check_timeout || return 1 - client_df + clients_up zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2" sleep 1 # wait connections being established @@ -741,7 +741,7 @@ test_28() { # bug 6086 - error adding new clients #define OBD_FAIL_MDS_CLIENT_ADD 0x12f do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012f" # fail once (evicted), reconnect fail (fail_loc), ok - df || (sleep 10; df) || (sleep 10; df) || error "reconnect failed" + client_up || (sleep 10; client_up) || (sleep 10; client_up) || error "reconnect failed" rm -f $DIR/$tfile fail $SINGLEMDS # verify MDS last_rcvd can be loaded } diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 11a3122..cdeae9f 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -215,7 +215,7 @@ test_12() { do_facet $SINGLEMDS lctl set_param fail_loc=0x80000302 facet_failover $SINGLEMDS do_facet $SINGLEMDS lctl set_param fail_loc=0 - df $MOUNT || return 1 + clients_up || return 1 ls $DIR/$tfile kill -USR1 $MULTIPID || return 3 @@ -240,7 +240,7 @@ test_13() { do_facet $SINGLEMDS lctl set_param fail_loc=0x80000115 facet_failover $SINGLEMDS do_facet $SINGLEMDS lctl set_param fail_loc=0 - df $MOUNT || return 1 + clients_up || return 1 ls $DIR/$tfile $CHECKSTAT -t file $DIR/$tfile || return 2 @@ -259,7 +259,7 @@ test_14a() { facet_failover $SINGLEMDS # expect failover to fail due to missing client 2 - df $MOUNT && return 1 + client_evicted || return 1 sleep 1 # first 25 files should have been replayed @@ -305,8 +305,7 @@ test_15a() { # was test_15 createmany -o $MOUNT2/$tfile-2- 1 umount $MOUNT2 - facet_failover $SINGLEMDS - df $MOUNT || return 1 + fail $SINGLEMDS unlinkmany $MOUNT1/$tfile- 25 || return 2 [ -e $MOUNT1/$tfile-2-0 ] && error "$tfile-2-0 exists" @@ -321,11 +320,9 @@ test_15c() { for ((i = 0; i < 2000; i++)); do echo "data" > "$MOUNT2/${tfile}-$i" || error "create ${tfile}-$i failed" done - umount $MOUNT2 - facet_failover $SINGLEMDS - df $MOUNT || return 1 + fail $SINGLEMDS zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" return 0 @@ -340,8 +337,7 @@ test_16() { facet_failover $SINGLEMDS sleep $TIMEOUT - facet_failover $SINGLEMDS - df $MOUNT || return 1 + fail $SINGLEMDS unlinkmany $MOUNT1/$tfile- 25 || return 2 @@ -363,8 +359,7 @@ test_17() { facet_failover ost1 sleep $TIMEOUT - facet_failover ost1 - df $MOUNT || return 1 + fail ost1 unlinkmany $MOUNT1/$tfile- 25 || return 2 @@ -417,8 +412,7 @@ test_20() { #16389 touch $MOUNT1/a touch $MOUNT2/b umount $MOUNT2 - facet_failover $SINGLEMDS - df $MOUNT1 || return 1 + fail $SINGLEMDS rm $MOUNT1/a zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" TIER1=$((`date +%s` - BEFORE)) @@ -427,8 +421,7 @@ test_20() { #16389 touch $MOUNT1/a touch $MOUNT2/b umount $MOUNT2 - facet_failover $SINGLEMDS - df $MOUNT1 || return 1 + fail $SINGLEMDS rm $MOUNT1/a zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" TIER2=$((`date +%s` - BEFORE)) diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index d7ec206..acc4454 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -474,10 +474,9 @@ test_20b() { # bug 10480 lfs getstripe $DIR/$tfile || return 1 rm -f $DIR/$tfile || return 2 # make it an orphan mds_evict_client - df -P $DIR || df -P $DIR || true # reconnect + client_up || client_up || true # reconnect fail $SINGLEMDS # start orphan recovery - df -P $DIR || df -P $DIR || true # reconnect wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" wait_mds_ost_sync || return 3 AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'` @@ -495,8 +494,7 @@ test_20c() { # bug 10480 ls -la $DIR/$tfile mds_evict_client - - df -P $DIR || df -P $DIR || true # reconnect + client_up || client_up || true # reconnect kill -USR1 $pid wait $pid || return 1 @@ -710,7 +708,7 @@ test_32() { multiop_bg_pause $DIR/$tfile O_c || return 3 pid2=$! mds_evict_client - df $MOUNT || sleep 1 && df $MOUNT || return 1 + client_up || client_up || return 1 kill -USR1 $pid1 kill -USR1 $pid2 wait $pid1 || return 4 @@ -1031,7 +1029,7 @@ test_47() { # bug 2824 # OBD_FAIL_OST_CREATE_NET 0x204 fail ost1 do_facet ost1 "lctl set_param fail_loc=0x80000204" - df $MOUNT || return 2 + client_up || return 2 # let the MDS discover the OST failure, attempt to recover, fail # and recover again. @@ -1055,7 +1053,7 @@ test_48() { # OBD_FAIL_OST_EROFS 0x216 facet_failover $SINGLEMDS do_facet ost1 "lctl set_param fail_loc=0x80000216" - df $MOUNT || return 2 + client_up || return 2 createmany -o $DIR/$tfile 20 20 || return 2 unlinkmany $DIR/$tfile 40 || return 3 @@ -1525,8 +1523,7 @@ test_62() { # Bug 15756 - don't mis-drop resent replay createmany -o $DIR/$tdir/$tfile- 25 #define OBD_FAIL_TGT_REPLAY_DROP 0x707 do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000707" - facet_failover $SINGLEMDS - df $MOUNT || return 1 + fail $SINGLEMDS do_facet $SINGLEMDS "lctl set_param fail_loc=0" unlinkmany $DIR/$tdir/$tfile- 25 || return 2 return 0 @@ -1916,7 +1913,7 @@ test_74() { mount_facet ost1 touch $DIR/$tfile || return 1 rm $DIR/$tfile || return 2 - client_df || error "df failed: $?" + clients_up || error "client evicted: $?" return 0 } run_test 74 "Ensure applications don't fail waiting for OST recovery" @@ -2015,7 +2012,7 @@ test_84a() { PID=$! mds_evict_client wait $PID - df -P $DIR || df -P $DIR || true # reconnect + client_up || client_up || true # reconnect } run_test 84a "stale open during export disconnect" diff --git a/lustre/tests/replay-vbr.sh b/lustre/tests/replay-vbr.sh index c3db2ce..335281b 100644 --- a/lustre/tests/replay-vbr.sh +++ b/lustre/tests/replay-vbr.sh @@ -111,7 +111,7 @@ test_0b() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then error "open succeeded unexpectedly" fi @@ -132,8 +132,8 @@ test_0c() { rmultiop_start $CLIENT1 $DIR/$tdir/$tfile o_c zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS + client_up $CLIENT1 || error "$CLIENT1 evicted" - do_node $CLIENT1 df $MOUNT || error "$CLIENT1 evicted" rmultiop_stop $CLIENT1 || error "close failed" zconf_mount $CLIENT2 $MOUNT } @@ -164,7 +164,7 @@ test_0e() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then error "create succeeded unexpectedly" fi @@ -199,7 +199,7 @@ test_0g() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then error "unlink succeeded unexpectedly" fi @@ -250,7 +250,7 @@ test_0j() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if ! do_node $CLIENT1 $CHECKSTAT -u \\\#$UID $file; then error "setattr of UID succeeded unexpectedly" fi @@ -271,7 +271,7 @@ test_0k() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if ! do_node $CLIENT1 $CHECKSTAT -g \\\#$UID $file; then error "setattr of GID succeeded unexpectedly" fi @@ -307,7 +307,7 @@ test_0m() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if ! do_node $CLIENT1 $CHECKSTAT -p 0644 $file; then error "setattr of permission succeeded unexpectedly" fi @@ -357,7 +357,7 @@ test_0o() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" checkattr $CLIENT1 i $file rc=$? do_node $CLIENT1 chattr -i $file @@ -428,7 +428,7 @@ test_0r() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT || error "$CLIENT1 evicted" + client_up $CLIENT1 || error "$CLIENT1 evicted" if (($mtime_pre >= $mtime_post)); then error "time not changed: pre $mtime_pre, post $mtime_post" fi @@ -478,7 +478,7 @@ test_0t() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then error "link should fail" fi @@ -499,7 +499,7 @@ test_0u() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then error "link should fail" fi @@ -556,7 +556,7 @@ test_0x() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then error "rename should fail" fi @@ -577,7 +577,7 @@ test_0y() { zconf_umount $CLIENT2 $MOUNT facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted" + client_evicted $CLIENT1 || error "$CLIENT1 not evicted" if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then error "rename should fail" fi @@ -601,7 +601,7 @@ test_1a() { facet_failover $SINGLEMDS # recovery shouldn't fail due to missing client 2 - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 # All 50 files should have been replayed do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2 @@ -630,7 +630,7 @@ test_2a() { facet_failover $SINGLEMDS # recovery shouldn't fail due to missing client 2 - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 # All 50 files should have been replayed do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2 @@ -687,8 +687,8 @@ test_2b() { zconf_umount $CLIENT2 $MOUNT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $MOUNT && error "$CLIENT1:$MOUNT not evicted" - do_node $CLIENT2 df $MOUNT1 || error "$CLIENT2:$MOUNT1 evicted" + client_evicted $CLIENT1 || error "$CLIENT1:$MOUNT not evicted" + client_up $CLIENT2 || error "$CLIENT2:$MOUNT1 evicted" # # Check the MDT epoch. $post must be the first transaction @@ -738,7 +738,7 @@ test_3a() { facet_failover $SINGLEMDS # recovery shouldn't fail due to missing client 2 - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail" @@ -770,7 +770,7 @@ test_3b() { facet_failover $SINGLEMDS # recovery should fail due to missing client 2 - do_node $CLIENT1 df $DIR && return 1 + client_evicted $CLIENT1 || return 1 do_node $CLIENT1 $CHECKSTAT -p 0755 $DIR/$tfile && return 2 zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail" @@ -817,14 +817,14 @@ test_4a() { vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 # All 50 files should have been replayed do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2 do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3 vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 4 + client_up $CLIENT2 || return 4 # All 25 files from client2 should have been replayed do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5 @@ -848,13 +848,13 @@ test_4b(){ vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 # create another set of files do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25 vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 2 + client_up $CLIENT2 || return 2 # All files from should have been replayed do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3 @@ -880,13 +880,13 @@ test_4c() { vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 # create another set of files do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25 vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 2 + client_up $CLIENT2 || return 2 # All files from should have been replayed do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3 @@ -913,10 +913,10 @@ test_5a() { vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR && return 1 + client_evicted $CLIENT1 || return 1 vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 2 + client_up $CLIENT2 || return 2 # First 25 files should have been replayed do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3 @@ -944,14 +944,14 @@ test_5b() { vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists" # create another set of files do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25 vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR && return 4 + client_evicted $CLIENT2 || return 4 # file from client2 should fail do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists" @@ -984,13 +984,13 @@ test_6a() { do_node $CLIENT2 "sysctl -w lustre.fail_val=5" #define OBD_FAIL_PTLRPC_REPLAY 0x50e do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e" - do_node $CLIENT2 df $DIR + client_up $CLIENT2 # vbr_activate_client $CLIENT2 # need way to know that client stops replays sleep 5 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 # All files should have been replayed do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2 @@ -1020,10 +1020,10 @@ test_7a() { facet_failover $SINGLEMDS vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 4 + client_up $CLIENT2 || return 4 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 1 + client_up $CLIENT1 || return 1 # All files should have been replayed do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2 @@ -1051,10 +1051,10 @@ test_8a() { vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 3 + client_up $CLIENT1 || return 3 #client1 is back and will try to open orphan vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 4 + client_up $CLIENT2 || return 4 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists" zconf_umount_clients $CLIENTS $DIR @@ -1077,10 +1077,10 @@ test_8b() { vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 2 + client_up $CLIENT1 || return 2 #client1 is back and will try to open orphan vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 3 + client_up $CLIENT2 || return 3 rmultiop_stop $CLIENT2 || return 1 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists" @@ -1105,10 +1105,10 @@ test_8c() { vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 3 + client_up $CLIENT1 || return 3 #client1 is back and will try to open orphan vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 4 + client_up $CLIENT2 || return 4 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists" zconf_umount_clients $CLIENTS $DIR @@ -1134,11 +1134,11 @@ test_8d() { vbr_deactivate_client $CLIENT2 facet_failover $SINGLEMDS - do_node $CLIENT1 df $DIR || return 6 + client_up $CLIENT1 || return 6 #client1 is back and will try to open orphan vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 8 + client_up $CLIENT2 || return 8 do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists" zconf_umount_clients $CLIENTS $DIR @@ -1160,7 +1160,7 @@ test_8e() { zconf_umount $CLIENT1 $DIR facet_failover $SINGLEMDS - do_node $CLIENT2 df $DIR || return 6 + client_up $CLIENT2 || return 6 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists" zconf_umount_clients $CLIENTS $DIR @@ -1182,7 +1182,7 @@ test_8f() { zconf_umount $CLIENT1 $DIR facet_failover $SINGLEMDS - do_node $CLIENT2 df $DIR || return 6 + client_up $CLIENT2 || return 6 do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists" zconf_umount $CLIENT2 $DIR @@ -1204,7 +1204,7 @@ test_8g() { zconf_umount $CLIENT1 $DIR facet_failover $SINGLEMDS - do_node $CLIENT2 df $DIR || return 6 + client_up $CLIENT2 || return 6 do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists" zconf_umount $CLIENT2 $DIR @@ -1241,7 +1241,7 @@ test_10 () { sleep $TIMEOUT vbr_activate_client $CLIENT2 - do_node $CLIENT2 df $DIR || return 4 + client_up $CLIENT2 || return 4 for CLIENT in ${CLIENTS//,/ }; do PID=`cat pid.$CLIENT` diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 5dd44ef..d10de99 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -1171,15 +1171,31 @@ wait_remote_prog () { return $rc } -client_df() { +clients_up() { # not every config has many clients + sleep 1 if [ ! -z "$CLIENTS" ]; then - $PDSH $CLIENTS "df $MOUNT" > /dev/null + $PDSH $CLIENTS "stat -f $MOUNT" > /dev/null + else + stat -f $MOUNT > /dev/null + fi +} + +client_up() { + local client=$1 + # usually checked on particular client or locally + sleep 1 + if [ ! -z "$client" ]; then + $PDSH $client "stat -f $MOUNT" > /dev/null else - df $MOUNT > /dev/null + stat -f $MOUNT > /dev/null fi } +client_evicted() { + ! client_up $1 +} + client_reconnect() { uname -n >> $MOUNT/recon if [ -z "$CLIENTS" ]; then @@ -1255,7 +1271,7 @@ ost_evict_client() { fail() { facet_failover $* || error "failover: $?" - client_df || error "post-failover df: $?" + clients_up || error "post-failover df: $?" } fail_nodf() { @@ -1268,9 +1284,8 @@ fail_abort() { stop $facet change_active $facet mount_facet $facet -o abort_recovery - client_df || echo "first df failed: $?" - sleep 1 - client_df || error "post-failover df: $?" + clients_up || echo "first df failed: $?" + clients_up || error "post-failover df: $?" } do_lmc() { -- 1.8.3.1