echo "Verify Lustre filesystem is up and running"
[ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running"
- client_df
+ clients_up
shutdown_facet $SINGLEMDS
reboot_facet $SINGLEMDS
change_active $SINGLEMDS
reboot_facet $SINGLEMDS
- client_df &
+ clients_up &
DFPID=$!
sleep 5
wait $DFPID
clients_recover_osts ost1
echo "Verify reintegration"
- client_df || return 1
+ clients_up || return 1
}
run_test 2 "Second Failure Mode: MDS/OST `date`"
#Check FS
echo "Test Lustre stability after MDS failover"
- client_df
+ clients_up
#CLIENT Portion
echo "Failing 2 CLIENTS"
#Check FS
echo "Test Lustre stability after CLIENT failure"
- client_df
+ clients_up
#Reintegration
echo "Reintegrating CLIENTS"
reintegrate_clients || return 1
- client_df || return 3
+ clients_up || return 3
sleep 2 # give it a little time for fully recovered before next test
}
run_test 3 "Thirdb Failure Mode: MDS/CLIENT `date`"
#Check FS
echo "Test Lustre stability after OST failure"
- client_df &
+ clients_up &
DFPIDA=$!
sleep 5
change_active $SINGLEMDS
reboot_facet $SINGLEMDS
- client_df &
+ clients_up &
DFPIDB=$!
sleep 5
wait $DFPIDB
clients_recover_osts ost1
echo "Test Lustre stability after MDS failover"
- client_df || return 1
+ clients_up || return 1
}
run_test 4 "Fourth Failure Mode: OST/MDS `date`"
###################################################
echo "Verify Lustre filesystem is up and running"
[ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running"
- client_df
+ clients_up
#OST Portion
shutdown_facet ost1
#Check FS
echo "Test Lustre stability after OST failure"
- client_df &
+ clients_up &
DFPIDA=$!
sleep 5
#Check FS
echo "Test Lustre stability after OST failure"
- client_df &
+ clients_up &
DFPIDB=$!
sleep 5
wait $DFPIDA
wait $DFPIDB
- client_df || return 2
+ clients_up || return 2
}
run_test 5 "Fifth Failure Mode: OST/OST `date`"
###################################################
echo "Verify Lustre filesystem is up and running"
[ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running"
- client_df
+ clients_up
client_touch testfile || return 2
#OST Portion
#Check FS
echo "Test Lustre stability after OST failure"
- client_df &
+ clients_up &
DFPIDA=$!
echo DFPIDA=$DFPIDA
sleep 5
#Check FS
echo "Test Lustre stability after CLIENTs failure"
- client_df &
+ clients_up &
DFPIDB=$!
echo DFPIDB=$DFPIDB
sleep 5
echo "Verifying mount"
[ -z "$(mounted_lustre_filesystems)" ] && return 3
- client_df
+ clients_up
}
run_test 6 "Sixth Failure Mode: OST/CLIENT `date`"
###################################################
echo "Verify Lustre filesystem is up and running"
[ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running"
- client_df
+ clients_up
client_touch testfile || return 1
#CLIENT Portion
#Check FS
echo "Test Lustre stability after CLIENTs failure"
- client_df
+ clients_up
$PDSH $LIVE_CLIENT "ls -l $TESTDIR"
$PDSH $LIVE_CLIENT "rm -f $TESTDIR/*_testfile"
echo "Verify Lustre filesystem is up and running"
[ -z "$(mounted_lustre_filesystems)" ] && return 2
- client_df
+ clients_up
client_rm testfile
#MDS Portion
#Reintegration
echo "Reintegrating CLIENTs"
reintegrate_clients || return 2
- client_df
+ clients_up
#Sleep
echo "wait 1 minutes"
echo "Verify Lustre filesystem is up and running"
[ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running"
- client_df
+ clients_up
client_touch testfile
#CLIENT Portion
#Check FS
echo "Test Lustre stability after CLIENTs failure"
- client_df
+ clients_up
$PDSH $LIVE_CLIENT "ls -l $TESTDIR"
$PDSH $LIVE_CLIENT "rm -f $TESTDIR/*_testfile"
echo "Verify Lustre filesystem is up and running"
[ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running"
- client_df
+ clients_up
client_touch testfile
#Check FS
echo "Test Lustre stability after OST failure"
- client_df &
+ clients_up &
DFPID=$!
sleep 5
#non-failout hangs forever here
wait_for ost1
start_ost 1
wait $DFPID
- client_df || return 1
+ clients_up || return 1
client_touch testfile2 || return 2
#Sleep
echo "Verify Lustre filesystem is up and running"
[ -z "$(mounted_lustre_filesystems)" ] && error "Lustre is not running"
- client_df
+ clients_up
client_touch testfile || return 1
#CLIENT Portion
#Check FS
echo "Test Lustre stability after CLIENTs failure"
- client_df
+ clients_up
$PDSH $LIVE_CLIENT "ls -l $TESTDIR" || return 1
$PDSH $LIVE_CLIENT "rm -f $TESTDIR/*_testfile" || return 2
#Create files
echo "Verify Lustre filesystem is up and running"
- $PDSH $LIVE_CLIENT df $MOUNT || return 3
+ client_up $LIVE_CLIENT || return 3
client_touch testfile || return 4
#CLIENT Portion
#Check FS
echo "Test Lustre stability after CLIENTs failure"
- client_df
+ clients_up
$PDSH $LIVE_CLIENT "ls -l $TESTDIR" || return 5
$PDSH $LIVE_CLIENT "rm -f $TESTDIR/*_testfile" || return 6
#Reintegration
echo "Reintegrating CLIENTs/CLIENTs"
reintegrate_clients || return 7
- client_df
+ clients_up
#Sleep
echo "Wait 1 minutes"
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
error "open succeeded unexpectedly"
fi
rmultiop_start $CLIENT1 $DIR/$tdir/$tfile o_c
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
+ client_up $CLIENT1 || error "$CLIENT1 evicted"
- do_node $CLIENT1 df $MOUNT || error "$CLIENT1 evicted"
rmultiop_stop $CLIENT1 || error "close failed"
zconf_mount $CLIENT2 $MOUNT
}
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
error "create succeeded unexpectedly"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
error "unlink succeeded unexpectedly"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if ! do_node $CLIENT1 $CHECKSTAT -u \\\#$UID $file; then
error "setattr of UID succeeded unexpectedly"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if ! do_node $CLIENT1 $CHECKSTAT -g \\\#$UID $file; then
error "setattr of GID succeeded unexpectedly"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if ! do_node $CLIENT1 $CHECKSTAT -p 0644 $file; then
error "setattr of permission succeeded unexpectedly"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
checkattr $CLIENT1 i $file
rc=$?
do_node $CLIENT1 chattr -i $file
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT || error "$CLIENT1 evicted"
+ client_up $CLIENT1 || error "$CLIENT1 evicted"
if (($mtime_pre >= $mtime_post)); then
error "time not changed: pre $mtime_pre, post $mtime_post"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
error "link should fail"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if ! do_node $CLIENT1 $CHECKSTAT -a $DIR/$tdir/$tfile; then
error "link should fail"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
error "rename should fail"
fi
zconf_umount $CLIENT2 $MOUNT
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1 not evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1 not evicted"
if do_node $CLIENT1 $CHECKSTAT -a $DIR/$tfile; then
error "rename should fail"
fi
facet_failover $SINGLEMDS
# recovery shouldn't fail due to missing client 2
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
# All 50 files should have been replayed
do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
facet_failover $SINGLEMDS
# recovery shouldn't fail due to missing client 2
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
# All 50 files should have been replayed
do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
zconf_umount $CLIENT2 $MOUNT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $MOUNT && error "$CLIENT1:$MOUNT not evicted"
- do_node $CLIENT2 df $MOUNT1 || error "$CLIENT2:$MOUNT1 evicted"
+ client_evicted $CLIENT1 || error "$CLIENT1:$MOUNT not evicted"
+ client_up $CLIENT2 || error "$CLIENT2:$MOUNT1 evicted"
#
# Check the MDT epoch. $post must be the first transaction
facet_failover $SINGLEMDS
# recovery shouldn't fail due to missing client 2
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
do_node $CLIENT1 $CHECKSTAT $DIR/$tfile && return 2
zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
facet_failover $SINGLEMDS
# recovery should fail due to missing client 2
- do_node $CLIENT1 df $DIR && return 1
+ client_evicted $CLIENT1 || return 1
do_node $CLIENT1 $CHECKSTAT -p 0755 $DIR/$tfile && return 2
zconf_mount $CLIENT2 $DIR || error "mount $CLIENT2 $DIR fail"
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
# All 50 files should have been replayed
do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
do_node $CLIENT1 unlinkmany $DIR/$tfile-3- 25 || return 3
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 4
+ client_up $CLIENT2 || return 4
# All 25 files from client2 should have been replayed
do_node $CLIENT2 unlinkmany $DIR/$tdir/$tfile-2- 25 || return 5
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
# create another set of files
do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 2
+ client_up $CLIENT2 || return 2
# All files from should have been replayed
do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
# create another set of files
do_node $CLIENT1 createmany -m $DIR/$tfile-3- 25
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 2
+ client_up $CLIENT2 || return 2
# All files from should have been replayed
do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR && return 1
+ client_evicted $CLIENT1 || return 1
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 2
+ client_up $CLIENT2 || return 2
# First 25 files should have been replayed
do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 3
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
do_node $CLIENT1 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
# create another set of files
do_node $CLIENT1 createmany -o $DIR/$tfile-3- 25
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR && return 4
+ client_evicted $CLIENT2 || return 4
# file from client2 should fail
do_node $CLIENT2 $CHECKSTAT $DIR/$tfile-2-0 && error "$tfile-2-0 exists"
do_node $CLIENT2 "sysctl -w lustre.fail_val=5"
#define OBD_FAIL_PTLRPC_REPLAY 0x50e
do_node $CLIENT2 "sysctl -w lustre.fail_loc=0x2000050e"
- do_node $CLIENT2 df $DIR
+ client_up $CLIENT2
# vbr_activate_client $CLIENT2
# need way to know that client stops replays
sleep 5
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
# All files should have been replayed
do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
facet_failover $SINGLEMDS
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 4
+ client_up $CLIENT2 || return 4
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 1
+ client_up $CLIENT1 || return 1
# All files should have been replayed
do_node $CLIENT1 unlinkmany $DIR/$tfile- 25 || return 2
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 3
+ client_up $CLIENT1 || return 3
#client1 is back and will try to open orphan
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 4
+ client_up $CLIENT2 || return 4
do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
zconf_umount_clients $CLIENTS $DIR
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 2
+ client_up $CLIENT1 || return 2
#client1 is back and will try to open orphan
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 3
+ client_up $CLIENT2 || return 3
rmultiop_stop $CLIENT2 || return 1
do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 3
+ client_up $CLIENT1 || return 3
#client1 is back and will try to open orphan
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 4
+ client_up $CLIENT2 || return 4
do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
zconf_umount_clients $CLIENTS $DIR
vbr_deactivate_client $CLIENT2
facet_failover $SINGLEMDS
- do_node $CLIENT1 df $DIR || return 6
+ client_up $CLIENT1 || return 6
#client1 is back and will try to open orphan
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 8
+ client_up $CLIENT2 || return 8
do_node $CLIENT2 $CHECKSTAT $DIR/$tfile && error "$tfile exists"
zconf_umount_clients $CLIENTS $DIR
zconf_umount $CLIENT1 $DIR
facet_failover $SINGLEMDS
- do_node $CLIENT2 df $DIR || return 6
+ client_up $CLIENT2 || return 6
do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
zconf_umount_clients $CLIENTS $DIR
zconf_umount $CLIENT1 $DIR
facet_failover $SINGLEMDS
- do_node $CLIENT2 df $DIR || return 6
+ client_up $CLIENT2 || return 6
do_node $CLIENT2 rm $DIR/$tfile || error "$tfile doesn't exists"
zconf_umount $CLIENT2 $DIR
zconf_umount $CLIENT1 $DIR
facet_failover $SINGLEMDS
- do_node $CLIENT2 df $DIR || return 6
+ client_up $CLIENT2 || return 6
do_node $CLIENT2 rmdir $DIR/$tfile || error "$tfile doesn't exists"
zconf_umount $CLIENT2 $DIR
sleep $TIMEOUT
vbr_activate_client $CLIENT2
- do_node $CLIENT2 df $DIR || return 4
+ client_up $CLIENT2 || return 4
for CLIENT in ${CLIENTS//,/ }; do
PID=`cat pid.$CLIENT`