--- /dev/null
+mds_HOST=${mds_HOST:-mdev4}
+mdsfailover_HOST=${mdsfailover_HOST:-mdev5}
+ost1_HOST=${ost1_HOST:-mdev2}
+ost2_HOST=${ost2_HOST:-mdev3}
+client_HOST=client
+LIVE_CLIENT=${LIVE_CLIENT:-mdev6}
+# This should always be a list, not a regexp
+FAIL_CLIENTS=${FAIL_CLIENTS:-mdev7}
+
+NETTYPE=${NETTYPE:-tcp}
+
+TIMEOUT=${TIMEOUT:-30}
+#PTLDEBUG=${PTLDEBUG:-'"ha|info|ioctl|malloc"'}
+PTLDEBUG=${PTLDEBUG:-0}
+MOUNT=${MOUNT:-"/mnt/lustre"}
+UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh}
+
+MDSDEV=${MDSDEV:-/dev/sda1}
+MDSSIZE=${MDSSIZE:-50000}
+
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
+OSTSIZE=${OSTSIZE:=50000}
+FSTYPE=${FSTYPE:-ext3}
+STRIPE_BYTES=${STRIPE_BYTES:-1048576}
+STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
+
+FAILURE_MODE=${FAILURE_MODE:-HARD} # or HARD
+POWER_DOWN=${POWER_DOWN:-"powerman --off"}
+POWER_UP=${POWER_UP:-"powerman --on"}
+
+PDSH="pdsh -S -w "
MOUNT2=${MOUNT2:-"/mnt/lustre2"}
DIR=${DIR:-$MOUNT}
DIR2=${DIR2:-$MOUNT1}
-
+PTLDEBUG=${PTLDEBUG:-0}
PDSH=${PDSH:-no_dsh}
MDSDEV=${MDSDEV:-$ROOT/tmp/mds-`hostname`}
--- /dev/null
+
+mds_HOST=${mds_HOST:-mdev4}
+mdsfailover_HOST=${mdsfailover_HOST:-mdev5}
+ost_HOST=${ost_HOST:-mdev2}
+ost2_HOST=${ost2_HOST:-mdev3}
+client_HOST=${client_HOST:-client}
+NETTYPE=${NETTYPE:-tcp}
+
+MOUNT=${MOUNT:-"/mnt/lustre"}
+MOUNT1=${MOUNT1:-$MOUNT}
+MOUNT2=${MOUNT2:-"/mnt/lustre2"}
+DIR=${DIR:-$MOUNT}
+DIR2=${DIR2:-$MOUNT1}
+PTLDEBUG=${PTLDEBUG:-0}
+PDSH=${PDSH:-pdsh -S -w}
+
+MDSDEV=${MDSDEV:-/dev/sda1}
+MDSSIZE=${MDSSIZE:-50000}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
+OSTSIZE=${OSTSIZE:-20000}
+FSTYPE=${FSTYPE:-ext3}
+TIMEOUT=${TIMEOUT:-10}
+UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
+
+STRIPE_BYTES=${STRIPE_BYTES:-65536}
+STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0}
+
+FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD
+POWER_DOWN=${POWER_DOWN:-"powerman --off"}
+POWER_UP=${POWER_UP:-"powerman --on"}
echo $ret
}
+shutdown_client() {
+ client=$1
+ if [ "$FAILURE_MODE" = HARD ]; then
+ $POWER_DOWN $client
+ elif [ "$FAILURE_MODE" = SOFT ]; then
+ stop $facet --force --nomod
+ fi
+}
+
fail_clients() {
num=$1
if [ -z "$num" -o $num -gt $((FAIL_NUM - DOWN_NUM)) ]; then
reintegrate_clients() {
for client in $DOWN_CLIENTS; do
- wait_for $client
+ wait_for_host $client
$PDSH $client "$LCONF --node client --select mds_svc=`facet_active mds` $CLIENTOPTS $XMLCONFIG"
done
DOWN_CLIENTS=""
wait_for mds
start mds $MDSLCONFARGS ${REFORMAT}
while ! $PDSH $HOST "ls -ld $LUSTRE"; do sleep 5; done
- do_node $CLIENTS lconf --node client_facet --ptldebug $PTLDEBUG \
+ do_node $CLIENTS lconf --node client_facet \
--select mds_service=$ACTIVEMDS $XMLCONFIG
}
stop ost2 ${FORCE} --dump cleanup.log
}
-wait_for() {
- facet=$1
- HOST=`facet_active_host $facet`
- check_network $HOST 900
- while ! $PDSH $HOST "ls -ld $LUSTRE"; do sleep 5; done
-}
-
client_df() {
$PDSH $CLIENTS "df $MOUNT" | dshbak -c
}
$PDSH $CLIENTLIST "mkdir $MOUNT/\`hostname\`; ls $MOUNT/\`hostname\` > /dev/null"
}
-facet_failover() {
- facet=$1
- echo "Failing $facet node `facet_active_host $facet`"
- shutdown_facet $facet
- sleep 2
- reboot_facet $facet
- client_df &
- DFPID=$!
- change_active $facet
- TO=`facet_active_host $facet`
- echo "Failover MDS to $TO"
- wait_for $facet
- start $facet
-}
-
-
clients_recover_osts() {
facet=$1
$PDSH $CLIENTS "$LCTL "'--device %OSC_`hostname`_OST_'"${facet}_svc_MNT_client recover"
#Reintegration
echo "Reintegrating OST"
- reboot_node ost1
+ reboot_facet ost1
wait_for ost1
start ost1
#OST Portion
echo "Failing OST"
shutdown_facet ost1
- reboot_node ost1
+ reboot_facet ost1
#Check FS
echo "Test Lustre stability after OST failure"
#OST Portion
echo "Failing OST"
shutdown_node ost2
- reboot_node ost2
+ reboot_facet ost2
#Check FS
echo "Test Lustre stability after OST failure"
#OST Portion
echo "Failing OST"
shutdown_node ost1
- reboot_node ost1
+ reboot_facet ost1
#Check FS
echo "Test Lustre stability after OST failure"
#OST Portion
echo "Failing OST"
shutdown_node ost1
- reboot_node ost1
+ reboot_facet ost1
#Check FS
echo "Test Lustre stability after OST failure"
}
if [ "$ONLY" == "cleanup" ]; then
- sysctl -w portals.debug=0
+ sysctl -w portals.debug=0 || true
cleanup
exit
fi