build_test_filter
cleanup() {
- # make sure we are using the primary MDS, so the config log will
- # be able to clean up properly.
- activemds=`facet_active mds`
- if [ $activemds != "mds" ]; then
- fail mds $MDS_MOUNT_OPTS
- fi
- zconf_umount `hostname` $MOUNT
- stop mds ${FORCE}
- stop ost2 ${FORCE}
- stop ost ${FORCE}
+ grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
+ stop ost -f
+ stop ost2 -f
+ stop mds -f
#no dump option in mountconf...
#stop ost ${FORCE} --dump $TMP/replay-single-`hostname`.log
}
+SETUP=${SETUP:-"setup"}
+CLEANUP=${CLEANUP:-"cleanup"}
+
if [ "$ONLY" == "cleanup" ]; then
sysctl -w lnet.debug=0 || true
- # failover is the default, '-f' is force
- FORCE="-f"
+ $CLEANUP
exit
fi
-SETUP=${SETUP:-"setup"}
-CLEANUP=${CLEANUP:-"cleanup"}
-
setup() {
- grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT
- stop ost -f || 1
- stop ost2 -f || 1
- stop mds -f || 1
+ cleanup
add mds $MDS_MKFS_OPTS --reformat $MDSDEV
add ost $OST_MKFS_OPTS --reformat $OSTDEV
add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2
test_0() {
replay_barrier mds
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
}
run_test 0 "empty replay"
test_0b() {
# this test attempts to trigger a race in the precreation code,
# and must run before any other objects are created on the filesystem
- fail ost $OSTDEV $OST_MOUNT_OPTS
+ fail ost
createmany -o $DIR/$tfile 20 || return 1
unlinkmany $DIR/$tfile 20 || return 2
}
test_1() {
replay_barrier mds
mcreate $DIR/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t file $DIR/$tfile || return 1
rm $DIR/$tfile
}
test_2a() {
replay_barrier mds
touch $DIR/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t file $DIR/$tfile || return 1
rm $DIR/$tfile
}
./mcreate $DIR/$tfile
replay_barrier mds
touch $DIR/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t file $DIR/$tfile || return 1
rm $DIR/$tfile
}
replay_barrier mds
mcreate $DIR/$tfile
o_directory $DIR/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t file $DIR/$tfile || return 2
rm $DIR/$tfile
}
do_facet mds "sysctl -w lustre.fail_loc=0x80000114"
touch $DIR/$tfile
do_facet mds "sysctl -w lustre.fail_loc=0"
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t file $DIR/$tfile && return 2
return 0
}
do_facet mds "sysctl -w lustre.fail_loc=0x80000128"
touch $DIR/$tfile
do_facet mds "sysctl -w lustre.fail_loc=0"
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t file $DIR/$tfile && return 2
return 0
for i in `seq 10`; do
echo "tag-$i" > $DIR/$tfile-$i
done
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
for i in `seq 10`; do
grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
done
test_4b() {
replay_barrier mds
rm -rf $DIR/$tfile-*
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
}
run_test 4b "|x| rm 10 files"
for i in `seq 220`; do
echo "tag-$i" > $DIR/$tfile-$i
done
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
for i in `seq 220`; do
grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i"
done
replay_barrier mds
mkdir $DIR/$tdir
mcreate $DIR/$tdir/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t dir $DIR/$tdir || return 1
$CHECKSTAT -t file $DIR/$tdir/$tfile || return 2
sleep 2
test_6b() {
replay_barrier mds
rm -rf $DIR/$tdir
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t dir $DIR/$tdir && return 1 || true
}
run_test 6b "|X| rmdir"
mkdir $DIR/$tdir
replay_barrier mds
mcreate $DIR/$tdir/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT -t dir $DIR/$tdir || return 1
$CHECKSTAT -t file $DIR/$tdir/$tfile || return 2
rm -fr $DIR/$tdir
multiop $DIR/$tfile mo_c &
MULTIPID=$!
sleep 1
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
ls $DIR/$tfile
$CHECKSTAT -t file $DIR/$tfile || return 1
kill -USR1 $MULTIPID || return 2
replay_barrier mds
mcreate $DIR/$tfile
local old_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
local new_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
echo " old_inum == $old_inum, new_inum == $new_inum"
replay_barrier mds
mv $DIR/$tfile $DIR/$tfile-2
rm -f $DIR/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
$CHECKSTAT $DIR/$tfile && return 1
$CHECKSTAT $DIR/$tfile-2 ||return 2
rm $DIR/$tfile-2
echo "new" > $DIR/$tfile
grep new $DIR/$tfile
grep old $DIR/$tfile-2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
grep new $DIR/$tfile || return 1
grep old $DIR/$tfile-2 || return 2
}
kill -USR1 $pid
wait $pid || return 1
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
[ -e $DIR/$tfile ] && return 2
return 0
}
chmod 0 $DIR/$tfile
$CHECKSTAT -p 0 $DIR/$tfile
replay_barrier mds
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid
wait $pid || return 1
kill -USR1 $pid || return 1
wait $pid || return 2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
[ -e $DIR/$tfile ] && return 3
return 0
}
kill -USR1 $pid
wait $pid || return 2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
[ -e $DIR/$tfile ] && return 3
touch $DIR/h11 || return 4
return 0
mcreate $DIR/$tfile
munlink $DIR/$tfile
mcreate $DIR/$tfile-2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
[ -e $DIR/$tfile ] && return 1
[ -e $DIR/$tfile-2 ] || return 2
munlink $DIR/$tfile-2 || return 3
pid=$!
# give multiop a chance to open
sleep 1
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid || return 1
wait $pid || return 2
$CHECKSTAT -t file $DIR/$tfile || return 3
kill -USR1 $pid
wait $pid || return 2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
[ -e $DIR/$tfile ] && return 3
[ -e $DIR/$tfile-2 ] || return 4
# this touch frequently fails
echo "old" > $DIR/$tfile
mv $DIR/$tfile $DIR/$tfile-2
grep old $DIR/$tfile-2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
grep old $DIR/$tfile-2 || return 2
}
run_test 19 "|X| mcreate, open, write, rename "
sleep 1
rm -f $DIR/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid
wait $pid || return 1
[ -e $DIR/$tfile ] && return 2
rm -f $DIR/$tfile
touch $DIR/g11 || return 1
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid
wait $pid || return 2
[ -e $DIR/$tfile ] && return 3
replay_barrier mds
rm -f $DIR/$tfile
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid
wait $pid || return 1
[ -e $DIR/$tfile ] && return 2
rm -f $DIR/$tfile
touch $DIR/g11 || return 1
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid
wait $pid || return 2
[ -e $DIR/$tfile ] && return 3
sleep 1
replay_barrier mds
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
rm -f $DIR/$tfile
kill -USR1 $pid
wait $pid || return 1
rm -f $DIR/$tfile
replay_barrier mds
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid
wait $pid || return 1
[ -e $DIR/$tfile ] && return 2
kill -USR1 $pid2
wait $pid2 || return 1
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid1
wait $pid1 || return 2
[ -e $DIR/$tfile-1 ] && return 3
rm -f $DIR/$tfile-1
rm -f $DIR/$tfile-2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid1
wait $pid1 || return 1
kill -USR1 $pid2
kill -USR1 $pid2
wait $pid2 || return 1
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid1
wait $pid1 || return 2
[ -e $DIR/$tfile-1 ] && return 3
rm -f $DIR/$tfile-1
rm -f $DIR/$tfile-2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid1
wait $pid1 || return 1
kill -USR1 $pid2
rm -f $DIR/$tfile-2
replay_barrier mds
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid1
wait $pid1 || return 1
kill -USR1 $pid2
replay_barrier mds
rm -f $DIR/$tfile-2
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
kill -USR1 $pid1
wait $pid1 || return 1
kill -USR1 $pid2
replay_barrier mds
touch $DIR/$tfile
checkstat $DIR/$tfile
- facet_failover mds $MDS_MOUNT_OPTS
+ facet_failover mds
cancel_lru_locks MDC
if dmesg | grep "unknown lock cookie"; then
echo "cancel after replay failed"
createmany -o $DIR/$tfile-%d 800
unlinkmany $DIR/$tfile-%d 0 400
replay_barrier mds
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
unlinkmany $DIR/$tfile-%d 400 400
sleep 2
$CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
createmany -o $DIR/$tfile-%d 800
replay_barrier mds
unlinkmany $DIR/$tfile-%d 0 400
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
unlinkmany $DIR/$tfile-%d 400 400
sleep 2
$CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
writeme -s $MOUNT/${tfile}-2 &
WRITE_PID=$!
sleep 1
- facet_failover mds $MDS_MOUNT_OPTS
+ facet_failover mds
#define OBD_FAIL_MDS_CONNECT_NET 0x117
do_facet mds "sysctl -w lustre.fail_loc=0x80000117"
kill -USR1 $PID
unlinkmany $DIR/$tfile-%d 0 400
DEBUG42=`sysctl -n lnet.debug`
sysctl -w lnet.debug=-1
- facet_failover ost $OST_MOUNT_OPTS
+ facet_failover ost
# osc is evicted, fs is smaller (but only with failout OSTs (bug 7287)
#blocks_after=`df -P $MOUNT | tail -n 1 | awk '{ print $2 }'`
# OBD_FAIL_OST_CREATE_NET 0x204
do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
sleep 10
do_facet ost "sysctl -w lustre.fail_loc=0"
test_46() {
dmesg -c >/dev/null
drop_reply "touch $DIR/$tfile"
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
# ironically, the previous test, 45, will cause a real forced close,
# so just look for one for this test
dmesg | grep -i "force closing client file handle for $tfile" && return 1
createmany -o $DIR/$tfile 20 || return 1
# OBD_FAIL_OST_CREATE_NET 0x204
- fail ost $OSTDEV $OST_MOUNT_OPTS
+ fail ost
do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
df $MOUNT || return 2
replay_barrier mds
createmany -o $DIR/$tfile 20 || return 1
# OBD_FAIL_OST_EROFS 0x216
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
do_facet ost "sysctl -w lustre.fail_loc=0x80000216"
df $MOUNT || return 2
multiop $DIR/$tfile s
replay_barrier mds
do_facet mds "sysctl -w lustre.fail_loc=0x8000030c"
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
do_facet mds "sysctl -w lustre.fail_loc=0x0"
$CHECKSTAT -t file $DIR/$tfile-* && return 3 || true
ln -s foo $DIR/$tfile
replay_barrier mds
#drop_reply "cat $DIR/$tfile"
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
sleep 10
}
run_test 56 "don't replay a symlink open request (3440)"
do_facet mds "sysctl -w lustre.fail_loc=0x8000012c"
touch $DIR/$tfile
replay_barrier mds
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
sleep 1
$CHECKSTAT -t file $DIR/$tfile || return 1
do_facet mds "sysctl -w lustre.fail_loc=0x0"
mkdir $DIR/$tdir
createmany -o $DIR/$tdir/$tfile-%d 2500
replay_barrier mds
- fail mds $MDSDEV $MDS_MOUNT_OPTS
+ fail mds
sleep 2
$CHECKSTAT -t file $DIR/$tdir/$tfile-* || return 1
do_facet mds "sysctl -w lustre.fail_loc=0x0"
shift
device=$1
shift
- active=`facet_active $facet`
- echo "mount active=${active}, facet=${facet}"
+ echo "Starting ${device} as /mnt/${facet} (opts:$@)"
mkdir -p /mnt/${facet}
do_facet ${facet} mount -t lustre $@ ${device} /mnt/${facet}
#do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \
# $@ $XMLCONFIG
RC=${PIPESTATUS[0]}
if [ $RC -ne 0 ]; then
+ echo mount -t lustre $@ ${device} /mnt/${facet}
echo Start of ${device} on ${facet} failed ${RC}
+ else
+ label=`do_facet ${facet} e2label ${device}`
+ eval export ${facet}_svc=${label}
+ eval export ${facet}_dev=${device}
+ eval export ${facet}_opt=\"$@\"
+ echo Started ${label}
fi
- label=`do_facet ${facet} e2label ${device}`
- eval export ${facet}_svc=${label}
- echo Started ${label}
return $RC
}
stop() {
facet=$1
shift
- active=`facet_active $facet`
- echo "umount active=${active}, facet=${facet}"
- do_facet ${facet} umount -d $@ /mnt/${facet}
+ local running=`do_facet ${facet} mount | grep -c /mnt/${facet}" "`
+ if [ $running -ne 0 ]; then
+ echo "Stopping /mnt/${facet} (opts:$@)"
+ do_facet ${facet} umount -d $@ /mnt/${facet}
+ fi
#do_facet $facet $LCONF --select ${facet}_svc=${active}_facet \
# --node ${active}_facet --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM \
# $@ --cleanup $XMLCONFIG
facet_failover() {
facet=$1
- echo "Failing $facet node `facet_active_host $facet`"
+ echo "Failing $facet on node `facet_active_host $facet`"
shutdown_facet $facet
reboot_facet $facet
client_df &
TO=`facet_active_host $facet`
echo "Failover $facet to $TO"
wait_for $facet
- start $*
+ local dev=${facet}_dev
+ local opt=${facet}_opt
+ start $facet ${!dev} ${!opt}
}
obd_name() {
local svc=${facet}_svc
do_facet $facet $LCTL --device %${!svc} readonly
do_facet $facet $LCTL --device %${!svc} notransno
- do_facet $facet $LCTL mark "$facet REPLAY BARRIER"
+ do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
$LCTL mark "local REPLAY BARRIER on ${!svc}"
}
echo Replay barrier on ${!svc}
do_facet $facet $LCTL --device %${!svc} readonly
do_facet $facet $LCTL --device %${!svc} notransno
- do_facet $facet $LCTL mark "$facet REPLAY BARRIER"
+ do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}"
$LCTL mark "local REPLAY BARRIER on ${!svc}"
}
}
fail() {
- local facet=$1
facet_failover $*
df $MOUNT || error "post-failover df: $?"
}
local facet=$1
stop $facet
change_active $facet
- start $*
local svc=${facet}_svc
+ local dev=${facet}_dev
+ local opt=${facet}_opt
+ start $facet ${!dev} ${!opt}
do_facet $facet lctl --device %${!svc} abort_recovery
df $MOUNT || echo "first df failed: $?"
sleep 1
local facet=$1
shift
# failsafe
- umount -d -f /mnt/${facet} || true
+ stop ${facet} -f
rm -f ${facet}active
$MKFS $*
}