build_test_filter
-assert_env mds_HOST ost1_HOST ost2_HOST client_HOST LIVE_CLIENT
+assert_env MDSCOUNT mds1_HOST ost1_HOST ost2_HOST client_HOST LIVE_CLIENT
####
# Initialize all the ostN_HOST
gen_config() {
rm -f $XMLCONFIG
- add_mds mds --dev $MDSDEV --size $MDSSIZE --journal-size $MDSJOURNALSIZE
-
- if [ ! -z "$mdsfailover_HOST" ]; then
- add_mdsfailover mds --dev $MDSDEV --size $MDSSIZE
+ if [ "$MDSCOUNT" -gt 1 ]; then
+ add_lmv lmv1
+ for mds in `mds_list`; do
+ MDSDEV=$TMP/${mds}-`hostname`
+ add_mds $mds --dev $MDSDEV --size $MDSSIZE --lmv lmv1
+ done
+ MDS=lmv1
+ add_lov_to_lmv lov1 lmv1 --stripe_sz $STRIPE_BYTES \
+ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
+ else
+ add_mds mds1 --dev $MDSDEV --size $MDSSIZE
+ if [ ! -z "$mds1failover_HOST" ]; then
+ add_mdsfailover mds1 --dev $MDSDEV --size $MDSSIZE
+ fi
+ add_lov lov1 mds1 --stripe_sz $STRIPE_BYTES \
+ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
+ MDS=mds1_svc
fi
- add_lov lov1 mds --stripe_sz $STRIPE_BYTES\
- --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
for i in `seq $NUMOST`; do
dev=`printf $OSTDEV $i`
add_ost ost$i --lov lov1 --dev $dev --size $OSTSIZE \
--journal-size $OSTJOURNALSIZE
done
-
- add_client client mds --lov lov1 --path $MOUNT
+ add_client client --mds $MDS --lov lov1 --path $MOUNT
}
setup() {
start ost$i ${REFORMAT} $OSTLCONFARGS
done
[ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
- wait_for mds
- start mds $MDSLCONFARGS ${REFORMAT}
+ for mds in `mds_list`; do
+ wait_for $mds
+ start $mds $MDSLCONFARGS ${REFORMAT}
+ done
while ! do_node $CLIENTS "ls -d $LUSTRE" > /dev/null; do sleep 5; done
grep " $MOUNT " /proc/mounts || zconf_mount $CLIENTS $MOUNT
cleanup() {
zconf_umount $CLIENTS $MOUNT
- stop mds ${FORCE} $MDSLCONFARGS || :
+ for mds in `mds_list`; do
+ stop $mds ${FORCE} $MDSLCONFARGS || :
+ done
for i in `seq $NUMOST`; do
stop ost$i ${REFORMAT} ${FORCE} $OSTLCONFARGS || :
done
test_0() {
echo "Failover MDS"
- facet_failover mds
+ facet_failover mds1
echo "Waiting for df pid: $DFPID"
wait $DFPID || return 1
client_df
echo "Failing MDS"
- shutdown_facet mds
- reboot_facet mds
+ shutdown_facet mds1
+ reboot_facet mds1
# prepare for MDS failover
- change_active mds
- reboot_facet mds
+ change_active mds1
+ reboot_facet mds1
client_df &
DFPID=$!
start ost1
echo "Failover MDS"
- wait_for mds
- start mds
+ wait_for mds1
+ start mds1
#Check FS
wait $DFPID
echo "Verify Lustre filesystem is up and running"
#MDS Portion
- facet_failover mds
+ facet_failover mds1
wait $DFPID || echo df failed: $?
#Check FS
#MDS Portion
echo "Failing MDS"
- shutdown_facet mds
- reboot_facet mds
+ shutdown_facet mds1
+ reboot_facet mds1
# prepare for MDS failover
- change_active mds
- reboot_facet mds
+ change_active mds1
+ reboot_facet mds1
client_df &
DFPID=$!
start ost1
echo "Failover MDS"
- wait_for mds
- start mds
+ wait_for mds1
+ start mds1
#Check FS
wait $DFPID
#MDS Portion
echo "Failing MDS"
- facet_failover mds
+ facet_failover mds1
#Check FS
echo "Test Lustre stability after MDS failover"
--stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
else
add_mds mds1 --dev $MDSDEV --size $MDSSIZE
- if [ ! -z "$mdsfailover_HOST" ]; then
+ if [ ! -z "$mds1failover_HOST" ]; then
add_mdsfailover mds1 --dev $MDSDEV --size $MDSSIZE
fi
add_lov lov1 mds1 --stripe_sz $STRIPE_BYTES \
run_test 0b "ensure object created after recover exists. (3284)"
test_1() {
- replay_barrier mds2
+ replay_barrier mds1
mcreate $DIR/$tfile
- fail mds2
+ fail mds1
$CHECKSTAT -t file $DIR/$tfile || return 1
rm $DIR/$tfile
}
run_test 1 "simple create"
test_2a() {
- replay_barrier mds
+ replay_barrier mds1
touch $DIR/$tfile
fail mds1
$CHECKSTAT -t file $DIR/$tfile || return 1
test_2b() {
./mcreate $DIR/$tfile
- replay_barrier mds
+ replay_barrier mds1
touch $DIR/$tfile
fail mds1
$CHECKSTAT -t file $DIR/$tfile || return 1
run_test 2b "touch"
test_3a() {
- replay_barrier mds
+ replay_barrier mds1
mcreate $DIR/$tfile
o_directory $DIR/$tfile
fail mds1
run_test 3a "replay failed open(O_DIRECTORY)"
test_3b() {
- replay_barrier mds
+ replay_barrier mds1
#define OBD_FAIL_MDS_OPEN_PACK | OBD_FAIL_ONCE
do_facet mds "sysctl -w lustre.fail_loc=0x80000114"
touch $DIR/$tfile
run_test 3b "replay failed open -ENOMEM"
test_3c() {
- replay_barrier mds
+ replay_barrier mds1
#define OBD_FAIL_MDS_ALLOC_OBDO | OBD_FAIL_ONCE
do_facet mds "sysctl -w lustre.fail_loc=0x80000128"
touch $DIR/$tfile
run_test 3c "replay failed open -ENOMEM"
test_4() {
- replay_barrier mds
+ replay_barrier mds1
for i in `seq 10`; do
echo "tag-$i" > $DIR/$tfile-$i
done
run_test 4 "|x| 10 open(O_CREAT)s"
test_4b() {
- replay_barrier mds
+ replay_barrier mds1
rm -rf $DIR/$tfile-*
fail mds1
$CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
# The idea is to get past the first block of precreated files on both
# osts, and then replay.
test_5() {
- replay_barrier mds
+ replay_barrier mds1
for i in `seq 220`; do
echo "tag-$i" > $DIR/$tfile-$i
done
test_6() {
- replay_barrier mds
+ replay_barrier mds1
mkdir $DIR/$tdir
mcreate $DIR/$tdir/$tfile
fail mds1
run_test 6 "mkdir + contained create"
test_6b() {
- replay_barrier mds
+ replay_barrier mds1
rm -rf $DIR/$tdir
fail mds1
$CHECKSTAT -t dir $DIR/$tdir && return 1 || true
test_7() {
mkdir $DIR/$tdir
- replay_barrier mds
+ replay_barrier mds1
mcreate $DIR/$tdir/$tfile
fail mds1
$CHECKSTAT -t dir $DIR/$tdir || return 1
run_test 7 "mkdir |X| contained create"
test_8() {
- replay_barrier mds
+ replay_barrier mds1
multiop $DIR/$tfile mo_c &
MULTIPID=$!
sleep 1
run_test 8 "creat open |X| close"
test_9() {
- replay_barrier mds
+ replay_barrier mds1
mcreate $DIR/$tfile
local old_inum=`ls -i $DIR/$tfile | awk '{print $1}'`
fail mds1
test_10() {
mcreate $DIR/$tfile
- replay_barrier mds
+ replay_barrier mds1
mv $DIR/$tfile $DIR/$tfile-2
rm -f $DIR/$tfile
fail mds1
mcreate $DIR/$tfile
echo "old" > $DIR/$tfile
mv $DIR/$tfile $DIR/$tfile-2
- replay_barrier mds
+ replay_barrier mds1
echo "new" > $DIR/$tfile
grep new $DIR/$tfile
grep old $DIR/$tfile-2
# give multiop a chance to open
sleep 1
rm -f $DIR/$tfile
- replay_barrier mds
+ replay_barrier mds1
kill -USR1 $pid
wait $pid || return 1
sleep 1
chmod 0 $DIR/$tfile
$CHECKSTAT -p 0 $DIR/$tfile
- replay_barrier mds
+ replay_barrier mds1
fail mds1
kill -USR1 $pid
wait $pid || return 1
# give multiop a chance to open
sleep 1
rm -f $DIR/$tfile
- replay_barrier mds
+ replay_barrier mds1
kill -USR1 $pid || return 1
wait $pid || return 2
# give multiop a chance to open
sleep 1
rm -f $DIR/$tfile
- replay_barrier mds
+ replay_barrier mds1
touch $DIR/g11 || return 1
kill -USR1 $pid
wait $pid || return 2
test_16() {
- replay_barrier mds
+ replay_barrier mds1
mcreate $DIR/$tfile
munlink $DIR/$tfile
mcreate $DIR/$tfile-2
run_test 16 "|X| open(O_CREAT), unlink, touch new, unlink new"
test_17() {
- replay_barrier mds
+ replay_barrier mds1
multiop $DIR/$tfile O_c &
pid=$!
# give multiop a chance to open
run_test 17 "|X| open(O_CREAT), |replay| close"
test_18() {
- replay_barrier mds
+ replay_barrier mds1
multiop $DIR/$tfile O_tSc &
pid=$!
# give multiop a chance to open
# bug 1855 (a simpler form of test_11 above)
test_19() {
- replay_barrier mds
+ replay_barrier mds1
mcreate $DIR/$tfile
echo "old" > $DIR/$tfile
mv $DIR/$tfile $DIR/$tfile-2
run_test 19 "|X| mcreate, open, write, rename "
test_20() {
- replay_barrier mds
+ replay_barrier mds1
multiop $DIR/$tfile O_tSc &
pid=$!
# give multiop a chance to open
run_test 20 "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
test_21() {
- replay_barrier mds
+ replay_barrier mds1
multiop $DIR/$tfile O_tSc &
pid=$!
# give multiop a chance to open
# give multiop a chance to open
sleep 1
- replay_barrier mds
+ replay_barrier mds1
rm -f $DIR/$tfile
fail mds1
# give multiop a chance to open
sleep 1
- replay_barrier mds
+ replay_barrier mds1
rm -f $DIR/$tfile
touch $DIR/g11 || return 1
# give multiop a chance to open
sleep 1
- replay_barrier mds
+ replay_barrier mds1
fail mds1
rm -f $DIR/$tfile
kill -USR1 $pid
sleep 1
rm -f $DIR/$tfile
- replay_barrier mds
+ replay_barrier mds1
fail mds1
kill -USR1 $pid
wait $pid || return 1
run_test 25 "open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
test_26() {
- replay_barrier mds
+ replay_barrier mds1
multiop $DIR/$tfile-1 O_tSc &
pid1=$!
multiop $DIR/$tfile-2 O_tSc &
run_test 26 "|X| open(O_CREAT), unlink two, close one, replay, close one (test mds_cleanup_orphans)"
test_27() {
- replay_barrier mds
+ replay_barrier mds1
multiop $DIR/$tfile-1 O_tSc &
pid1=$!
multiop $DIR/$tfile-2 O_tSc &
pid2=$!
# give multiop a chance to open
sleep 1
- replay_barrier mds
+ replay_barrier mds1
rm -f $DIR/$tfile-1
rm -f $DIR/$tfile-2
kill -USR1 $pid2
pid2=$!
# give multiop a chance to open
sleep 1
- replay_barrier mds
+ replay_barrier mds1
rm -f $DIR/$tfile-1
rm -f $DIR/$tfile-2
rm -f $DIR/$tfile-1
rm -f $DIR/$tfile-2
- replay_barrier mds
+ replay_barrier mds1
fail mds1
kill -USR1 $pid1
wait $pid1 || return 1
sleep 1
rm -f $DIR/$tfile-1
- replay_barrier mds
+ replay_barrier mds1
rm -f $DIR/$tfile-2
fail mds1
kill -USR1 $pid1
# Abort recovery before client complete
test_33() {
- replay_barrier mds
+ replay_barrier mds1
touch $DIR/$tfile
- fail_abort mds
+ fail_abort mds1
# this file should be gone, because the replay was aborted
$CHECKSTAT -t file $DIR/$tfile && return 1
return 0
sleep 1
rm -f $DIR/$tfile
- replay_barrier mds
- fail_abort mds
+ replay_barrier mds1
+ fail_abort mds1
kill -USR1 $pid
[ -e $DIR/$tfile ] && return 1
sync
sync
sleep 1
# give a chance to remove from MDS
- fail_abort mds
+ fail_abort mds1
$CHECKSTAT -t file $DIR/$tfile && return 1 || true
}
run_test 35 "test recovery from llog for unlink op"
# b=2432 resent cancel after replay uses wrong cookie,
# so don't resend cancels
test_36() {
- replay_barrier mds
+ replay_barrier mds1
touch $DIR/$tfile
checkstat $DIR/$tfile
- facet_failover mds
+ facet_failover mds1
cancel_lru_locks MDC
if dmesg | grep "unknown lock cookie"; then
echo "cancel after replay failed"
sleep 1
rmdir $DIR/$tfile
- replay_barrier mds
+ replay_barrier mds1
# clear the dmesg buffer so we only see errors from this recovery
dmesg -c >/dev/null
- fail_abort mds
+ fail_abort mds1
kill -USR1 $pid
dmesg | grep "mds_unlink_orphan.*error .* unlinking orphan" && return 1
sync
test_38() {
createmany -o $DIR/$tfile-%d 800
unlinkmany $DIR/$tfile-%d 0 400
- replay_barrier mds
+ replay_barrier mds1
fail mds1
unlinkmany $DIR/$tfile-%d 400 400
sleep 2
test_39() {
createmany -o $DIR/$tfile-%d 800
- replay_barrier mds
+ replay_barrier mds1
unlinkmany $DIR/$tfile-%d 0 400
fail mds1
unlinkmany $DIR/$tfile-%d 400 400
writeme -s $MOUNT/${tfile}-2 &
WRITE_PID=$!
sleep 1
- facet_failover mds
+ facet_failover mds1
#define OBD_FAIL_MDS_CONNECT_NET 0x117
do_facet mds "sysctl -w lustre.fail_loc=0x80000117"
kill -USR1 $PID
# b=2530
# timeout in MDS/OST recovery RPC will LBUG MDS
test_43() {
- replay_barrier mds
+ replay_barrier mds1
# OBD_FAIL_OST_CREATE_NET 0x204
do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
- facet_failover mds
+ facet_failover mds1
df $MOUNT || return 1
sleep 10
do_facet ost "sysctl -w lustre.fail_loc=0"