add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
add_client client --mds mds1_svc --lov lov1 --path $MOUNT
fi
+
+ add_lov lov1 mds --stripe_sz $STRIPE_BYTES \
+ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
+ add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE
+ add_ost ost2 --lov lov1 --dev ${OSTDEV}-2 --size $OSTSIZE
+ add_client client mds --lov lov1 --path $MOUNT
}
build_test_filter
start ost --reformat $OSTLCONFARGS
start ost2 --reformat $OSTLCONFARGS
[ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
- if [ "$MDSCOUNT" -gt 1 ]; then
- for num in `seq $MDSCOUNT`; do
- start mds$num $MDSLCONFARGS --reformat
- done
- else
- start mds $MDSLCONFARGS --reformat
- fi
- zconf_mount `hostname` $MOUNT
- echo 0x3f0410 > /proc/sys/portals/debug
+ start mds $MDSLCONFARGS --reformat
+ grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
}
$SETUP
}
run_test 0 "empty replay"
+test_0b() {
+ # this test attempts to trigger a race in the precreation code,
+ # and must run before any other objects are created on the filesystem
+ fail ost
+ createmany -o $DIR/$tfile 20 || return 1
+ unlinkmany $DIR/$tfile 20 || return 2
+}
+run_test 0b "ensure object created after recover exists. (3284)"
+
test_1() {
replay_barrier mds2
mcreate $DIR/$tfile
sleep 1
rm -f $DIR/$tfile
touch $DIR/$tfile-2 || return 1
+ echo "pid: $pid will close"
kill -USR1 $pid
wait $pid || return 2
# test MDS recovery after ost failure
test_42() {
- blocks=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+ blocks=`df $MOUNT | tail -n 1 | awk '{ print $1 }'`
createmany -o $DIR/$tfile-%d 800
replay_barrier ost
unlinkmany $DIR/$tfile-%d 0 400
facet_failover ost
# osc is evicted, fs is smaller
- blocks_after=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+ blocks_after=`df $MOUNT | tail -n 1 | awk '{ print $1 }'`
[ $blocks_after -lt $blocks ] || return 1
echo wait for MDS to timeout and recover
sleep $((TIMEOUT * 2))
unlinkmany $DIR/$tfile-%d 400 400
- $CHECKSTAT -t file $DIR/$tfile-* && return 1 || true
+ $CHECKSTAT -t file $DIR/$tfile-* && return 2 || true
}
run_test 42 "recovery after ost failure"
# b=2530
-# directory orphans can't be unlinked from PENDING directory
+# timeout in MDS/OST recovery RPC will LBUG MDS
test_43() {
replay_barrier mds
}
run_test 44 "race in target handle connect"
+# Handle failed close
+test_45() {
+ mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ $LCTL --device $mdcdev recover
+
+ multiop $DIR/$tfile O_c &
+ pid=$!
+ sleep 1
+
+ # This will cause the CLOSE to fail before even
+ # allocating a reply buffer
+ $LCTL --device $mdcdev deactivate
+
+ # try the close
+ kill -USR1 $pid
+ wait $pid || return 1
+
+ $LCTL --device $mdcdev activate
+
+ $CHECKSTAT -t file $DIR/$tfile || return 2
+ return 0
+}
+run_test 45 "Handle failed close"
+
+test_46() {
+ dmesg -c >/dev/null
+ drop_reply "touch $DIR/$tfile"
+ fail mds
+ # ironically, the previous test, 45, will cause a real forced close,
+ # so just look for one for this test
+ dmesg | grep -i "force closing client file handle for $tfile" && return 1
+ return 0
+}
+run_test 46 "Don't leak file handle after open resend (3325)"
+
+# b=2824
+test_47() {
+
+ # create some files to make sure precreate has been done on all
+ # OSTs. (just in case this test is run independently)
+ createmany -o $DIR/$tfile 20 || return 1
+
+ # OBD_FAIL_OST_CREATE_NET 0x204
+ fail ost
+ do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
+ df $MOUNT || return 2
+
+ # let the MDS discover the OST failure, attempt to recover, fail
+ # and recover again.
+ sleep $((3 * TIMEOUT))
+
+ # Without 2824, this createmany would hang
+ createmany -o $DIR/$tfile 20 || return 3
+ unlinkmany $DIR/$tfile 20 || return 4
+
+ do_facet ost "sysctl -w lustre.fail_loc=0"
+ return 0
+}
+run_test 47 "MDS->OSC failure during precreate cleanup (2824)"
+
equals_msg test complete, cleaning up
$CLEANUP