[ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
start mds $MDSLCONFARGS --reformat
zconf_mount `hostname` $MOUNT
+ echo 0x3f0410 > /proc/sys/portals/debug
}
$SETUP
}
run_test 2b "touch"
-test_3() {
+test_3a() {
replay_barrier mds
mcreate $DIR/$tfile
o_directory $DIR/$tfile
$CHECKSTAT -t file $DIR/$tfile || return 2
rm $DIR/$tfile
}
-run_test 3 "replay failed open"
+run_test 3a "replay failed open(O_DIRECTORY)"
+
+test_3b() {
+ replay_barrier mds
+#define OBD_FAIL_MDS_OPEN_PACK | OBD_FAIL_ONCE
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000114"
+ touch $DIR/$tfile
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+ fail mds
+ $CHECKSTAT -t file $DIR/$tfile && return 2
+ return 0
+}
+run_test 3b "replay failed open -ENOMEM"
+
+test_3c() {
+ replay_barrier mds
+#define OBD_FAIL_MDS_ALLOC_OBDO | OBD_FAIL_ONCE
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000128"
+ touch $DIR/$tfile
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+ fail mds
+
+ $CHECKSTAT -t file $DIR/$tfile && return 2
+ return 0
+}
+run_test 3c "replay failed open -ENOMEM"
test_4() {
replay_barrier mds
done
fail mds
for i in `seq 10`; do
- grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i"
+ grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
done
}
run_test 4 "|x| 10 open(O_CREAT)s"
sleep 1
rm -f $DIR/$tfile
touch $DIR/$tfile-2 || return 1
+ echo "pid: $pid will close"
kill -USR1 $pid
wait $pid || return 2
}
run_test 40 "cause recovery in ptlrpc, ensure IO continues"
+
+#b=2814
+# make sure that a read to one osc doesn't try to double-unlock its page just
+# because another osc is invalid. trigger_group_io used to mistakenly return
+# an error if any oscs were invalid even after having successfully put rpcs
+# on valid oscs. This was fatal if the caller was ll_readpage who unlocked
+# the page, guarnateeing that the unlock from the RPC completion would
+# assert on trying to unlock the unlocked page.
+test_41() {
+ local f=$MOUNT/$tfile
+ # make sure the start of the file is ost1
+ lfs setstripe $f $((128 * 1024)) 0 0
+ do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3
+ cancel_lru_locks OSC
+ # fail ost2 and read from ost1
+ local osc2_dev=`$LCTL device_list | \
+ awk '(/ost2.*client_facet/){print $4}' `
+ $LCTL --device %$osc2_dev deactivate
+ do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3
+ $LCTL --device %$osc2_dev activate
+ return 0
+}
+run_test 41 "read from a valid osc while other oscs are invalid"
+
+# test MDS recovery after ost failure
+test_42() {
+ blocks=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+ createmany -o $DIR/$tfile-%d 800
+ replay_barrier ost
+ unlinkmany $DIR/$tfile-%d 0 400
+ facet_failover ost
+
+ # osc is evicted, fs is smaller
+ blocks_after=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+ [ $blocks_after -lt $blocks ] || return 1
+ echo wait for MDS to timeout and recover
+ sleep $((TIMEOUT * 2))
+ unlinkmany $DIR/$tfile-%d 400 400
+ $CHECKSTAT -t file $DIR/$tfile-* && return 2 || true
+}
+run_test 42 "recovery after ost failure"
+
+# b=2530
+# directory orphans can't be unlinked from PENDING directory
+test_43() {
+ replay_barrier mds
+
+ # OBD_FAIL_OST_CREATE_NET 0x204
+ do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
+ facet_failover mds
+ df $MOUNT || return 1
+ sleep 10
+ do_facet ost "sysctl -w lustre.fail_loc=0"
+
+ return 0
+}
+run_test 43 "mds osc import failure during recovery; don't LBUG"
+
+test_44() {
+ mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
+ $LCTL --device $mdcdev recover
+ df $MOUNT
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+ return 0
+}
+run_test 44 "race in target handle connect"
+
+# Handle failed close
+test_45() {
+ mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ $LCTL --device $mdcdev recover
+
+ multiop $DIR/$tfile O_c &
+ pid=$!
+ sleep 1
+
+ # This will cause the CLOSE to fail before even
+ # allocating a reply buffer
+ $LCTL --device $mdcdev deactivate
+
+ # try the close
+ kill -USR1 $pid
+ wait $pid || return 1
+
+ $LCTL --device $mdcdev activate
+
+ $CHECKSTAT -t file $DIR/$tfile || return 2
+ return 0
+}
+run_test 45 "Handle failed close"
+
equals_msg test complete, cleaning up
$CLEANUP