+
+#b=2814
+# make sure that a read to one osc doesn't try to double-unlock its page just
+# because another osc is invalid. trigger_group_io used to mistakenly return
+# an error if any oscs were invalid even after having successfully put rpcs
+# on valid oscs. This was fatal if the caller was ll_readpage who unlocked
+# the page, guarnateeing that the unlock from the RPC completion would
+# assert on trying to unlock the unlocked page.
+test_41() {
+ local f=$MOUNT/$tfile
+ # make sure the start of the file is ost1
+ lfs setstripe $f $((128 * 1024)) 0 0
+ do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3
+ cancel_lru_locks OSC
+ # fail ost2 and read from ost1
+ local osc2_dev=`$LCTL device_list | \
+ awk '(/ost2.*client_facet/){print $4}' `
+ $LCTL --device %$osc2_dev deactivate
+ do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3
+ $LCTL --device %$osc2_dev activate
+ return 0
+}
+run_test 41 "read from a valid osc while other oscs are invalid"
+
+# test MDS recovery after ost failure
+test_42() {
+ blocks=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+ createmany -o $DIR/$tfile-%d 800
+ replay_barrier ost
+ unlinkmany $DIR/$tfile-%d 0 400
+ facet_failover ost
+
+ # osc is evicted, fs is smaller
+ blocks_after=`df $MOUNT | tail -1 | awk '{ print $1 }'`
+ [ $blocks_after -lt $blocks ] || return 1
+ echo wait for MDS to timeout and recover
+ sleep $((TIMEOUT * 2))
+ unlinkmany $DIR/$tfile-%d 400 400
+ $CHECKSTAT -t file $DIR/$tfile-* && return 2 || true
+}
+run_test 42 "recovery after ost failure"
+
+# b=2530
+# directory orphans can't be unlinked from PENDING directory
+test_43() {
+ replay_barrier mds
+
+ # OBD_FAIL_OST_CREATE_NET 0x204
+ do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
+ facet_failover mds
+ df $MOUNT || return 1
+ sleep 10
+ do_facet ost "sysctl -w lustre.fail_loc=0"
+
+ return 0
+}
+run_test 43 "mds osc import failure during recovery; don't LBUG"
+
+test_44() {
+ mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
+ $LCTL --device $mdcdev recover
+ df $MOUNT
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+ return 0
+}
+run_test 44 "race in target handle connect"
+
+# Handle failed close
+test_45() {
+ mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ $LCTL --device $mdcdev recover
+
+ multiop $DIR/$tfile O_c &
+ pid=$!
+ sleep 1
+
+ # This will cause the CLOSE to fail before even
+ # allocating a reply buffer
+ $LCTL --device $mdcdev deactivate
+
+ # try the close
+ kill -USR1 $pid
+ wait $pid || return 1
+
+ $LCTL --device $mdcdev activate
+
+ $CHECKSTAT -t file $DIR/$tfile || return 2
+ return 0
+}
+run_test 45 "Handle failed close"
+