+ fail $SINGLEMDS
+ wait $pid || return 1
+ return 0
+}
+run_test 23 "client hang when close a file after mds crash"
+
+test_24() { # bug 2248 - eviction fails writeback but app doesn't see it
+ mkdir -p $DIR/$tdir
+ cancel_lru_locks osc
+ multiop_bg_pause $DIR/$tdir/$tfile Owy_wyc || return 1
+ MULTI_PID=$!
+ ost_evict_client
+ kill -USR1 $MULTI_PID
+ wait $MULTI_PID
+ rc=$?
+ lctl set_param fail_loc=0x0
+ client_reconnect
+ [ $rc -eq 0 ] && error_ignore 5494 "multiop didn't fail fsync: rc $rc" || true
+}
+run_test 24 "fsync error (should return error)"
+
+test_26a() { # was test_26 bug 5921 - evict dead exports by pinger
+# this test can only run from a client on a separate node.
+ remote_ost || { skip "local OST" && return 0; }
+ remote_mds || { skip "local MDS" && return 0; }
+ OST_FILE=obdfilter.${ost1_svc}.num_exports
+ OST_EXP="`do_facet ost1 lctl get_param -n $OST_FILE`"
+ OST_NEXP1=`echo $OST_EXP | cut -d' ' -f2`
+ echo starting with $OST_NEXP1 OST exports
+# OBD_FAIL_PTLRPC_DROP_RPC 0x505
+ do_facet client lctl set_param fail_loc=0x505
+ # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
+ # But if there's a race to start the evictor from various obds,
+ # the loser might have to wait for the next ping.
+ echo Waiting for $(($TIMEOUT * 8)) secs
+ sleep $(($TIMEOUT * 8))
+ OST_EXP="`do_facet ost1 lctl get_param -n $OST_FILE`"
+ OST_NEXP2=`echo $OST_EXP | cut -d' ' -f2`
+ echo ending with $OST_NEXP2 OST exports
+ do_facet client lctl set_param fail_loc=0x0
+ [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted"
+ return 0
+}
+run_test 26a "evict dead exports"
+
+test_26b() { # bug 10140 - evict dead exports by pinger
+ client_df
+ zconf_mount `hostname` $MOUNT2 || error "Failed to mount $MOUNT2"
+ sleep 1 # wait connections being established
+ MDS_FILE=mdt.${mds1_svc}.num_exports
+ MDS_NEXP1="`do_facet $SINGLEMDS lctl get_param -n $MDS_FILE | cut -d' ' -f2`"
+ OST_FILE=obdfilter.${ost1_svc}.num_exports
+ OST_NEXP1="`do_facet ost1 lctl get_param -n $OST_FILE | cut -d' ' -f2`"
+ echo starting with $OST_NEXP1 OST and $MDS_NEXP1 MDS exports
+ zconf_umount `hostname` $MOUNT2 -f
+ # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
+ # But if there's a race to start the evictor from various obds,
+ # the loser might have to wait for the next ping.
+ echo Waiting for $(($TIMEOUT * 3)) secs
+ sleep $(($TIMEOUT * 3))
+ OST_NEXP2="`do_facet ost1 lctl get_param -n $OST_FILE | cut -d' ' -f2`"
+ MDS_NEXP2="`do_facet $SINGLEMDS lctl get_param -n $MDS_FILE | cut -d' ' -f2`"
+ echo ending with $OST_NEXP2 OST and $MDS_NEXP2 MDS exports
+ [ $OST_NEXP1 -le $OST_NEXP2 ] && error "client not evicted from OST"
+ [ $MDS_NEXP1 -le $MDS_NEXP2 ] && error "client not evicted from MDS"
+ return 0
+}
+run_test 26b "evict dead exports"
+
+test_27() {
+ remote_mds && { skip "remote MDS" && return 0; }
+ mkdir -p $DIR/$tdir
+ writemany -q -a $DIR/$tdir/$tfile 0 5 &
+ CLIENT_PID=$!
+ sleep 1
+ FAILURE_MODE="SOFT"
+ facet_failover $SINGLEMDS
+#define OBD_FAIL_OSC_SHUTDOWN 0x407
+ lctl set_param fail_loc=0x80000407
+ # need to wait for reconnect
+ echo -n waiting for fail_loc
+ while [ `lctl get_param -n fail_loc` -eq -2147482617 ]; do
+ sleep 1
+ echo -n .
+ done
+ facet_failover $SINGLEMDS
+ #no crashes allowed!
+ kill -USR1 $CLIENT_PID
+ wait $CLIENT_PID
+ true
+}
+run_test 27 "fail LOV while using OSC's"
+
+test_28() { # bug 6086 - error adding new clients
+ do_facet client mcreate $MOUNT/$tfile || return 1
+ drop_bl_callback "chmod 0777 $MOUNT/$tfile" ||echo "evicted as expected"
+ #define OBD_FAIL_MDS_CLIENT_ADD 0x12f
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012f"
+ # fail once (evicted), reconnect fail (fail_loc), ok
+ df || (sleep 10; df) || (sleep 10; df) || error "reconnect failed"
+ rm -f $MOUNT/$tfile
+ fail $SINGLEMDS # verify MDS last_rcvd can be loaded
+}
+run_test 28 "handle error adding new clients (bug 6086)"
+
+test_50() {
+ mkdir -p $DIR/$tdir
+ # put a load of file creates/writes/deletes
+ writemany -q $DIR/$tdir/$tfile 0 5 &
+ CLIENT_PID=$!
+ echo writemany pid $CLIENT_PID
+ sleep 10
+ FAILURE_MODE="SOFT"
+ fail $SINGLEMDS
+ # wait for client to reconnect to MDS
+ sleep 60
+ fail $SINGLEMDS
+ sleep 60
+ fail $SINGLEMDS
+ # client process should see no problems even though MDS went down
+ sleep $TIMEOUT
+ kill -USR1 $CLIENT_PID
+ wait $CLIENT_PID
+ rc=$?
+ echo writemany returned $rc
+ #these may fail because of eviction due to slow AST response.
+ [ $rc -eq 0 ] || error_ignore 13652 "writemany returned rc $rc" || true
+}
+run_test 50 "failover MDS under load"
+
+test_51() {
+ mkdir -p $DIR/$tdir
+ # put a load of file creates/writes/deletes
+ writemany -q $DIR/$tdir/$tfile 0 5 &
+ CLIENT_PID=$!
+ sleep 1
+ FAILURE_MODE="SOFT"
+ facet_failover $SINGLEMDS
+ # failover at various points during recovery
+ SEQ="1 5 10 $(seq $TIMEOUT 5 $(($TIMEOUT+10)))"
+ echo will failover at $SEQ
+ for i in $SEQ
+ do
+ echo failover in $i sec
+ sleep $i
+ facet_failover $SINGLEMDS
+ done
+ # client process should see no problems even though MDS went down
+ # and recovery was interrupted
+ sleep $TIMEOUT
+ kill -USR1 $CLIENT_PID
+ wait $CLIENT_PID
+ rc=$?
+ echo writemany returned $rc
+ [ $rc -eq 0 ] || error_ignore 13652 "writemany returned rc $rc" || true
+}
+run_test 51 "failover MDS during recovery"
+
+test_52_guts() {
+ do_facet client "mkdir -p $DIR/$tdir"
+ do_facet client "writemany -q -a $DIR/$tdir/$tfile 300 5" &
+ CLIENT_PID=$!
+ echo writemany pid $CLIENT_PID
+ sleep 10
+ FAILURE_MODE="SOFT"
+ fail ost1
+ rc=0
+ wait $CLIENT_PID || rc=$?
+ # active client process should see an EIO for down OST
+ [ $rc -eq 5 ] && { echo "writemany correctly failed $rc" && return 0; }
+ # but timing or failover setup may allow success
+ [ $rc -eq 0 ] && { echo "writemany succeeded" && return 0; }
+ echo "writemany returned $rc"
+ return $rc
+}