done
fail $SINGLEMDS
for i in `seq 220`; do
- grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i"
+ grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i"
done
rm -rf $DIR/$tfile-*
sleep 3
run_test 20a "|X| open(O_CREAT), unlink, replay, close (test mds_cleanup_orphans)"
test_20b() { # bug 10480
+ # XXX increase the debug level temporary
+ do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug=0x33f0406; $LCTL set_param debug_mb=150"
BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 &
log "before $BEFOREUSED, after $AFTERUSED"
[ $AFTERUSED -gt $((BEFOREUSED + 20)) ] && \
error "after $AFTERUSED > before $BEFOREUSED"
+ # XXX decrease it back
+ do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug=$PTLDEBUG; $LCTL set_param debug_mb=$DEBUG_SIZE"
return 0
}
run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)"
test_61a() { # was test_61
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
- mkdir $DIR/$tdir
+ mkdir -p $DIR/$tdir
createmany -o $DIR/$tdir/$tfile-%d 800
- replay_barrier ost1
-# OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
- unlinkmany $DIR/$tdir/$tfile-%d 800
+ replay_barrier ost1
+# OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221
+ unlinkmany $DIR/$tdir/$tfile-%d 800
set_nodes_failloc "$(osts_nodes)" 0x80000221
facet_failover ost1
- sleep 10
+ sleep 10
fail ost1
sleep 30
set_nodes_failloc "$(osts_nodes)" 0x0
-
+
$CHECKSTAT -t file $DIR/$tdir/$tfile-* && return 1
rmdir $DIR/$tdir
}
test_61b() {
# OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a
do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000013a"
- facet_failover $SINGLEMDS
+ facet_failover $SINGLEMDS
sleep 10
fail $SINGLEMDS
do_facet client dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 || return 1
test_61c() {
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
-# OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
- touch $DIR/$tfile
+# OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222
+ touch $DIR/$tfile
set_nodes_failloc "$(osts_nodes)" 0x80000222
- rm $DIR/$tfile
+ rm $DIR/$tfile
sleep 10
fail ost1
set_nodes_failloc "$(osts_nodes)" 0x0
run_test 61c "test race mds llog sync vs llog cleanup"
test_61d() { # bug 16002 # bug 17466
-#define OBD_FAIL_OBD_LLOG_SETUP 0x605
shutdown_facet $SINGLEMDS
+#define OBD_FAIL_OBD_LLOG_SETUP 0x605
do_facet $SINGLEMDS "lctl set_param fail_loc=0x605"
start $SINGLEMDS `mdsdevname 1` $MDS_MOUNT_OPTS && error "mds start should have failed"
do_facet $SINGLEMDS "lctl set_param fail_loc=0"
mkdir -p $DIR/$tdir
replay_barrier $SINGLEMDS
createmany -o $DIR/$tdir/$tfile- 25
-#define OBD_FAIL_TGT_REPLAY_DROP 0x706
+#define OBD_FAIL_TGT_REPLAY_DROP 0x707
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000707"
facet_failover $SINGLEMDS
df $MOUNT || return 1
echo $TIMEOUT >> $ldlm_enqueue_min
do_facet ost1 "echo $TIMEOUT >> $ldlm_enqueue_min_r"
- rm -f $DIR/${tfile}_[1-2]
- lfs setstripe $DIR/$tfile --index=0 --count=1
+ rm -rf $DIR/$tdir
+ mkdir -p $DIR/$tdir
+ lfs setstripe $DIR/$tdir --index=0 --count=1
#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312
sysctl -w lustre.fail_val=$(($TIMEOUT - 1))
sysctl -w lustre.fail_loc=0x80000312
- cp /etc/profile $DIR/${tfile}_1 || error "1st cp failed $?"
- sysctl -w lustre.fail_val=$((TIMEOUT * 3 / 2))
+ cp /etc/profile $DIR/$tdir/${tfile}_1 || error "1st cp failed $?"
+ sysctl -w lustre.fail_val=$((TIMEOUT * 5 / 4))
sysctl -w lustre.fail_loc=0x80000312
- cp /etc/profile $DIR/${tfile}_2 || error "2nd cp failed $?"
+ cp /etc/profile $DIR/$tdir/${tfile}_2 || error "2nd cp failed $?"
sysctl -w lustre.fail_loc=0
echo $ENQ_MIN >> $ldlm_enqueue_min
do_facet ost1 "echo $ENQ_MIN_R >> $ldlm_enqueue_min_r"
+ rm -rf $DIR/$tdir
return 0
}
run_test 68 "AT: verify slowing locks"
error "dd failed on $CLIENT"
done
- local prev_client=$(echo $clients | sed 's/^.* \(.\+\)$/\1/')
+ local prev_client=$(echo $clients | sed 's/^.* \(.\+\)$/\1/')
for C in ${CLIENTS//,/ }; do
do_node $prev_client dd if=$DIR/${tfile}_${C} of=/dev/null 2>/dev/null || \
error "dd if=$DIR/${tfile}_${C} failed on $prev_client"
zconf_mount_clients $clients $DIR
- local duration=120
+ local duration=300
[ "$SLOW" = "no" ] && duration=60
local cmd="rundbench 1 -t $duration"
local PID=""
LCTL=$LCTL $cmd" &
PID=$!
log "Started rundbench load PID=$PID ..."
+ ELAPSED=0
+ NUM_FAILOVERS=0
+ START_TS=$(date +%s)
+ CURRENT_TS=$START_TS
+ while [ $ELAPSED -lt $duration ]; do
+ sleep 1
+ replay_barrier $SINGLEMDS
+ sleep 1 # give clients a time to do operations
+ # Increment the number of failovers
+ NUM_FAILOVERS=$((NUM_FAILOVERS+1))
+ log "$TESTNAME fail mds1 $NUM_FAILOVERS times"
+ facet_failover $SINGLEMDS
+ CURRENT_TS=$(date +%s)
+ ELAPSED=$((CURRENT_TS - START_TS))
+ done
+ wait $PID || error "rundbench load on $CLIENTS failed!"
+}
+run_test 70b "mds recovery; $CLIENTCOUNT clients"
+# end multi-client tests
+
+test_73a() {
+ multiop_bg_pause $DIR/$tfile O_tSc || return 3
+ pid=$!
+ rm -f $DIR/$tfile
+
+ replay_barrier $SINGLEMDS
+#define OBD_FAIL_LDLM_ENQUEUE 0x302
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000302"
+ fail $SINGLEMDS
+ kill -USR1 $pid
+ wait $pid || return 1
+ [ -e $DIR/$tfile ] && return 2
+ return 0
+}
+run_test 73a "open(O_CREAT), unlink, replay, reconnect before open replay , close"
- sleep $((duration / 4))
- replay_barrier $SINGLEMDS
- sleep 3 # give clients a time to do operations
+test_73b() {
+ multiop_bg_pause $DIR/$tfile O_tSc || return 3
+ pid=$!
+ rm -f $DIR/$tfile
- log "$TESTNAME fail mds 1"
- fail $SINGLEMDS
+ replay_barrier $SINGLEMDS
+#define OBD_FAIL_LDLM_REPLY 0x30c
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000030c"
+ fail $SINGLEMDS
+ kill -USR1 $pid
+ wait $pid || return 1
+ [ -e $DIR/$tfile ] && return 2
+ return 0
+}
+run_test 73b "open(O_CREAT), unlink, replay, reconnect at open_replay reply, close"
- wait $PID || error "rundbench load on $CLIENTS failed!"
+test_73c() {
+ multiop_bg_pause $DIR/$tfile O_tSc || return 3
+ pid=$!
+ rm -f $DIR/$tfile
+
+ replay_barrier $SINGLEMDS
+#define OBD_FAIL_TGT_LAST_REPLAY 0x710
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000710"
+ fail $SINGLEMDS
+ kill -USR1 $pid
+ wait $pid || return 1
+ [ -e $DIR/$tfile ] && return 2
+ return 0
+}
+run_test 73c "open(O_CREAT), unlink, replay, reconnect at last_replay, close"
+# bug 18554
+test_74() {
+ stop ost1
+ zconf_umount $(hostname) $MOUNT
+ fail $SINGLEMDS
+ zconf_mount $(hostname) $MOUNT
+ mount_facet ost1
+ touch $DIR/$tfile || return 1
+ rm $DIR/$tfile || return 2
+ df $MOUNT || error "df failed: $?"
+ return 0
}
-run_test 70b "mds recovery; $CLIENTCOUNT clients"
-# end multi-client tests
+run_test 74 "Ensure applications don't fail waiting for OST reocvery"
test_80a() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0