require_dsh_mds || exit 0
# Skip these tests
-# bug number: 17466 18857 LU1867
-ALWAYS_EXCEPT="61d 33a 33b 89 $REPLAY_SINGLE_EXCEPT"
+# bug number: 17466 18857 LU-1867 LU-1473
+ALWAYS_EXCEPT="61d 33a 33b 89 62 $REPLAY_SINGLE_EXCEPT"
+
+[ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
+# bug number for skipped test: LU-951
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 73a"
# 63 min 7 min AT AT AT AT"
[ "$SLOW" = "no" ] && EXCEPT_SLOW="1 2 3 4 6 12 16 44a 44b 65 66 67 68"
+[ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
+# bug number for skipped test: LU-3127
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 73b"
+
build_test_filter
check_and_setup_lustre
zconf_mount `hostname` $MOUNT || error "mount fails"
client_up || error "post-failover df failed"
# file shouldn't exist if replay-barrier works as expected
- rm $DIR/$tfile && return 1
+ rm $DIR/$tfile && error "File exists and it shouldn't"
return 0
}
run_test 0c "check replay-barrier"
wait_mds_ost_sync || return 3
AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
log "before $BEFOREUSED, after $AFTERUSED"
- [ $AFTERUSED -gt $((BEFOREUSED + 20)) ] && \
+ (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) &&
error "after $AFTERUSED > before $BEFOREUSED"
return 0
}
#b=2477,2532
test_40(){
- $LCTL mark multiop $MOUNT/$tfile OS_c
- multiop $MOUNT/$tfile OS_c &
- PID=$!
- writeme -s $MOUNT/${tfile}-2 &
- WRITE_PID=$!
- sleep 1
- facet_failover $SINGLEMDS
+ # always need connection to MDS to verify layout during IO. LU-2628.
+ lctl get_param mdc.*.connect_flags | grep -q layout_lock &&
+ skip "layout_lock needs MDS connection for IO" && return 0
+
+ $LCTL mark multiop $MOUNT/$tfile OS_c
+ multiop $MOUNT/$tfile OS_c &
+ PID=$!
+ writeme -s $MOUNT/${tfile}-2 &
+ WRITE_PID=$!
+ sleep 1
+ facet_failover $SINGLEMDS
#define OBD_FAIL_MDS_CONNECT_NET 0x117
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000117"
- kill -USR1 $PID
- stat1=`count_ost_writes`
- sleep $TIMEOUT
- stat2=`count_ost_writes`
- echo "$stat1, $stat2"
- if [ $stat1 -lt $stat2 ]; then
- echo "writes continuing during recovery"
- RC=0
- else
- echo "writes not continuing during recovery, bug 2477"
- RC=4
- fi
- echo "waiting for writeme $WRITE_PID"
- kill $WRITE_PID
- wait $WRITE_PID
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000117"
+ kill -USR1 $PID
+ stat1=`count_ost_writes`
+ sleep $TIMEOUT
+ stat2=`count_ost_writes`
+ echo "$stat1, $stat2"
+ if [ $stat1 -lt $stat2 ]; then
+ echo "writes continuing during recovery"
+ RC=0
+ else
+ echo "writes not continuing during recovery, bug 2477"
+ RC=4
+ fi
+ echo "waiting for writeme $WRITE_PID"
+ kill $WRITE_PID
+ wait $WRITE_PID
- echo "waiting for multiop $PID"
- wait $PID || return 2
- do_facet client munlink $MOUNT/$tfile || return 3
- do_facet client munlink $MOUNT/${tfile}-2 || return 3
- return $RC
+ echo "waiting for multiop $PID"
+ wait $PID || return 2
+ do_facet client munlink $MOUNT/$tfile || return 3
+ do_facet client munlink $MOUNT/${tfile}-2 || return 3
+ return $RC
}
run_test 40 "cause recovery in ptlrpc, ensure IO continues"
for i in `seq 1 10`; do
echo "$i of 10 ($(date +%s))"
- do_facet $SINGLEMDS "lctl get_param -n mdt.*.mdt.timeouts | grep service"
+ do_facet $SINGLEMDS \
+ "lctl get_param -n md[ts].*.mdt.timeouts | grep service"
#define OBD_FAIL_TGT_CONN_RACE 0x701
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000701"
# lctl below may fail, it is valid case
for i in `seq 1 10`; do
echo "$i of 10 ($(date +%s))"
- do_facet $SINGLEMDS "lctl get_param -n mdt.*.mdt.timeouts | grep service"
+ do_facet $SINGLEMDS \
+ "lctl get_param -n md[ts].*.mdt.timeouts | grep service"
#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000704"
# lctl below may fail, it is valid case
run_test 44b "race in target handle connect"
test_44c() {
- replay_barrier $SINGLEMDS
- createmany -m $DIR/$tfile-%d 100
+ replay_barrier $SINGLEMDS
+ createmany -m $DIR/$tfile-%d 100 || error "failed to create directories"
#define OBD_FAIL_TGT_RCVG_FLAG 0x712
- do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000712"
- fail_abort $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 100 && return 1
- fail $SINGLEMDS
- unlinkmany $DIR/$tfile-%d 100 && return 1
- return 0
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000712"
+ fail_abort $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 100 && error "unliked after fail abort"
+ fail $SINGLEMDS
+ unlinkmany $DIR/$tfile-%d 100 && error "unliked after fail"
+ return 0
}
run_test 44c "race in target handle connect"
zconf_mount_clients $clients $MOUNT
local duration=300
- [ "$SLOW" = "no" ] && duration=60
+ [ "$SLOW" = "no" ] && duration=120
# set duration to 900 because it takes some time to boot node
[ "$FAILURE_MODE" = HARD ] && duration=900
+ local elapsed
+ local start_ts=$(date +%s)
local cmd="rundbench 1 -t $duration"
local pid=""
do_nodesv $clients "set -x; MISSING_DBENCH_OK=$MISSING_DBENCH_OK \
DBENCH_LIB=$DBENCH_LIB TESTSUITE=$TESTSUITE TESTNAME=$TESTNAME \
MOUNT=$MOUNT DIR=$DIR/$tdir/\\\$(hostname) LCTL=$LCTL $cmd" &
pid=$!
+
+ #LU-1897 wait for all dbench copies to start
+ while ! check_for_process $clients dbench; do
+ elapsed=$(($(date +%s) - start_ts))
+ if [ $elapsed -gt $duration]; then
+ killall_process $clients dbench
+ error "dbench failed to start on $clients!"
+ fi
+ sleep 1
+ done
+
log "Started rundbench load pid=$pid ..."
- # give rundbench a chance to start, bug 24118
- sleep 12
- local elapsed=0
+ elapsed=$(($(date +%s) - start_ts))
local num_failovers=0
- local start_ts=$(date +%s)
while [ $elapsed -lt $duration ]; do
if ! check_for_process $clients dbench; then
- error_noexit "dbench not found on some of $clients!"
+ error_noexit "dbench stopped on some of $clients!"
killall_process $clients dbench
break
fi
local remote_dir=$DIR/$tdir/remote_dir
mkdir -p $DIR/$tdir
- # OBD_FAIL_MDS_DROP_OBJ_UPDATE 0x188
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x188
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds$((MDTIDX + 1))
local remote_dir=$DIR/$tdir/remote_dir
mkdir -p $DIR/$tdir
- # OBD_FAIL_MDS_DROP_OBJ_UPDATE 0x188
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x188
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds${MDTIDX}
local remote_dir=$DIR/$tdir/remote_dir
mkdir -p $DIR/$tdir
- # OBD_FAIL_MDS_DROP_OBJ_UPDATE 0x188
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x188
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds${MDTIDX}
fail mds$((MDTIDX + 1))
local remote_dir=$DIR/$tdir/remote_dir
mkdir -p $DIR/$tdir
- # OBD_FAIL_MDS_DROP_OBJ_UPDATE 0x188
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x188
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX}
do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds$((MDTIDX + 1))
do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX}
fail mds$((MDTIDX + 1))
do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
mkdir -p $DIR/$tdir
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_MDS_DROP_OBJ_UPDATE 0x188
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x188
+ touch $remote_dir
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds$((MDTIDX + 1))
mkdir -p $DIR/$tdir
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_MDS_DROP_OBJ_UPDATE 0x188
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x188
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX}
mkdir -p $DIR/$tdir
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_MDS_DROP_OBJ_UPDATE 0x188
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x188
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX}
fail mds$((MDTIDX + 1))
mkdir -p $DIR/$tdir
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_MDS_DROP_OBJ_UPDATE 0x188
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x188
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds${MDTIDX}
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds$((MDTIDX + 1))
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX}
fail mds$((MDTIDX + 1))
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
rmdir $remote_dir &
local CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
# Before failing an OST, get its obd name and index
local varsvc=${ostfail}_svc
local obd=$(do_facet $ostfail lctl get_param -n obdfilter.${!varsvc}.uuid)
- local index=${obd:(-6):1}
+ local index=$(($(facet_number $ostfail) - 1))
echo "Fail $ostfail $obd, display the list of affected files"
shutdown_facet $ostfail || return 2