[ "$SLOW" = "no" ] && EXCEPT_SLOW="21b"
+[[ $(facet_fstype $SINGLEMDS) == zfs ]] &&
+# bug number for skipped test: LU-2230
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT 21b"
+
build_test_filter
check_and_setup_lustre
export NOW=0
test_18() { # bug 3822 - evicting client with enqueued lock
- #set -vx
- mkdir -p $MOUNT1/$tdir
- touch $MOUNT1/$tdir/f0
-#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
- statmany -s $MOUNT1/$tdir/f 1 500 &
- OPENPID=$!
- NOW=`date +%s`
- do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b # hold enqueue
- sleep 1
-#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
- do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict
- cancel_lru_locks mdc
- usleep 500 # wait to ensure first client is one that will be evicted
- openfile -f O_RDONLY $MOUNT2/$tdir/f0
- wait $OPENPID
- dmesg | grep "entering recovery in server" && \
- error "client not evicted" || true
- do_facet client "lctl set_param fail_loc=0"
- do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ #set -vx
+ local DLMTRACE=$(do_facet $SINGLEMDS lctl get_param debug)
+ do_facet $SINGLEMDS lctl set_param debug=+dlmtrace
+ mkdir -p $MOUNT1/$tdir || error "mkdir $MOUNT1/$tdir failed"
+ touch $MOUNT1/$tdir/$tfile
+ #define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
+ statmany -s $MOUNT1/$tdir/f 1 500 &
+ OPENPID=$!
+ NOW=$(date +%s)
+ do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b # hold enqueue
+ sleep 1
+ #define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
+ do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict
+ cancel_lru_locks mdc
+ usleep 500 # wait to ensure first client is one that will be evicted
+ openfile -f O_RDONLY $MOUNT2/$tdir/$tfile
+ wait $OPENPID
+ do_facet $SINGLEMDS lctl debug_kernel |
+ grep "not entering recovery" && error "client not evicted"
+ do_facet client "lctl set_param fail_loc=0"
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
}
run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)"
test_21a() {
local param_file=$TMP/$tfile-params
- save_lustre_params $(facet_active_host $SINGLEMDS) "mdt.*.commit_on_sharing" > $param_file
+ save_lustre_params $SINGLEMDS "mdt.*.commit_on_sharing" > $param_file
do_facet $SINGLEMDS lctl set_param mdt.*.commit_on_sharing=1
touch $MOUNT1/$tfile-1
mv $MOUNT2/$tfile-1 $MOUNT2/$tfile-2
local num=$(get_mds_dir $MOUNT1)
- save_lustre_params $(facet_active_host mds$num) "mdt.*.commit_on_sharing" > $param_file
+ save_lustre_params mds$num "mdt.*.commit_on_sharing" > $param_file
# COS enabled
local COS=1
do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
# OBD_FAIL_MDS_REINT_NET_REP 0x119
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
- fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
wait $CLIENT_PID || error "lfs mkdir failed"
- replay_barrier mds${MDTIDX}
+ replay_barrier mds$MDTIDX
create_remote_dir_files_22 || error "Remote creation failed $?"
- fail mds${MDTIDX}
+ fail mds$MDTIDX
checkstat_22 || error "check stat failed $?"
rm -rf $MOUNT1/$tdir || error "rmdir remote_dir failed"
return 0
}
-run_test 22a "c1 lfs mkdir -i 1 dir1, M0 drop reply & fail, c2 mkdir dir1/dir"
+run_test 22a "c1 lfs mkdir -i 1 dir1, M1 drop reply & fail, c2 mkdir dir1/dir"
test_22b () {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
# OBD_FAIL_MDS_REINT_NET_REP 0x119
do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
CLIENT_PID=$!
- do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
wait $CLIENT_PID || error "lfs mkdir failed"
rm -rf $MOUNT1/$tdir || error "rmdir remote_dir failed"
return 0
}
-run_test 22b "c1 lfs mkdir -i 1 d1, M0 drop reply & fail M0/M1, c2 mkdir d1/dir"
+run_test 22b "c1 lfs mkdir -i 1 d1, M1 drop reply & fail M0/M1, c2 mkdir d1/dir"
test_22c () {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1500
+ # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds$MDTIDX lctl set_param fail_loc=0x1701
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
+ do_facet mds$MDTIDX lctl set_param fail_loc=0
- fail mds$((MDTIDX+1))
+ fail mds$MDTIDX
wait $CLIENT_PID || error "lfs mkdir failed"
replay_barrier mds$MDTIDX
do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x1500
+ # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds$MDTIDX lctl set_param fail_loc=0x1701
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
CLIENT_PID=$!
- do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
+ do_facet mds$MDTIDX lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
wait $CLIENT_PID || error "lfs mkdir failed"
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir ||
error "lfs mkdir failed"
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x1500
+ # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
CLIENT_PID=$!
do_facet mds${MDTIDX} lctl set_param fail_loc=0
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir ||
error "lfs mkdir failed"
- # OBD_FAIL_UPDATE_OBJ_NET 0x1500
- do_facet mds${MDTIDX} lctl set_param fail_loc=0x1500
+ # OBD_FAIL_UPDATE_OBJ_NET 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
CLIENT_PID=$!
do_facet mds${MDTIDX} lctl set_param fail_loc=0
# end commit on sharing tests
+test_24() {
+ cancel_lru_locks osc
+
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+
+ # get lock for the 1st client
+ dd if=/dev/zero of=$DIR/$tfile count=1 >/dev/null ||
+ error "failed to write data"
+
+ # get waiting locks for the 2nd client
+ drop_ldlm_cancel "multiop $DIR2/$tfile Ow512" &
+ sleep 1
+
+#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213
+ # failover, replay and resend replayed waiting locks
+ do_facet ost1 lctl set_param fail_loc=0x80000213
+ fail ost1
+
+ # multiop does not finish because CP AST is skipped;
+ # it is ok to kill it in the test, because CP AST is already re-sent
+ # and it does not hung forever in real life
+ killall multiop
+ wait
+}
+run_test 24 "replay|resend"
+
complete $SECONDS
SLEEP=$((`date +%s` - $NOW))
[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP