X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Freplay-dual.sh;h=2270e1e995e1e69da6c7b317e8485d1a42397d1c;hp=394309ee5a0013aa4b52e7ca2fdac36842035e13;hb=ce179e97767936ff76282fd06df063b386851fe7;hpb=6c617a3d56e4b84dcc4e76ff0eebcb177b227ac3 diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 394309e..2270e1e 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -2,39 +2,35 @@ set -e -# bug number: LU-2012 LU-8333 -ALWAYS_EXCEPT="14b 21b $REPLAY_DUAL_EXCEPT" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - -SAVE_PWD=$PWD PTLDEBUG=${PTLDEBUG:--1} -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} -SETUP=${SETUP:-""} -CLEANUP=${CLEANUP:-""} MOUNT_2=${MOUNT_2:-"yes"} -export MULTIOP=${MULTIOP:-multiop} -. $LUSTRE/tests/test-framework.sh +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0 -# 7 (min)" -[ "$SLOW" = "no" ] && EXCEPT_SLOW="21b" +ALWAYS_EXCEPT="$REPLAY_DUAL_EXCEPT " +# bug number for skipped test: LU-2012 LU-8333 LU-7372 +ALWAYS_EXCEPT+=" 14b 21b 26 " +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[[ $(facet_fstype $SINGLEMDS) == zfs ]] && -# bug number for skipped test: LU-2230 +[[ "$mds1_FSTYPE" == zfs ]] && +# bug number for skipped test: LU-2230 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 21b" -build_test_filter +# 7 (min)" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="21b " +build_test_filter check_and_setup_lustre + MOUNTED=$(mounted_lustre_filesystems) if ! $(echo $MOUNTED' ' | grep -w -q $MOUNT2' '); then - zconf_mount $HOSTNAME $MOUNT2 - MOUNTED2=yes + zconf_mount $HOSTNAME $MOUNT2 + MOUNTED2=yes fi assert_DIR @@ -331,11 +327,11 @@ test_14b() { local beforeused=$(df -P $DIR | tail -1 | awk '{ print $3 }') mkdir -p $MOUNT1/$tdir - $SETSTRIPE -i 0 $MOUNT1/$tdir + $LFS setstripe -i 0 $MOUNT1/$tdir replay_barrier $SINGLEMDS createmany -o $MOUNT1/$tdir/$tfile- 5 - $SETSTRIPE -i 0 $MOUNT2/$tfile-2 + $LFS setstripe -i 0 $MOUNT2/$tfile-2 dd if=/dev/zero of=$MOUNT2/$tfile-2 bs=1M count=5 createmany -o $MOUNT1/$tdir/$tfile-3- 5 umount $MOUNT2 @@ -450,7 +446,7 @@ test_18() { # bug 3822 - evicting client with enqueued lock do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0 do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict cancel_lru_locks mdc - usleep 500 # wait to ensure first client is one that will be evicted + sleep 0.1 # wait to ensure first client is one that will be evicted openfile -f O_RDONLY $MOUNT2/$tdir/$tfile wait $OPENPID do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1 @@ -463,7 +459,7 @@ run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)" test_19() { # Bug 10991 - resend of open request does not fail assertion. replay_barrier $SINGLEMDS - drop_ldlm_reply "createmany -o $DIR/$tfile 1" || return 1 + drop_mdt_ldlm_reply "createmany -o $DIR/$tfile 1" || return 1 fail $SINGLEMDS checkstat $DIR2/${tfile}0 || return 2 rm $DIR/${tfile}0 || return 3 @@ -625,17 +621,19 @@ test_22a () { return 0 local MDTIDX=1 - local remote_dir=${tdir}/remote_dir + local remote_dir=$tdir/remote_dir - do_node $CLIENT1 mkdir -p $MOUNT1/${tdir} + do_node $CLIENT1 $LFS mkdir -i 0 $MOUNT1/$tdir || + error "lfs mkdir -i 0 $MOUNT1/$tdir failed" # OBD_FAIL_MDS_REINT_NET_REP 0x119 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir & CLIENT_PID=$! + sleep 1 fail mds$((MDTIDX + 1)) - wait $CLIENT_PID || error "lfs mkdir failed" + wait $CLIENT_PID || error "lfs mkdir -i $MDTIDX failed" replay_barrier mds$MDTIDX create_remote_dir_files_22 || error "Remote creation failed $?" @@ -653,15 +651,17 @@ test_22b () { local MDTIDX=1 local remote_dir=$tdir/remote_dir - # OBD_FAIL_MDS_REINT_NET_REP 0x119 - do_node $CLIENT1 mkdir -p $MOUNT1/${tdir} + do_node $CLIENT1 $LFS mkdir -i 0 $MOUNT1/$tdir || + error "lfs mkdir -i 0 $MOUNT1/$tdir failed" + # OBD_FAIL_MDS_REINT_NET_REP 0x119 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir & CLIENT_PID=$! + sleep 1 fail mds${MDTIDX},mds$((MDTIDX + 1)) - wait $CLIENT_PID || error "lfs mkdir failed" + wait $CLIENT_PID || error "lfs mkdir -i $MDTIDX failed" replay_barrier mds$MDTIDX create_remote_dir_files_22 || error "Remote creation failed $?" @@ -681,18 +681,20 @@ test_22c () { skip "MDTs needs to be on diff hosts for HARD fail mode" && return 0 local MDTIDX=1 - local remote_dir=${tdir}/remote_dir + local remote_dir=$tdir/remote_dir - do_node $CLIENT1 mkdir -p $MOUNT1/${tdir} + do_node $CLIENT1 $LFS mkdir -i 0 $MOUNT1/$tdir || + error "lfs mkdir -i 0 $MOUNT1/$tdir failed" - # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701 + # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701 do_facet mds$MDTIDX lctl set_param fail_loc=0x1701 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir & CLIENT_PID=$! + sleep 1 do_facet mds$MDTIDX lctl set_param fail_loc=0 fail mds$MDTIDX - wait $CLIENT_PID || error "lfs mkdir failed" + wait $CLIENT_PID || error "lfs mkdir -i $MDTIDX failed" replay_barrier mds$MDTIDX create_remote_dir_files_22 || error "Remote creation failed $?" @@ -708,18 +710,20 @@ run_test 22c "c1 lfs mkdir -i 1 d1, M1 drop update & fail M1, c2 mkdir d1/dir" test_22d () { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 local MDTIDX=1 - local remote_dir=${tdir}/remote_dir + local remote_dir=$tdir/remote_dir - do_node $CLIENT1 mkdir -p $MOUNT1/${tdir} + do_node $CLIENT1 $LFS mkdir -i 0 $MOUNT1/$tdir || + error "lfs mkdir -i 0 $MOUNT1/$tdir failed" - # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701 + # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701 do_facet mds$MDTIDX lctl set_param fail_loc=0x1701 do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir & CLIENT_PID=$! + sleep 1 do_facet mds$MDTIDX lctl set_param fail_loc=0 fail mds${MDTIDX},mds$((MDTIDX + 1)) - wait $CLIENT_PID || error "lfs mkdir failed" + wait $CLIENT_PID || error "lfs mkdir -i $MDTIDX failed" replay_barrier mds$MDTIDX create_remote_dir_files_22 || error "Remote creation failed $?" @@ -753,13 +757,15 @@ test_23a () { local MDTIDX=1 local remote_dir=$tdir/remote_dir - do_node $CLIENT1 mkdir -p $MOUNT1/${tdir} + do_node $CLIENT1 $LFS mkdir -i 0 $MOUNT1/$tdir || + error "lfs mkdir -i 0 $MOUNT1/$tdir failed" do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir || - error "lfs mkdir failed" + error "lfs mkdir -i $MDTIDX failed" # OBD_FAIL_MDS_REINT_NET_REP 0x119 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119 do_node $CLIENT1 rmdir $MOUNT1/$remote_dir & local CLIENT_PID=$! + sleep 1 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0 fail mds$((MDTIDX + 1)) @@ -781,14 +787,16 @@ test_23b () { local MDTIDX=1 local remote_dir=$tdir/remote_dir - do_node $CLIENT1 mkdir -p $MOUNT1/${tdir} + do_node $CLIENT1 $LFS mkdir -i 0 $MOUNT1/$tdir || + error "lfs mkdir -i 0 $MOUNT/$tdir failed" do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir || - error "lfs mkdir failed" + error "lfs mkdir -i $MDTIDX failed" # OBD_FAIL_MDS_REINT_NET_REP 0x119 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119 do_node $CLIENT1 rmdir $MOUNT1/$remote_dir & local CLIENT_PID=$! + sleep 1 do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0 fail mds${MDTIDX},mds$((MDTIDX + 1)) @@ -815,14 +823,16 @@ test_23c () { local MDTIDX=1 local remote_dir=$tdir/remote_dir - do_node $CLIENT1 mkdir -p $MOUNT1/${tdir} + do_node $CLIENT1 $LFS mkdir -i 0 $MOUNT1/$tdir || + error "lfs mkdir -i 0 $MOUNT1/$tdir failed" do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir || - error "lfs mkdir failed" + error "lfs mkdir -i $MDTIDX failed" - # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701 + # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701 do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701 do_node $CLIENT1 rmdir $MOUNT1/$remote_dir & CLIENT_PID=$! + sleep 1 do_facet mds${MDTIDX} lctl set_param fail_loc=0 fail mds${MDTIDX} @@ -844,14 +854,16 @@ test_23d () { local MDTIDX=1 local remote_dir=$tdir/remote_dir - do_node $CLIENT1 mkdir -p $MOUNT1/${tdir} + do_node $CLIENT1 $LFS mkdir -i 0 $MOUNT1/$tdir || + error "lfs mkdir -i 0 $MOUNT1/$tdir failed" do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir || - error "lfs mkdir failed" + error "lfs mkdir -i $MDTIDX failed" # OBD_FAIL_UPDATE_OBJ_NET 0x1701 do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701 do_node $CLIENT1 rmdir $MOUNT1/$remote_dir & CLIENT_PID=$! + sleep 1 do_facet mds${MDTIDX} lctl set_param fail_loc=0 fail mds${MDTIDX},mds$((MDTIDX + 1)) @@ -869,8 +881,8 @@ test_23d () { run_test 23d "c1 rmdir d1, M0 drop update reply and fail M0/M1, c2 mkdir d1" test_24 () { - [[ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.5.2) ]] || - { skip "Need MDS version newer than 2.5.2"; return 0; } + [[ "$MDS1_VERSION" -gt $(version_code 2.5.2) ]] || + skip "Need MDS version newer than 2.5.2" touch $MOUNT/$tfile stat $MOUNT/$tfile >&/dev/null @@ -893,7 +905,7 @@ run_test 24 "reconstruct on non-existing object" test_25() { cancel_lru_locks osc - $SETSTRIPE -i 0 -c 1 $DIR/$tfile + $LFS setstripe -i 0 -c 1 $DIR/$tfile # get lock for the 1st client dd if=/dev/zero of=$DIR/$tfile count=1 >/dev/null || @@ -904,7 +916,7 @@ test_25() { sleep 1 # failover, replay and resend replayed waiting locks - if [ $(lustre_version_code ost1) -ge $(version_code 2.6.90) ]; then + if [ "$OST1_VERSION" -ge $(version_code 2.6.90) ]; then #define OBD_FAIL_LDLM_SRV_CP_AST 0x325 do_facet ost1 lctl set_param fail_loc=0x80000325 else @@ -1003,7 +1015,7 @@ test_26() { run_test 26 "dbench and tar with mds failover" test_28() { - $SETSTRIPE -i 0 -c 1 $DIR2/$tfile + $LFS setstripe -i 0 -c 1 $DIR2/$tfile dd if=/dev/zero of=$DIR2/$tfile bs=4096 count=1 #define OBD_FAIL_LDLM_SRV_BL_AST 0x324 @@ -1024,6 +1036,58 @@ test_28() { } run_test 28 "lock replay should be ordered: waiting after granted" +test_29() { + local dir0=$DIR/$tdir/d0 + local dir1=$DIR/$tdir/d1 + + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 + [ $CLIENTCOUNT -lt 2 ] && skip "needs >= 2 clients" && return 0 + [ "$CLIENT1" == "$CLIENT2" ] && + skip "clients must be on different nodes" && return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -i0 $dir0 + $LFS mkdir -i1 $dir1 + sync + + replay_barrier mds2 + # create a remote dir, drop reply + #define OBD_FAIL_PTLRPC_ROUND_XID 0x530 + $LCTL set_param fail_loc=0x530 fail_val=36 + #define OBD_FAIL_MDS_REINT_MULTI_NET_REP 0x15a + do_facet mds2 $LCTL set_param fail_loc=0x8000015a + echo make remote dir d0 for $dir0 + $LFS mkdir -i1 -c1 $dir0/d3 & + sleep 1 + + echo make local dir d1 for $dir1 + do_node $CLIENT2 $LCTL set_param fail_loc=0x530 fail_val=36 + do_node $CLIENT2 mkdir $dir1/d4 + + fail mds2 +} +run_test 29 "replay vs update with the same xid" + +test_30() { + $LFS setstripe -E 1m -L mdt -E -1 $DIR/$tfile + #first write to have no problems with grants + dd if=/dev/zero of=$DIR/$tfile bs=4k count=10 || + error "dd on client failed" + dd if=/dev/zero of=$DIR/$tfile bs=4k count=10 seek=10 || + error "dd on client failed" + + #define OBD_FAIL_LDLM_REPLAY_PAUSE 0x32e + lctl set_param fail_loc=0x32e fail_val=4 + dd of=/dev/null if=$DIR2/$tfile & + local pid=$! + sleep 1 + + fail $SINGLEMDS + + wait $pid || error "dd on client failed" +} +run_test 30 "layout lock replay is not blocked on IO" + complete $SECONDS SLEEP=$((SECONDS - $NOW)) [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP