set -e
-# bug number: LU-2012 10124 LU-7372
-ALWAYS_EXCEPT="14b 15c 26 $REPLAY_DUAL_EXCEPT"
-
-SAVE_PWD=$PWD
PTLDEBUG=${PTLDEBUG:--1}
-LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
-SETUP=${SETUP:-""}
-CLEANUP=${CLEANUP:-""}
MOUNT_2=${MOUNT_2:-"yes"}
-export MULTIOP=${MULTIOP:-multiop}
-. $LUSTRE/tests/test-framework.sh
+LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+. $LUSTRE/tests/test-framework.sh
init_test_env $@
-. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
init_logging
remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
-# 7 (min)"
-[ "$SLOW" = "no" ] && EXCEPT_SLOW="21b"
+ALWAYS_EXCEPT="$REPLAY_DUAL_EXCEPT "
+# bug number for skipped test: LU-2012 LU-8333 LU-7372
+ALWAYS_EXCEPT+=" 14b 21b 26 "
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
-[[ $(facet_fstype $SINGLEMDS) == zfs ]] &&
-# bug number for skipped test: LU-2230
+[[ "$mds1_FSTYPE" == zfs ]] &&
+# bug number for skipped test: LU-2230
ALWAYS_EXCEPT="$ALWAYS_EXCEPT 21b"
-build_test_filter
+if $SHARED_KEY; then
+# bug number for skipped tests: LU-9795 LU-9795
+ ALWAYS_EXCEPT+=" 0a 0b "
+fi
+# 7 (min)"
+[ "$SLOW" = "no" ] && EXCEPT_SLOW="21b "
+
+build_test_filter
check_and_setup_lustre
+
MOUNTED=$(mounted_lustre_filesystems)
if ! $(echo $MOUNTED' ' | grep -w -q $MOUNT2' '); then
- zconf_mount $HOSTNAME $MOUNT2
- MOUNTED2=yes
+ zconf_mount $HOSTNAME $MOUNT2
+ MOUNTED2=yes
fi
assert_DIR
wait_mds_ost_sync
wait_delete_completed
- local BEFOREUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
+ local beforeused=$(df -P $DIR | tail -1 | awk '{ print $3 }')
mkdir -p $MOUNT1/$tdir
- $SETSTRIPE -i 0 $MOUNT1/$tdir
+ $LFS setstripe -i 0 $MOUNT1/$tdir
replay_barrier $SINGLEMDS
createmany -o $MOUNT1/$tdir/$tfile- 5
- $SETSTRIPE -i 0 $MOUNT2/$tfile-2
+ $LFS setstripe -i 0 $MOUNT2/$tfile-2
dd if=/dev/zero of=$MOUNT2/$tfile-2 bs=1M count=5
createmany -o $MOUNT1/$tdir/$tfile-3- 5
umount $MOUNT2
wait_mds_ost_sync || error "wait_mds_ost_sync failed"
wait_delete_completed || error "wait_delete_complete failed"
- local AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
- log "before $BEFOREUSED, after $AFTERUSED"
+ local afterused=$(df -P $DIR | tail -1 | awk '{ print $3 }')
+ log "before $beforeused, after $afterused"
# leave some margin for some files/dirs to be modified (OI, llog, etc)
- [ $AFTERUSED -gt $((BEFOREUSED + 128)) ] &&
- error "after $AFTERUSED > before $BEFOREUSED" || true
+ [ $afterused -le $((beforeused + $(fs_log_size))) ] ||
+ error "after $afterused > before $beforeused"
}
run_test 14b "delete ost orphans if gap occured in objids due to VBR"
do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0
do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict
cancel_lru_locks mdc
- usleep 500 # wait to ensure first client is one that will be evicted
+ sleep 0.1 # wait to ensure first client is one that will be evicted
openfile -f O_RDONLY $MOUNT2/$tdir/$tfile
wait $OPENPID
do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1
test_19() { # Bug 10991 - resend of open request does not fail assertion.
replay_barrier $SINGLEMDS
- drop_ldlm_reply "createmany -o $DIR/$tfile 1" || return 1
+ drop_mdt_ldlm_reply "createmany -o $DIR/$tfile 1" || return 1
fail $SINGLEMDS
checkstat $DIR2/${tfile}0 || return 2
rm $DIR/${tfile}0 || return 3
local param_file=$TMP/$tfile-params
- local mdtidx=$($LFS getstripe -M $MOUNT1)
+ local mdtidx=$($LFS getstripe -m $MOUNT1)
local facet=mds$((mdtidx + 1))
save_lustre_params $facet "mdt.*.commit_on_sharing" > $param_file
test_21b_sub $facet || error "Not all renames are replayed. COS=$COS"
- # COS disabled (should fail)
- COS=0
- do_facet $facet lctl set_param mdt.*.commit_on_sharing=$COS
-
# there is still a window when transactions may be written to disk
# before the mds device is set R/O. To avoid such a rare test failure,
# the check is repeated several times.
+ COS=0
local n_attempts=1
while true; do
+ # COS disabled (should fail)
+ do_facet $facet lctl set_param mdt.*.commit_on_sharing=$COS
+
test_21b_sub $facet || break
n_attempts=$((n_attempts + 1))
[ $n_attempts -gt 3 ] &&
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
CLIENT_PID=$!
+ sleep 1
fail mds$((MDTIDX + 1))
wait $CLIENT_PID || error "lfs mkdir failed"
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
CLIENT_PID=$!
+ sleep 1
fail mds${MDTIDX},mds$((MDTIDX + 1))
wait $CLIENT_PID || error "lfs mkdir failed"
do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
- # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds$MDTIDX lctl set_param fail_loc=0x1701
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
CLIENT_PID=$!
+ sleep 1
do_facet mds$MDTIDX lctl set_param fail_loc=0
fail mds$MDTIDX
do_node $CLIENT1 mkdir -p $MOUNT1/${tdir}
- # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds$MDTIDX lctl set_param fail_loc=0x1701
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir &
CLIENT_PID=$!
+ sleep 1
do_facet mds$MDTIDX lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
local CLIENT_PID=$!
+ sleep 1
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds$((MDTIDX + 1))
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
local CLIENT_PID=$!
+ sleep 1
do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
do_node $CLIENT1 $LFS mkdir -i $MDTIDX $MOUNT1/$remote_dir ||
error "lfs mkdir failed"
- # OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
CLIENT_PID=$!
+ sleep 1
do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX}
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
do_node $CLIENT1 rmdir $MOUNT1/$remote_dir &
CLIENT_PID=$!
+ sleep 1
do_facet mds${MDTIDX} lctl set_param fail_loc=0
fail mds${MDTIDX},mds$((MDTIDX + 1))
run_test 23d "c1 rmdir d1, M0 drop update reply and fail M0/M1, c2 mkdir d1"
test_24 () {
- [[ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.5.2) ]] ||
- { skip "Need MDS version newer than 2.5.2"; return 0; }
+ [[ "$MDS1_VERSION" -gt $(version_code 2.5.2) ]] ||
+ skip "Need MDS version newer than 2.5.2"
touch $MOUNT/$tfile
stat $MOUNT/$tfile >&/dev/null
test_25() {
cancel_lru_locks osc
- $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+ $LFS setstripe -i 0 -c 1 $DIR/$tfile
# get lock for the 1st client
dd if=/dev/zero of=$DIR/$tfile count=1 >/dev/null ||
sleep 1
# failover, replay and resend replayed waiting locks
- if [ $(lustre_version_code ost1) -ge $(version_code 2.6.90) ]; then
+ if [ "$OST1_VERSION" -ge $(version_code 2.6.90) ]; then
#define OBD_FAIL_LDLM_SRV_CP_AST 0x325
do_facet ost1 lctl set_param fail_loc=0x80000325
else
error "set default dirstripe failed"
fi
cd $dbench_dir || break
- rundbench 1 -D $dbench_dir -t 100 > /dev/null 2&>1 ||
- break
+ rundbench 1 -D $dbench_dir -t 100 &>/dev/null || break
cd $DIR/$tdir || break
rm -rf $dbench_dir || break
done
}
run_test 26 "dbench and tar with mds failover"
+test_28() {
+ $LFS setstripe -i 0 -c 1 $DIR2/$tfile
+ dd if=/dev/zero of=$DIR2/$tfile bs=4096 count=1
+
+ #define OBD_FAIL_LDLM_SRV_BL_AST 0x324
+ do_facet ost1 $LCTL set_param fail_loc=0x80000324
+
+ dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 &
+ local pid=$!
+ sleep 2
+
+ #define OBD_FAIL_LDLM_GRANT_CHECK 0x32a
+ do_facet ost1 $LCTL set_param fail_loc=0x32a
+
+ fail ost1
+
+ sleep 2
+ cancel_lru_locks OST0000-osc
+ wait $pid || error "dd failed"
+}
+run_test 28 "lock replay should be ordered: waiting after granted"
+
+test_29() {
+ local dir0=$DIR/$tdir/d0
+ local dir1=$DIR/$tdir/d1
+
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ [ $CLIENTCOUNT -lt 2 ] && skip "needs >= 2 clients" && return 0
+ [ "$CLIENT1" == "$CLIENT2" ] &&
+ skip "clients must be on different nodes" && return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -i0 $dir0
+ $LFS mkdir -i1 $dir1
+ sync
+
+ replay_barrier mds2
+ # create a remote dir, drop reply
+ #define OBD_FAIL_PTLRPC_ROUND_XID 0x530
+ $LCTL set_param fail_loc=0x530 fail_val=36
+ #define OBD_FAIL_MDS_REINT_MULTI_NET_REP 0x15a
+ do_facet mds2 $LCTL set_param fail_loc=0x8000015a
+ echo make remote dir d0 for $dir0
+ $LFS mkdir -i1 -c1 $dir0/d3 &
+ sleep 1
+
+ echo make local dir d1 for $dir1
+ do_node $CLIENT2 $LCTL set_param fail_loc=0x530 fail_val=36
+ do_node $CLIENT2 mkdir $dir1/d4
+
+ fail mds2
+}
+run_test 29 "replay vs update with the same xid"
+
complete $SECONDS
SLEEP=$((SECONDS - $NOW))
[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP