#!/bin/bash
-# -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
-# vim:shiftwidth=4:softtabstop=4:tabstop=4:
set -e
run_test 10 "resending a replayed unlink"
test_11() {
- replay_barrier $SINGLEMDS
- mcreate $MOUNT1/$tfile-1
- mcreate $MOUNT2/$tfile-2
- mcreate $MOUNT1/$tfile-3
- mcreate $MOUNT2/$tfile-4
- mcreate $MOUNT1/$tfile-5
- # drop all reint replies for a while
- do_facet $SINGLEMDS lctl set_param fail_loc=0x0119
- # note that with this fail_loc set, facet_failover df will fail
- facet_failover $SINGLEMDS
- #sleep for while, let both clients reconnect and timeout
- sleep $((TIMEOUT * 2))
- do_facet $SINGLEMDS lctl set_param fail_loc=0
+ replay_barrier $SINGLEMDS
+ mcreate $DIR1/$tfile-1
+ mcreate $DIR2/$tfile-2
+ mcreate $DIR1/$tfile-3
+ mcreate $DIR2/$tfile-4
+ mcreate $DIR1/$tfile-5
+ # drop all reint replies for a while
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0119
+ # note that with this fail_loc set, facet_failover df will fail
+ facet_failover $SINGLEMDS
- rm $MOUNT1/$tfile-[1-5] || return 1
+ local clients=${CLIENTS:-$HOSTNAME}
+ wait_clients_import_state "$clients" $SINGLEMDS FULL
- return 0
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+ rm $DIR1/$tfile-[1-5] || return 1
+
+ return 0
}
run_test 11 "both clients timeout during replay"
do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b # hold enqueue
sleep 1
#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305
+ do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0
do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict
cancel_lru_locks mdc
usleep 500 # wait to ensure first client is one that will be evicted
openfile -f O_RDONLY $MOUNT2/$tdir/$tfile
wait $OPENPID
+ do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1
do_facet $SINGLEMDS lctl debug_kernel |
grep "not entering recovery" && error "client not evicted"
do_facet client "lctl set_param fail_loc=0"
run_test 19 "resend of open request"
test_20() { #16389
- BEFORE=`date +%s`
- replay_barrier $SINGLEMDS
- touch $MOUNT1/a
- touch $MOUNT2/b
- umount $MOUNT2
- fail $SINGLEMDS
- rm $MOUNT1/a
- zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
- TIER1=$((`date +%s` - BEFORE))
- BEFORE=`date +%s`
- replay_barrier $SINGLEMDS
- touch $MOUNT1/a
- touch $MOUNT2/b
- umount $MOUNT2
- fail $SINGLEMDS
- rm $MOUNT1/a
- zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
- TIER2=$((`date +%s` - BEFORE))
- [ $TIER2 -ge $((TIER1 * 2)) ] && \
- error "recovery time is growing $TIER2 > $TIER1"
- return 0
+ local before=$SECONDS
+ replay_barrier $SINGLEMDS
+ touch $DIR1/$tfile.a
+ touch $DIR2/$tfile.b
+ umount $DIR2
+ fail $SINGLEMDS
+ rm $DIR1/$tfile.a
+ zconf_mount $HOSTNAME $DIR2 || error "mount $DIR2 fail"
+ local tier1=$((SECONDS - before))
+
+ before=$SECONDS
+ replay_barrier $SINGLEMDS
+ touch $DIR1/$tfile.a
+ touch $DIR2/$tfile.b
+ umount $DIR2
+ fail $SINGLEMDS
+ rm $DIR1/$tfile.a
+ zconf_mount $HOSTNAME $DIR2 || error "mount $DIR2 fail"
+ local tier2=$((SECONDS - before))
+
+ # timeout is more than 2.25x original timeout
+ ((tier2 < tier1 * 9 / 4)) ||
+ error "recovery time $tier2 >= 2.25x original time $tier1"
}
run_test 20 "recovery time is not increasing"
run_test 21a "commit on sharing"
test_21b_sub () {
- local mds=$1
- do_node $CLIENT1 rm -f $MOUNT1/$tfile-*
+ local mds=$1
+ do_node $CLIENT1 rm -f $MOUNT1/$tfile-*
- do_facet $mds sync
- do_node $CLIENT1 touch $MOUNT1/$tfile-1
- do_node $CLIENT2 mv $MOUNT1/$tfile-1 $MOUNT1/$tfile-2
- do_node $CLIENT1 mv $MOUNT1/$tfile-2 $MOUNT1/$tfile-3
+ do_facet $mds sync
+ do_node $CLIENT1 touch $MOUNT1/$tfile-1
+ do_node $CLIENT2 mv $MOUNT1/$tfile-1 $MOUNT1/$tfile-2
+ do_node $CLIENT1 mv $MOUNT1/$tfile-2 $MOUNT1/$tfile-3
- replay_barrier_nosync $mds
- shutdown_client $CLIENT2 $MOUNT1
+ replay_barrier_nosync $mds
+ shutdown_client $CLIENT2 $MOUNT1
- facet_failover $mds
+ facet_failover $mds
- # were renames replayed?
- local rc=0
- echo UNLINK $MOUNT1/$tfile-3
- do_node $CLIENT1 unlink $MOUNT1/$tfile-3 || \
- { echo "unlink $tfile-3 fail!" && rc=1; }
+ # were renames replayed?
+ local rc=0
+ echo UNLINK $MOUNT1/$tfile-3
+ do_node $CLIENT1 unlink $MOUNT1/$tfile-3 ||
+ { echo "unlink $tfile-3 fail!" && rc=1; }
- boot_node $CLIENT2
- zconf_mount_clients $CLIENT2 $MOUNT1 || error "mount $CLIENT2 $MOUNT1 fail"
+ boot_node $CLIENT2
+ zconf_mount_clients $CLIENT2 $MOUNT1 ||
+ error "mount $CLIENT2 $MOUNT1 fail"
- return $rc
+ return $rc
}
test_21b() {
- [ -z "$CLIENTS" ] && skip "Need two or more clients." && return
- [ $CLIENTCOUNT -lt 2 ] && \
- { skip "Need two or more clients, have $CLIENTCOUNT" && return; }
+ [ -z "$CLIENTS" ] && skip "Need two or more clients" && return
+ [ $CLIENTCOUNT -lt 2 ] &&
+ { skip "Need 2+ clients, have $CLIENTCOUNT" && return; }
- if [ "$FAILURE_MODE" = "HARD" ] && mixed_mdt_devs; then
- skip "Several mdt services on one mds node are used with FAILURE_MODE=$FAILURE_MODE. "
- return 0
- fi
+ if [ "$FAILURE_MODE" = "HARD" ] && mixed_mdt_devs; then
+ skip "Several MDTs on one MDS with FAILURE_MODE=$FAILURE_MODE"
+ return 0
+ fi
+ zconf_umount_clients $CLIENTS $MOUNT2
+ zconf_mount_clients $CLIENTS $MOUNT1
- zconf_umount_clients $CLIENTS $MOUNT2
- zconf_mount_clients $CLIENTS $MOUNT1
+ local param_file=$TMP/$tfile-params
- local param_file=$TMP/$tfile-params
+ local mdtidx=$($LFS getstripe -M $MOUNT1)
+ local facet=mds$((mdtidx + 1))
- local num=$(get_mds_dir $MOUNT1)
+ save_lustre_params $facet "mdt.*.commit_on_sharing" > $param_file
- save_lustre_params mds$num "mdt.*.commit_on_sharing" > $param_file
+ # COS enabled
+ local COS=1
+ do_facet $facet lctl set_param mdt.*.commit_on_sharing=$COS
- # COS enabled
- local COS=1
- do_facet mds$num lctl set_param mdt.*.commit_on_sharing=$COS
+ test_21b_sub $facet || error "Not all renames are replayed. COS=$COS"
- test_21b_sub mds$num || error "Not all renames are replayed. COS=$COS"
+ # COS disabled (should fail)
+ COS=0
+ do_facet $facet lctl set_param mdt.*.commit_on_sharing=$COS
- # COS disabled (should fail)
- COS=0
- do_facet mds$num lctl set_param mdt.*.commit_on_sharing=$COS
-
- # there is still a window when transactions may be written to disk before
- # the mds device is set R/O. To avoid such a rare test failure, the check
- # is repeated several times.
- local n_attempts=1
- while true; do
- test_21b_sub mds$num || break;
- let n_attempts=n_attempts+1
- [ $n_attempts -gt 3 ] &&
- error "The test cannot check whether COS works or not: all renames are replied w/o COS"
- done
- zconf_mount_clients $CLIENTS $MOUNT2
- restore_lustre_params < $param_file
- rm -f $param_file
- return 0
+ # there is still a window when transactions may be written to disk
+ # before the mds device is set R/O. To avoid such a rare test failure,
+ # the check is repeated several times.
+ local n_attempts=1
+ while true; do
+ test_21b_sub $facet || break
+ n_attempts=$((n_attempts + 1))
+ [ $n_attempts -gt 3 ] &&
+ error "can't check if COS works: rename replied w/o COS"
+ done
+ zconf_mount_clients $CLIENTS $MOUNT2
+ restore_lustre_params < $param_file
+ rm -f $param_file
+ return 0
}
run_test 21b "commit on sharing, two clients"
run_test 23d "c1 rmdir d1, M0 drop update reply and fail M0/M1, c2 mkdir d1"
test_24 () {
+ [[ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.5.2) ]] ||
+ { skip "Need MDS version newer than 2.5.2"; return 0; }
+
touch $MOUNT/$tfile
stat $MOUNT/$tfile >&/dev/null
# OBD_FAIL_MDS_REINT_NET_REP
# end commit on sharing tests
-test_24() {
+test_25() {
cancel_lru_locks osc
$SETSTRIPE -i 0 -c 1 $DIR/$tfile
killall multiop
wait
}
-run_test 24 "replay|resend"
+run_test 25 "replay|resend"
complete $SECONDS
SLEEP=$((`date +%s` - $NOW))