# Skip these tests
# bug number for skipped tests:
-# LU-472
-ALWAYS_EXCEPT="$REPLAY_SINGLE_EXCEPT 61d"
+ALWAYS_EXCEPT="$REPLAY_SINGLE_EXCEPT "
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
# 7.5 (min)"
[ "$SLOW" = "no" ] && EXCEPT_SLOW="44b"
[ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
-# bug number for skipped test: LU-1867 LU-3127
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 89 73b"
+# bug number for skipped test:
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT "
build_test_filter
}
run_test 2b "touch"
+test_2c() {
+ replay_barrier $SINGLEMDS
+ $LFS setstripe -c $OSTCOUNT $DIR/$tfile
+ fail $SINGLEMDS
+ $CHECKSTAT -t file $DIR/$tfile ||
+ error "$CHECKSTAT $DIR/$tfile check failed"
+}
+run_test 2c "setstripe replay"
+
+test_2d() {
+ replay_barrier $SINGLEMDS
+ $LFS setdirstripe -i 0 -c $MDSCOUNT $DIR/$tdir
+ fail $SINGLEMDS
+ $CHECKSTAT -t dir $DIR/$tdir ||
+ error "$CHECKSTAT $DIR/$tdir check failed"
+}
+run_test 2d "setdirstripe replay"
+
test_3a() {
local file=$DIR/$tfile
replay_barrier $SINGLEMDS
test_20b() { # bug 10480
local wait_timeout=$((TIMEOUT * 4))
- local BEFOREUSED
- local AFTERUSED
+ local extra=$(fs_log_size)
+ local n_attempts=1
+
+ sync_all_data
+ $LFS setstripe -i 0 -c 1 $DIR
+
+ local beforeused=$(df -P $DIR | tail -1 | awk '{ print $3 }')
- BEFOREUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
dd if=/dev/zero of=$DIR/$tfile bs=4k count=10000 &
while [ ! -e $DIR/$tfile ] ; do
- usleep 60 # give dd a chance to start
+ usleep 60 # give dd a chance to start
done
- $GETSTRIPE $DIR/$tfile || error "$GETSTRIPE $DIR/$tfile failed"
+ $LFS getstripe $DIR/$tfile || error "$LFS getstripe $DIR/$tfile failed"
# make it an orphan
rm -f $DIR/$tfile || error "rm -f $DIR/$tfile failed"
mds_evict_client
- client_up || client_up || true # reconnect
+ client_up || client_up || true # reconnect
- do_facet $SINGLEMDS "lctl set_param -n osd*.*MDT*.force_sync 1"
+ do_facet $SINGLEMDS "lctl set_param -n osd*.*MDT*.force_sync=1"
- fail $SINGLEMDS # start orphan recovery
+ fail $SINGLEMDS # start orphan recovery
wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
- wait_delete_completed_mds $wait_timeout ||
- error "delete did not complete"
+ wait_delete_completed $wait_timeout || error "delete did not finish"
+ sync_all_data
- AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }')
- log "before $BEFOREUSED, after $AFTERUSED"
- (( $AFTERUSED > $BEFOREUSED + $(fs_log_size) )) &&
- error "after $AFTERUSED > before $BEFOREUSED"
- return 0
+ while true; do
+ local afterused=$(df -P $DIR | tail -1 | awk '{ print $3 }')
+ log "before $beforeused, after $afterused"
+
+ (( $beforeused + $extra >= $afterused )) && break
+ n_attempts=$((n_attempts + 1))
+ [ $n_attempts -gt 3 ] &&
+ error "after $afterused > before $beforeused + $extra"
+
+ wait_zfs_commit $SINGLEMDS 5
+ sync_all_data
+ done
}
-run_test 20b "write, unlink, eviction, replay, (test mds_cleanup_orphans)"
+
+run_test 20b "write, unlink, eviction, replay (test mds_cleanup_orphans)"
test_20c() { # bug 10480
multiop_bg_pause $DIR/$tfile Ow_c ||
# the page, guarnateeing that the unlock from the RPC completion would
# assert on trying to unlock the unlocked page.
test_41() {
- [ $OSTCOUNT -lt 2 ] &&
- skip_env "skipping test 41: we don't have a second OST to test with" &&
- return
+ [ $OSTCOUNT -lt 2 ] && skip_env "needs >= 2 OSTs" && return
local f=$MOUNT/$tfile
# make sure the start of the file is ost1
test_48() {
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
- [ "$OSTCOUNT" -lt "2" ] &&
- skip_env "$OSTCOUNT < 2 OSTs -- skipping" && return
+ [ "$OSTCOUNT" -lt "2" ] && skip_env "needs >= 2 OSTs" && return
replay_barrier $SINGLEMDS
createmany -o $DIR/$tfile 20 ||
do_facet ost1 "lctl set_param fail_loc=0x80000216"
client_up || error "client_up failed"
+ # let the MDS discover the OST failure, attempt to recover, fail
+ # and recover again.
+ sleep $((3 * TIMEOUT))
+
createmany -o $DIR/$tfile 20 20 ||
error "createmany recraete $DIR/$tfile failed"
unlinkmany $DIR/$tfile 40 || error "unlinkmany $DIR/$tfile failed"
touch $DIR/$tfile || error "touch $DIR/$tfile failed"
replay_barrier $SINGLEMDS
fail $SINGLEMDS
- sleep 1
+ wait_recovery_complete $SINGLEMDS || error "MDS recovery is not done"
+ wait_mds_ost_sync || error "wait_mds_ost_sync failed"
$CHECKSTAT -t file $DIR/$tfile ||
error "$CHECKSTAT $DIR/$tfile attribute check failed"
do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
test_65b() #bug 3055
{
- remote_ost_nodsh && skip "remote OST with nodsh" && return 0
-
- at_start || return 0
- # turn on D_ADAPTTO
- debugsave
- $LCTL set_param debug="other trace"
- $LCTL dk > /dev/null
- # Slow down a request to the current service time, this is critical
- # because previous tests may have caused this value to increase.
- $SETSTRIPE --stripe-index=0 --count=1 $DIR/$tfile
- multiop $DIR/$tfile Ow1yc
- REQ_DELAY=`lctl get_param -n osc.${FSNAME}-OST0000-osc-*.timeouts |
- awk '/portal 6/ {print $5}'`
- REQ_DELAY=$((${REQ_DELAY} + ${REQ_DELAY} / 4 + 5))
-
- do_facet ost1 lctl set_param fail_val=${REQ_DELAY}
-#define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224
- do_facet ost1 $LCTL set_param fail_loc=0x224
+ remote_ost_nodsh && skip "remote OST with nodsh" && return 0
- rm -f $DIR/$tfile
- $SETSTRIPE --stripe-index=0 --count=1 $DIR/$tfile
- # force some real bulk transfer
- multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c
+ at_start || return 0
+ # turn on D_ADAPTTO
+ debugsave
+ $LCTL set_param debug="other trace"
+ $LCTL dk > /dev/null
+ # Slow down a request to the current service time, this is critical
+ # because previous tests may have caused this value to increase.
+ $SETSTRIPE --stripe-index=0 --stripe-count=1 $DIR/$tfile ||
+ error "$SETSTRIPE failed for $DIR/$tfile"
+
+ multiop $DIR/$tfile Ow1yc
+ REQ_DELAY=`lctl get_param -n osc.${FSNAME}-OST0000-osc-*.timeouts |
+ awk '/portal 6/ {print $5}'`
+ REQ_DELAY=$((${REQ_DELAY} + ${REQ_DELAY} / 4 + 5))
+
+ do_facet ost1 lctl set_param fail_val=${REQ_DELAY}
+ #define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224
+ do_facet ost1 $LCTL set_param fail_loc=0x224
- do_facet ost1 $LCTL set_param fail_loc=0
- # check for log message
- $LCTL dk | grep "Early reply #" || error "No early reply"
- debugrestore
- # client should show REQ_DELAY estimates
- lctl get_param -n osc.${FSNAME}-OST0000-osc-*.timeouts | grep portal
+ rm -f $DIR/$tfile
+ $SETSTRIPE --stripe-index=0 --stripe-count=1 $DIR/$tfile ||
+ error "$SETSTRIPE failed"
+ # force some real bulk transfer
+ multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c
+
+ do_facet ost1 $LCTL set_param fail_loc=0
+ # check for log message
+ $LCTL dk | grep "Early reply #" || error "No early reply"
+ debugrestore
+ # client should show REQ_DELAY estimates
+ lctl get_param -n osc.${FSNAME}-OST0000-osc-*.timeouts | grep portal
}
run_test 65b "AT: verify early replies on packed reply / bulk"
$LCTL set_param fail_val=$(($ORIG + 5))
#define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c
$LCTL set_param fail_loc=0x50c
- ls $DIR/$tfile > /dev/null 2>&1
+ touch $DIR/$tfile > /dev/null 2>&1
$LCTL set_param fail_loc=0
CUR=$(lctl get_param -n mdc.${FSNAME}-MDT0000*.timeouts |
awk '/network/ {print $4}')
do_facet ost1 "echo $TIMEOUT >> $ldlm_enqueue_min_r"
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
- $SETSTRIPE --stripe-index=0 --count=1 $DIR/$tdir
-#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312
- $LCTL set_param fail_val=$(($TIMEOUT - 1))
- $LCTL set_param fail_loc=0x80000312
- cp /etc/profile $DIR/$tdir/${tfile}_1 || error "1st cp failed $?"
- $LCTL set_param fail_val=$((TIMEOUT * 5 / 4))
- $LCTL set_param fail_loc=0x80000312
- cp /etc/profile $DIR/$tdir/${tfile}_2 || error "2nd cp failed $?"
- $LCTL set_param fail_loc=0
-
- echo $ENQ_MIN >> $ldlm_enqueue_min
- do_facet ost1 "echo $ENQ_MIN_R >> $ldlm_enqueue_min_r"
- rm -rf $DIR/$tdir
- return 0
+ $SETSTRIPE --stripe-index=0 -c 1 $DIR/$tdir ||
+ error "$SETSTRIPE failed for $DIR/$tdir"
+ #define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312
+ $LCTL set_param fail_val=$(($TIMEOUT - 1))
+ $LCTL set_param fail_loc=0x80000312
+ cp /etc/profile $DIR/$tdir/${tfile}_1 || error "1st cp failed $?"
+ $LCTL set_param fail_val=$((TIMEOUT * 5 / 4))
+ $LCTL set_param fail_loc=0x80000312
+ cp /etc/profile $DIR/$tdir/${tfile}_2 || error "2nd cp failed $?"
+ $LCTL set_param fail_loc=0
+
+ echo $ENQ_MIN >> $ldlm_enqueue_min
+ do_facet ost1 "echo $ENQ_MIN_R >> $ldlm_enqueue_min_r"
+ rm -rf $DIR/$tdir
+ return 0
}
run_test 68 "AT: verify slowing locks"
# [ $CLIENTCOUNT -lt 2 ] &&
# { skip "Need 2 or more clients, have $CLIENTCOUNT" && return; }
+ [[ $(lustre_version_code ost1) -lt $(version_code 2.9.53) ]] &&
+ skip "Need server version at least 2.9.53" && return
+
echo "mount clients $CLIENTS ..."
zconf_mount_clients $CLIENTS $MOUNT
wait $CLIENT_PID || error "rm remote dir failed"
- stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+ stat $remote_dir &>/dev/null && error "$remote_dir still exist!"
rm -rf $DIR/$tdir || error "rmdir failed"
wait $CLIENT_PID || error "rm remote dir failed"
- stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+ stat $remote_dir &>/dev/null && error "$remote_dir still exist!"
rm -rf $DIR/$tdir || error "rmdir failed"
wait $CLIENT_PID || error "rm remote dir failed"
- stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+ stat $remote_dir &>/dev/null && error "$remote_dir still exist!"
rm -rf $DIR/$tdir || error "rmdir failed"
wait $CLIENT_PID || error "rm remote dir failed"
- stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+ stat $remote_dir &>/dev/null && error "$remote_dir still exist!"
rm -rf $DIR/$tdir || error "rmdir failed"
wait $CLIENT_PID || error "rm remote dir failed"
- stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+ stat $remote_dir &>/dev/null && error "$remote_dir still exist!"
rm -rf $DIR/$tdir || error "rmdir failed"
wait $CLIENT_PID || error "rm remote dir failed"
- stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+ stat $remote_dir &>/dev/null && error "$remote_dir still exist!"
rm -rf $DIR/$tdir || error "rmdir failed"
wait $CLIENT_PID || error "rm remote dir failed"
- stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+ stat $remote_dir &>/dev/null && error "$remote_dir still exist!"
rm -rf $DIR/$tdir || error "rmdir failed"
wait $CLIENT_PID || error "rm remote dir failed"
- stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+ stat $remote_dir &>/dev/null && error "$remote_dir still exist!"
rm -rf $DIR/$tdir || error "rmdir failed"
test_85b() { #bug 16774
lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
- do_facet mgs $LCTL pool_new $FSNAME.$TESTNAME ||
+ if ! combined_mgs_mds ; then
+ mount_mgs_client
+ fi
+
+ create_pool $FSNAME.$TESTNAME ||
error "unable to create pool $TESTNAME"
do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $FSNAME-OST0000 ||
error "unable to add pool $TESTNAME"
do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME ||
error "unable to destroy the pool $TESTNAME"
+ if ! combined_mgs_mds ; then
+ umount_mgs_client
+ fi
+
if [ $count2 -ge $count ]; then
error "unused locks are not canceled"
fi
cancel_lru_locks osc
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
rm -f $DIR/$tdir/$tfile
- wait_mds_ost_sync
- wait_delete_completed
- BLOCKS1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ wait_mds_ost_sync || error "initial MDS-OST sync timed out"
+ wait_delete_completed || error "initial wait delete timed out"
+ local blocks1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ local write_size=$(fs_log_size)
+
$SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
- dd if=/dev/zero bs=1M count=10 of=$DIR/$tdir/$tfile
+ [ $write_size -lt 1024 ] && write_size=1024
+ dd if=/dev/zero bs=${write_size}k count=10 of=$DIR/$tdir/$tfile
sync
stop ost1
facet_failover $SINGLEMDS
mount_facet ost1
zconf_mount $(hostname) $MOUNT || error "mount fails"
client_up || error "client_up failed"
- wait_mds_ost_sync
- wait_delete_completed
- BLOCKS2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
- [ $((BLOCKS2 - BLOCKS1)) -le 4 ] ||
- error $((BLOCKS2 - BLOCKS1)) blocks leaked
-}
+ wait_mds_ost_sync || error "MDS-OST sync timed out"
+ wait_delete_completed || error "wait delete timed out"
+ local blocks2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ [ $((blocks2 - blocks1)) -le $(fs_log_size) ] ||
+ error $((blocks2 - blocks1)) blocks leaked
+}
run_test 89 "no disk space leak on late ost connection"
cleanup_90 () {
- local facet=$1
- trap 0
- reboot_facet $facet
- change_active $facet
- wait_for_facet $facet
- mount_facet $facet || error "Restart of $facet failed"
- clients_up
+ local facet=$1
+
+ trap 0
+ reboot_facet $facet
+ change_active $facet
+ wait_for_facet $facet
+ mount_facet $facet || error "Restart of $facet failed"
+ clients_up
}
test_90() { # bug 19494
test_103() {
remote_mds_nodsh && skip "remote MDS with nodsh" && return
+ local mds_version=$(lustre_version_code $SINGLEMDS)
+ [[ $mds_version -gt $(version_code 2.8.54) ]] ||
+ { skip "Need MDS version 2.8.54+"; return; }
+
#define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162
do_facet mds1 $LCTL set_param fail_loc=0x80000162
replay_barrier mds1
replay_barrier mds2
$LFS mkdir -i1 -c$MDSCOUNT $DIR/$tdir/striped_dir
- fail mds1,mds2
+ fail mds2,mds1
check_striped_dir_110 || error "check striped_dir failed"
# OBD_FAIL_SPLIT_UPDATE_REC 0x1702
do_facet mds1 "lctl set_param fail_loc=0x80001702"
- $LFS setdirstripe -c$MDSCOUNT $DIR/$tdir/striped_dir
+ $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/striped_dir
fail mds1
$CHECKSTAT -t dir $DIR/$tdir/striped_dir ||
# OBD_FAIL_SPLIT_UPDATE_REC 0x1702
do_facet mds2 "lctl set_param fail_loc=0x80001702"
- $LFS setdirstripe -c$MDSCOUNT $DIR/$tdir/striped_dir
+ $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/striped_dir
fail mds2
$CHECKSTAT -t dir $DIR/$tdir/striped_dir ||
replay_barrier mds1
mkdir $DIR/$tdir/dir_1
for ((i = 0; i < 20; i++)); do
- $LFS setdirstripe -c2 $DIR/$tdir/stripe_dir-$i
+ $LFS setdirstripe -i0 -c2 $DIR/$tdir/stripe_dir-$i
done
stop mds1
error "create dir-$i fails"
break
}
- $LFS setdirstripe -c2 $DIR/$tdir/stripe_dir-$i || {
+ $LFS setdirstripe -i0 -c2 $DIR/$tdir/stripe_dir-$i || {
error "create stripe_dir-$i fails"
break
}
}
run_test 120 "DNE fail abort should stop both normal and DNE replay"
+test_121() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] &&
+ skip "Don't support it before 2.11" &&
+ return 0
+
+ local at_max_saved=$(at_max_get mds)
+
+ touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+ cancel_lru_locks mdc
+
+ multiop_bg_pause $DIR/$tfile s_s || error "multiop $DIR/$tfile failed"
+ mpid=$!
+
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "0"
+
+ stop mds1
+ change_active mds1
+ wait_for_facet mds1
+
+ #define OBD_FAIL_TGT_RECOVERY_REQ_RACE 0x721
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x721 fail_val=0"
+ at_max_set 0 mds
+
+ mount_facet mds1
+ wait_clients_import_state "$clients" mds1 FULL
+ clients_up || clients_up || error "failover df: $?"
+
+ kill -USR1 $mpid
+ wait $mpid || error "multiop_bg_pause pid failed"
+
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+ at_max_set $at_max_saved mds
+ rm -f $DIR/$tfile
+}
+run_test 121 "lock replay timed out and race"
+
complete $SECONDS
check_and_cleanup_lustre
exit_status