[ "$SLOW" = "no" ] && EXCEPT_SLOW="44b"
[ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
-# bug number for skipped test: LU-5761
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT 89"
+# bug number for skipped test:
+ ALWAYS_EXCEPT="$ALWAYS_EXCEPT "
build_test_filter
(( $beforeused + $extra >= $afterused )) && break
n_attempts=$((n_attempts + 1))
[ $n_attempts -gt 3 ] &&
- error "after $afterused > before $beforeused"
+ error "after $afterused > before $beforeused + $extra"
wait_zfs_commit $SINGLEMDS 5
sync_all_data
do_facet ost1 "lctl set_param fail_loc=0x80000216"
client_up || error "client_up failed"
+ # let the MDS discover the OST failure, attempt to recover, fail
+ # and recover again.
+ sleep $((3 * TIMEOUT))
+
createmany -o $DIR/$tfile 20 20 ||
error "createmany recraete $DIR/$tfile failed"
unlinkmany $DIR/$tfile 40 || error "unlinkmany $DIR/$tfile failed"
$LCTL set_param fail_val=$(($ORIG + 5))
#define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c
$LCTL set_param fail_loc=0x50c
- ls $DIR/$tfile > /dev/null 2>&1
+ touch $DIR/$tfile > /dev/null 2>&1
$LCTL set_param fail_loc=0
CUR=$(lctl get_param -n mdc.${FSNAME}-MDT0000*.timeouts |
awk '/network/ {print $4}')
cancel_lru_locks osc
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
rm -f $DIR/$tdir/$tfile
- wait_mds_ost_sync
- wait_delete_completed
- BLOCKS1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ wait_mds_ost_sync || error "initial MDS-OST sync timed out"
+ wait_delete_completed || error "initial wait delete timed out"
+ local blocks1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ local write_size=$(fs_log_size)
+
$SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
- dd if=/dev/zero bs=1M count=10 of=$DIR/$tdir/$tfile
+ [ $write_size -lt 1024 ] && write_size=1024
+ dd if=/dev/zero bs=${write_size}k count=10 of=$DIR/$tdir/$tfile
sync
stop ost1
facet_failover $SINGLEMDS
mount_facet ost1
zconf_mount $(hostname) $MOUNT || error "mount fails"
client_up || error "client_up failed"
- wait_mds_ost_sync
- wait_delete_completed
- BLOCKS2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
- [ $((BLOCKS2 - BLOCKS1)) -le 4 ] ||
- error $((BLOCKS2 - BLOCKS1)) blocks leaked
+ wait_mds_ost_sync || error "MDS-OST sync timed out"
+ wait_delete_completed || error "wait delete timed out"
+ local blocks2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+
+ [ $((blocks2 - blocks1)) -le $(fs_log_size) ] ||
+ error $((blocks2 - blocks1)) blocks leaked
}
run_test 89 "no disk space leak on late ost connection"
test_103() {
remote_mds_nodsh && skip "remote MDS with nodsh" && return
+ local mds_version=$(lustre_version_code $SINGLEMDS)
+ [[ $mds_version -gt $(version_code 2.8.54) ]] ||
+ { skip "Need MDS version 2.8.54+"; return; }
+
#define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162
do_facet mds1 $LCTL set_param fail_loc=0x80000162
# OBD_FAIL_SPLIT_UPDATE_REC 0x1702
do_facet mds1 "lctl set_param fail_loc=0x80001702"
- $LFS setdirstripe -c$MDSCOUNT $DIR/$tdir/striped_dir
+ $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/striped_dir
fail mds1
$CHECKSTAT -t dir $DIR/$tdir/striped_dir ||
# OBD_FAIL_SPLIT_UPDATE_REC 0x1702
do_facet mds2 "lctl set_param fail_loc=0x80001702"
- $LFS setdirstripe -c$MDSCOUNT $DIR/$tdir/striped_dir
+ $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/striped_dir
fail mds2
$CHECKSTAT -t dir $DIR/$tdir/striped_dir ||
replay_barrier mds1
mkdir $DIR/$tdir/dir_1
for ((i = 0; i < 20; i++)); do
- $LFS setdirstripe -c2 $DIR/$tdir/stripe_dir-$i
+ $LFS setdirstripe -i0 -c2 $DIR/$tdir/stripe_dir-$i
done
stop mds1
error "create dir-$i fails"
break
}
- $LFS setdirstripe -c2 $DIR/$tdir/stripe_dir-$i || {
+ $LFS setdirstripe -i0 -c2 $DIR/$tdir/stripe_dir-$i || {
error "create stripe_dir-$i fails"
break
}
}
run_test 120 "DNE fail abort should stop both normal and DNE replay"
+test_121() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] &&
+ skip "Don't support it before 2.11" &&
+ return 0
+
+ local at_max_saved=$(at_max_get mds)
+
+ touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+ cancel_lru_locks mdc
+
+ multiop_bg_pause $DIR/$tfile s_s || error "multiop $DIR/$tfile failed"
+ mpid=$!
+
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "0"
+
+ stop mds1
+ change_active mds1
+ wait_for_facet mds1
+
+ #define OBD_FAIL_TGT_RECOVERY_REQ_RACE 0x721
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x721 fail_val=0"
+ at_max_set 0 mds
+
+ mount_facet mds1
+ wait_clients_import_state "$clients" mds1 FULL
+ clients_up || clients_up || error "failover df: $?"
+
+ kill -USR1 $mpid
+ wait $mpid || error "multiop_bg_pause pid failed"
+
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+ at_max_set $at_max_saved mds
+ rm -f $DIR/$tfile
+}
+run_test 121 "lock replay timed out and race"
+
complete $SECONDS
check_and_cleanup_lustre
exit_status