ALWAYS_EXCEPT="$REPLAY_SINGLE_EXCEPT "
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
-# 7.5 (min)"
+# time in minutes: 7.5"
[ "$SLOW" = "no" ] && EXCEPT_SLOW="44b"
-[ $(facet_fstype $SINGLEMDS) = "zfs" ] &&
-# bug number for skipped test:
- ALWAYS_EXCEPT="$ALWAYS_EXCEPT "
+if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then
+# bug number for skipped test: LU-11388
+ ALWAYS_EXCEPT+="131b"
+ if [ $MDSCOUNT -gt 1 ]; then
+# bug number for skipped test: LU-10740 LU-11330 LU-9157 LU-11336
+ ALWAYS_EXCEPT+=" 2d 70d 80c 80d"
+ fi
+fi
build_test_filter
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000701"
# lctl below may fail, it is valid case
$LCTL --device $mdcdev recover
- df $MOUNT
+ $LFS df $MOUNT
done
do_facet $SINGLEMDS "lctl set_param fail_loc=0"
[ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds
}
rm -rf $DIR/$tdir/test || {
echo "rmdir fails"
+ ls -lR $DIR/$tdir
break
}
rm -rf $DIR/$tdir/test1 || {
echo "rmdir fails"
+ ls -lR $DIR/$tdir/test1
break
}
done
}
run_test 70d "mkdir/rmdir striped dir ${MDSCOUNT}mdts recovery"
-cleanup_70e() {
- trap 0
- kill -9 $rename_70e_pid
-}
-
test_70e () {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
local clients=${CLIENTS:-$HOSTNAME}
local rc=0
- echo ha > /proc/sys/lnet/debug
+ lctl set_param debug=+ha
zconf_mount_clients $clients $MOUNT
local duration=300
$LFS mkdir -i0 $DIR/$tdir/test_1
touch $DIR/$tdir/test_0/a
touch $DIR/$tdir/test_1/b
- trap cleanup_70e EXIT
(
- while true; do
- mrename $DIR/$tdir/test_0/a $DIR/$tdir/test_1/b > \
- /dev/null || {
- echo "a->b fails"
- break;
- }
+ while true; do
+ mrename $DIR/$tdir/test_0/a $DIR/$tdir/test_1/b > /dev/null || {
+ echo "a->b fails"
+ break;
+ }
- checkstat $DIR/$tdir/test_0/a && {
- echo "a still exists"
- break
- }
+ checkstat $DIR/$tdir/test_0/a && {
+ echo "a still exists"
+ break
+ }
- checkstat $DIR/$tdir/test_1/b || {
- echo "b still exists"
- break
- }
+ checkstat $DIR/$tdir/test_1/b || {
+ echo "b still exists"
+ break
+ }
- touch $DIR/$tdir/test_0/a || {
- echo "touch a fails"
- break
- }
+ touch $DIR/$tdir/test_0/a || {
+ echo "touch a fails"
+ break
+ }
- mrename $DIR/$tdir/test_1/b $DIR/$tdir/test_0/a > \
- /dev/null || {
- echo "a->a fails"
- break;
- }
- done
+ mrename $DIR/$tdir/test_1/b $DIR/$tdir/test_0/a > /dev/null || {
+ echo "a->a fails"
+ break;
+ }
+ done
)&
rename_70e_pid=$!
- echo "Started $rename_70e_pid"
+ stack_trap "kill -9 $rename_70e_pid" EXIT
+ echo "Started PID=$rename_70e_pid"
random_fail_mdt 2 $duration $rename_70e_pid
kill -0 $rename_70e_pid || error "rename $rename_70e_pid stopped"
-
- cleanup_70e
- true
}
run_test 70e "rename cross-MDT with random fails"
run_test 74 "Ensure applications don't fail waiting for OST recovery"
remote_dir_check_80() {
- local MDTIDX=1
+ local mdtidx=1
local diridx
local fileidx
- diridx=$($GETSTRIPE -M $remote_dir) ||
- error "$GETSTRIPE -M $remote_dir failed"
- [ $diridx -eq $MDTIDX ] || error "$diridx != $MDTIDX"
+ diridx=$($LFS getstripe -m $remote_dir) ||
+ error "$LFS getstripe -m $remote_dir failed"
+ [ $diridx -eq $mdtidx ] || error "$diridx != $mdtidx"
createmany -o $remote_dir/f-%d 20 || error "creation failed"
- fileidx=$($GETSTRIPE -M $remote_dir/f-1) ||
- error "$GETSTRIPE -M $remote_dir/f-1 failed"
- [ $fileidx -eq $MDTIDX ] || error "$fileidx != $MDTIDX"
+ fileidx=$($LFS getstripe -m $remote_dir/f-1) ||
+ error "$LFS getstripe -m $remote_dir/f-1 failed"
+ [ $fileidx -eq $mdtidx ] || error "$fileidx != $mdtidx"
return 0
}
local remote_dir=$DIR/$tdir/remote_dir
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
- #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
local remote_dir=$DIR/$tdir/remote_dir
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
- #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
local remote_dir=$DIR/$tdir/remote_dir
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
- #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
$LFS mkdir -i $MDTIDX $remote_dir &
local CLIENT_PID=$!
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
touch $remote_dir || error "touch $remote_dir failed"
- # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
$LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
- # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ # OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
rmdir $remote_dir &
local CLIENT_PID=$!
}
run_test 88 "MDS should not assign same objid to different files "
+function calc_osc_kbytes_used() {
+ local kbtotal=$(calc_osc_kbytes kbytestotal)
+ local kbfree=$(calc_osc_kbytes kbytesfree)
+ echo $((kbtotal-kbfree))
+}
+
test_89() {
cancel_lru_locks osc
mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
rm -f $DIR/$tdir/$tfile
wait_mds_ost_sync || error "initial MDS-OST sync timed out"
wait_delete_completed || error "initial wait delete timed out"
- local blocks1=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ local blocks1=$(calc_osc_kbytes_used)
local write_size=$(fs_log_size)
$SETSTRIPE -i 0 -c 1 $DIR/$tdir/$tfile
wait_mds_ost_sync || error "MDS-OST sync timed out"
wait_delete_completed || error "wait delete timed out"
- local blocks2=$(df -P $MOUNT | tail -n 1 | awk '{ print $3 }')
+ local blocks2=$(calc_osc_kbytes_used)
[ $((blocks2 - blocks1)) -le $(fs_log_size) ] ||
error $((blocks2 - blocks1)) blocks leaked
echo "Create the files"
- # file "f${index}" striped over 1 OST
- # file "all" striped over all OSTs
+ # file "f${index}" striped over 1 OST
+ # file "all" striped over all OSTs
- $SETSTRIPE -c $OSTCOUNT $dir/all ||
- error "setstripe failed to create $dir/all"
+ $LFS setstripe -c $OSTCOUNT $dir/all ||
+ error "setstripe failed to create $dir/all"
- for (( i=0; i<$OSTCOUNT; i++ )); do
- local f=$dir/f$i
- $SETSTRIPE -i $i -c 1 $f || error "$SETSTRIPE failed to create $f"
+ for ((i = 0; i < $OSTCOUNT; i++)); do
+ local f=$dir/f$i
- # confirm setstripe actually created the stripe on the requested OST
- local uuid=$(ostuuid_from_index $i)
- for file in f$i all; do
- if [[ $dir/$file != $($LFS find --obd $uuid --name $file $dir) ]]; then
- $GETSTRIPE $dir/$file
- error wrong stripe: $file, uuid: $uuid
- fi
- done
- done
+ $LFS setstripe -i $i -c 1 $f ||
+ error "$LFS setstripe failed to create $f"
+
+ # confirm setstripe actually created stripe on requested OST
+ local uuid=$(ostuuid_from_index $i)
+
+ for file in f$i all; do
+ local found=$($LFS find --obd $uuid --name $file $dir)
+
+ if [[ $dir/$file != $found ]]; then
+ $LFS getstripe $dir/$file
+ error "wrong stripe: $file, uuid: $uuid"
+ fi
+ done
+ done
# Before failing an OST, get its obd name and index
local varsvc=${ostfail}_svc
}
run_test 121 "lock replay timed out and race"
+test_130a() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] &&
+ skip "Do not support Data-on-MDT before 2.11"
+
+ replay_barrier $SINGLEMDS
+ $LFS setstripe -E 1M -L mdt -E EOF -c 2 $DIR/$tfile
+ fail $SINGLEMDS
+
+ [ $($LFS getstripe -L $DIR/$tfile) == "mdt" ] ||
+ error "Fail to replay DoM file creation"
+}
+run_test 130a "DoM file create (setstripe) replay"
+
+test_130b() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] &&
+ skip "Do not support Data-on-MDT before 2.11"
+
+ mkdir $DIR/$tdir
+ $LFS setstripe -E 1M -L mdt -E EOF -c 2 $DIR/$tdir
+ replay_barrier $SINGLEMDS
+ touch $DIR/$tdir/$tfile
+ fail $SINGLEMDS
+
+ [ $($LFS getstripe -L $DIR/$tdir/$tfile) == "mdt" ] ||
+ error "Fail to replay DoM file creation"
+}
+run_test 130b "DoM file create (inherited) replay"
+
+test_131a() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] &&
+ skip "Do not support Data-on-MDT before 2.11"
+
+ $LFS setstripe -E 1M -L mdt -E EOF -c 2 $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ echo "dom_data" | dd of=$DIR/$tfile bs=8 count=1
+ # lock is not canceled and will be replayed
+ fail $SINGLEMDS
+
+ [ $(cat $DIR/$tfile) == "dom_data" ] ||
+ error "Wrong file content after failover"
+}
+run_test 131a "DoM file write lock replay"
+
+test_131b() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] &&
+ skip "Do not support Data-on-MDT before 2.11"
+
+ $LFS setstripe -E 1M -L mdt -E EOF -c 2 $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ echo "dom_data" | dd of=$DIR/$tfile bs=8 count=1
+ cancel_lru_locks mdc
+
+ fail $SINGLEMDS
+
+ [ $(cat $DIR/$tfile) == "dom_data" ] ||
+ error "Wrong file content after failover"
+}
+run_test 131b "DoM file write replay"
+
+test_132a() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.9.90) ] &&
+ skip "Do not support PFL files before 2.10"
+
+ $LFS setstripe -E 1M -c 1 -E EOF -c 2 $DIR/$tfile
+ replay_barrier $SINGLEMDS
+ # write over the first component size cause next component instantiation
+ dd if=/dev/urandom of=$DIR/$tfile bs=1M count=1 seek=1 ||
+ error "dd to $DIR/$tfile failed"
+ lfs getstripe $DIR/$tfile
+
+ cksum=$(md5sum $DIR/$tfile | awk '{print $1}')
+ $LFS getstripe -I2 $DIR/$tfile | grep -q lmm_objects ||
+ error "Component #1 was not instantiated"
+
+ fail $SINGLEMDS
+
+ lfs getstripe $DIR/$tfile
+ $LFS getstripe -I2 $DIR/$tfile | grep -q lmm_objects ||
+ error "Component #1 instantiation was not replayed"
+ cksum2=$(md5sum $DIR/$tfile | awk '{print $1}')
+ if [ $cksum != $cksum2 ] ; then
+ error_noexit "New cksum $cksum2 does not match original $cksum"
+ fi
+}
+run_test 132a "PFL new component instantiate replay"
+
complete $SECONDS
check_and_cleanup_lustre
exit_status