set -e
ONLY=${ONLY:-"$*"}
-#Bug number for excepting test 6705
-ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT 1c 5 10"
+ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
[ "$SLOW" = "no" ] && EXCEPT_SLOW=""
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
require_dsh_mds || exit 0
+load_modules
+
+if ! check_versions; then
+ skip "It is NOT necessary to test scrub under interoperation mode"
+ exit 0
+fi
+
+[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
+ skip "test OI scrub only for ldiskfs" && exit 0
+
+[ $(facet_fstype ost1) != "ldiskfs" ] &&
+ skip "test OI scrub only for ldiskfs" && exit 0
+
+[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
+ skip "Need MDS version at least 2.2.90" && exit 0
+
SAVED_MDSSIZE=${MDSSIZE}
SAVED_OSTSIZE=${OSTSIZE}
SAVED_OSTCOUNT=${OSTCOUNT}
MDSSIZE=200000
OSTSIZE=100000
# no need too much OSTs, to reduce the format/start/stop overhead
+stopall
[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
MOUNT_2=""
formatall
setupall
-[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
- skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
- exit 0
-[ $(facet_fstype ost1) != "ldiskfs" ] &&
- skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
- exit 0
-[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
- skip "Need MDS version at least 2.2.90" && check_and_cleanup_lustre &&
- exit 0
-
[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
echo "preparing... $(date)"
for n in $(seq $MDSCOUNT); do
echo "creating $nfiles files on mds$n"
- if [ $n -eq 1 ]; then
- mkdir $DIR/$tdir/mds$n ||
- error "Failed to create directory mds$n"
- else
- $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
- error "Failed to create remote directory mds$n"
- fi
+ test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
+ error "Failed to create directory mds$n"
cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
error "Failed to copy files to mds$n"
mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
done
echo "prepared $(date)."
cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
+
+ # sync local transactions on every MDT
+ do_nodes $(comma_list $(mdts_nodes)) \
+ "$LCTL set_param -n osd*.*MDT*.force_sync=1"
+
+ # wait for a while to cancel update logs after transactions committed.
+ sleep 3
+
+ # sync again to guarantee all things done.
+ do_nodes $(comma_list $(mdts_nodes)) \
+ "$LCTL set_param -n osd*.*MDT*.force_sync=1"
+
for n in $(seq $MDSCOUNT); do
echo "stop mds$n"
stop mds$n > /dev/null || error "Fail to stop MDS$n!"
for n in $(seq $MDSCOUNT); do
mds_backup_restore mds$n $igif ||
- error "(error_id) Backup/restore on mds$n failed"
+ error "($error_id) Backup/restore on mds$n failed"
done
}
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
scrub_check_status 3 init
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
}
#run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
scrub_backup_restore 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
full_scrub_ratio 0
local -a updated0
for n in $(seq $MDSCOUNT); do
updated0[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
+ awk '/^prior_updated/ { print $2 }')
done
scrub_check_data2 sanity-scrub.sh 9
local -a updated1
for n in $(seq $MDSCOUNT); do
updated1[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
+ awk '/^prior_updated/ { print $2 }')
[ ${updated0[$n]} -eq ${updated1[$n]} ] ||
error "(10) NOT auto trigger full scrub as expected"
done
scrub_backup_restore 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
full_scrub_ratio 10
local -a updated0
for n in $(seq $MDSCOUNT); do
updated0[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
+ awk '/^prior_updated/ { print $2 }')
+
+ echo "OI scrub on MDS$n status for the 1st time:"
+ do_facet mds$n $LCTL get_param -n \
+ osd-ldiskfs.$(facet_svc mds$n).oi_scrub
done
scrub_check_data2 sanity-scrub.sh 9
local -a updated1
for n in $(seq $MDSCOUNT); do
updated1[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
+ awk '/^prior_updated/ { print $2 }')
+
+ echo "OI scrub on MDS$n status for the 2nd time:"
+ do_facet mds$n $LCTL get_param -n \
+ osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+
[ ${updated0[$n]} -lt ${updated1[$n]} ] ||
error "(12) Auto trigger full scrub unexpectedly"
done
for n in $(seq $MDSCOUNT); do
updated0[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
+ awk '/^prior_updated/ { print $2 }')
+
+ echo "OI scrub on MDS$n status for the 3rd time:"
+ do_facet mds$n $LCTL get_param -n \
+ osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+
[ ${updated0[$n]} -gt ${updated1[$n]} ] ||
error "(16) Auto trigger full scrub unexpectedly"
done
for n in $(seq $MDSCOUNT); do
updated1[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
- [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
+ awk '/^prior_updated/ { print $2 }')
+ [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
+ echo "OI scrub on MDS$n status for the 4th time:"
+ do_facet mds$n $LCTL get_param -n \
+ osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+
error "(18) NOT auto trigger full scrub as expected"
+ }
done
}
run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
scrub_backup_restore 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
full_scrub_ratio 2
local -a updated0
for n in $(seq $MDSCOUNT); do
updated0[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
+ awk '/^prior_updated/ { print $2 }')
+
+ echo "OI scrub on MDS$n status for the 1st time:"
+ do_facet mds$n $LCTL get_param -n \
+ osd-ldiskfs.$(facet_svc mds$n).oi_scrub
done
scrub_check_data2 sanity-scrub.sh 9
local -a updated1
for n in $(seq $MDSCOUNT); do
updated1[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
+ awk '/^prior_updated/ { print $2 }')
+
+ echo "OI scrub on MDS$n status for the 2nd time:"
+ do_facet mds$n $LCTL get_param -n \
+ osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+
[ ${updated0[$n]} -lt ${updated1[$n]} ] ||
error "(12) Auto trigger full scrub unexpectedly"
done
for n in $(seq $MDSCOUNT); do
updated0[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
+ awk '/^prior_updated/ { print $2 }')
+
+ echo "OI scrub on MDS$n status for the 3rd time:"
+ do_facet mds$n $LCTL get_param -n \
+ osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+
[ ${updated0[$n]} -gt ${updated1[$n]} ] ||
error "(16) Auto trigger full scrub unexpectedly"
done
for n in $(seq $MDSCOUNT); do
updated1[$n]=$(scrub_status $n |
- awk '/^sf_items_updated_prior/ { print $2 }')
- [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
+ awk '/^prior_updated/ { print $2 }')
+ [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
+ echo "OI scrub on MDS$n status for the 4th time:"
+ do_facet mds$n $LCTL get_param -n \
+ osd-ldiskfs.$(facet_svc mds$n).oi_scrub
+
error "(18) NOT auto trigger full scrub as expected"
+ }
done
}
run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
echo "starting MDTs with OI scrub disabled (1)"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
scrub_check_status 3 init
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
+ full_scrub_ratio 0
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_val=3 fail_loc=0x190
- full_scrub_ratio 0
scrub_check_data 6
umount_client $MOUNT || error "(7) Fail to stop client!"
scrub_check_status 8 scanning
$LCTL set_param fail_val=3 fail_loc=0x190
local n
+ declare -a pids
+
for n in $(seq $MDSCOUNT); do
- stat $DIR/$tdir/mds$n/${tfile}800 ||
- error "(17) Failed to stat mds$n/${tfile}800"
+ stat $DIR/$tdir/mds$n/${tfile}800 &
+ pids[$n]=$!
done
- scrub_check_status 18 scanning
+ sleep 3
+
+ scrub_check_status 17 scanning
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_loc=0 fail_val=0
+ for n in $(seq $MDSCOUNT); do
+ wait ${pids[$n]} || error "(18) Fail to stat mds$n/${tfile}800"
+ done
+
scrub_check_status 19 completed
scrub_check_flags 20 ""
}
scrub_backup_restore 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
+ full_scrub_ratio 0
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_val=2 fail_loc=0x190
- full_scrub_ratio 0
scrub_check_data 6
# Sleep 5 sec to guarantee at least one object processed by OI scrub
scrub_backup_restore 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
+ full_scrub_ratio 0
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_val=3 fail_loc=0x190
- full_scrub_ratio 0
scrub_check_data 6
local n
done
scrub_check_status 8 scanning
- scrub_check_flags 9 inconsistent,auto
+ scrub_check_flags 9 recreated,inconsistent,auto
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_loc=0 fail_val=0
scrub_backup_restore 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
do_nodes $(comma_list $(mdts_nodes)) \
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
local BASE_SPEED1=100
local RUN_TIME1=10
scrub_backup_restore 1
echo "starting mds$n with OI scrub disabled (1)"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
mount_client $MOUNT || error "(5) Fail to start client!"
scrub_enable_auto
+ full_scrub_ratio 0
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_val=1 fail_loc=0x190
- full_scrub_ratio 0
scrub_check_data 6
scrub_check_status 7 scanning
umount_client $MOUNT || error "(8) Fail to stop client!"
scrub_backup_restore 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
do_nodes $(comma_list $(mdts_nodes)) \
check_mount_and_prep
for n in $(seq $MDSCOUNT); do
- $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
+ test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
error "(1) Fail to mkdir $DIR/$tdir/mds$n"
createmany -o $DIR/$tdir/mds$n/f $CREATED ||
run_test 14 "OI scrub can repair objects under lost+found"
test_15() {
- # skip test_15 for LU-4182
- [ $MDSCOUNT -ge 2 ] && skip "skip now for >= 2 MDTs" && return
local server_version=$(lustre_version_code $SINGLEMDS)
scrub_prep 20
scrub_backup_restore 1
echo "starting MDTs with OI scrub disabled"
scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
scrub_check_status 3 init
- scrub_check_flags 4 inconsistent
+ scrub_check_flags 4 recreated,inconsistent
# run under dryrun mode
if [ $server_version -lt $(version_code 2.5.58) ]; then
scrub_start 5 --dryrun
fi
scrub_check_status 6 completed
- scrub_check_flags 7 inconsistent
+ scrub_check_flags 7 recreated,inconsistent
scrub_check_params 8 dryrun
scrub_check_repaired 9 20
scrub_start 10 --dryrun
fi
scrub_check_status 11 completed
- scrub_check_flags 12 inconsistent
+ scrub_check_flags 12 recreated,inconsistent
scrub_check_params 13 dryrun
scrub_check_repaired 14 20