X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-scrub.sh;h=5b3e2a7b8324c2aa123f4ecec7a871c69c5331a8;hb=9587d10a7e7e1839572d24bdf3b645df462c10f4;hp=5ca94e26b6a5da39669431236ba9ed0138efe761;hpb=ef82e43c8ff4b060ca4519816dc049a54bb24374;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 5ca94e2..5b3e2a7 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -8,6 +8,7 @@ set -e ONLY=${ONLY:-"$*"} ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT" + [ "$SLOW" = "no" ] && EXCEPT_SLOW="" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! @@ -19,14 +20,32 @@ init_logging require_dsh_mds || exit 0 +load_modules + +if ! check_versions; then + skip "It is NOT necessary to test scrub under interoperation mode" + exit 0 +fi + +[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && + skip "test OI scrub only for ldiskfs" && exit 0 + +[ $(facet_fstype ost1) != "ldiskfs" ] && + skip "test OI scrub only for ldiskfs" && exit 0 + +[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] && + skip "Need MDS version at least 2.2.90" && exit 0 + SAVED_MDSSIZE=${MDSSIZE} SAVED_OSTSIZE=${OSTSIZE} SAVED_OSTCOUNT=${OSTCOUNT} # use small MDS + OST size to speed formatting time # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size -MDSSIZE=100000 +# 200M MDT device can guarantee uninitialized groups during the OI scrub +MDSSIZE=200000 OSTSIZE=100000 # no need too much OSTs, to reduce the format/start/stop overhead +stopall [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4 MOUNT_2="" @@ -35,19 +54,12 @@ MOUNT_2="" formatall setupall -[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && - skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && - exit 0 -[ $(facet_fstype ost1) != "ldiskfs" ] && - skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && - exit 0 -[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] && - skip "Need MDS version at least 2.2.90" && check_and_cleanup_lustre && - exit 0 - [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a" +[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] && + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4" + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15" @@ -58,6 +70,9 @@ setupall [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14" +[[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] && + SCRUB_ONLY="-t scrub" + build_test_filter MDT_DEV="${FSNAME}-MDT0000" @@ -68,8 +83,10 @@ scrub_start() { local error_id=$1 local n + # use "lfsck_start -A" when we no longer need testing interop for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) "$@" || + do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \ + $SCRUB_ONLY "$@" || error "($error_id) Failed to start OI scrub on mds$n" done } @@ -78,6 +95,7 @@ scrub_stop() { local error_id=$1 local n + # use "lfsck_stop -A" when we no longer need testing interop for n in $(seq $MDSCOUNT); do do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) || error "($error_id) Failed to stop OI scrub on mds$n" @@ -91,8 +109,8 @@ scrub_status() { osd-ldiskfs.$(facet_svc mds$n).oi_scrub } -START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV}" -START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV}" +START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY" +START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY" STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}" SHOW_SCRUB="do_facet $SINGLEMDS \ $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub" @@ -119,6 +137,10 @@ scrub_prep() { fi cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n || error "Failed to copy files to mds$n" + mkdir -p $DIR/$tdir/mds$n/d_$tfile || + error "mkdir failed on mds$n" + createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \ + /dev/null || error "create failed on mds$n" if [[ $nfiles -gt 0 ]]; then createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \ /dev/null || error "createmany failed on mds$n" @@ -235,6 +257,18 @@ scrub_check_data() { done } +scrub_check_data2() { + local filename=$1 + local error_id=$2 + local n + + for n in $(seq $MDSCOUNT); do + diff -q $LUSTRE/tests/$filename \ + $DIR/$tdir/mds$n/$filename || + error "($error_id) File data check failed" + done +} + scrub_remove_ois() { local error_id=$1 local index=$2 @@ -258,12 +292,28 @@ scrub_backup_restore() { } scrub_enable_auto() { - local n + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \ + osd-ldiskfs.*.auto_scrub=1 +} - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param -n \ - osd-ldiskfs.$(facet_svc mds$n).auto_scrub 1 - done +full_scrub_ratio() { + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] && + return + + local ratio=$1 + + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \ + osd-ldiskfs.*.full_scrub_ratio=$ratio +} + +full_scrub_threshold_rate() { + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] && + return + + local rate=$1 + + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \ + osd-ldiskfs.*.full_scrub_threshold_rate=$rate } test_0() { @@ -280,8 +330,7 @@ run_test 0 "Do not auto trigger OI scrub for non-backup/restore case" test_1a() { scrub_prep 0 echo "start $SINGLEMDS without disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + scrub_start_mds 1 "$MOUNT_OPTS_SCRUB" local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'" @@ -359,23 +408,175 @@ test_3() { echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_status 3 init - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent } #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" -test_4() { +test_4a() { scrub_prep 0 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent + mount_client $MOUNT || error "(5) Fail to start client!" + scrub_enable_auto + full_scrub_ratio 0 + scrub_check_data 6 + sleep 3 + + scrub_check_status 7 completed + scrub_check_flags 8 "" + + local -a updated0 + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + done + + scrub_check_data2 sanity-scrub.sh 9 + sleep 3 + + local -a updated1 + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -eq ${updated1[$n]} ] || + error "(10) NOT auto trigger full scrub as expected" + done +} +run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)" + +test_4b() { + scrub_prep 5 + scrub_backup_restore 1 + echo "starting MDTs with OI scrub disabled" + scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" + scrub_check_flags 4 recreated,inconsistent + mount_client $MOUNT || error "(5) Fail to start client!" + scrub_enable_auto + full_scrub_ratio 10 + full_scrub_threshold_rate 10000 + scrub_check_data 6 + sleep 3 + + scrub_check_status 7 completed + scrub_check_flags 8 "" + + local -a updated0 + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + done + + scrub_check_data2 sanity-scrub.sh 9 + sleep 3 + + scrub_check_status 10 completed + scrub_check_flags 11 "" + + local -a updated1 + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -lt ${updated1[$n]} ] || + error "(12) Auto trigger full scrub unexpectedly" + done + + for n in $(seq $MDSCOUNT); do + ls -l $DIR/$tdir/mds$n/*.sh > /dev/null || + error "(13) fail to ls" + done + sleep 3 + + scrub_check_status 14 completed + scrub_check_flags 15 "" + + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -gt ${updated1[$n]} ] || + error "(16) Auto trigger full scrub unexpectedly" + done + + for n in $(seq $MDSCOUNT); do + ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls" + done + sleep 3 + + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -eq ${updated1[$n]} ] || + error "(18) NOT auto trigger full scrub as expected" + done +} +run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)" + +test_4c() { + scrub_prep 500 + scrub_backup_restore 1 + echo "starting MDTs with OI scrub disabled" + scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto + full_scrub_ratio 2 + full_scrub_threshold_rate 20 scrub_check_data 6 + sleep 3 + scrub_check_status 7 completed scrub_check_flags 8 "" + + local -a updated0 + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + done + + scrub_check_data2 sanity-scrub.sh 9 + sleep 3 + + scrub_check_status 10 completed + scrub_check_flags 11 "" + + local -a updated1 + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -lt ${updated1[$n]} ] || + error "(12) Auto trigger full scrub unexpectedly" + done + + for n in $(seq $MDSCOUNT); do + ls -l $DIR/$tdir/mds$n/*.sh > /dev/null || + error "(13) fail to ls" + done + sleep 3 + + scrub_check_status 14 completed + scrub_check_flags 15 "" + + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -gt ${updated1[$n]} ] || + error "(16) Auto trigger full scrub unexpectedly" + done + + for n in $(seq $MDSCOUNT); do + ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls" + done + sleep 3 + + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -eq ${updated1[$n]} ] || + error "(18) NOT auto trigger full scrub as expected" + done } -run_test 4 "Trigger OI scrub automatically if inconsistent OI mapping was found" +run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)" test_5() { formatall > /dev/null @@ -383,10 +584,10 @@ test_5() { scrub_prep 1000 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" + echo "starting MDTs with OI scrub disabled (1)" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_status 3 init - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto @@ -394,6 +595,7 @@ test_5() { do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=3 fail_loc=0x190 + full_scrub_ratio 0 scrub_check_data 6 umount_client $MOUNT || error "(7) Fail to stop client!" scrub_check_status 8 scanning @@ -407,7 +609,7 @@ test_5() { do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_loc=0 fail_val=0 - echo "starting MDTs with OI scrub disabled" + echo "starting MDTs with OI scrub disabled (2)" scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB" scrub_check_status 11 crashed scrub_stop_mds 12 @@ -426,6 +628,7 @@ test_5() { scrub_check_status 15 failed mount_client $MOUNT || error "(16) Fail to start client!" + full_scrub_ratio 0 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=3 fail_loc=0x190 @@ -451,14 +654,15 @@ test_6() { scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ - $LCTL set_param fail_val=3 fail_loc=0x190 + $LCTL set_param fail_val=2 fail_loc=0x190 + full_scrub_ratio 0 scrub_check_data 6 # Sleep 5 sec to guarantee at least one object processed by OI scrub @@ -528,7 +732,7 @@ test_7() { scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto @@ -536,6 +740,7 @@ test_7() { do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=3 fail_loc=0x190 + full_scrub_ratio 0 scrub_check_data 6 local n @@ -545,7 +750,7 @@ test_7() { done scrub_check_status 8 scanning - scrub_check_flags 9 inconsistent,auto + scrub_check_flags 9 recreated,inconsistent,auto do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_loc=0 fail_val=0 @@ -560,7 +765,7 @@ test_8() { scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ @@ -592,7 +797,7 @@ test_9() { echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent local BASE_SPEED1=100 local RUN_TIME1=10 @@ -664,9 +869,9 @@ run_test 9 "OI scrub speed control" test_10a() { scrub_prep 0 scrub_backup_restore 1 - echo "starting mds$n with OI scrub disabled" + echo "starting mds$n with OI scrub disabled (1)" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto @@ -674,11 +879,12 @@ test_10a() { do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=1 fail_loc=0x190 + full_scrub_ratio 0 scrub_check_data 6 scrub_check_status 7 scanning umount_client $MOUNT || error "(8) Fail to stop client!" scrub_stop_mds 9 - echo "starting MDTs with OI scrub disabled" + echo "starting MDTs with OI scrub disabled (2)" scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB" scrub_check_status 11 paused scrub_stop_mds 12 @@ -700,7 +906,7 @@ test_10b() { scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ @@ -748,14 +954,13 @@ test_11() { # OI scrub should skip the new created objects for the first accessing # notice we're creating a new llog for every OST on every startup - # new features can make this even less stable, so we only check - # that the number of skipped files is less than 2x the number of files - local MAXIMUM=$((CREATED * 2)) + # new features can make this even less stable, so we only check that + # the number of skipped files is more than the number or known created local MINIMUM=$((CREATED + 1)) # files + directory for n in $(seq $MDSCOUNT); do local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }') - [ $SKIPPED -ge $MAXIMUM -o $SKIPPED -lt $MINIMUM ] && - error "(5) Expect [ $MINIMUM , $MAXIMUM ) objects" \ + [ $SKIPPED -lt $MINIMUM ] && + error "(5) Expect at least $MINIMUM objects" \ "skipped on mds$n, but got $SKIPPED" checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }') @@ -781,12 +986,11 @@ test_12() { check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir - local count=$(precreated_ost_obj_count 0 0) - #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195 do_facet ost1 $LCTL set_param fail_loc=0x195 - createmany -o $DIR/$tdir/f $((count + 32)) + local count=$(precreated_ost_obj_count 0 0) + createmany -o $DIR/$tdir/f $((count + 32)) umount_client $MOUNT || error "(1) Fail to stop client!" stop ost1 || error "(2) Fail to stop ost1" @@ -820,10 +1024,10 @@ test_13() { check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir - local count=$(precreated_ost_obj_count 0 0) - #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 + local count=$(precreated_ost_obj_count 0 0) + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 @@ -853,10 +1057,10 @@ test_14() { check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir - local count=$(precreated_ost_obj_count 0 0) - #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 + local count=$(precreated_ost_obj_count 0 0) + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 @@ -874,7 +1078,7 @@ test_14() { mount_client $MOUNT || error "(5) Fail to start client!" local LF_REPAIRED=$($SHOW_SCRUB_ON_OST | - awk '/^lf_reparied/ { print $2 }') + awk '/^lf_repa[ri]*ed/ { print $2 }') [ $LF_REPAIRED -gt 0 ] || error "(6) Some entry under /lost+found should be repaired" @@ -891,7 +1095,7 @@ test_15() { echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_status 3 init - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent # run under dryrun mode if [ $server_version -lt $(version_code 2.5.58) ]; then @@ -900,7 +1104,7 @@ test_15() { scrub_start 5 --dryrun fi scrub_check_status 6 completed - scrub_check_flags 7 inconsistent + scrub_check_flags 7 recreated,inconsistent scrub_check_params 8 dryrun scrub_check_repaired 9 20 @@ -911,7 +1115,7 @@ test_15() { scrub_start 10 --dryrun fi scrub_check_status 11 completed - scrub_check_flags 12 inconsistent + scrub_check_flags 12 recreated,inconsistent scrub_check_params 13 dryrun scrub_check_repaired 14 20