X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-scrub.sh;h=80f78b76050887a882e33b422b9c61d63a647726;hp=18280a7ac0e7bb551d5355607988f60535ebaca9;hb=0c1ae1cb9c19f8a4f6c5a7ff6a1fd54807430795;hpb=61b4d4ff8e9a6e7539aa3a7dcc4dd1aea6b4f927 diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 18280a7..80f78b7 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -7,72 +7,48 @@ set -e ONLY=${ONLY:-"$*"} -#Bug number for excepting test 6705 -ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT 1c 5 10" -[ "$SLOW" = "no" ] && EXCEPT_SLOW="" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +LUSTRE=${LUSTRE:-$(dirname $0)/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -require_dsh_mds || exit 0 - -SAVED_MDSSIZE=${MDSSIZE} -SAVED_OSTSIZE=${OSTSIZE} -SAVED_OSTCOUNT=${OSTCOUNT} -# use small MDS + OST size to speed formatting time -# do not use too small MDSSIZE/OSTSIZE, which affect the default journal size -# 200M MDT device can guarantee uninitialized groups during the OI scrub -MDSSIZE=200000 -OSTSIZE=100000 -# no need too much OSTs, to reduce the format/start/stop overhead -stopall -[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4 +ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT" -MOUNT_2="" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -# build up a clean test environment. -formatall -setupall +build_test_filter -[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && - skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && - exit 0 -[ $(facet_fstype ost1) != "ldiskfs" ] && - skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && - exit 0 -[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] && - skip "Need MDS version at least 2.2.90" && check_and_cleanup_lustre && - exit 0 +require_dsh_mds || exit 0 -[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a" +load_modules -[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4" +if ! check_versions; then + skip "It is NOT necessary to test scrub under interoperation mode" + exit 0 +fi -[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15" +cleanupall -[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] && -[[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15" +SAVED_MDSSIZE=${MDSSIZE} +SAVED_OSTSIZE=${OSTSIZE} +SAVED_OSTCOUNT=${OSTCOUNT} -[[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14" +# use small MDS + OST size to speed formatting time +# do not use too small MDSSIZE/OSTSIZE, which affect the default journal size +# 400M MDT device can guarantee uninitialized groups during the OI scrub +[[ $MDSSIZE < 400000 || "$mds1_FSTYPE" == ldiskfs ]] && MDSSIZE=400000 +[[ $OSTSIZE < 400000 || "$ost1_FSTYPE" == ldiskfs ]] && OSTSIZE=400000 -[[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] && - SCRUB_ONLY="-t scrub" +# no need too many OSTs, to reduce the format/start/stop overhead +[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4 -build_test_filter +# build up a clean test environment. +REFORMAT="yes" check_and_setup_lustre MDT_DEV="${FSNAME}-MDT0000" OST_DEV="${FSNAME}-OST0000" -MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/}) scrub_start() { local error_id=$1 @@ -81,7 +57,7 @@ scrub_start() { # use "lfsck_start -A" when we no longer need testing interop for n in $(seq $MDSCOUNT); do do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \ - $SCRUB_ONLY "$@" || + -t scrub "$@" || error "($error_id) Failed to start OI scrub on mds$n" done } @@ -100,22 +76,22 @@ scrub_stop() { scrub_status() { local n=$1 - do_facet mds$n $LCTL get_param -n \ - osd-ldiskfs.$(facet_svc mds$n).oi_scrub + do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub } -START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY" -START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY" +START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub" +START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub" STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}" SHOW_SCRUB="do_facet $SINGLEMDS \ - $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub" + $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub" SHOW_SCRUB_ON_OST="do_facet ost1 \ - $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub" -MOUNT_OPTS_SCRUB="-o user_xattr" -MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub" + $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub" +MOUNT_OPTS_SCRUB="$MDS_MOUNT_OPTS -o user_xattr" +MOUNT_OPTS_NOSCRUB="$MDS_MOUNT_OPTS -o user_xattr,noscrub" scrub_prep() { local nfiles=$1 + local inject=$2 local n check_mount_and_prep @@ -123,30 +99,78 @@ scrub_prep() { echo "preparing... $(date)" for n in $(seq $MDSCOUNT); do echo "creating $nfiles files on mds$n" - if [ $n -eq 1 ]; then - mkdir $DIR/$tdir/mds$n || - error "Failed to create directory mds$n" - else - $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n || - error "Failed to create remote directory mds$n" - fi + test_mkdir -i $((n - 1)) -c1 $DIR/$tdir/mds$n || + error "Failed to create directory mds$n" cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n || error "Failed to copy files to mds$n" mkdir -p $DIR/$tdir/mds$n/d_$tfile || error "mkdir failed on mds$n" - createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \ + touch $DIR/$tdir/mds$n/d_$tfile/f1 > \ /dev/null || error "create failed on mds$n" + dd if=/dev/zero of=$DIR/$tdir/mds$n/d_$tfile/f2 bs=1M count=1 || + error "write failed on mds$n" if [[ $nfiles -gt 0 ]]; then createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \ /dev/null || error "createmany failed on mds$n" fi done echo "prepared $(date)." + + [ ! -z $inject ] && [ $inject -eq 2 ] && { + #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0x198 + + for n in $(seq $MDSCOUNT); do + cp $LUSTRE/tests/runas $DIR/$tdir/mds$n || + error "Fail to copy runas to MDS$n" + done + + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0 + } + + [ ! -z $inject ] && [ $inject -eq 1 ] && + [ "$mds1_FSTYPE" = "zfs" ] && { + #define OBD_FAIL_OSD_FID_MAPPING 0x193 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0x193 + + for n in $(seq $MDSCOUNT); do + chmod 0400 $DIR/$tdir/mds$n/test-framework.sh + chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh + done + + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0 + } + cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!" + + # sync local transactions on every MDT + do_nodes $(comma_list $(mdts_nodes)) \ + "$LCTL set_param -n osd*.*MDT*.force_sync=1" + + # wait for a while to cancel update logs after transactions committed. + sleep 3 + + # sync again to guarantee all things done. + do_nodes $(comma_list $(mdts_nodes)) \ + "$LCTL set_param -n osd*.*MDT*.force_sync=1" + for n in $(seq $MDSCOUNT); do echo "stop mds$n" stop mds$n > /dev/null || error "Fail to stop MDS$n!" done + + [ ! -z $inject ] && [ "$mds1_FSTYPE" = "ldiskfs" ] && { + if [ $inject -eq 1 ]; then + for n in $(seq $MDSCOUNT); do + mds_backup_restore mds$n || + error "Backup/restore on mds$n failed" + done + elif [ $inject -eq 2 ]; then + scrub_remove_ois 1 + fi + } } scrub_start_mds() { @@ -178,7 +202,7 @@ scrub_check_status() { for n in $(seq $MDSCOUNT); do wait_update_facet mds$n "$LCTL get_param -n \ - osd-ldiskfs.$(facet_svc mds$n).oi_scrub | + osd-*.$(facet_svc mds$n).oi_scrub | awk '/^status/ { print \\\$2 }'" "$expected" 6 || error "($error_id) Expected '$expected' on mds$n" done @@ -192,7 +216,7 @@ scrub_check_flags() { for n in $(seq $MDSCOUNT); do actual=$(do_facet mds$n $LCTL get_param -n \ - osd-ldiskfs.$(facet_svc mds$n).oi_scrub | + osd-*.$(facet_svc mds$n).oi_scrub | awk '/^flags/ { print $2 }') if [ "$actual" != "$expected" ]; then error "($error_id) Expected '$expected' on mds$n, but" \ @@ -209,7 +233,7 @@ scrub_check_params() { for n in $(seq $MDSCOUNT); do actual=$(do_facet mds$n $LCTL get_param -n \ - osd-ldiskfs.$(facet_svc mds$n).oi_scrub | + osd-*.$(facet_svc mds$n).oi_scrub | awk '/^param/ { print $2 }') if [ "$actual" != "$expected" ]; then error "($error_id) Expected '$expected' on mds$n, but" \ @@ -221,13 +245,20 @@ scrub_check_params() { scrub_check_repaired() { local error_id=$1 local expected=$2 + local dryrun=$3 local actual local n for n in $(seq $MDSCOUNT); do - actual=$(do_facet mds$n $LCTL get_param -n \ - osd-ldiskfs.$(facet_svc mds$n).oi_scrub | - awk '/^updated/ { print $2 }') + if [ $dryrun -eq 1 ]; then + actual=$(do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub | + awk '/^inconsistent:/ { print $2 }') + else + actual=$(do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub | + awk '/^updated:/ { print $2 }') + fi if [ $expected -eq 0 -a $actual -ne 0 ]; then error "($error_id) Expected no repaired on mds$n, but" \ @@ -265,6 +296,8 @@ scrub_check_data2() { } scrub_remove_ois() { + [ "$mds1_FSTYPE" != "ldiskfs" ] && return + local error_id=$1 local index=$2 local n @@ -275,40 +308,37 @@ scrub_remove_ois() { done } -scrub_backup_restore() { - local error_id=$1 - local igif=$2 - local n - - for n in $(seq $MDSCOUNT); do - mds_backup_restore mds$n $igif || - error "(error_id) Backup/restore on mds$n failed" - done -} - scrub_enable_auto() { do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \ - osd-ldiskfs.*.auto_scrub=1 + osd-*.*.auto_scrub=1 } full_scrub_ratio() { - [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] && - return + [ "$mds1_FSTYPE" != "ldiskfs" ] && return local ratio=$1 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \ - osd-ldiskfs.*.full_scrub_ratio=$ratio + osd-*.*.full_scrub_ratio=$ratio } full_scrub_threshold_rate() { - [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] && - return + [ "$mds1_FSTYPE" != "ldiskfs" ] && return local rate=$1 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \ - osd-ldiskfs.*.full_scrub_threshold_rate=$rate + osd-*.*.full_scrub_threshold_rate=$rate +} + +scrub_enable_index_backup() { + do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \ + osd-*.*.index_backup=1 +} + +scrub_disable_index_backup() { + do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \ + osd-*.*.index_backup=0 } test_0() { @@ -323,8 +353,10 @@ test_0() { run_test 0 "Do not auto trigger OI scrub for non-backup/restore case" test_1a() { + [ -n "$FILESET" ] && skip "Not functional for FILESET set" + scrub_prep 0 - echo "start $SINGLEMDS without disabling OI scrub" + echo "start $SINGLEMDS without disabling OI scrub: $MOUNT_OPTS_SCRUB" scrub_start_mds 1 "$MOUNT_OPTS_SCRUB" local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') @@ -341,8 +373,8 @@ test_1a() { echo "stop $SINGLEMDS" stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!" - echo "start $SINGLEMDS with disabling OI scrub" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + echo "start $SINGLEMDS with disabling OI scrub: $MOUNT_OPTS_NOSCRUB" + start $SINGLEMDS $(mdsdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null || error "(7) Fail to start MDS!" local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') @@ -352,17 +384,21 @@ test_1a() { run_test 1a "Auto trigger initial OI scrub when server mounts" test_1b() { - scrub_prep 0 - scrub_remove_ois 1 + scrub_prep 0 2 echo "start MDTs without disabling OI scrub" scrub_start_mds 2 "$MOUNT_OPTS_SCRUB" - scrub_check_status 3 completed + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_status 3 completed mount_client $MOUNT || error "(4) Fail to start client!" - scrub_check_data 5 + scrub_check_data2 runas 5 + scrub_check_status 6 completed } run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case" test_1c() { + [ "$mds1_FSTYPE" != "ldiskfs" ] && + skip "ldiskfs special test" + local index # OI files to be removed: @@ -383,8 +419,10 @@ test_1c() { run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases" test_2() { - scrub_prep 0 - scrub_backup_restore 1 + [ "$mds1_FSTYPE" != "ldiskfs" ] && + skip "ldiskfs special test" + + scrub_prep 0 1 echo "starting MDTs without disabling OI scrub" scrub_start_mds 2 "$MOUNT_OPTS_SCRUB" scrub_check_status 3 completed @@ -398,21 +436,21 @@ test_3() { formatall > /dev/null setupall > /dev/null - scrub_prep 0 - scrub_backup_restore 1 + scrub_prep 0 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_status 3 init - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent } #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" test_4a() { - scrub_prep 0 - scrub_backup_restore 1 + scrub_prep 0 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto full_scrub_ratio 0 @@ -425,7 +463,7 @@ test_4a() { local -a updated0 for n in $(seq $MDSCOUNT); do updated0[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') + awk '/^prior_updated/ { print $2 }') done scrub_check_data2 sanity-scrub.sh 9 @@ -434,7 +472,7 @@ test_4a() { local -a updated1 for n in $(seq $MDSCOUNT); do updated1[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') + awk '/^prior_updated/ { print $2 }') [ ${updated0[$n]} -eq ${updated1[$n]} ] || error "(10) NOT auto trigger full scrub as expected" done @@ -442,11 +480,13 @@ test_4a() { run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)" test_4b() { - scrub_prep 5 - scrub_backup_restore 1 + [ "$mds1_FSTYPE" != "ldiskfs" ] && + skip "ldiskfs special test" + + scrub_prep 5 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto full_scrub_ratio 10 @@ -460,7 +500,11 @@ test_4b() { local -a updated0 for n in $(seq $MDSCOUNT); do updated0[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') + awk '/^prior_updated/ { print $2 }') + + echo "OI scrub on MDS$n status for the 1st time:" + do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub done scrub_check_data2 sanity-scrub.sh 9 @@ -472,7 +516,12 @@ test_4b() { local -a updated1 for n in $(seq $MDSCOUNT); do updated1[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') + awk '/^prior_updated/ { print $2 }') + + echo "OI scrub on MDS$n status for the 2nd time:" + do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub + [ ${updated0[$n]} -lt ${updated1[$n]} ] || error "(12) Auto trigger full scrub unexpectedly" done @@ -488,7 +537,12 @@ test_4b() { for n in $(seq $MDSCOUNT); do updated0[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') + awk '/^prior_updated/ { print $2 }') + + echo "OI scrub on MDS$n status for the 3rd time:" + do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub + [ ${updated0[$n]} -gt ${updated1[$n]} ] || error "(16) Auto trigger full scrub unexpectedly" done @@ -500,19 +554,26 @@ test_4b() { for n in $(seq $MDSCOUNT); do updated1[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') - [ ${updated0[$n]} -eq ${updated1[$n]} ] || + awk '/^prior_updated/ { print $2 }') + [ ${updated0[$n]} -eq ${updated1[$n]} ] || { + echo "OI scrub on MDS$n status for the 4th time:" + do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub + error "(18) NOT auto trigger full scrub as expected" + } done } run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)" test_4c() { - scrub_prep 500 - scrub_backup_restore 1 + [ "$mds1_FSTYPE" != "ldiskfs" ] && + skip "ldiskfs special test" + + scrub_prep 500 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto full_scrub_ratio 2 @@ -526,7 +587,11 @@ test_4c() { local -a updated0 for n in $(seq $MDSCOUNT); do updated0[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') + awk '/^prior_updated/ { print $2 }') + + echo "OI scrub on MDS$n status for the 1st time:" + do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub done scrub_check_data2 sanity-scrub.sh 9 @@ -538,7 +603,12 @@ test_4c() { local -a updated1 for n in $(seq $MDSCOUNT); do updated1[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') + awk '/^prior_updated/ { print $2 }') + + echo "OI scrub on MDS$n status for the 2nd time:" + do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub + [ ${updated0[$n]} -lt ${updated1[$n]} ] || error "(12) Auto trigger full scrub unexpectedly" done @@ -554,7 +624,12 @@ test_4c() { for n in $(seq $MDSCOUNT); do updated0[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') + awk '/^prior_updated/ { print $2 }') + + echo "OI scrub on MDS$n status for the 3rd time:" + do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub + [ ${updated0[$n]} -gt ${updated1[$n]} ] || error "(16) Auto trigger full scrub unexpectedly" done @@ -566,31 +641,58 @@ test_4c() { for n in $(seq $MDSCOUNT); do updated1[$n]=$(scrub_status $n | - awk '/^sf_items_updated_prior/ { print $2 }') - [ ${updated0[$n]} -eq ${updated1[$n]} ] || + awk '/^prior_updated/ { print $2 }') + [ ${updated0[$n]} -eq ${updated1[$n]} ] || { + echo "OI scrub on MDS$n status for the 4th time:" + do_facet mds$n $LCTL get_param -n \ + osd-*.$(facet_svc mds$n).oi_scrub + error "(18) NOT auto trigger full scrub as expected" + } done } run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)" +test_4d() { + [ "$mds1_FSTYPE" != "ldiskfs" ] && skip "ldiskfs only test" + + check_mount_and_prep + + #define OBD_FAIL_OSD_DUPLICATE_MAP 0x19b + do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x19b + for i in {1..100}; do + echo $i > $DIR/$tdir/f_$i || error "write f_$i failed" + done + do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0 + + for i in {101..200}; do + echo $i > $DIR/$tdir/f_$i || error "write f_$i failed" + done + + for i in {1..200}; do + echo $i | cmp $DIR/$tdir/f_$i - || error "f_$i data corrupt" + done +} +run_test 4d "FID in LMA mismatch with object FID won't block create" + test_5() { formatall > /dev/null setupall > /dev/null - scrub_prep 1000 - scrub_backup_restore 1 + scrub_prep 100 1 echo "starting MDTs with OI scrub disabled (1)" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_status 3 init - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto + full_scrub_ratio 0 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=3 fail_loc=0x190 - full_scrub_ratio 0 scrub_check_data 6 umount_client $MOUNT || error "(7) Fail to stop client!" scrub_check_status 8 scanning @@ -624,20 +726,21 @@ test_5() { mount_client $MOUNT || error "(16) Fail to start client!" full_scrub_ratio 0 - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ - $LCTL set_param fail_val=3 fail_loc=0x190 + $LCTL set_param fail_loc=0 fail_val=0 local n + declare -a pids + for n in $(seq $MDSCOUNT); do - stat $DIR/$tdir/mds$n/${tfile}800 || - error "(17) Failed to stat mds$n/${tfile}800" + stat $DIR/$tdir/mds$n/sanity-scrub.sh & + pids[$n]=$! done - scrub_check_status 18 scanning - - do_nodes $(comma_list $(mdts_nodes)) \ - $LCTL set_param fail_loc=0 fail_val=0 + for n in $(seq $MDSCOUNT); do + wait ${pids[$n]} || + error "(18) Fail to stat mds$n/sanity-scrub.sh" + done scrub_check_status 19 completed scrub_check_flags 20 "" @@ -645,19 +748,19 @@ test_5() { run_test 5 "OI scrub state machine" test_6() { - scrub_prep 1000 - scrub_backup_restore 1 + scrub_prep 100 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto + full_scrub_ratio 0 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=2 fail_loc=0x190 - full_scrub_ratio 0 scrub_check_data 6 # Sleep 5 sec to guarantee at least one object processed by OI scrub @@ -675,8 +778,8 @@ test_6() { local n for n in $(seq $MDSCOUNT); do # stat will re-trigger OI scrub - stat $DIR/$tdir/mds$n/${tfile}800 || - error "(8) Failed to stat mds$n/${tfile}800" + stat $DIR/$tdir/mds$n/sanity-scrub.sh || + error "(8) Failed to stat mds$n/sanity-scrub.sh" done umount_client $MOUNT || error "(9) Fail to stop client!" @@ -723,19 +826,19 @@ test_6() { run_test 6 "OI scrub resumes from last checkpoint" test_7() { - scrub_prep 500 - scrub_backup_restore 1 + scrub_prep 500 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto + full_scrub_ratio 0 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=3 fail_loc=0x190 - full_scrub_ratio 0 scrub_check_data 6 local n @@ -745,7 +848,11 @@ test_7() { done scrub_check_status 8 scanning - scrub_check_flags 9 inconsistent,auto + if [ "$mds1_FSTYPE" != "ldiskfs" ]; then + scrub_check_flags 9 inconsistent,auto + else + scrub_check_flags 9 recreated,inconsistent,auto + fi do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_loc=0 fail_val=0 @@ -756,11 +863,11 @@ test_7() { run_test 7 "System is available during OI scrub scanning" test_8() { - scrub_prep 128 - scrub_backup_restore 1 + scrub_prep 128 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ @@ -782,17 +889,19 @@ test_8() { run_test 8 "Control OI scrub manually" test_9() { + # Skip scrub speed test for ZFS because of performance unstable + [ "$mds1_FSTYPE" != "ldiskfs" ] && + skip "test scrub speed only on ldiskfs" + if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then skip "Testing on UP system, the speed may be inaccurate." - return 0 fi - scrub_prep 6000 - scrub_backup_restore 1 + scrub_prep 6000 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + scrub_check_flags 4 recreated,inconsistent local BASE_SPEED1=100 local RUN_TIME1=10 @@ -802,6 +911,7 @@ test_9() { sleep $RUN_TIME1 scrub_check_status 6 completed scrub_check_flags 7 "" + # OI scrub should run with limited speed under non-inconsistent case scrub_start 8 -s $BASE_SPEED1 -r @@ -834,16 +944,17 @@ test_9() { done sleep $RUN_TIME2 - # MIN_MARGIN = 0.8 = 8 / 10 + # 30% margin + local MARGIN=3 local MIN_SPEED=$(((PRE_FETCHED + \ BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \ BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \ - (RUN_TIME1 + RUN_TIME2) * 8 / 10)) + (RUN_TIME1 + RUN_TIME2) * (10 - MARGIN) / 10)) # MAX_MARGIN = 1.2 = 12 / 10 MAX_SPEED=$(((PRE_FETCHED + \ BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \ BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \ - (RUN_TIME1 + RUN_TIME2) * 12 / 10)) + (RUN_TIME1 + RUN_TIME2) * (10 + MARGIN) / 10)) for n in $(seq $MDSCOUNT); do SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }') [ $SPEED -gt $MIN_SPEED ] || @@ -862,19 +973,19 @@ test_9() { run_test 9 "OI scrub speed control" test_10a() { - scrub_prep 0 - scrub_backup_restore 1 + scrub_prep 0 1 echo "starting mds$n with OI scrub disabled (1)" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto + full_scrub_ratio 0 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=1 fail_loc=0x190 - full_scrub_ratio 0 scrub_check_data 6 scrub_check_status 7 scanning umount_client $MOUNT || error "(8) Fail to stop client!" @@ -897,11 +1008,11 @@ run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" # test_10b is obsolete, it will be coverded by related sanity-lfsck tests. test_10b() { - scrub_prep 0 - scrub_backup_restore 1 + scrub_prep 0 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ @@ -927,13 +1038,16 @@ test_10b() { #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)" test_11() { + [ "$mds1_FSTYPE" != "ldiskfs" ] && + skip "ldiskfs special test" + local CREATED=100 local n check_mount_and_prep for n in $(seq $MDSCOUNT); do - $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n || + test_mkdir -i $((n - 1)) -c1 $DIR/$tdir/mds$n || error "(1) Fail to mkdir $DIR/$tdir/mds$n" createmany -o $DIR/$tdir/mds$n/f $CREATED || @@ -979,7 +1093,7 @@ run_test 11 "OI scrub skips the new created objects only once" test_12() { check_mount_and_prep - $SETSTRIPE -c 1 -i 0 $DIR/$tdir + $LFS setstripe -c 1 -i 0 $DIR/$tdir #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195 do_facet ost1 $LCTL set_param fail_loc=0x195 @@ -1004,7 +1118,7 @@ test_12() { do_facet ost1 $LCTL set_param fail_loc=0 wait_update_facet ost1 "$LCTL get_param -n \ - osd-ldiskfs.$(facet_svc ost1).oi_scrub | + osd-*.$(facet_svc ost1).oi_scrub | awk '/^status/ { print \\\$2 }'" "completed" 6 || error "(7) Expected '$expected' on ost1" @@ -1017,7 +1131,7 @@ run_test 12 "OI scrub can rebuild invalid /O entries" test_13() { check_mount_and_prep - $SETSTRIPE -c 1 -i 0 $DIR/$tdir + $LFS setstripe -c 1 -i 0 $DIR/$tdir #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 @@ -1040,7 +1154,7 @@ test_13() { $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!" wait_update_facet ost1 "$LCTL get_param -n \ - osd-ldiskfs.$(facet_svc ost1).oi_scrub | + osd-*.$(facet_svc ost1).oi_scrub | awk '/^status/ { print \\\$2 }'" "completed" 6 || error "(7) Expected '$expected' on ost1" @@ -1049,14 +1163,17 @@ test_13() { run_test 13 "OI scrub can rebuild missed /O entries" test_14() { + [ "$mds1_FSTYPE" != "ldiskfs" ] && + skip "ldiskfs special test" + check_mount_and_prep - $SETSTRIPE -c 1 -i 0 $DIR/$tdir + $LFS setstripe -c 1 -i 0 $DIR/$tdir #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 local count=$(precreated_ost_obj_count 0 0) - createmany -o $DIR/$tdir/f $((count + 32)) + createmany -o $DIR/$tdir/f $((count + 1000)) do_facet ost1 $LCTL set_param fail_loc=0 umount_client $MOUNT || error "(1) Fail to stop client!" @@ -1073,82 +1190,206 @@ test_14() { mount_client $MOUNT || error "(5) Fail to start client!" local LF_REPAIRED=$($SHOW_SCRUB_ON_OST | - awk '/^lf_repa[ri]*ed/ { print $2 }') - [ $LF_REPAIRED -gt 0 ] || + awk '/^lf_repa[ir]*ed/ { print $2 }') + [ $LF_REPAIRED -ge 1000 ] || error "(6) Some entry under /lost+found should be repaired" ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed" + + stopall + + echo "run e2fsck again after LFSCK" + run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" || + error "(8) Fail to run e2fsck error" } -run_test 14 "OI scrub can repair objects under lost+found" +run_test 14 "OI scrub can repair OST objects under lost+found" test_15() { - # skip test_15 for LU-4182 - [ $MDSCOUNT -ge 2 ] && skip "skip now for >= 2 MDTs" && return - local server_version=$(lustre_version_code $SINGLEMDS) - scrub_prep 20 - scrub_backup_restore 1 + local repaired + + formatall > /dev/null + setupall > /dev/null + + scrub_prep 20 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_status 3 init - scrub_check_flags 4 inconsistent + [ "$mds1_FSTYPE" != "ldiskfs" ] || + scrub_check_flags 4 recreated,inconsistent # run under dryrun mode - if [ $server_version -lt $(version_code 2.5.58) ]; then - scrub_start 5 --dryrun on + scrub_start 5 --dryrun + scrub_check_status 6 completed + if [ "$mds1_FSTYPE" != "ldiskfs" ]; then + scrub_check_flags 7 inconsistent + repaired=2 else - scrub_start 5 --dryrun + scrub_check_flags 7 recreated,inconsistent + repaired=20 fi - scrub_check_status 6 completed - scrub_check_flags 7 inconsistent scrub_check_params 8 dryrun - scrub_check_repaired 9 20 + scrub_check_repaired 9 $repaired 1 # run under dryrun mode again - if [ $server_version -lt $(version_code 2.5.58) ]; then - scrub_start 10 --dryrun on + scrub_start 10 --dryrun + scrub_check_status 11 completed + if [ "$mds1_FSTYPE" != "ldiskfs" ]; then + scrub_check_flags 12 inconsistent else - scrub_start 10 --dryrun + scrub_check_flags 12 recreated,inconsistent fi - scrub_check_status 11 completed - scrub_check_flags 12 inconsistent scrub_check_params 13 dryrun - scrub_check_repaired 14 20 + scrub_check_repaired 14 $repaired 1 # run under normal mode - # - # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not - # work under Lustre-2.y (y >= 6), the test script should be fixed as - # "-noff" or "--dryrun=off" or nothing by default. - if [ $server_version -lt $(version_code 2.5.58) ]; then - scrub_start 15 --dryrun off - else - scrub_start 15 - fi + scrub_start 15 scrub_check_status 16 completed scrub_check_flags 17 "" scrub_check_params 18 "" - scrub_check_repaired 19 20 + scrub_check_repaired 19 $repaired 0 # run under normal mode again - if [ $server_version -lt $(version_code 2.5.58) ]; then - scrub_start 20 --dryrun off - else - scrub_start 20 - fi + scrub_start 20 scrub_check_status 21 completed scrub_check_flags 22 "" scrub_check_params 23 "" - scrub_check_repaired 24 0 + scrub_check_repaired 24 0 0 } run_test 15 "Dryrun mode OI scrub" +test_16() { + check_mount_and_prep + scrub_enable_index_backup + + #define OBD_FAIL_OSD_INDEX_CRASH 0x199 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x199 + scrub_prep 0 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0 + + echo "starting MDTs without disabling OI scrub" + scrub_start_mds 1 "$MOUNT_OPTS_SCRUB" + mount_client $MOUNT || error "(2) Fail to start client!" + scrub_check_data 3 + scrub_disable_index_backup +} +run_test 16 "Initial OI scrub can rebuild crashed index objects" + +test_17a() { + [ "$mds1_FSTYPE" != "ldiskfs" ] && skip_env "ldiskfs only test" + +#define OBD_FAIL_OSD_OI_ENOSPC 0x19d + do_facet mds1 $LCTL set_param fail_loc=0x8000019d + mkdir $DIR/$tdir && error "mkdir should fail" + stop mds1 + local devname=$(mdsdevname 1) + + stack_trap "start mds1 $devname $MDS_MOUNT_OPTS" EXIT + FSCK_MAX_ERR=0 run_e2fsck $(facet_active_host mds1) $devname -n || + error "e2fsck returned $?" +} +run_test 17a "ENOSPC on OI insert shouldn't leak inodes" + +test_17b() { + [ "$mds1_FSTYPE" != "ldiskfs" ] && skip_env "ldiskfs only test" + +#define OBD_FAIL_OSD_DOTDOT_ENOSPC 0x19e + do_facet mds1 $LCTL set_param fail_loc=0x8000019e + mkdir $DIR/$tdir && error "mkdir should fail" + stop mds1 + local devname=$(mdsdevname 1) + + stack_trap "start mds1 $devname $MDS_MOUNT_OPTS" EXIT + FSCK_MAX_ERR=0 run_e2fsck $(facet_active_host mds1) $devname -n || + error "e2fsck returned $?" +} +run_test 17b "ENOSPC on .. insertion shouldn't leak inodes" + +test_18() { + local n + local fids=() + local opts=$(csa_add "$MOUNT_OPTS_SCRUB" -o resetoi) + + scrub_prep 10 + scrub_start_mds 1 "$MOUNT_OPTS_SCRUB" + mount_client $MOUNT || error "(2) Fail to start client!" + for n in $(seq $MDSCOUNT); do + fids+=($($LFS path2fid $DIR/$tdir/mds$n/test-framework.sh)) + done + cleanup_mount $MOUNT > /dev/null || error "(3) Fail to stop client!" + for n in $(seq $MDSCOUNT); do + stop mds$n > /dev/null || error "(4) Fail to stop MDS$n!" + done + scrub_start_mds 5 "$opts" + do_facet mds1 dmesg | grep "reset Object Index" || + error "(6) reset log not found" + mount_client $MOUNT || error "(7) Fail to start client!" + scrub_check_data 7 + + local fid + local path + for n in $(seq $MDSCOUNT); do + path=$($LFS fid2path $DIR ${fids[$((n - 1))]}) + [ "$path" == "$DIR/$tdir/mds$n/test-framework.sh" ] || + error "path mismatch $path != $DIR/$tdir/mds$n/test-framework.sh" + fid=$($LFS path2fid $DIR/$tdir/mds$n/test-framework.sh) + [ "${fids[$((n - 1))]}" == "$fid" ] || + error "$DIR/$tdir/mds$n/test-framework.sh FID mismatch ${fids[$((n - 1))]} != $fid" + done +} +run_test 18 "test mount -o resetoi to recreate OI files" + +test_19() { + local rcmd="do_facet ost${ost}" + + check_mount_and_prep + $LFS setstripe -c 1 -i 0 $DIR/$tdir + createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 32 files." + + echo "stopall" + stopall > /dev/null + + # create mulitple link file + mount_fstype ost1 || error "(1) Fail to mount ost1" + mntpt=$(facet_mntpt ost1) + + local path=$mntpt/O/0/d2 + local file=$(${rcmd} ls $path | awk '{print $0; exit}') + + # create link to the first file + echo "link $path/1 to $path/$file" + ${rcmd} ln $path/$file $path/1 + unmount_fstype ost1 || error "(2) Fail to umount ost1" + + start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(2) Fail to start ost1" + + $START_SCRUB_ON_OST -r || error "(3) Fail to start OI scrub on OST!" + + wait_update_facet ost1 "$LCTL get_param -n \ + osd-*.$(facet_svc ost1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 6 || + error "(4) Expected '$expected' on ost1" + + stop ost1 + mount_fstype ost1 || error "(5) Fail to mount ost1" + links=$(do_facet ost1 "stat $path/$file" | awk '/Links:/ { print $6 }') + unmount_fstype ost1 || error "(6) Fail to umount ost1" + + start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(7) Fail to start ost1" + + (( links == 1)) || error "(8) object links $links != 1 after scrub" +} +run_test 19 "LFSCK can fix multiple linked files on OST" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} OSTCOUNT=${SAVED_OSTCOUNT} # cleanup the system at last -formatall +REFORMAT="yes" cleanup_and_setup_lustre complete $SECONDS +check_and_cleanup_lustre exit_status