X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-scrub.sh;h=5ca94e26b6a5da39669431236ba9ed0138efe761;hb=25374e3b35f5776a9fd6e5b4cac273f7000511e9;hp=4db0e258b7736478a392aa1b94554d672eedb940;hpb=dc2e5bc3d9277ecf10e645350797a742c4342a44;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 4db0e25..5ca94e2 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -21,13 +21,19 @@ require_dsh_mds || exit 0 SAVED_MDSSIZE=${MDSSIZE} SAVED_OSTSIZE=${OSTSIZE} +SAVED_OSTCOUNT=${OSTCOUNT} # use small MDS + OST size to speed formatting time # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size MDSSIZE=100000 OSTSIZE=100000 +# no need too much OSTs, to reduce the format/start/stop overhead +[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4 MOUNT_2="" -check_and_setup_lustre + +# build up a clean test environment. +formatall +setupall [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && @@ -42,6 +48,13 @@ check_and_setup_lustre [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a" +[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] && + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15" + +[[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] && +[[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] && + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15" + [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14" @@ -92,16 +105,13 @@ scrub_prep() { local nfiles=$1 local n - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null + check_mount_and_prep - echo "preparing..." + echo "preparing... $(date)" for n in $(seq $MDSCOUNT); do echo "creating $nfiles files on mds$n" if [ $n -eq 1 ]; then - mkdir -p $DIR/$tdir/mds$n || + mkdir $DIR/$tdir/mds$n || error "Failed to create directory mds$n" else $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n || @@ -110,11 +120,11 @@ scrub_prep() { cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n || error "Failed to copy files to mds$n" if [[ $nfiles -gt 0 ]]; then - createmany -o $DIR/$tdir/mds$n/$tfile $nfiles || - error "createmany failed on mds$n" + createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \ + /dev/null || error "createmany failed on mds$n" fi done - echo "prepared." + echo "prepared $(date)." cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!" for n in $(seq $MDSCOUNT); do echo "stop mds$n" @@ -147,13 +157,26 @@ scrub_stop_mds() { scrub_check_status() { local error_id=$1 local expected=$2 + local n + + for n in $(seq $MDSCOUNT); do + wait_update_facet mds$n "$LCTL get_param -n \ + osd-ldiskfs.$(facet_svc mds$n).oi_scrub | + awk '/^status/ { print \\\$2 }'" "$expected" 6 || + error "($error_id) Expected '$expected' on mds$n" + done +} + +scrub_check_flags() { + local error_id=$1 + local expected=$2 local actual local n for n in $(seq $MDSCOUNT); do actual=$(do_facet mds$n $LCTL get_param -n \ osd-ldiskfs.$(facet_svc mds$n).oi_scrub | - awk '/^status/ { print $2 }') + awk '/^flags/ { print $2 }') if [ "$actual" != "$expected" ]; then error "($error_id) Expected '$expected' on mds$n, but" \ "got '$actual'" @@ -161,7 +184,7 @@ scrub_check_status() { done } -scrub_check_flags() { +scrub_check_params() { local error_id=$1 local expected=$2 local actual @@ -170,7 +193,7 @@ scrub_check_flags() { for n in $(seq $MDSCOUNT); do actual=$(do_facet mds$n $LCTL get_param -n \ osd-ldiskfs.$(facet_svc mds$n).oi_scrub | - awk '/^flags/ { print $2 }') + awk '/^param/ { print $2 }') if [ "$actual" != "$expected" ]; then error "($error_id) Expected '$expected' on mds$n, but" \ "got '$actual'" @@ -178,6 +201,29 @@ scrub_check_flags() { done } +scrub_check_repaired() { + local error_id=$1 + local expected=$2 + local actual + local n + + for n in $(seq $MDSCOUNT); do + actual=$(do_facet mds$n $LCTL get_param -n \ + osd-ldiskfs.$(facet_svc mds$n).oi_scrub | + awk '/^updated/ { print $2 }') + + if [ $expected -eq 0 -a $actual -ne 0 ]; then + error "($error_id) Expected no repaired on mds$n, but" \ + "got '$actual'" + fi + + if [ $expected -ne 0 -a $actual -lt $expected ]; then + error "($error_id) Expected '$expected' on mds$n, but" \ + "got '$actual'" + fi + done +} + scrub_check_data() { local error_id=$1 local n @@ -237,21 +283,15 @@ test_1a() { start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(1) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(2) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'" mount_client $MOUNT || error "(4) Fail to start client!" - #define OBD_FAIL_OSD_FID_MAPPING 0x193 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193 # update .lustre OI mapping touch $MOUNT/.lustre do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - umount_client $MOUNT || error "(5) Fail to stop client!" echo "stop $SINGLEMDS" @@ -261,10 +301,6 @@ test_1a() { start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(7) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(8) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent" ] || error "(9) Expect 'inconsistent', but got '$FLAGS'" @@ -276,7 +312,6 @@ test_1b() { scrub_remove_ois 1 echo "start MDTs without disabling OI scrub" scrub_start_mds 2 "$MOUNT_OPTS_SCRUB" - sleep 3 scrub_check_status 3 completed mount_client $MOUNT || error "(4) Fail to start client!" scrub_check_data 5 @@ -288,20 +323,15 @@ test_1c() { # OI files to be removed: # idx 0: oi.16.0 - # idx 1: oi.16.1 # idx 2: oi.16.{2,4,8,16,32} # idx 3: oi.16.{3,9,27} - # idx 5: oi.16.{5,25} - # idx 7: oi.16.{7,49} - for index in 0 1 2 3 5 7; do + for index in 0 2 3; do scrub_prep 0 scrub_remove_ois 1 $index - echo "start MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_flags 3 recreated scrub_start 4 - sleep 3 scrub_check_status 5 completed scrub_check_flags 6 "" done @@ -313,43 +343,45 @@ test_2() { scrub_backup_restore 1 echo "starting MDTs without disabling OI scrub" scrub_start_mds 2 "$MOUNT_OPTS_SCRUB" - sleep 3 scrub_check_status 3 completed mount_client $MOUNT || error "(4) Fail to start client!" scrub_check_data 5 } run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case" +# test_3 is obsolete, it will be covered by test_5. test_3() { + formatall > /dev/null + setupall > /dev/null + scrub_prep 0 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - sleep 3 scrub_check_status 3 init scrub_check_flags 4 inconsistent - echo "stopall" - stopall > /dev/null } -run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" +#run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" test_4() { scrub_prep 0 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto scrub_check_data 6 - sleep 3 scrub_check_status 7 completed + scrub_check_flags 8 "" } run_test 4 "Trigger OI scrub automatically if inconsistent OI mapping was found" test_5() { - scrub_prep 1500 + formatall > /dev/null + setupall > /dev/null + + scrub_prep 1000 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" @@ -358,73 +390,58 @@ test_5() { mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_check_data 6 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 umount_client $MOUNT || error "(7) Fail to stop client!" - scrub_check_status 8 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 - do_facet mds$n $LCTL set_param fail_loc=0x191 - done + #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191 + sleep 4 scrub_stop_mds 9 - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 echo "starting MDTs with OI scrub disabled" scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 11 crashed - scrub_stop_mds 12 - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + echo "starting MDTs without disabling OI scrub" scrub_start_mds 13 "$MOUNT_OPTS_SCRUB" - scrub_check_status 14 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 - do_facet mds$n $LCTL set_param fail_loc=0x192 - done - sleep 4 - scrub_check_status 15 failed + #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192 + scrub_check_status 15 failed mount_client $MOUNT || error "(16) Fail to start client!" + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + + local n for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - stat $DIR/$tdir/mds$n/${tfile}1000 || - error "(17) Failed to stat mds$n/${tfile}1000" + stat $DIR/$tdir/mds$n/${tfile}800 || + error "(17) Failed to stat mds$n/${tfile}800" done scrub_check_status 18 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 19 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 19 completed scrub_check_flags 20 "" } run_test 5 "OI scrub state machine" @@ -434,45 +451,41 @@ test_6() { scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 # Sleep 5 sec to guarantee at least one object processed by OI scrub sleep 5 # Fail the OI scrub to guarantee there is at least one checkpoint - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 - do_facet mds$n $LCTL set_param fail_loc=0x192 - done - sleep 4 + #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192 + scrub_check_status 7 failed + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + + local n for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 # stat will re-trigger OI scrub stat $DIR/$tdir/mds$n/${tfile}800 || error "(8) Failed to stat mds$n/${tfile}800" done umount_client $MOUNT || error "(9) Fail to stop client!" - scrub_check_status 10 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 - do_facet mds$n $LCTL set_param fail_loc=0x191 - done + #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191 + sleep 4 local -a position0 for n in $(seq $MDSCOUNT); do @@ -483,11 +496,10 @@ test_6() { scrub_stop_mds 11 - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + echo "starting MDTs without disabling OI scrub" scrub_start_mds 12 "$MOUNT_OPTS_SCRUB" @@ -495,7 +507,7 @@ test_6() { local -a position1 for n in $(seq $MDSCOUNT); do - positions1[$n]=$(scrub_status $n | + position1[$n]=$(scrub_status $n | awk '/^latest_start_position/ {print $2}') if [ ${position0[$n]} -ne ${position1[$n]} ]; then error "(14) Expected position ${position0[$n]}, but" \ @@ -503,13 +515,10 @@ test_6() { fi done - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 15 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 15 completed scrub_check_flags 16 "" } run_test 6 "OI scrub resumes from last checkpoint" @@ -517,39 +526,31 @@ run_test 6 "OI scrub resumes from last checkpoint" test_7() { scrub_prep 500 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent - mount_client $MOUNT || error "(5) Fail to start client!" - scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 + local n for n in $(seq $MDSCOUNT); do stat $DIR/$tdir/mds$n/${tfile}300 || error "(7) Failed to stat mds$n/${tfile}300!" done scrub_check_status 8 scanning - scrub_check_flags 9 inconsistent,auto - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 10 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 10 completed scrub_check_flags "" } run_test 7 "System is available during OI scrub scanning" @@ -557,39 +558,25 @@ run_test 7 "System is available during OI scrub scanning" test_8() { scrub_prep 128 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=1 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_start 5 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=1 fail_loc=0x190 + scrub_start 5 scrub_check_status 6 scanning - scrub_stop 7 - scrub_check_status 8 stopped - scrub_start 9 - scrub_check_status 10 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 11 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 11 completed scrub_check_flags 12 "" } run_test 8 "Control OI scrub manually" @@ -600,14 +587,11 @@ test_9() { return 0 fi - scrub_prep 8000 + scrub_prep 6000 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent local BASE_SPEED1=100 @@ -617,9 +601,7 @@ test_9() { sleep $RUN_TIME1 scrub_check_status 6 completed - scrub_check_flags 7 "" - # OI scrub should run with limited speed under non-inconsistent case scrub_start 8 -s $BASE_SPEED1 -r @@ -674,7 +656,7 @@ test_9() { do_facet mds$n $LCTL set_param -n \ mdd.$(facet_svc mds$n).lfsck_speed_limit 0 done - sleep 6 + scrub_check_status 13 completed } run_test 9 "OI scrub speed control" @@ -682,50 +664,32 @@ run_test 9 "OI scrub speed control" test_10a() { scrub_prep 0 scrub_backup_restore 1 - echo "starting mds$n with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - mount_client $MOUNT || error "(5) Fail to start client!" - scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=1 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_check_data 6 - scrub_check_status 7 scanning + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=1 fail_loc=0x190 + scrub_check_data 6 + scrub_check_status 7 scanning umount_client $MOUNT || error "(8) Fail to stop client!" - scrub_stop_mds 9 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 11 paused - scrub_stop_mds 12 - echo "starting MDTs without disabling OI scrub" scrub_start_mds 13 "$MOUNT_OPTS_SCRUB" - scrub_check_status 14 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 15 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 15 completed scrub_check_flags 16 "" } run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" @@ -734,79 +698,54 @@ run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" test_10b() { scrub_prep 0 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 scrub_start 5 - scrub_check_status 6 scanning - scrub_stop_mds 7 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 9 paused - scrub_stop_mds 10 - echo "starting MDTs without disabling OI scrub" scrub_start_mds 11 "$MOUNT_OPTS_SCRUB" - scrub_check_status 12 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 13 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 13 completed scrub_check_flags 14 "" } #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)" test_11() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - local CREATED=100 - local tname=`date +%s` - rm -rf $MOUNT/$tname > /dev/null - mkdir -p $MOUNT/$tname || error "(0) Failed to create $MOUNT/$tname" local n + + check_mount_and_prep + for n in $(seq $MDSCOUNT); do - $LFS mkdir -i $((n - 1)) $MOUNT/$tname/mds$n || - error "(1) Fail to mkdir $MOUNT/$tname/mds$n" + $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n || + error "(1) Fail to mkdir $DIR/$tdir/mds$n" - createmany -o $MOUNT/$tname/mds$n/f $CREATED || - error "(2) Fail to create in $tname/mds$n" + createmany -o $DIR/$tdir/mds$n/f $CREATED || + error "(2) Fail to create under $tdir/mds$n" done - cleanup_mount $MOUNT - do_facet $SINGLEMDS $LCTL clear - start_full_debug_logging # reset OI scrub start point by force scrub_start 3 -r - sleep 3 scrub_check_status 4 completed + declare -a checked0 + declare -a checked1 + # OI scrub should skip the new created objects for the first accessing # notice we're creating a new llog for every OST on every startup # new features can make this even less stable, so we only check @@ -818,149 +757,196 @@ test_11() { [ $SKIPPED -ge $MAXIMUM -o $SKIPPED -lt $MINIMUM ] && error "(5) Expect [ $MINIMUM , $MAXIMUM ) objects" \ "skipped on mds$n, but got $SKIPPED" + + checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }') done # reset OI scrub start point by force - scrub_start -r - sleep 3 + scrub_start 6 -r scrub_check_status 7 completed # OI scrub should skip the new created object only once for n in $(seq $MDSCOUNT); do SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }') - [ $SKIPPED -eq 0 ] || + checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }') + + [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] || error "(8) Expect 0 objects skipped on mds$n, but" \ "got $SKIPPED" done - - stop_full_debug_logging - restore_mount $MOUNT || error "(9) Fail to start client!" - rm -rf $MOUNT/$tname > /dev/null } run_test 11 "OI scrub skips the new created objects only once" test_12() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195 do_facet ost1 $LCTL set_param fail_loc=0x195 - createmany -o $DIR/$tdir/f 1000 + createmany -o $DIR/$tdir/f $((count + 32)) - echo "stopall" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + umount_client $MOUNT || error "(1) Fail to stop client!" - do_facet ost1 $LCTL set_param fail_loc=0 - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" + stop ost1 || error "(2) Fail to stop ost1" - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + #define OBD_FAIL_OST_NODESTROY 0x233 + do_facet ost1 $LCTL set_param fail_loc=0x233 - sleep 3 - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(3) Expect 'completed', but got '$STATUS'" + start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB || + error "(3) Fail to start ost1" - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(4) ls should succeed" + mount_client $MOUNT || error "(4) Fail to start client!" + + ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail" + + $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!" + + do_facet ost1 $LCTL set_param fail_loc=0 + wait_update_facet ost1 "$LCTL get_param -n \ + osd-ldiskfs.$(facet_svc ost1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 6 || + error "(7) Expected '$expected' on ost1" + + ls -ail $DIR/$tdir > /dev/null || { + $SHOW_SCRUB_ON_OST + error "(8) ls should succeed" + } } run_test 12 "OI scrub can rebuild invalid /O entries" test_13() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 - createmany -o $DIR/$tdir/f 1000 + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + umount_client $MOUNT || error "(1) Fail to stop client!" + + stop ost1 || error "(2) Fail to stop ost1" + + start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB || + error "(3) Fail to start ost1" - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" + mount_client $MOUNT || error "(4) Fail to start client!" + + ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail" - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!" - $START_SCRUB_ON_OST || error "(3) Fail to start OI scrub on OST!" - sleep 3 - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + wait_update_facet ost1 "$LCTL get_param -n \ + osd-ldiskfs.$(facet_svc ost1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 6 || + error "(7) Expected '$expected' on ost1" - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed" + ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed" } run_test 13 "OI scrub can rebuild missed /O entries" test_14() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 - createmany -o $DIR/$tdir/f 64 + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall" - stopall > /dev/null - echo "setupall" - setupall > /dev/null - - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" + umount_client $MOUNT || error "(1) Fail to stop client!" - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" - - echo "stopall" - stopall > /dev/null + stop ost1 || error "(2) Fail to stop ost1" echo "run e2fsck" run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" || error "(3) Fail to run e2fsck error" - echo "setupall" - setupall > /dev/null + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || + error "(4) Fail to start ost1" + + mount_client $MOUNT || error "(5) Fail to start client!" local LF_REPAIRED=$($SHOW_SCRUB_ON_OST | awk '/^lf_reparied/ { print $2 }') [ $LF_REPAIRED -gt 0 ] || - error "(4) Some entry under /lost+found should be repaired" + error "(6) Some entry under /lost+found should be repaired" - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed" + ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed" } run_test 14 "OI scrub can repair objects under lost+found" +test_15() { + # skip test_15 for LU-4182 + [ $MDSCOUNT -ge 2 ] && skip "skip now for >= 2 MDTs" && return + local server_version=$(lustre_version_code $SINGLEMDS) + scrub_prep 20 + scrub_backup_restore 1 + echo "starting MDTs with OI scrub disabled" + scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" + scrub_check_status 3 init + scrub_check_flags 4 inconsistent + + # run under dryrun mode + if [ $server_version -lt $(version_code 2.5.58) ]; then + scrub_start 5 --dryrun on + else + scrub_start 5 --dryrun + fi + scrub_check_status 6 completed + scrub_check_flags 7 inconsistent + scrub_check_params 8 dryrun + scrub_check_repaired 9 20 + + # run under dryrun mode again + if [ $server_version -lt $(version_code 2.5.58) ]; then + scrub_start 10 --dryrun on + else + scrub_start 10 --dryrun + fi + scrub_check_status 11 completed + scrub_check_flags 12 inconsistent + scrub_check_params 13 dryrun + scrub_check_repaired 14 20 + + # run under normal mode + # + # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not + # work under Lustre-2.y (y >= 6), the test script should be fixed as + # "-noff" or "--dryrun=off" or nothing by default. + if [ $server_version -lt $(version_code 2.5.58) ]; then + scrub_start 15 --dryrun off + else + scrub_start 15 + fi + scrub_check_status 16 completed + scrub_check_flags 17 "" + scrub_check_params 18 "" + scrub_check_repaired 19 20 + + # run under normal mode again + if [ $server_version -lt $(version_code 2.5.58) ]; then + scrub_start 20 --dryrun off + else + scrub_start 20 + fi + scrub_check_status 21 completed + scrub_check_flags 22 "" + scrub_check_params 23 "" + scrub_check_repaired 24 0 +} +run_test 15 "Dryrun mode OI scrub" + # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} +OSTCOUNT=${SAVED_OSTCOUNT} # cleanup the system at last formatall