3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
32 SAVED_MDSSIZE=${MDSSIZE}
33 SAVED_OSTSIZE=${OSTSIZE}
34 SAVED_OSTCOUNT=${OSTCOUNT}
36 # use small MDS + OST size to speed formatting time
37 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
38 # 400M MDT device can guarantee uninitialized groups during the OI scrub
42 # no need too many OSTs, to reduce the format/start/stop overhead
43 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
45 # build up a clean test environment.
46 REFORMAT="yes" check_and_setup_lustre
50 MDT_DEV="${FSNAME}-MDT0000"
51 OST_DEV="${FSNAME}-OST0000"
52 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
58 # use "lfsck_start -A" when we no longer need testing interop
59 for n in $(seq $MDSCOUNT); do
60 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
62 error "($error_id) Failed to start OI scrub on mds$n"
70 # use "lfsck_stop -A" when we no longer need testing interop
71 for n in $(seq $MDSCOUNT); do
72 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
73 error "($error_id) Failed to stop OI scrub on mds$n"
80 do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
83 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
84 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
85 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
86 SHOW_SCRUB="do_facet $SINGLEMDS \
87 $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
88 SHOW_SCRUB_ON_OST="do_facet ost1 \
89 $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
90 MOUNT_OPTS_SCRUB="-o user_xattr"
91 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
100 echo "preparing... $(date)"
101 for n in $(seq $MDSCOUNT); do
102 echo "creating $nfiles files on mds$n"
103 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
104 error "Failed to create directory mds$n"
105 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
106 error "Failed to copy files to mds$n"
107 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
108 error "mkdir failed on mds$n"
109 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
110 /dev/null || error "create failed on mds$n"
111 if [[ $nfiles -gt 0 ]]; then
112 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
113 /dev/null || error "createmany failed on mds$n"
116 echo "prepared $(date)."
118 [ ! -z $inject ] && [ $inject -eq 2 ] && {
119 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
120 do_nodes $(comma_list $(mdts_nodes)) \
121 $LCTL set_param fail_loc=0x198
123 for n in $(seq $MDSCOUNT); do
124 cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
125 error "Fail to copy runas to MDS$n"
128 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
131 [ ! -z $inject ] && [ $inject -eq 1 ] &&
132 [ $(facet_fstype $SINGLEMDS) = "zfs" ] && {
133 #define OBD_FAIL_OSD_FID_MAPPING 0x193
134 do_nodes $(comma_list $(mdts_nodes)) \
135 $LCTL set_param fail_loc=0x193
137 for n in $(seq $MDSCOUNT); do
138 chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
139 chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
142 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
145 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
147 # sync local transactions on every MDT
148 do_nodes $(comma_list $(mdts_nodes)) \
149 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
151 # wait for a while to cancel update logs after transactions committed.
154 # sync again to guarantee all things done.
155 do_nodes $(comma_list $(mdts_nodes)) \
156 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
158 for n in $(seq $MDSCOUNT); do
160 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
163 [ ! -z $inject ] && [ $(facet_fstype $SINGLEMDS) = "ldiskfs" ] && {
164 if [ $inject -eq 1 ]; then
165 for n in $(seq $MDSCOUNT); do
166 mds_backup_restore mds$n ||
167 error "Backup/restore on mds$n failed"
169 elif [ $inject -eq 2 ]; then
180 for n in $(seq $MDSCOUNT); do
181 start mds$n $(mdsdevname $n) $opts >/dev/null ||
182 error "($error_id) Failed to start mds$n"
190 for n in $(seq $MDSCOUNT); do
191 echo "stopping mds$n"
192 stop mds$n >/dev/null ||
193 error "($error_id) Failed to stop mds$n"
197 scrub_check_status() {
202 for n in $(seq $MDSCOUNT); do
203 wait_update_facet mds$n "$LCTL get_param -n \
204 osd-*.$(facet_svc mds$n).oi_scrub |
205 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
206 error "($error_id) Expected '$expected' on mds$n"
210 scrub_check_flags() {
216 for n in $(seq $MDSCOUNT); do
217 actual=$(do_facet mds$n $LCTL get_param -n \
218 osd-*.$(facet_svc mds$n).oi_scrub |
219 awk '/^flags/ { print $2 }')
220 if [ "$actual" != "$expected" ]; then
221 error "($error_id) Expected '$expected' on mds$n, but" \
227 scrub_check_params() {
233 for n in $(seq $MDSCOUNT); do
234 actual=$(do_facet mds$n $LCTL get_param -n \
235 osd-*.$(facet_svc mds$n).oi_scrub |
236 awk '/^param/ { print $2 }')
237 if [ "$actual" != "$expected" ]; then
238 error "($error_id) Expected '$expected' on mds$n, but" \
244 scrub_check_repaired() {
251 for n in $(seq $MDSCOUNT); do
252 if [ $dryrun -eq 1 ]; then
253 actual=$(do_facet mds$n $LCTL get_param -n \
254 osd-*.$(facet_svc mds$n).oi_scrub |
255 awk '/^inconsistent:/ { print $2 }')
257 actual=$(do_facet mds$n $LCTL get_param -n \
258 osd-*.$(facet_svc mds$n).oi_scrub |
259 awk '/^updated:/ { print $2 }')
262 if [ $expected -eq 0 -a $actual -ne 0 ]; then
263 error "($error_id) Expected no repaired on mds$n, but" \
267 if [ $expected -ne 0 -a $actual -lt $expected ]; then
268 error "($error_id) Expected '$expected' on mds$n, but" \
278 for n in $(seq $MDSCOUNT); do
279 diff -q $LUSTRE/tests/test-framework.sh \
280 $DIR/$tdir/mds$n/test-framework.sh ||
281 error "($error_id) File data check failed"
285 scrub_check_data2() {
290 for n in $(seq $MDSCOUNT); do
291 diff -q $LUSTRE/tests/$filename \
292 $DIR/$tdir/mds$n/$filename ||
293 error "($error_id) File data check failed"
298 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
304 for n in $(seq $MDSCOUNT); do
305 mds_remove_ois mds$n $index ||
306 error "($error_id) Failed to remove OI .$index on mds$n"
310 scrub_enable_auto() {
311 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
316 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
320 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
321 osd-*.*.full_scrub_ratio=$ratio
324 full_scrub_threshold_rate() {
325 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
329 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
330 osd-*.*.full_scrub_threshold_rate=$rate
333 scrub_enable_index_backup() {
334 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
335 osd-*.*.index_backup=1
338 scrub_disable_index_backup() {
339 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
340 osd-*.*.index_backup=0
345 echo "starting MDTs without disabling OI scrub"
346 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
347 scrub_check_status 2 init
348 scrub_check_flags 3 ""
349 mount_client $MOUNT || error "(4) Fail to start client!"
352 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
356 echo "start $SINGLEMDS without disabling OI scrub"
357 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
359 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
360 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
362 mount_client $MOUNT || error "(4) Fail to start client!"
363 #define OBD_FAIL_OSD_FID_MAPPING 0x193
364 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
365 # update .lustre OI mapping
367 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
368 umount_client $MOUNT || error "(5) Fail to stop client!"
370 echo "stop $SINGLEMDS"
371 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
373 echo "start $SINGLEMDS with disabling OI scrub"
374 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
375 error "(7) Fail to start MDS!"
377 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
378 [ "$FLAGS" == "inconsistent" ] ||
379 error "(9) Expect 'inconsistent', but got '$FLAGS'"
381 run_test 1a "Auto trigger initial OI scrub when server mounts"
385 echo "start MDTs without disabling OI scrub"
386 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
387 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
388 scrub_check_status 3 completed
389 mount_client $MOUNT || error "(4) Fail to start client!"
390 scrub_check_data2 runas 5
391 scrub_check_status 6 completed
393 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
396 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
397 skip "ldiskfs special test" && return
401 # OI files to be removed:
403 # idx 2: oi.16.{2,4,8,16,32}
404 # idx 3: oi.16.{3,9,27}
405 for index in 0 2 3; do
407 scrub_remove_ois 1 $index
408 echo "start MDTs with OI scrub disabled"
409 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
410 scrub_check_flags 3 recreated
412 scrub_check_status 5 completed
413 scrub_check_flags 6 ""
416 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
419 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
420 skip "ldiskfs special test" && return
423 echo "starting MDTs without disabling OI scrub"
424 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
425 scrub_check_status 3 completed
426 mount_client $MOUNT || error "(4) Fail to start client!"
429 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
431 # test_3 is obsolete, it will be covered by test_5.
433 formatall > /dev/null
437 echo "starting MDTs with OI scrub disabled"
438 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
439 scrub_check_status 3 init
440 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
441 scrub_check_flags 4 recreated,inconsistent
443 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
447 echo "starting MDTs with OI scrub disabled"
448 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
449 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
450 scrub_check_flags 4 recreated,inconsistent
451 mount_client $MOUNT || error "(5) Fail to start client!"
457 scrub_check_status 7 completed
458 scrub_check_flags 8 ""
461 for n in $(seq $MDSCOUNT); do
462 updated0[$n]=$(scrub_status $n |
463 awk '/^prior_updated/ { print $2 }')
466 scrub_check_data2 sanity-scrub.sh 9
470 for n in $(seq $MDSCOUNT); do
471 updated1[$n]=$(scrub_status $n |
472 awk '/^prior_updated/ { print $2 }')
473 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
474 error "(10) NOT auto trigger full scrub as expected"
477 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
480 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
481 skip "ldiskfs special test" && return
484 echo "starting MDTs with OI scrub disabled"
485 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
486 scrub_check_flags 4 recreated,inconsistent
487 mount_client $MOUNT || error "(5) Fail to start client!"
490 full_scrub_threshold_rate 10000
494 scrub_check_status 7 completed
495 scrub_check_flags 8 ""
498 for n in $(seq $MDSCOUNT); do
499 updated0[$n]=$(scrub_status $n |
500 awk '/^prior_updated/ { print $2 }')
502 echo "OI scrub on MDS$n status for the 1st time:"
503 do_facet mds$n $LCTL get_param -n \
504 osd-*.$(facet_svc mds$n).oi_scrub
507 scrub_check_data2 sanity-scrub.sh 9
510 scrub_check_status 10 completed
511 scrub_check_flags 11 ""
514 for n in $(seq $MDSCOUNT); do
515 updated1[$n]=$(scrub_status $n |
516 awk '/^prior_updated/ { print $2 }')
518 echo "OI scrub on MDS$n status for the 2nd time:"
519 do_facet mds$n $LCTL get_param -n \
520 osd-*.$(facet_svc mds$n).oi_scrub
522 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
523 error "(12) Auto trigger full scrub unexpectedly"
526 for n in $(seq $MDSCOUNT); do
527 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
528 error "(13) fail to ls"
532 scrub_check_status 14 completed
533 scrub_check_flags 15 ""
535 for n in $(seq $MDSCOUNT); do
536 updated0[$n]=$(scrub_status $n |
537 awk '/^prior_updated/ { print $2 }')
539 echo "OI scrub on MDS$n status for the 3rd time:"
540 do_facet mds$n $LCTL get_param -n \
541 osd-*.$(facet_svc mds$n).oi_scrub
543 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
544 error "(16) Auto trigger full scrub unexpectedly"
547 for n in $(seq $MDSCOUNT); do
548 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
552 for n in $(seq $MDSCOUNT); do
553 updated1[$n]=$(scrub_status $n |
554 awk '/^prior_updated/ { print $2 }')
555 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
556 echo "OI scrub on MDS$n status for the 4th time:"
557 do_facet mds$n $LCTL get_param -n \
558 osd-*.$(facet_svc mds$n).oi_scrub
560 error "(18) NOT auto trigger full scrub as expected"
564 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
567 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
568 skip "ldiskfs special test" && return
571 echo "starting MDTs with OI scrub disabled"
572 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
573 scrub_check_flags 4 recreated,inconsistent
574 mount_client $MOUNT || error "(5) Fail to start client!"
577 full_scrub_threshold_rate 20
581 scrub_check_status 7 completed
582 scrub_check_flags 8 ""
585 for n in $(seq $MDSCOUNT); do
586 updated0[$n]=$(scrub_status $n |
587 awk '/^prior_updated/ { print $2 }')
589 echo "OI scrub on MDS$n status for the 1st time:"
590 do_facet mds$n $LCTL get_param -n \
591 osd-*.$(facet_svc mds$n).oi_scrub
594 scrub_check_data2 sanity-scrub.sh 9
597 scrub_check_status 10 completed
598 scrub_check_flags 11 ""
601 for n in $(seq $MDSCOUNT); do
602 updated1[$n]=$(scrub_status $n |
603 awk '/^prior_updated/ { print $2 }')
605 echo "OI scrub on MDS$n status for the 2nd time:"
606 do_facet mds$n $LCTL get_param -n \
607 osd-*.$(facet_svc mds$n).oi_scrub
609 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
610 error "(12) Auto trigger full scrub unexpectedly"
613 for n in $(seq $MDSCOUNT); do
614 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
615 error "(13) fail to ls"
619 scrub_check_status 14 completed
620 scrub_check_flags 15 ""
622 for n in $(seq $MDSCOUNT); do
623 updated0[$n]=$(scrub_status $n |
624 awk '/^prior_updated/ { print $2 }')
626 echo "OI scrub on MDS$n status for the 3rd time:"
627 do_facet mds$n $LCTL get_param -n \
628 osd-*.$(facet_svc mds$n).oi_scrub
630 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
631 error "(16) Auto trigger full scrub unexpectedly"
634 for n in $(seq $MDSCOUNT); do
635 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
639 for n in $(seq $MDSCOUNT); do
640 updated1[$n]=$(scrub_status $n |
641 awk '/^prior_updated/ { print $2 }')
642 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
643 echo "OI scrub on MDS$n status for the 4th time:"
644 do_facet mds$n $LCTL get_param -n \
645 osd-*.$(facet_svc mds$n).oi_scrub
647 error "(18) NOT auto trigger full scrub as expected"
651 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
654 formatall > /dev/null
658 echo "starting MDTs with OI scrub disabled (1)"
659 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
660 scrub_check_status 3 init
661 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
662 scrub_check_flags 4 recreated,inconsistent
663 mount_client $MOUNT || error "(5) Fail to start client!"
667 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
668 do_nodes $(comma_list $(mdts_nodes)) \
669 $LCTL set_param fail_val=3 fail_loc=0x190
672 umount_client $MOUNT || error "(7) Fail to stop client!"
673 scrub_check_status 8 scanning
675 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
676 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
681 do_nodes $(comma_list $(mdts_nodes)) \
682 $LCTL set_param fail_loc=0 fail_val=0
684 echo "starting MDTs with OI scrub disabled (2)"
685 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
686 scrub_check_status 11 crashed
689 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
690 do_nodes $(comma_list $(mdts_nodes)) \
691 $LCTL set_param fail_val=3 fail_loc=0x190
693 echo "starting MDTs without disabling OI scrub"
694 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
695 scrub_check_status 14 scanning
697 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
698 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
700 scrub_check_status 15 failed
701 mount_client $MOUNT || error "(16) Fail to start client!"
704 do_nodes $(comma_list $(mdts_nodes)) \
705 $LCTL set_param fail_loc=0 fail_val=0
710 for n in $(seq $MDSCOUNT); do
711 stat $DIR/$tdir/mds$n/sanity-scrub.sh &
715 for n in $(seq $MDSCOUNT); do
717 error "(18) Fail to stat mds$n/sanity-scrub.sh"
720 scrub_check_status 19 completed
721 scrub_check_flags 20 ""
723 run_test 5 "OI scrub state machine"
727 echo "starting MDTs with OI scrub disabled"
728 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
729 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
730 scrub_check_flags 4 recreated,inconsistent
731 mount_client $MOUNT || error "(5) Fail to start client!"
735 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
736 do_nodes $(comma_list $(mdts_nodes)) \
737 $LCTL set_param fail_val=2 fail_loc=0x190
741 # Sleep 5 sec to guarantee at least one object processed by OI scrub
743 # Fail the OI scrub to guarantee there is at least one checkpoint
744 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
745 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
747 scrub_check_status 7 failed
749 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
750 do_nodes $(comma_list $(mdts_nodes)) \
751 $LCTL set_param fail_val=3 fail_loc=0x190
754 for n in $(seq $MDSCOUNT); do
755 # stat will re-trigger OI scrub
756 stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
757 error "(8) Failed to stat mds$n/sanity-scrub.sh"
760 umount_client $MOUNT || error "(9) Fail to stop client!"
761 scrub_check_status 10 scanning
763 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
764 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
768 for n in $(seq $MDSCOUNT); do
769 position0[$n]=$(scrub_status $n |
770 awk '/^last_checkpoint_position/ {print $2}')
771 position0[$n]=$((${position0[$n]} + 1))
776 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
777 do_nodes $(comma_list $(mdts_nodes)) \
778 $LCTL set_param fail_val=3 fail_loc=0x190
780 echo "starting MDTs without disabling OI scrub"
781 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
783 scrub_check_status 13 scanning
786 for n in $(seq $MDSCOUNT); do
787 position1[$n]=$(scrub_status $n |
788 awk '/^latest_start_position/ {print $2}')
789 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
790 error "(14) Expected position ${position0[$n]}, but" \
791 "got ${position1[$n]}"
795 do_nodes $(comma_list $(mdts_nodes)) \
796 $LCTL set_param fail_loc=0 fail_val=0
798 scrub_check_status 15 completed
799 scrub_check_flags 16 ""
801 run_test 6 "OI scrub resumes from last checkpoint"
805 echo "starting MDTs with OI scrub disabled"
806 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
807 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
808 scrub_check_flags 4 recreated,inconsistent
809 mount_client $MOUNT || error "(5) Fail to start client!"
813 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
814 do_nodes $(comma_list $(mdts_nodes)) \
815 $LCTL set_param fail_val=3 fail_loc=0x190
820 for n in $(seq $MDSCOUNT); do
821 stat $DIR/$tdir/mds$n/${tfile}300 ||
822 error "(7) Failed to stat mds$n/${tfile}300!"
825 scrub_check_status 8 scanning
826 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
827 scrub_check_flags 9 inconsistent,auto
829 scrub_check_flags 9 recreated,inconsistent,auto
832 do_nodes $(comma_list $(mdts_nodes)) \
833 $LCTL set_param fail_loc=0 fail_val=0
835 scrub_check_status 10 completed
838 run_test 7 "System is available during OI scrub scanning"
842 echo "starting MDTs with OI scrub disabled"
843 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
844 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
845 scrub_check_flags 4 recreated,inconsistent
847 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
848 do_nodes $(comma_list $(mdts_nodes)) \
849 $LCTL set_param fail_val=1 fail_loc=0x190
852 scrub_check_status 6 scanning
854 scrub_check_status 8 stopped
856 scrub_check_status 10 scanning
858 do_nodes $(comma_list $(mdts_nodes)) \
859 $LCTL set_param fail_loc=0 fail_val=0
861 scrub_check_status 11 completed
862 scrub_check_flags 12 ""
864 run_test 8 "Control OI scrub manually"
867 # Skip scrub speed test for ZFS because of performance unstable
868 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
869 skip "test scrub speed only on ldiskfs" && return
871 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
872 skip "Testing on UP system, the speed may be inaccurate."
878 echo "starting MDTs with OI scrub disabled"
879 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
880 scrub_check_flags 4 recreated,inconsistent
882 local BASE_SPEED1=100
884 # OI scrub should run with full speed under inconsistent case
885 scrub_start 5 -s $BASE_SPEED1
888 scrub_check_status 6 completed
889 scrub_check_flags 7 ""
890 # OI scrub should run with limited speed under non-inconsistent case
891 scrub_start 8 -s $BASE_SPEED1 -r
894 scrub_check_status 9 scanning
896 # Do NOT ignore that there are 1024 pre-fetched items. And there
897 # may be time error, normally it should be less than 2 seconds.
898 # We allow another 20% schedule error.
899 local PRE_FETCHED=1024
901 # MAX_MARGIN = 1.2 = 12 / 10
902 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
903 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
905 for n in $(seq $MDSCOUNT); do
906 local SPEED=$(scrub_status $n | \
907 awk '/^average_speed/ { print $2 }')
908 [ $SPEED -lt $MAX_SPEED ] ||
909 error "(10) Got speed $SPEED, expected less than" \
914 local BASE_SPEED2=300
916 for n in $(seq $MDSCOUNT); do
917 do_facet mds$n $LCTL set_param -n \
918 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
922 # MIN_MARGIN = 0.8 = 8 / 10
923 local MIN_SPEED=$(((PRE_FETCHED + \
924 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
925 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
926 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
927 # MAX_MARGIN = 1.2 = 12 / 10
928 MAX_SPEED=$(((PRE_FETCHED + \
929 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
930 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
931 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
932 for n in $(seq $MDSCOUNT); do
933 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
934 [ $SPEED -gt $MIN_SPEED ] ||
935 error "(11) Got speed $SPEED, expected more than" \
937 [ $SPEED -lt $MAX_SPEED ] ||
938 error "(12) Got speed $SPEED, expected less than" \
941 do_facet mds$n $LCTL set_param -n \
942 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
945 scrub_check_status 13 completed
947 run_test 9 "OI scrub speed control"
951 echo "starting mds$n with OI scrub disabled (1)"
952 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
953 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
954 scrub_check_flags 4 recreated,inconsistent
955 mount_client $MOUNT || error "(5) Fail to start client!"
959 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
960 do_nodes $(comma_list $(mdts_nodes)) \
961 $LCTL set_param fail_val=1 fail_loc=0x190
964 scrub_check_status 7 scanning
965 umount_client $MOUNT || error "(8) Fail to stop client!"
967 echo "starting MDTs with OI scrub disabled (2)"
968 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
969 scrub_check_status 11 paused
971 echo "starting MDTs without disabling OI scrub"
972 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
973 scrub_check_status 14 scanning
975 do_nodes $(comma_list $(mdts_nodes)) \
976 $LCTL set_param fail_loc=0 fail_val=0
978 scrub_check_status 15 completed
979 scrub_check_flags 16 ""
981 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
983 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
986 echo "starting MDTs with OI scrub disabled"
987 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
988 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
989 scrub_check_flags 4 recreated,inconsistent
991 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
992 do_nodes $(comma_list $(mdts_nodes)) \
993 $LCTL set_param fail_val=3 fail_loc=0x190
996 scrub_check_status 6 scanning
998 echo "starting MDTs with OI scrub disabled"
999 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
1000 scrub_check_status 9 paused
1002 echo "starting MDTs without disabling OI scrub"
1003 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
1004 scrub_check_status 12 scanning
1006 do_nodes $(comma_list $(mdts_nodes)) \
1007 $LCTL set_param fail_loc=0 fail_val=0
1009 scrub_check_status 13 completed
1010 scrub_check_flags 14 ""
1012 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
1015 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1016 skip "ldiskfs special test" && return
1021 check_mount_and_prep
1023 for n in $(seq $MDSCOUNT); do
1024 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
1025 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
1027 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
1028 error "(2) Fail to create under $tdir/mds$n"
1031 # reset OI scrub start point by force
1033 scrub_check_status 4 completed
1038 # OI scrub should skip the new created objects for the first accessing
1039 # notice we're creating a new llog for every OST on every startup
1040 # new features can make this even less stable, so we only check that
1041 # the number of skipped files is more than the number or known created
1042 local MINIMUM=$((CREATED + 1)) # files + directory
1043 for n in $(seq $MDSCOUNT); do
1044 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1045 [ $SKIPPED -lt $MINIMUM ] &&
1046 error "(5) Expect at least $MINIMUM objects" \
1047 "skipped on mds$n, but got $SKIPPED"
1049 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1052 # reset OI scrub start point by force
1054 scrub_check_status 7 completed
1056 # OI scrub should skip the new created object only once
1057 for n in $(seq $MDSCOUNT); do
1058 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1059 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1061 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1062 error "(8) Expect 0 objects skipped on mds$n, but" \
1066 run_test 11 "OI scrub skips the new created objects only once"
1069 check_mount_and_prep
1070 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1072 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1073 do_facet ost1 $LCTL set_param fail_loc=0x195
1074 local count=$(precreated_ost_obj_count 0 0)
1076 createmany -o $DIR/$tdir/f $((count + 32))
1077 umount_client $MOUNT || error "(1) Fail to stop client!"
1079 stop ost1 || error "(2) Fail to stop ost1"
1081 #define OBD_FAIL_OST_NODESTROY 0x233
1082 do_facet ost1 $LCTL set_param fail_loc=0x233
1084 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1085 error "(3) Fail to start ost1"
1087 mount_client $MOUNT || error "(4) Fail to start client!"
1089 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1091 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1093 do_facet ost1 $LCTL set_param fail_loc=0
1094 wait_update_facet ost1 "$LCTL get_param -n \
1095 osd-*.$(facet_svc ost1).oi_scrub |
1096 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1097 error "(7) Expected '$expected' on ost1"
1099 ls -ail $DIR/$tdir > /dev/null || {
1101 error "(8) ls should succeed"
1104 run_test 12 "OI scrub can rebuild invalid /O entries"
1107 check_mount_and_prep
1108 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1110 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1111 do_facet ost1 $LCTL set_param fail_loc=0x196
1112 local count=$(precreated_ost_obj_count 0 0)
1114 createmany -o $DIR/$tdir/f $((count + 32))
1115 do_facet ost1 $LCTL set_param fail_loc=0
1117 umount_client $MOUNT || error "(1) Fail to stop client!"
1119 stop ost1 || error "(2) Fail to stop ost1"
1121 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1122 error "(3) Fail to start ost1"
1124 mount_client $MOUNT || error "(4) Fail to start client!"
1126 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1128 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1130 wait_update_facet ost1 "$LCTL get_param -n \
1131 osd-*.$(facet_svc ost1).oi_scrub |
1132 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1133 error "(7) Expected '$expected' on ost1"
1135 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1137 run_test 13 "OI scrub can rebuild missed /O entries"
1140 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1141 skip "ldiskfs special test" && return
1143 check_mount_and_prep
1144 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1146 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1147 do_facet ost1 $LCTL set_param fail_loc=0x196
1148 local count=$(precreated_ost_obj_count 0 0)
1150 createmany -o $DIR/$tdir/f $((count + 1000))
1151 do_facet ost1 $LCTL set_param fail_loc=0
1153 umount_client $MOUNT || error "(1) Fail to stop client!"
1155 stop ost1 || error "(2) Fail to stop ost1"
1158 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1159 error "(3) Fail to run e2fsck error"
1161 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1162 error "(4) Fail to start ost1"
1164 mount_client $MOUNT || error "(5) Fail to start client!"
1166 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1167 awk '/^lf_repa[ri]*ed/ { print $2 }')
1168 [ $LF_REPAIRED -ge 1000 ] ||
1169 error "(6) Some entry under /lost+found should be repaired"
1171 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1173 run_test 14 "OI scrub can repair objects under lost+found"
1178 formatall > /dev/null
1179 setupall > /dev/null
1182 echo "starting MDTs with OI scrub disabled"
1183 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1184 scrub_check_status 3 init
1185 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1186 scrub_check_flags 4 recreated,inconsistent
1188 # run under dryrun mode
1189 scrub_start 5 --dryrun
1190 scrub_check_status 6 completed
1191 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1192 scrub_check_flags 7 inconsistent
1195 scrub_check_flags 7 recreated,inconsistent
1198 scrub_check_params 8 dryrun
1199 scrub_check_repaired 9 $repaired 1
1201 # run under dryrun mode again
1202 scrub_start 10 --dryrun
1203 scrub_check_status 11 completed
1204 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1205 scrub_check_flags 12 inconsistent
1207 scrub_check_flags 12 recreated,inconsistent
1209 scrub_check_params 13 dryrun
1210 scrub_check_repaired 14 $repaired 1
1212 # run under normal mode
1214 scrub_check_status 16 completed
1215 scrub_check_flags 17 ""
1216 scrub_check_params 18 ""
1217 scrub_check_repaired 19 $repaired 0
1219 # run under normal mode again
1221 scrub_check_status 21 completed
1222 scrub_check_flags 22 ""
1223 scrub_check_params 23 ""
1224 scrub_check_repaired 24 0 0
1226 run_test 15 "Dryrun mode OI scrub"
1229 check_mount_and_prep
1230 scrub_enable_index_backup
1232 #define OBD_FAIL_OSD_INDEX_CRASH 0x199
1233 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x199
1235 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
1237 echo "starting MDTs without disabling OI scrub"
1238 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
1239 mount_client $MOUNT || error "(2) Fail to start client!"
1241 scrub_disable_index_backup
1243 run_test 16 "Initial OI scrub can rebuild crashed index objects"
1245 # restore MDS/OST size
1246 MDSSIZE=${SAVED_MDSSIZE}
1247 OSTSIZE=${SAVED_OSTSIZE}
1248 OSTCOUNT=${SAVED_OSTCOUNT}
1250 # cleanup the system at last
1251 REFORMAT="yes" cleanup_and_setup_lustre
1254 check_and_cleanup_lustre