3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
32 SAVED_MDSSIZE=${MDSSIZE}
33 SAVED_OSTSIZE=${OSTSIZE}
34 SAVED_OSTCOUNT=${OSTCOUNT}
36 # use small MDS + OST size to speed formatting time
37 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
38 # 400M MDT device can guarantee uninitialized groups during the OI scrub
42 # no need too many OSTs, to reduce the format/start/stop overhead
43 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
45 # build up a clean test environment.
46 REFORMAT="yes" check_and_setup_lustre
50 MDT_DEV="${FSNAME}-MDT0000"
51 OST_DEV="${FSNAME}-OST0000"
52 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
58 # use "lfsck_start -A" when we no longer need testing interop
59 for n in $(seq $MDSCOUNT); do
60 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
62 error "($error_id) Failed to start OI scrub on mds$n"
70 # use "lfsck_stop -A" when we no longer need testing interop
71 for n in $(seq $MDSCOUNT); do
72 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
73 error "($error_id) Failed to stop OI scrub on mds$n"
80 do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
83 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
84 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
85 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
86 SHOW_SCRUB="do_facet $SINGLEMDS \
87 $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
88 SHOW_SCRUB_ON_OST="do_facet ost1 \
89 $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
90 MOUNT_OPTS_SCRUB="-o user_xattr"
91 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
100 echo "preparing... $(date)"
101 for n in $(seq $MDSCOUNT); do
102 echo "creating $nfiles files on mds$n"
103 test_mkdir -i $((n - 1)) -c1 $DIR/$tdir/mds$n ||
104 error "Failed to create directory mds$n"
105 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
106 error "Failed to copy files to mds$n"
107 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
108 error "mkdir failed on mds$n"
109 touch $DIR/$tdir/mds$n/d_$tfile/f1 > \
110 /dev/null || error "create failed on mds$n"
111 dd if=/dev/zero of=$DIR/$tdir/mds$n/d_$tfile/f2 bs=1M count=1 ||
112 error "write failed on mds$n"
113 if [[ $nfiles -gt 0 ]]; then
114 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
115 /dev/null || error "createmany failed on mds$n"
118 echo "prepared $(date)."
120 [ ! -z $inject ] && [ $inject -eq 2 ] && {
121 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
122 do_nodes $(comma_list $(mdts_nodes)) \
123 $LCTL set_param fail_loc=0x198
125 for n in $(seq $MDSCOUNT); do
126 cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
127 error "Fail to copy runas to MDS$n"
130 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
133 [ ! -z $inject ] && [ $inject -eq 1 ] &&
134 [ $(facet_fstype $SINGLEMDS) = "zfs" ] && {
135 #define OBD_FAIL_OSD_FID_MAPPING 0x193
136 do_nodes $(comma_list $(mdts_nodes)) \
137 $LCTL set_param fail_loc=0x193
139 for n in $(seq $MDSCOUNT); do
140 chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
141 chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
144 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
147 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
149 # sync local transactions on every MDT
150 do_nodes $(comma_list $(mdts_nodes)) \
151 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
153 # wait for a while to cancel update logs after transactions committed.
156 # sync again to guarantee all things done.
157 do_nodes $(comma_list $(mdts_nodes)) \
158 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
160 for n in $(seq $MDSCOUNT); do
162 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
165 [ ! -z $inject ] && [ $(facet_fstype $SINGLEMDS) = "ldiskfs" ] && {
166 if [ $inject -eq 1 ]; then
167 for n in $(seq $MDSCOUNT); do
168 mds_backup_restore mds$n ||
169 error "Backup/restore on mds$n failed"
171 elif [ $inject -eq 2 ]; then
182 for n in $(seq $MDSCOUNT); do
183 start mds$n $(mdsdevname $n) $opts >/dev/null ||
184 error "($error_id) Failed to start mds$n"
192 for n in $(seq $MDSCOUNT); do
193 echo "stopping mds$n"
194 stop mds$n >/dev/null ||
195 error "($error_id) Failed to stop mds$n"
199 scrub_check_status() {
204 for n in $(seq $MDSCOUNT); do
205 wait_update_facet mds$n "$LCTL get_param -n \
206 osd-*.$(facet_svc mds$n).oi_scrub |
207 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
208 error "($error_id) Expected '$expected' on mds$n"
212 scrub_check_flags() {
218 for n in $(seq $MDSCOUNT); do
219 actual=$(do_facet mds$n $LCTL get_param -n \
220 osd-*.$(facet_svc mds$n).oi_scrub |
221 awk '/^flags/ { print $2 }')
222 if [ "$actual" != "$expected" ]; then
223 error "($error_id) Expected '$expected' on mds$n, but" \
229 scrub_check_params() {
235 for n in $(seq $MDSCOUNT); do
236 actual=$(do_facet mds$n $LCTL get_param -n \
237 osd-*.$(facet_svc mds$n).oi_scrub |
238 awk '/^param/ { print $2 }')
239 if [ "$actual" != "$expected" ]; then
240 error "($error_id) Expected '$expected' on mds$n, but" \
246 scrub_check_repaired() {
253 for n in $(seq $MDSCOUNT); do
254 if [ $dryrun -eq 1 ]; then
255 actual=$(do_facet mds$n $LCTL get_param -n \
256 osd-*.$(facet_svc mds$n).oi_scrub |
257 awk '/^inconsistent:/ { print $2 }')
259 actual=$(do_facet mds$n $LCTL get_param -n \
260 osd-*.$(facet_svc mds$n).oi_scrub |
261 awk '/^updated:/ { print $2 }')
264 if [ $expected -eq 0 -a $actual -ne 0 ]; then
265 error "($error_id) Expected no repaired on mds$n, but" \
269 if [ $expected -ne 0 -a $actual -lt $expected ]; then
270 error "($error_id) Expected '$expected' on mds$n, but" \
280 for n in $(seq $MDSCOUNT); do
281 diff -q $LUSTRE/tests/test-framework.sh \
282 $DIR/$tdir/mds$n/test-framework.sh ||
283 error "($error_id) File data check failed"
287 scrub_check_data2() {
292 for n in $(seq $MDSCOUNT); do
293 diff -q $LUSTRE/tests/$filename \
294 $DIR/$tdir/mds$n/$filename ||
295 error "($error_id) File data check failed"
300 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
306 for n in $(seq $MDSCOUNT); do
307 mds_remove_ois mds$n $index ||
308 error "($error_id) Failed to remove OI .$index on mds$n"
312 scrub_enable_auto() {
313 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
318 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
322 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
323 osd-*.*.full_scrub_ratio=$ratio
326 full_scrub_threshold_rate() {
327 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
331 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
332 osd-*.*.full_scrub_threshold_rate=$rate
335 scrub_enable_index_backup() {
336 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
337 osd-*.*.index_backup=1
340 scrub_disable_index_backup() {
341 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
342 osd-*.*.index_backup=0
347 echo "starting MDTs without disabling OI scrub"
348 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
349 scrub_check_status 2 init
350 scrub_check_flags 3 ""
351 mount_client $MOUNT || error "(4) Fail to start client!"
354 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
357 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
360 echo "start $SINGLEMDS without disabling OI scrub"
361 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
363 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
364 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
366 mount_client $MOUNT || error "(4) Fail to start client!"
367 #define OBD_FAIL_OSD_FID_MAPPING 0x193
368 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
369 # update .lustre OI mapping
371 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
372 umount_client $MOUNT || error "(5) Fail to stop client!"
374 echo "stop $SINGLEMDS"
375 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
377 echo "start $SINGLEMDS with disabling OI scrub"
378 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
379 error "(7) Fail to start MDS!"
381 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
382 [ "$FLAGS" == "inconsistent" ] ||
383 error "(9) Expect 'inconsistent', but got '$FLAGS'"
385 run_test 1a "Auto trigger initial OI scrub when server mounts"
389 echo "start MDTs without disabling OI scrub"
390 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
391 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
392 scrub_check_status 3 completed
393 mount_client $MOUNT || error "(4) Fail to start client!"
394 scrub_check_data2 runas 5
395 scrub_check_status 6 completed
397 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
400 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
401 skip "ldiskfs special test" && return
405 # OI files to be removed:
407 # idx 2: oi.16.{2,4,8,16,32}
408 # idx 3: oi.16.{3,9,27}
409 for index in 0 2 3; do
411 scrub_remove_ois 1 $index
412 echo "start MDTs with OI scrub disabled"
413 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
414 scrub_check_flags 3 recreated
416 scrub_check_status 5 completed
417 scrub_check_flags 6 ""
420 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
423 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
424 skip "ldiskfs special test" && return
427 echo "starting MDTs without disabling OI scrub"
428 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
429 scrub_check_status 3 completed
430 mount_client $MOUNT || error "(4) Fail to start client!"
433 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
435 # test_3 is obsolete, it will be covered by test_5.
437 formatall > /dev/null
441 echo "starting MDTs with OI scrub disabled"
442 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
443 scrub_check_status 3 init
444 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
445 scrub_check_flags 4 recreated,inconsistent
447 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
451 echo "starting MDTs with OI scrub disabled"
452 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
453 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
454 scrub_check_flags 4 recreated,inconsistent
455 mount_client $MOUNT || error "(5) Fail to start client!"
461 scrub_check_status 7 completed
462 scrub_check_flags 8 ""
465 for n in $(seq $MDSCOUNT); do
466 updated0[$n]=$(scrub_status $n |
467 awk '/^prior_updated/ { print $2 }')
470 scrub_check_data2 sanity-scrub.sh 9
474 for n in $(seq $MDSCOUNT); do
475 updated1[$n]=$(scrub_status $n |
476 awk '/^prior_updated/ { print $2 }')
477 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
478 error "(10) NOT auto trigger full scrub as expected"
481 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
484 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
485 skip "ldiskfs special test" && return
488 echo "starting MDTs with OI scrub disabled"
489 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
490 scrub_check_flags 4 recreated,inconsistent
491 mount_client $MOUNT || error "(5) Fail to start client!"
494 full_scrub_threshold_rate 10000
498 scrub_check_status 7 completed
499 scrub_check_flags 8 ""
502 for n in $(seq $MDSCOUNT); do
503 updated0[$n]=$(scrub_status $n |
504 awk '/^prior_updated/ { print $2 }')
506 echo "OI scrub on MDS$n status for the 1st time:"
507 do_facet mds$n $LCTL get_param -n \
508 osd-*.$(facet_svc mds$n).oi_scrub
511 scrub_check_data2 sanity-scrub.sh 9
514 scrub_check_status 10 completed
515 scrub_check_flags 11 ""
518 for n in $(seq $MDSCOUNT); do
519 updated1[$n]=$(scrub_status $n |
520 awk '/^prior_updated/ { print $2 }')
522 echo "OI scrub on MDS$n status for the 2nd time:"
523 do_facet mds$n $LCTL get_param -n \
524 osd-*.$(facet_svc mds$n).oi_scrub
526 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
527 error "(12) Auto trigger full scrub unexpectedly"
530 for n in $(seq $MDSCOUNT); do
531 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
532 error "(13) fail to ls"
536 scrub_check_status 14 completed
537 scrub_check_flags 15 ""
539 for n in $(seq $MDSCOUNT); do
540 updated0[$n]=$(scrub_status $n |
541 awk '/^prior_updated/ { print $2 }')
543 echo "OI scrub on MDS$n status for the 3rd time:"
544 do_facet mds$n $LCTL get_param -n \
545 osd-*.$(facet_svc mds$n).oi_scrub
547 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
548 error "(16) Auto trigger full scrub unexpectedly"
551 for n in $(seq $MDSCOUNT); do
552 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
556 for n in $(seq $MDSCOUNT); do
557 updated1[$n]=$(scrub_status $n |
558 awk '/^prior_updated/ { print $2 }')
559 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
560 echo "OI scrub on MDS$n status for the 4th time:"
561 do_facet mds$n $LCTL get_param -n \
562 osd-*.$(facet_svc mds$n).oi_scrub
564 error "(18) NOT auto trigger full scrub as expected"
568 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
571 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
572 skip "ldiskfs special test" && return
575 echo "starting MDTs with OI scrub disabled"
576 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
577 scrub_check_flags 4 recreated,inconsistent
578 mount_client $MOUNT || error "(5) Fail to start client!"
581 full_scrub_threshold_rate 20
585 scrub_check_status 7 completed
586 scrub_check_flags 8 ""
589 for n in $(seq $MDSCOUNT); do
590 updated0[$n]=$(scrub_status $n |
591 awk '/^prior_updated/ { print $2 }')
593 echo "OI scrub on MDS$n status for the 1st time:"
594 do_facet mds$n $LCTL get_param -n \
595 osd-*.$(facet_svc mds$n).oi_scrub
598 scrub_check_data2 sanity-scrub.sh 9
601 scrub_check_status 10 completed
602 scrub_check_flags 11 ""
605 for n in $(seq $MDSCOUNT); do
606 updated1[$n]=$(scrub_status $n |
607 awk '/^prior_updated/ { print $2 }')
609 echo "OI scrub on MDS$n status for the 2nd time:"
610 do_facet mds$n $LCTL get_param -n \
611 osd-*.$(facet_svc mds$n).oi_scrub
613 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
614 error "(12) Auto trigger full scrub unexpectedly"
617 for n in $(seq $MDSCOUNT); do
618 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
619 error "(13) fail to ls"
623 scrub_check_status 14 completed
624 scrub_check_flags 15 ""
626 for n in $(seq $MDSCOUNT); do
627 updated0[$n]=$(scrub_status $n |
628 awk '/^prior_updated/ { print $2 }')
630 echo "OI scrub on MDS$n status for the 3rd time:"
631 do_facet mds$n $LCTL get_param -n \
632 osd-*.$(facet_svc mds$n).oi_scrub
634 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
635 error "(16) Auto trigger full scrub unexpectedly"
638 for n in $(seq $MDSCOUNT); do
639 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
643 for n in $(seq $MDSCOUNT); do
644 updated1[$n]=$(scrub_status $n |
645 awk '/^prior_updated/ { print $2 }')
646 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
647 echo "OI scrub on MDS$n status for the 4th time:"
648 do_facet mds$n $LCTL get_param -n \
649 osd-*.$(facet_svc mds$n).oi_scrub
651 error "(18) NOT auto trigger full scrub as expected"
655 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
658 formatall > /dev/null
662 echo "starting MDTs with OI scrub disabled (1)"
663 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
664 scrub_check_status 3 init
665 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
666 scrub_check_flags 4 recreated,inconsistent
667 mount_client $MOUNT || error "(5) Fail to start client!"
671 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
672 do_nodes $(comma_list $(mdts_nodes)) \
673 $LCTL set_param fail_val=3 fail_loc=0x190
676 umount_client $MOUNT || error "(7) Fail to stop client!"
677 scrub_check_status 8 scanning
679 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
680 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
685 do_nodes $(comma_list $(mdts_nodes)) \
686 $LCTL set_param fail_loc=0 fail_val=0
688 echo "starting MDTs with OI scrub disabled (2)"
689 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
690 scrub_check_status 11 crashed
693 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
694 do_nodes $(comma_list $(mdts_nodes)) \
695 $LCTL set_param fail_val=3 fail_loc=0x190
697 echo "starting MDTs without disabling OI scrub"
698 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
699 scrub_check_status 14 scanning
701 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
702 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
704 scrub_check_status 15 failed
705 mount_client $MOUNT || error "(16) Fail to start client!"
708 do_nodes $(comma_list $(mdts_nodes)) \
709 $LCTL set_param fail_loc=0 fail_val=0
714 for n in $(seq $MDSCOUNT); do
715 stat $DIR/$tdir/mds$n/sanity-scrub.sh &
719 for n in $(seq $MDSCOUNT); do
721 error "(18) Fail to stat mds$n/sanity-scrub.sh"
724 scrub_check_status 19 completed
725 scrub_check_flags 20 ""
727 run_test 5 "OI scrub state machine"
731 echo "starting MDTs with OI scrub disabled"
732 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
733 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
734 scrub_check_flags 4 recreated,inconsistent
735 mount_client $MOUNT || error "(5) Fail to start client!"
739 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
740 do_nodes $(comma_list $(mdts_nodes)) \
741 $LCTL set_param fail_val=2 fail_loc=0x190
745 # Sleep 5 sec to guarantee at least one object processed by OI scrub
747 # Fail the OI scrub to guarantee there is at least one checkpoint
748 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
749 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
751 scrub_check_status 7 failed
753 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
754 do_nodes $(comma_list $(mdts_nodes)) \
755 $LCTL set_param fail_val=3 fail_loc=0x190
758 for n in $(seq $MDSCOUNT); do
759 # stat will re-trigger OI scrub
760 stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
761 error "(8) Failed to stat mds$n/sanity-scrub.sh"
764 umount_client $MOUNT || error "(9) Fail to stop client!"
765 scrub_check_status 10 scanning
767 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
768 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
772 for n in $(seq $MDSCOUNT); do
773 position0[$n]=$(scrub_status $n |
774 awk '/^last_checkpoint_position/ {print $2}')
775 position0[$n]=$((${position0[$n]} + 1))
780 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
781 do_nodes $(comma_list $(mdts_nodes)) \
782 $LCTL set_param fail_val=3 fail_loc=0x190
784 echo "starting MDTs without disabling OI scrub"
785 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
787 scrub_check_status 13 scanning
790 for n in $(seq $MDSCOUNT); do
791 position1[$n]=$(scrub_status $n |
792 awk '/^latest_start_position/ {print $2}')
793 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
794 error "(14) Expected position ${position0[$n]}, but" \
795 "got ${position1[$n]}"
799 do_nodes $(comma_list $(mdts_nodes)) \
800 $LCTL set_param fail_loc=0 fail_val=0
802 scrub_check_status 15 completed
803 scrub_check_flags 16 ""
805 run_test 6 "OI scrub resumes from last checkpoint"
809 echo "starting MDTs with OI scrub disabled"
810 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
811 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
812 scrub_check_flags 4 recreated,inconsistent
813 mount_client $MOUNT || error "(5) Fail to start client!"
817 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
818 do_nodes $(comma_list $(mdts_nodes)) \
819 $LCTL set_param fail_val=3 fail_loc=0x190
824 for n in $(seq $MDSCOUNT); do
825 stat $DIR/$tdir/mds$n/${tfile}300 ||
826 error "(7) Failed to stat mds$n/${tfile}300!"
829 scrub_check_status 8 scanning
830 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
831 scrub_check_flags 9 inconsistent,auto
833 scrub_check_flags 9 recreated,inconsistent,auto
836 do_nodes $(comma_list $(mdts_nodes)) \
837 $LCTL set_param fail_loc=0 fail_val=0
839 scrub_check_status 10 completed
842 run_test 7 "System is available during OI scrub scanning"
846 echo "starting MDTs with OI scrub disabled"
847 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
848 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
849 scrub_check_flags 4 recreated,inconsistent
851 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
852 do_nodes $(comma_list $(mdts_nodes)) \
853 $LCTL set_param fail_val=1 fail_loc=0x190
856 scrub_check_status 6 scanning
858 scrub_check_status 8 stopped
860 scrub_check_status 10 scanning
862 do_nodes $(comma_list $(mdts_nodes)) \
863 $LCTL set_param fail_loc=0 fail_val=0
865 scrub_check_status 11 completed
866 scrub_check_flags 12 ""
868 run_test 8 "Control OI scrub manually"
871 # Skip scrub speed test for ZFS because of performance unstable
872 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
873 skip "test scrub speed only on ldiskfs" && return
875 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
876 skip "Testing on UP system, the speed may be inaccurate."
882 echo "starting MDTs with OI scrub disabled"
883 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
884 scrub_check_flags 4 recreated,inconsistent
886 local BASE_SPEED1=100
888 # OI scrub should run with full speed under inconsistent case
889 scrub_start 5 -s $BASE_SPEED1
892 scrub_check_status 6 completed
893 scrub_check_flags 7 ""
894 # OI scrub should run with limited speed under non-inconsistent case
895 scrub_start 8 -s $BASE_SPEED1 -r
898 scrub_check_status 9 scanning
900 # Do NOT ignore that there are 1024 pre-fetched items. And there
901 # may be time error, normally it should be less than 2 seconds.
902 # We allow another 20% schedule error.
903 local PRE_FETCHED=1024
905 # MAX_MARGIN = 1.2 = 12 / 10
906 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
907 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
909 for n in $(seq $MDSCOUNT); do
910 local SPEED=$(scrub_status $n | \
911 awk '/^average_speed/ { print $2 }')
912 [ $SPEED -lt $MAX_SPEED ] ||
913 error "(10) Got speed $SPEED, expected less than" \
918 local BASE_SPEED2=300
920 for n in $(seq $MDSCOUNT); do
921 do_facet mds$n $LCTL set_param -n \
922 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
926 # MIN_MARGIN = 0.8 = 8 / 10
927 local MIN_SPEED=$(((PRE_FETCHED + \
928 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
929 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
930 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
931 # MAX_MARGIN = 1.2 = 12 / 10
932 MAX_SPEED=$(((PRE_FETCHED + \
933 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
934 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
935 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
936 for n in $(seq $MDSCOUNT); do
937 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
938 [ $SPEED -gt $MIN_SPEED ] ||
939 error "(11) Got speed $SPEED, expected more than" \
941 [ $SPEED -lt $MAX_SPEED ] ||
942 error "(12) Got speed $SPEED, expected less than" \
945 do_facet mds$n $LCTL set_param -n \
946 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
949 scrub_check_status 13 completed
951 run_test 9 "OI scrub speed control"
955 echo "starting mds$n with OI scrub disabled (1)"
956 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
957 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
958 scrub_check_flags 4 recreated,inconsistent
959 mount_client $MOUNT || error "(5) Fail to start client!"
963 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
964 do_nodes $(comma_list $(mdts_nodes)) \
965 $LCTL set_param fail_val=1 fail_loc=0x190
968 scrub_check_status 7 scanning
969 umount_client $MOUNT || error "(8) Fail to stop client!"
971 echo "starting MDTs with OI scrub disabled (2)"
972 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
973 scrub_check_status 11 paused
975 echo "starting MDTs without disabling OI scrub"
976 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
977 scrub_check_status 14 scanning
979 do_nodes $(comma_list $(mdts_nodes)) \
980 $LCTL set_param fail_loc=0 fail_val=0
982 scrub_check_status 15 completed
983 scrub_check_flags 16 ""
985 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
987 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
990 echo "starting MDTs with OI scrub disabled"
991 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
992 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
993 scrub_check_flags 4 recreated,inconsistent
995 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
996 do_nodes $(comma_list $(mdts_nodes)) \
997 $LCTL set_param fail_val=3 fail_loc=0x190
1000 scrub_check_status 6 scanning
1002 echo "starting MDTs with OI scrub disabled"
1003 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
1004 scrub_check_status 9 paused
1006 echo "starting MDTs without disabling OI scrub"
1007 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
1008 scrub_check_status 12 scanning
1010 do_nodes $(comma_list $(mdts_nodes)) \
1011 $LCTL set_param fail_loc=0 fail_val=0
1013 scrub_check_status 13 completed
1014 scrub_check_flags 14 ""
1016 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
1019 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1020 skip "ldiskfs special test" && return
1025 check_mount_and_prep
1027 for n in $(seq $MDSCOUNT); do
1028 test_mkdir -i $((n - 1)) -c1 $DIR/$tdir/mds$n ||
1029 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
1031 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
1032 error "(2) Fail to create under $tdir/mds$n"
1035 # reset OI scrub start point by force
1037 scrub_check_status 4 completed
1042 # OI scrub should skip the new created objects for the first accessing
1043 # notice we're creating a new llog for every OST on every startup
1044 # new features can make this even less stable, so we only check that
1045 # the number of skipped files is more than the number or known created
1046 local MINIMUM=$((CREATED + 1)) # files + directory
1047 for n in $(seq $MDSCOUNT); do
1048 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1049 [ $SKIPPED -lt $MINIMUM ] &&
1050 error "(5) Expect at least $MINIMUM objects" \
1051 "skipped on mds$n, but got $SKIPPED"
1053 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1056 # reset OI scrub start point by force
1058 scrub_check_status 7 completed
1060 # OI scrub should skip the new created object only once
1061 for n in $(seq $MDSCOUNT); do
1062 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1063 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1065 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1066 error "(8) Expect 0 objects skipped on mds$n, but" \
1070 run_test 11 "OI scrub skips the new created objects only once"
1073 check_mount_and_prep
1074 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1076 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1077 do_facet ost1 $LCTL set_param fail_loc=0x195
1078 local count=$(precreated_ost_obj_count 0 0)
1080 createmany -o $DIR/$tdir/f $((count + 32))
1081 umount_client $MOUNT || error "(1) Fail to stop client!"
1083 stop ost1 || error "(2) Fail to stop ost1"
1085 #define OBD_FAIL_OST_NODESTROY 0x233
1086 do_facet ost1 $LCTL set_param fail_loc=0x233
1088 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1089 error "(3) Fail to start ost1"
1091 mount_client $MOUNT || error "(4) Fail to start client!"
1093 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1095 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1097 do_facet ost1 $LCTL set_param fail_loc=0
1098 wait_update_facet ost1 "$LCTL get_param -n \
1099 osd-*.$(facet_svc ost1).oi_scrub |
1100 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1101 error "(7) Expected '$expected' on ost1"
1103 ls -ail $DIR/$tdir > /dev/null || {
1105 error "(8) ls should succeed"
1108 run_test 12 "OI scrub can rebuild invalid /O entries"
1111 check_mount_and_prep
1112 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1114 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1115 do_facet ost1 $LCTL set_param fail_loc=0x196
1116 local count=$(precreated_ost_obj_count 0 0)
1118 createmany -o $DIR/$tdir/f $((count + 32))
1119 do_facet ost1 $LCTL set_param fail_loc=0
1121 umount_client $MOUNT || error "(1) Fail to stop client!"
1123 stop ost1 || error "(2) Fail to stop ost1"
1125 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1126 error "(3) Fail to start ost1"
1128 mount_client $MOUNT || error "(4) Fail to start client!"
1130 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1132 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1134 wait_update_facet ost1 "$LCTL get_param -n \
1135 osd-*.$(facet_svc ost1).oi_scrub |
1136 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1137 error "(7) Expected '$expected' on ost1"
1139 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1141 run_test 13 "OI scrub can rebuild missed /O entries"
1144 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1145 skip "ldiskfs special test"
1147 check_mount_and_prep
1148 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1150 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1151 do_facet ost1 $LCTL set_param fail_loc=0x196
1152 local count=$(precreated_ost_obj_count 0 0)
1154 createmany -o $DIR/$tdir/f $((count + 1000))
1155 do_facet ost1 $LCTL set_param fail_loc=0
1157 umount_client $MOUNT || error "(1) Fail to stop client!"
1159 stop ost1 || error "(2) Fail to stop ost1"
1162 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1163 error "(3) Fail to run e2fsck error"
1165 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1166 error "(4) Fail to start ost1"
1168 mount_client $MOUNT || error "(5) Fail to start client!"
1170 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1171 awk '/^lf_repa[ir]*ed/ { print $2 }')
1172 [ $LF_REPAIRED -ge 1000 ] ||
1173 error "(6) Some entry under /lost+found should be repaired"
1175 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1179 echo "run e2fsck again after LFSCK"
1180 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1181 error "(8) Fail to run e2fsck error"
1183 run_test 14 "OI scrub can repair OST objects under lost+found"
1188 formatall > /dev/null
1189 setupall > /dev/null
1192 echo "starting MDTs with OI scrub disabled"
1193 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1194 scrub_check_status 3 init
1195 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1196 scrub_check_flags 4 recreated,inconsistent
1198 # run under dryrun mode
1199 scrub_start 5 --dryrun
1200 scrub_check_status 6 completed
1201 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1202 scrub_check_flags 7 inconsistent
1205 scrub_check_flags 7 recreated,inconsistent
1208 scrub_check_params 8 dryrun
1209 scrub_check_repaired 9 $repaired 1
1211 # run under dryrun mode again
1212 scrub_start 10 --dryrun
1213 scrub_check_status 11 completed
1214 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1215 scrub_check_flags 12 inconsistent
1217 scrub_check_flags 12 recreated,inconsistent
1219 scrub_check_params 13 dryrun
1220 scrub_check_repaired 14 $repaired 1
1222 # run under normal mode
1224 scrub_check_status 16 completed
1225 scrub_check_flags 17 ""
1226 scrub_check_params 18 ""
1227 scrub_check_repaired 19 $repaired 0
1229 # run under normal mode again
1231 scrub_check_status 21 completed
1232 scrub_check_flags 22 ""
1233 scrub_check_params 23 ""
1234 scrub_check_repaired 24 0 0
1236 run_test 15 "Dryrun mode OI scrub"
1239 check_mount_and_prep
1240 scrub_enable_index_backup
1242 #define OBD_FAIL_OSD_INDEX_CRASH 0x199
1243 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x199
1245 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
1247 echo "starting MDTs without disabling OI scrub"
1248 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
1249 mount_client $MOUNT || error "(2) Fail to start client!"
1251 scrub_disable_index_backup
1253 run_test 16 "Initial OI scrub can rebuild crashed index objects"
1255 # restore MDS/OST size
1256 MDSSIZE=${SAVED_MDSSIZE}
1257 OSTSIZE=${SAVED_OSTSIZE}
1258 OSTCOUNT=${SAVED_OSTCOUNT}
1260 # cleanup the system at last
1261 REFORMAT="yes" cleanup_and_setup_lustre
1264 check_and_cleanup_lustre