3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
32 SAVED_MDSSIZE=${MDSSIZE}
33 SAVED_OSTSIZE=${OSTSIZE}
34 SAVED_OSTCOUNT=${OSTCOUNT}
36 # use small MDS + OST size to speed formatting time
37 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
38 # 400M MDT device can guarantee uninitialized groups during the OI scrub
42 # no need too many OSTs, to reduce the format/start/stop overhead
43 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
45 # build up a clean test environment.
51 MDT_DEV="${FSNAME}-MDT0000"
52 OST_DEV="${FSNAME}-OST0000"
53 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
59 # use "lfsck_start -A" when we no longer need testing interop
60 for n in $(seq $MDSCOUNT); do
61 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
63 error "($error_id) Failed to start OI scrub on mds$n"
71 # use "lfsck_stop -A" when we no longer need testing interop
72 for n in $(seq $MDSCOUNT); do
73 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
74 error "($error_id) Failed to stop OI scrub on mds$n"
81 do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
84 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
85 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
86 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
87 SHOW_SCRUB="do_facet $SINGLEMDS \
88 $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
89 SHOW_SCRUB_ON_OST="do_facet ost1 \
90 $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
91 MOUNT_OPTS_SCRUB="-o user_xattr"
92 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
101 echo "preparing... $(date)"
102 for n in $(seq $MDSCOUNT); do
103 echo "creating $nfiles files on mds$n"
104 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
105 error "Failed to create directory mds$n"
106 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
107 error "Failed to copy files to mds$n"
108 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
109 error "mkdir failed on mds$n"
110 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
111 /dev/null || error "create failed on mds$n"
112 if [[ $nfiles -gt 0 ]]; then
113 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
114 /dev/null || error "createmany failed on mds$n"
117 echo "prepared $(date)."
119 [ ! -z $inject ] && [ $inject -eq 2 ] && {
120 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
121 do_nodes $(comma_list $(mdts_nodes)) \
122 $LCTL set_param fail_loc=0x198
124 for n in $(seq $MDSCOUNT); do
125 cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
126 error "Fail to copy runas to MDS$n"
129 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
132 [ ! -z $inject ] && [ $inject -eq 1 ] &&
133 [ $(facet_fstype $SINGLEMDS) = "zfs" ] && {
134 #define OBD_FAIL_OSD_FID_MAPPING 0x193
135 do_nodes $(comma_list $(mdts_nodes)) \
136 $LCTL set_param fail_loc=0x193
138 for n in $(seq $MDSCOUNT); do
139 chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
140 chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
143 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
146 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
148 # sync local transactions on every MDT
149 do_nodes $(comma_list $(mdts_nodes)) \
150 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
152 # wait for a while to cancel update logs after transactions committed.
155 # sync again to guarantee all things done.
156 do_nodes $(comma_list $(mdts_nodes)) \
157 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
159 for n in $(seq $MDSCOUNT); do
161 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
164 [ ! -z $inject ] && [ $(facet_fstype $SINGLEMDS) = "ldiskfs" ] && {
165 if [ $inject -eq 1 ]; then
166 for n in $(seq $MDSCOUNT); do
167 mds_backup_restore mds$n ||
168 error "Backup/restore on mds$n failed"
170 elif [ $inject -eq 2 ]; then
181 for n in $(seq $MDSCOUNT); do
182 start mds$n $(mdsdevname $n) $opts >/dev/null ||
183 error "($error_id) Failed to start mds$n"
191 for n in $(seq $MDSCOUNT); do
192 echo "stopping mds$n"
193 stop mds$n >/dev/null ||
194 error "($error_id) Failed to stop mds$n"
198 scrub_check_status() {
203 for n in $(seq $MDSCOUNT); do
204 wait_update_facet mds$n "$LCTL get_param -n \
205 osd-*.$(facet_svc mds$n).oi_scrub |
206 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
207 error "($error_id) Expected '$expected' on mds$n"
211 scrub_check_flags() {
217 for n in $(seq $MDSCOUNT); do
218 actual=$(do_facet mds$n $LCTL get_param -n \
219 osd-*.$(facet_svc mds$n).oi_scrub |
220 awk '/^flags/ { print $2 }')
221 if [ "$actual" != "$expected" ]; then
222 error "($error_id) Expected '$expected' on mds$n, but" \
228 scrub_check_params() {
234 for n in $(seq $MDSCOUNT); do
235 actual=$(do_facet mds$n $LCTL get_param -n \
236 osd-*.$(facet_svc mds$n).oi_scrub |
237 awk '/^param/ { print $2 }')
238 if [ "$actual" != "$expected" ]; then
239 error "($error_id) Expected '$expected' on mds$n, but" \
245 scrub_check_repaired() {
252 for n in $(seq $MDSCOUNT); do
253 if [ $dryrun -eq 1 ]; then
254 actual=$(do_facet mds$n $LCTL get_param -n \
255 osd-*.$(facet_svc mds$n).oi_scrub |
256 awk '/^inconsistent:/ { print $2 }')
258 actual=$(do_facet mds$n $LCTL get_param -n \
259 osd-*.$(facet_svc mds$n).oi_scrub |
260 awk '/^updated:/ { print $2 }')
263 if [ $expected -eq 0 -a $actual -ne 0 ]; then
264 error "($error_id) Expected no repaired on mds$n, but" \
268 if [ $expected -ne 0 -a $actual -lt $expected ]; then
269 error "($error_id) Expected '$expected' on mds$n, but" \
279 for n in $(seq $MDSCOUNT); do
280 diff -q $LUSTRE/tests/test-framework.sh \
281 $DIR/$tdir/mds$n/test-framework.sh ||
282 error "($error_id) File data check failed"
286 scrub_check_data2() {
291 for n in $(seq $MDSCOUNT); do
292 diff -q $LUSTRE/tests/$filename \
293 $DIR/$tdir/mds$n/$filename ||
294 error "($error_id) File data check failed"
299 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
305 for n in $(seq $MDSCOUNT); do
306 mds_remove_ois mds$n $index ||
307 error "($error_id) Failed to remove OI .$index on mds$n"
311 scrub_enable_auto() {
312 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
317 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
321 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
322 osd-*.*.full_scrub_ratio=$ratio
325 full_scrub_threshold_rate() {
326 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
330 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
331 osd-*.*.full_scrub_threshold_rate=$rate
334 scrub_enable_index_backup() {
335 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
336 osd-*.*.index_backup=1
339 scrub_disable_index_backup() {
340 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
341 osd-*.*.index_backup=0
346 echo "starting MDTs without disabling OI scrub"
347 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
348 scrub_check_status 2 init
349 scrub_check_flags 3 ""
350 mount_client $MOUNT || error "(4) Fail to start client!"
353 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
357 echo "start $SINGLEMDS without disabling OI scrub"
358 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
360 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
361 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
363 mount_client $MOUNT || error "(4) Fail to start client!"
364 #define OBD_FAIL_OSD_FID_MAPPING 0x193
365 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
366 # update .lustre OI mapping
368 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
369 umount_client $MOUNT || error "(5) Fail to stop client!"
371 echo "stop $SINGLEMDS"
372 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
374 echo "start $SINGLEMDS with disabling OI scrub"
375 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
376 error "(7) Fail to start MDS!"
378 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
379 [ "$FLAGS" == "inconsistent" ] ||
380 error "(9) Expect 'inconsistent', but got '$FLAGS'"
382 run_test 1a "Auto trigger initial OI scrub when server mounts"
386 echo "start MDTs without disabling OI scrub"
387 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
388 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
389 scrub_check_status 3 completed
390 mount_client $MOUNT || error "(4) Fail to start client!"
391 scrub_check_data2 runas 5
392 scrub_check_status 6 completed
394 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
397 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
398 skip "ldiskfs special test" && return
402 # OI files to be removed:
404 # idx 2: oi.16.{2,4,8,16,32}
405 # idx 3: oi.16.{3,9,27}
406 for index in 0 2 3; do
408 scrub_remove_ois 1 $index
409 echo "start MDTs with OI scrub disabled"
410 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
411 scrub_check_flags 3 recreated
413 scrub_check_status 5 completed
414 scrub_check_flags 6 ""
417 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
420 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
421 skip "ldiskfs special test" && return
424 echo "starting MDTs without disabling OI scrub"
425 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
426 scrub_check_status 3 completed
427 mount_client $MOUNT || error "(4) Fail to start client!"
430 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
432 # test_3 is obsolete, it will be covered by test_5.
434 formatall > /dev/null
438 echo "starting MDTs with OI scrub disabled"
439 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
440 scrub_check_status 3 init
441 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
442 scrub_check_flags 4 recreated,inconsistent
444 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
448 echo "starting MDTs with OI scrub disabled"
449 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
450 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
451 scrub_check_flags 4 recreated,inconsistent
452 mount_client $MOUNT || error "(5) Fail to start client!"
458 scrub_check_status 7 completed
459 scrub_check_flags 8 ""
462 for n in $(seq $MDSCOUNT); do
463 updated0[$n]=$(scrub_status $n |
464 awk '/^prior_updated/ { print $2 }')
467 scrub_check_data2 sanity-scrub.sh 9
471 for n in $(seq $MDSCOUNT); do
472 updated1[$n]=$(scrub_status $n |
473 awk '/^prior_updated/ { print $2 }')
474 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
475 error "(10) NOT auto trigger full scrub as expected"
478 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
481 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
482 skip "ldiskfs special test" && return
485 echo "starting MDTs with OI scrub disabled"
486 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
487 scrub_check_flags 4 recreated,inconsistent
488 mount_client $MOUNT || error "(5) Fail to start client!"
491 full_scrub_threshold_rate 10000
495 scrub_check_status 7 completed
496 scrub_check_flags 8 ""
499 for n in $(seq $MDSCOUNT); do
500 updated0[$n]=$(scrub_status $n |
501 awk '/^prior_updated/ { print $2 }')
503 echo "OI scrub on MDS$n status for the 1st time:"
504 do_facet mds$n $LCTL get_param -n \
505 osd-*.$(facet_svc mds$n).oi_scrub
508 scrub_check_data2 sanity-scrub.sh 9
511 scrub_check_status 10 completed
512 scrub_check_flags 11 ""
515 for n in $(seq $MDSCOUNT); do
516 updated1[$n]=$(scrub_status $n |
517 awk '/^prior_updated/ { print $2 }')
519 echo "OI scrub on MDS$n status for the 2nd time:"
520 do_facet mds$n $LCTL get_param -n \
521 osd-*.$(facet_svc mds$n).oi_scrub
523 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
524 error "(12) Auto trigger full scrub unexpectedly"
527 for n in $(seq $MDSCOUNT); do
528 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
529 error "(13) fail to ls"
533 scrub_check_status 14 completed
534 scrub_check_flags 15 ""
536 for n in $(seq $MDSCOUNT); do
537 updated0[$n]=$(scrub_status $n |
538 awk '/^prior_updated/ { print $2 }')
540 echo "OI scrub on MDS$n status for the 3rd time:"
541 do_facet mds$n $LCTL get_param -n \
542 osd-*.$(facet_svc mds$n).oi_scrub
544 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
545 error "(16) Auto trigger full scrub unexpectedly"
548 for n in $(seq $MDSCOUNT); do
549 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
553 for n in $(seq $MDSCOUNT); do
554 updated1[$n]=$(scrub_status $n |
555 awk '/^prior_updated/ { print $2 }')
556 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
557 echo "OI scrub on MDS$n status for the 4th time:"
558 do_facet mds$n $LCTL get_param -n \
559 osd-*.$(facet_svc mds$n).oi_scrub
561 error "(18) NOT auto trigger full scrub as expected"
565 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
568 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
569 skip "ldiskfs special test" && return
572 echo "starting MDTs with OI scrub disabled"
573 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
574 scrub_check_flags 4 recreated,inconsistent
575 mount_client $MOUNT || error "(5) Fail to start client!"
578 full_scrub_threshold_rate 20
582 scrub_check_status 7 completed
583 scrub_check_flags 8 ""
586 for n in $(seq $MDSCOUNT); do
587 updated0[$n]=$(scrub_status $n |
588 awk '/^prior_updated/ { print $2 }')
590 echo "OI scrub on MDS$n status for the 1st time:"
591 do_facet mds$n $LCTL get_param -n \
592 osd-*.$(facet_svc mds$n).oi_scrub
595 scrub_check_data2 sanity-scrub.sh 9
598 scrub_check_status 10 completed
599 scrub_check_flags 11 ""
602 for n in $(seq $MDSCOUNT); do
603 updated1[$n]=$(scrub_status $n |
604 awk '/^prior_updated/ { print $2 }')
606 echo "OI scrub on MDS$n status for the 2nd time:"
607 do_facet mds$n $LCTL get_param -n \
608 osd-*.$(facet_svc mds$n).oi_scrub
610 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
611 error "(12) Auto trigger full scrub unexpectedly"
614 for n in $(seq $MDSCOUNT); do
615 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
616 error "(13) fail to ls"
620 scrub_check_status 14 completed
621 scrub_check_flags 15 ""
623 for n in $(seq $MDSCOUNT); do
624 updated0[$n]=$(scrub_status $n |
625 awk '/^prior_updated/ { print $2 }')
627 echo "OI scrub on MDS$n status for the 3rd time:"
628 do_facet mds$n $LCTL get_param -n \
629 osd-*.$(facet_svc mds$n).oi_scrub
631 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
632 error "(16) Auto trigger full scrub unexpectedly"
635 for n in $(seq $MDSCOUNT); do
636 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
640 for n in $(seq $MDSCOUNT); do
641 updated1[$n]=$(scrub_status $n |
642 awk '/^prior_updated/ { print $2 }')
643 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
644 echo "OI scrub on MDS$n status for the 4th time:"
645 do_facet mds$n $LCTL get_param -n \
646 osd-*.$(facet_svc mds$n).oi_scrub
648 error "(18) NOT auto trigger full scrub as expected"
652 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
655 formatall > /dev/null
659 echo "starting MDTs with OI scrub disabled (1)"
660 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
661 scrub_check_status 3 init
662 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
663 scrub_check_flags 4 recreated,inconsistent
664 mount_client $MOUNT || error "(5) Fail to start client!"
668 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
669 do_nodes $(comma_list $(mdts_nodes)) \
670 $LCTL set_param fail_val=3 fail_loc=0x190
673 umount_client $MOUNT || error "(7) Fail to stop client!"
674 scrub_check_status 8 scanning
676 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
677 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
682 do_nodes $(comma_list $(mdts_nodes)) \
683 $LCTL set_param fail_loc=0 fail_val=0
685 echo "starting MDTs with OI scrub disabled (2)"
686 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
687 scrub_check_status 11 crashed
690 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
691 do_nodes $(comma_list $(mdts_nodes)) \
692 $LCTL set_param fail_val=3 fail_loc=0x190
694 echo "starting MDTs without disabling OI scrub"
695 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
696 scrub_check_status 14 scanning
698 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
699 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
701 scrub_check_status 15 failed
702 mount_client $MOUNT || error "(16) Fail to start client!"
705 do_nodes $(comma_list $(mdts_nodes)) \
706 $LCTL set_param fail_loc=0 fail_val=0
711 for n in $(seq $MDSCOUNT); do
712 stat $DIR/$tdir/mds$n/sanity-scrub.sh &
716 for n in $(seq $MDSCOUNT); do
718 error "(18) Fail to stat mds$n/sanity-scrub.sh"
721 scrub_check_status 19 completed
722 scrub_check_flags 20 ""
724 run_test 5 "OI scrub state machine"
728 echo "starting MDTs with OI scrub disabled"
729 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
730 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
731 scrub_check_flags 4 recreated,inconsistent
732 mount_client $MOUNT || error "(5) Fail to start client!"
736 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
737 do_nodes $(comma_list $(mdts_nodes)) \
738 $LCTL set_param fail_val=2 fail_loc=0x190
742 # Sleep 5 sec to guarantee at least one object processed by OI scrub
744 # Fail the OI scrub to guarantee there is at least one checkpoint
745 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
746 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
748 scrub_check_status 7 failed
750 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
751 do_nodes $(comma_list $(mdts_nodes)) \
752 $LCTL set_param fail_val=3 fail_loc=0x190
755 for n in $(seq $MDSCOUNT); do
756 # stat will re-trigger OI scrub
757 stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
758 error "(8) Failed to stat mds$n/sanity-scrub.sh"
761 umount_client $MOUNT || error "(9) Fail to stop client!"
762 scrub_check_status 10 scanning
764 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
765 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
769 for n in $(seq $MDSCOUNT); do
770 position0[$n]=$(scrub_status $n |
771 awk '/^last_checkpoint_position/ {print $2}')
772 position0[$n]=$((${position0[$n]} + 1))
777 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
778 do_nodes $(comma_list $(mdts_nodes)) \
779 $LCTL set_param fail_val=3 fail_loc=0x190
781 echo "starting MDTs without disabling OI scrub"
782 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
784 scrub_check_status 13 scanning
787 for n in $(seq $MDSCOUNT); do
788 position1[$n]=$(scrub_status $n |
789 awk '/^latest_start_position/ {print $2}')
790 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
791 error "(14) Expected position ${position0[$n]}, but" \
792 "got ${position1[$n]}"
796 do_nodes $(comma_list $(mdts_nodes)) \
797 $LCTL set_param fail_loc=0 fail_val=0
799 scrub_check_status 15 completed
800 scrub_check_flags 16 ""
802 run_test 6 "OI scrub resumes from last checkpoint"
806 echo "starting MDTs with OI scrub disabled"
807 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
808 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
809 scrub_check_flags 4 recreated,inconsistent
810 mount_client $MOUNT || error "(5) Fail to start client!"
814 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
815 do_nodes $(comma_list $(mdts_nodes)) \
816 $LCTL set_param fail_val=3 fail_loc=0x190
821 for n in $(seq $MDSCOUNT); do
822 stat $DIR/$tdir/mds$n/${tfile}300 ||
823 error "(7) Failed to stat mds$n/${tfile}300!"
826 scrub_check_status 8 scanning
827 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
828 scrub_check_flags 9 inconsistent,auto
830 scrub_check_flags 9 recreated,inconsistent,auto
833 do_nodes $(comma_list $(mdts_nodes)) \
834 $LCTL set_param fail_loc=0 fail_val=0
836 scrub_check_status 10 completed
839 run_test 7 "System is available during OI scrub scanning"
843 echo "starting MDTs with OI scrub disabled"
844 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
845 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
846 scrub_check_flags 4 recreated,inconsistent
848 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
849 do_nodes $(comma_list $(mdts_nodes)) \
850 $LCTL set_param fail_val=1 fail_loc=0x190
853 scrub_check_status 6 scanning
855 scrub_check_status 8 stopped
857 scrub_check_status 10 scanning
859 do_nodes $(comma_list $(mdts_nodes)) \
860 $LCTL set_param fail_loc=0 fail_val=0
862 scrub_check_status 11 completed
863 scrub_check_flags 12 ""
865 run_test 8 "Control OI scrub manually"
868 # Skip scrub speed test for ZFS because of performance unstable
869 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
870 skip "test scrub speed only on ldiskfs" && return
872 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
873 skip "Testing on UP system, the speed may be inaccurate."
879 echo "starting MDTs with OI scrub disabled"
880 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
881 scrub_check_flags 4 recreated,inconsistent
883 local BASE_SPEED1=100
885 # OI scrub should run with full speed under inconsistent case
886 scrub_start 5 -s $BASE_SPEED1
889 scrub_check_status 6 completed
890 scrub_check_flags 7 ""
891 # OI scrub should run with limited speed under non-inconsistent case
892 scrub_start 8 -s $BASE_SPEED1 -r
895 scrub_check_status 9 scanning
897 # Do NOT ignore that there are 1024 pre-fetched items. And there
898 # may be time error, normally it should be less than 2 seconds.
899 # We allow another 20% schedule error.
900 local PRE_FETCHED=1024
902 # MAX_MARGIN = 1.2 = 12 / 10
903 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
904 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
906 for n in $(seq $MDSCOUNT); do
907 local SPEED=$(scrub_status $n | \
908 awk '/^average_speed/ { print $2 }')
909 [ $SPEED -lt $MAX_SPEED ] ||
910 error "(10) Got speed $SPEED, expected less than" \
915 local BASE_SPEED2=300
917 for n in $(seq $MDSCOUNT); do
918 do_facet mds$n $LCTL set_param -n \
919 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
923 # MIN_MARGIN = 0.8 = 8 / 10
924 local MIN_SPEED=$(((PRE_FETCHED + \
925 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
926 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
927 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
928 # MAX_MARGIN = 1.2 = 12 / 10
929 MAX_SPEED=$(((PRE_FETCHED + \
930 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
931 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
932 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
933 for n in $(seq $MDSCOUNT); do
934 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
935 [ $SPEED -gt $MIN_SPEED ] ||
936 error "(11) Got speed $SPEED, expected more than" \
938 [ $SPEED -lt $MAX_SPEED ] ||
939 error "(12) Got speed $SPEED, expected less than" \
942 do_facet mds$n $LCTL set_param -n \
943 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
946 scrub_check_status 13 completed
948 run_test 9 "OI scrub speed control"
952 echo "starting mds$n with OI scrub disabled (1)"
953 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
954 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
955 scrub_check_flags 4 recreated,inconsistent
956 mount_client $MOUNT || error "(5) Fail to start client!"
960 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
961 do_nodes $(comma_list $(mdts_nodes)) \
962 $LCTL set_param fail_val=1 fail_loc=0x190
965 scrub_check_status 7 scanning
966 umount_client $MOUNT || error "(8) Fail to stop client!"
968 echo "starting MDTs with OI scrub disabled (2)"
969 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
970 scrub_check_status 11 paused
972 echo "starting MDTs without disabling OI scrub"
973 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
974 scrub_check_status 14 scanning
976 do_nodes $(comma_list $(mdts_nodes)) \
977 $LCTL set_param fail_loc=0 fail_val=0
979 scrub_check_status 15 completed
980 scrub_check_flags 16 ""
982 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
984 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
987 echo "starting MDTs with OI scrub disabled"
988 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
989 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
990 scrub_check_flags 4 recreated,inconsistent
992 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
993 do_nodes $(comma_list $(mdts_nodes)) \
994 $LCTL set_param fail_val=3 fail_loc=0x190
997 scrub_check_status 6 scanning
999 echo "starting MDTs with OI scrub disabled"
1000 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
1001 scrub_check_status 9 paused
1003 echo "starting MDTs without disabling OI scrub"
1004 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
1005 scrub_check_status 12 scanning
1007 do_nodes $(comma_list $(mdts_nodes)) \
1008 $LCTL set_param fail_loc=0 fail_val=0
1010 scrub_check_status 13 completed
1011 scrub_check_flags 14 ""
1013 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
1016 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1017 skip "ldiskfs special test" && return
1022 check_mount_and_prep
1024 for n in $(seq $MDSCOUNT); do
1025 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
1026 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
1028 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
1029 error "(2) Fail to create under $tdir/mds$n"
1032 # reset OI scrub start point by force
1034 scrub_check_status 4 completed
1039 # OI scrub should skip the new created objects for the first accessing
1040 # notice we're creating a new llog for every OST on every startup
1041 # new features can make this even less stable, so we only check that
1042 # the number of skipped files is more than the number or known created
1043 local MINIMUM=$((CREATED + 1)) # files + directory
1044 for n in $(seq $MDSCOUNT); do
1045 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1046 [ $SKIPPED -lt $MINIMUM ] &&
1047 error "(5) Expect at least $MINIMUM objects" \
1048 "skipped on mds$n, but got $SKIPPED"
1050 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1053 # reset OI scrub start point by force
1055 scrub_check_status 7 completed
1057 # OI scrub should skip the new created object only once
1058 for n in $(seq $MDSCOUNT); do
1059 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1060 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1062 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1063 error "(8) Expect 0 objects skipped on mds$n, but" \
1067 run_test 11 "OI scrub skips the new created objects only once"
1070 check_mount_and_prep
1071 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1073 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1074 do_facet ost1 $LCTL set_param fail_loc=0x195
1075 local count=$(precreated_ost_obj_count 0 0)
1077 createmany -o $DIR/$tdir/f $((count + 32))
1078 umount_client $MOUNT || error "(1) Fail to stop client!"
1080 stop ost1 || error "(2) Fail to stop ost1"
1082 #define OBD_FAIL_OST_NODESTROY 0x233
1083 do_facet ost1 $LCTL set_param fail_loc=0x233
1085 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1086 error "(3) Fail to start ost1"
1088 mount_client $MOUNT || error "(4) Fail to start client!"
1090 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1092 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1094 do_facet ost1 $LCTL set_param fail_loc=0
1095 wait_update_facet ost1 "$LCTL get_param -n \
1096 osd-*.$(facet_svc ost1).oi_scrub |
1097 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1098 error "(7) Expected '$expected' on ost1"
1100 ls -ail $DIR/$tdir > /dev/null || {
1102 error "(8) ls should succeed"
1105 run_test 12 "OI scrub can rebuild invalid /O entries"
1108 check_mount_and_prep
1109 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1111 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1112 do_facet ost1 $LCTL set_param fail_loc=0x196
1113 local count=$(precreated_ost_obj_count 0 0)
1115 createmany -o $DIR/$tdir/f $((count + 32))
1116 do_facet ost1 $LCTL set_param fail_loc=0
1118 umount_client $MOUNT || error "(1) Fail to stop client!"
1120 stop ost1 || error "(2) Fail to stop ost1"
1122 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1123 error "(3) Fail to start ost1"
1125 mount_client $MOUNT || error "(4) Fail to start client!"
1127 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1129 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1131 wait_update_facet ost1 "$LCTL get_param -n \
1132 osd-*.$(facet_svc ost1).oi_scrub |
1133 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1134 error "(7) Expected '$expected' on ost1"
1136 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1138 run_test 13 "OI scrub can rebuild missed /O entries"
1141 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1142 skip "ldiskfs special test" && return
1144 check_mount_and_prep
1145 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1147 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1148 do_facet ost1 $LCTL set_param fail_loc=0x196
1149 local count=$(precreated_ost_obj_count 0 0)
1151 createmany -o $DIR/$tdir/f $((count + 1000))
1152 do_facet ost1 $LCTL set_param fail_loc=0
1154 umount_client $MOUNT || error "(1) Fail to stop client!"
1156 stop ost1 || error "(2) Fail to stop ost1"
1159 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1160 error "(3) Fail to run e2fsck error"
1162 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1163 error "(4) Fail to start ost1"
1165 mount_client $MOUNT || error "(5) Fail to start client!"
1167 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1168 awk '/^lf_repa[ri]*ed/ { print $2 }')
1169 [ $LF_REPAIRED -ge 1000 ] ||
1170 error "(6) Some entry under /lost+found should be repaired"
1172 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1174 run_test 14 "OI scrub can repair objects under lost+found"
1179 formatall > /dev/null
1180 setupall > /dev/null
1183 echo "starting MDTs with OI scrub disabled"
1184 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1185 scrub_check_status 3 init
1186 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1187 scrub_check_flags 4 recreated,inconsistent
1189 # run under dryrun mode
1190 scrub_start 5 --dryrun
1191 scrub_check_status 6 completed
1192 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1193 scrub_check_flags 7 inconsistent
1196 scrub_check_flags 7 recreated,inconsistent
1199 scrub_check_params 8 dryrun
1200 scrub_check_repaired 9 $repaired 1
1202 # run under dryrun mode again
1203 scrub_start 10 --dryrun
1204 scrub_check_status 11 completed
1205 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1206 scrub_check_flags 12 inconsistent
1208 scrub_check_flags 12 recreated,inconsistent
1210 scrub_check_params 13 dryrun
1211 scrub_check_repaired 14 $repaired 1
1213 # run under normal mode
1215 scrub_check_status 16 completed
1216 scrub_check_flags 17 ""
1217 scrub_check_params 18 ""
1218 scrub_check_repaired 19 $repaired 0
1220 # run under normal mode again
1222 scrub_check_status 21 completed
1223 scrub_check_flags 22 ""
1224 scrub_check_params 23 ""
1225 scrub_check_repaired 24 0 0
1227 run_test 15 "Dryrun mode OI scrub"
1230 check_mount_and_prep
1231 scrub_enable_index_backup
1233 #define OBD_FAIL_OSD_INDEX_CRASH 0x199
1234 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x199
1236 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
1238 echo "starting MDTs without disabling OI scrub"
1239 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
1240 mount_client $MOUNT || error "(2) Fail to start client!"
1242 scrub_disable_index_backup
1244 run_test 16 "Initial OI scrub can rebuild crashed index objects"
1246 # restore MDS/OST size
1247 MDSSIZE=${SAVED_MDSSIZE}
1248 OSTSIZE=${SAVED_OSTSIZE}
1249 OSTCOUNT=${SAVED_OSTCOUNT}
1251 # cleanup the system at last