3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
32 SAVED_MDSSIZE=${MDSSIZE}
33 SAVED_OSTSIZE=${OSTSIZE}
34 SAVED_OSTCOUNT=${OSTCOUNT}
36 # use small MDS + OST size to speed formatting time
37 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
38 # 400M MDT device can guarantee uninitialized groups during the OI scrub
42 # no need too many OSTs, to reduce the format/start/stop overhead
43 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
45 # build up a clean test environment.
51 MDT_DEV="${FSNAME}-MDT0000"
52 OST_DEV="${FSNAME}-OST0000"
53 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
59 # use "lfsck_start -A" when we no longer need testing interop
60 for n in $(seq $MDSCOUNT); do
61 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
63 error "($error_id) Failed to start OI scrub on mds$n"
71 # use "lfsck_stop -A" when we no longer need testing interop
72 for n in $(seq $MDSCOUNT); do
73 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
74 error "($error_id) Failed to stop OI scrub on mds$n"
81 do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
84 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
85 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
86 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
87 SHOW_SCRUB="do_facet $SINGLEMDS \
88 $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
89 SHOW_SCRUB_ON_OST="do_facet ost1 \
90 $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
91 MOUNT_OPTS_SCRUB="-o user_xattr"
92 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
101 echo "preparing... $(date)"
102 for n in $(seq $MDSCOUNT); do
103 echo "creating $nfiles files on mds$n"
104 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
105 error "Failed to create directory mds$n"
106 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
107 error "Failed to copy files to mds$n"
108 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
109 error "mkdir failed on mds$n"
110 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
111 /dev/null || error "create failed on mds$n"
112 if [[ $nfiles -gt 0 ]]; then
113 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
114 /dev/null || error "createmany failed on mds$n"
117 echo "prepared $(date)."
119 [ ! -z $inject ] && [ $inject -eq 2 ] && {
120 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
121 do_nodes $(comma_list $(mdts_nodes)) \
122 $LCTL set_param fail_loc=0x198
124 for n in $(seq $MDSCOUNT); do
125 cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
126 error "Fail to copy runas to MDS$n"
129 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
132 [ ! -z $inject ] && [ $inject -eq 1 ] &&
133 [ $(facet_fstype $SINGLEMDS) = "zfs" ] && {
134 #define OBD_FAIL_OSD_FID_MAPPING 0x193
135 do_nodes $(comma_list $(mdts_nodes)) \
136 $LCTL set_param fail_loc=0x193
138 for n in $(seq $MDSCOUNT); do
139 chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
140 chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
143 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
146 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
148 # sync local transactions on every MDT
149 do_nodes $(comma_list $(mdts_nodes)) \
150 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
152 # wait for a while to cancel update logs after transactions committed.
155 # sync again to guarantee all things done.
156 do_nodes $(comma_list $(mdts_nodes)) \
157 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
159 for n in $(seq $MDSCOUNT); do
161 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
164 [ ! -z $inject ] && [ $(facet_fstype $SINGLEMDS) = "ldiskfs" ] && {
165 if [ $inject -eq 1 ]; then
166 for n in $(seq $MDSCOUNT); do
167 mds_backup_restore mds$n ||
168 error "Backup/restore on mds$n failed"
170 elif [ $inject -eq 2 ]; then
181 for n in $(seq $MDSCOUNT); do
182 start mds$n $(mdsdevname $n) $opts >/dev/null ||
183 error "($error_id) Failed to start mds$n"
191 for n in $(seq $MDSCOUNT); do
192 echo "stopping mds$n"
193 stop mds$n >/dev/null ||
194 error "($error_id) Failed to stop mds$n"
198 scrub_check_status() {
203 for n in $(seq $MDSCOUNT); do
204 wait_update_facet mds$n "$LCTL get_param -n \
205 osd-*.$(facet_svc mds$n).oi_scrub |
206 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
207 error "($error_id) Expected '$expected' on mds$n"
211 scrub_check_flags() {
217 for n in $(seq $MDSCOUNT); do
218 actual=$(do_facet mds$n $LCTL get_param -n \
219 osd-*.$(facet_svc mds$n).oi_scrub |
220 awk '/^flags/ { print $2 }')
221 if [ "$actual" != "$expected" ]; then
222 error "($error_id) Expected '$expected' on mds$n, but" \
228 scrub_check_params() {
234 for n in $(seq $MDSCOUNT); do
235 actual=$(do_facet mds$n $LCTL get_param -n \
236 osd-*.$(facet_svc mds$n).oi_scrub |
237 awk '/^param/ { print $2 }')
238 if [ "$actual" != "$expected" ]; then
239 error "($error_id) Expected '$expected' on mds$n, but" \
245 scrub_check_repaired() {
252 for n in $(seq $MDSCOUNT); do
253 if [ $dryrun -eq 1 ]; then
254 actual=$(do_facet mds$n $LCTL get_param -n \
255 osd-*.$(facet_svc mds$n).oi_scrub |
256 awk '/^inconsistent:/ { print $2 }')
258 actual=$(do_facet mds$n $LCTL get_param -n \
259 osd-*.$(facet_svc mds$n).oi_scrub |
260 awk '/^updated:/ { print $2 }')
263 if [ $expected -eq 0 -a $actual -ne 0 ]; then
264 error "($error_id) Expected no repaired on mds$n, but" \
268 if [ $expected -ne 0 -a $actual -lt $expected ]; then
269 error "($error_id) Expected '$expected' on mds$n, but" \
279 for n in $(seq $MDSCOUNT); do
280 diff -q $LUSTRE/tests/test-framework.sh \
281 $DIR/$tdir/mds$n/test-framework.sh ||
282 error "($error_id) File data check failed"
286 scrub_check_data2() {
291 for n in $(seq $MDSCOUNT); do
292 diff -q $LUSTRE/tests/$filename \
293 $DIR/$tdir/mds$n/$filename ||
294 error "($error_id) File data check failed"
299 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
305 for n in $(seq $MDSCOUNT); do
306 mds_remove_ois mds$n $index ||
307 error "($error_id) Failed to remove OI .$index on mds$n"
311 scrub_enable_auto() {
312 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
317 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
321 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
322 osd-*.*.full_scrub_ratio=$ratio
325 full_scrub_threshold_rate() {
326 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
330 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
331 osd-*.*.full_scrub_threshold_rate=$rate
336 echo "starting MDTs without disabling OI scrub"
337 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
338 scrub_check_status 2 init
339 scrub_check_flags 3 ""
340 mount_client $MOUNT || error "(4) Fail to start client!"
343 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
347 echo "start $SINGLEMDS without disabling OI scrub"
348 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
350 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
351 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
353 mount_client $MOUNT || error "(4) Fail to start client!"
354 #define OBD_FAIL_OSD_FID_MAPPING 0x193
355 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
356 # update .lustre OI mapping
358 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
359 umount_client $MOUNT || error "(5) Fail to stop client!"
361 echo "stop $SINGLEMDS"
362 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
364 echo "start $SINGLEMDS with disabling OI scrub"
365 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
366 error "(7) Fail to start MDS!"
368 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
369 [ "$FLAGS" == "inconsistent" ] ||
370 error "(9) Expect 'inconsistent', but got '$FLAGS'"
372 run_test 1a "Auto trigger initial OI scrub when server mounts"
376 echo "start MDTs without disabling OI scrub"
377 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
378 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
379 scrub_check_status 3 completed
380 mount_client $MOUNT || error "(4) Fail to start client!"
381 scrub_check_data2 runas 5
382 scrub_check_status 6 completed
384 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
387 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
388 skip "ldiskfs special test" && return
392 # OI files to be removed:
394 # idx 2: oi.16.{2,4,8,16,32}
395 # idx 3: oi.16.{3,9,27}
396 for index in 0 2 3; do
398 scrub_remove_ois 1 $index
399 echo "start MDTs with OI scrub disabled"
400 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
401 scrub_check_flags 3 recreated
403 scrub_check_status 5 completed
404 scrub_check_flags 6 ""
407 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
410 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
411 skip "ldiskfs special test" && return
414 echo "starting MDTs without disabling OI scrub"
415 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
416 scrub_check_status 3 completed
417 mount_client $MOUNT || error "(4) Fail to start client!"
420 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
422 # test_3 is obsolete, it will be covered by test_5.
424 formatall > /dev/null
428 echo "starting MDTs with OI scrub disabled"
429 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
430 scrub_check_status 3 init
431 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
432 scrub_check_flags 4 recreated,inconsistent
434 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
438 echo "starting MDTs with OI scrub disabled"
439 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
440 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
441 scrub_check_flags 4 recreated,inconsistent
442 mount_client $MOUNT || error "(5) Fail to start client!"
448 scrub_check_status 7 completed
449 scrub_check_flags 8 ""
452 for n in $(seq $MDSCOUNT); do
453 updated0[$n]=$(scrub_status $n |
454 awk '/^prior_updated/ { print $2 }')
457 scrub_check_data2 sanity-scrub.sh 9
461 for n in $(seq $MDSCOUNT); do
462 updated1[$n]=$(scrub_status $n |
463 awk '/^prior_updated/ { print $2 }')
464 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
465 error "(10) NOT auto trigger full scrub as expected"
468 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
471 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
472 skip "ldiskfs special test" && return
475 echo "starting MDTs with OI scrub disabled"
476 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
477 scrub_check_flags 4 recreated,inconsistent
478 mount_client $MOUNT || error "(5) Fail to start client!"
481 full_scrub_threshold_rate 10000
485 scrub_check_status 7 completed
486 scrub_check_flags 8 ""
489 for n in $(seq $MDSCOUNT); do
490 updated0[$n]=$(scrub_status $n |
491 awk '/^prior_updated/ { print $2 }')
493 echo "OI scrub on MDS$n status for the 1st time:"
494 do_facet mds$n $LCTL get_param -n \
495 osd-*.$(facet_svc mds$n).oi_scrub
498 scrub_check_data2 sanity-scrub.sh 9
501 scrub_check_status 10 completed
502 scrub_check_flags 11 ""
505 for n in $(seq $MDSCOUNT); do
506 updated1[$n]=$(scrub_status $n |
507 awk '/^prior_updated/ { print $2 }')
509 echo "OI scrub on MDS$n status for the 2nd time:"
510 do_facet mds$n $LCTL get_param -n \
511 osd-*.$(facet_svc mds$n).oi_scrub
513 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
514 error "(12) Auto trigger full scrub unexpectedly"
517 for n in $(seq $MDSCOUNT); do
518 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
519 error "(13) fail to ls"
523 scrub_check_status 14 completed
524 scrub_check_flags 15 ""
526 for n in $(seq $MDSCOUNT); do
527 updated0[$n]=$(scrub_status $n |
528 awk '/^prior_updated/ { print $2 }')
530 echo "OI scrub on MDS$n status for the 3rd time:"
531 do_facet mds$n $LCTL get_param -n \
532 osd-*.$(facet_svc mds$n).oi_scrub
534 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
535 error "(16) Auto trigger full scrub unexpectedly"
538 for n in $(seq $MDSCOUNT); do
539 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
543 for n in $(seq $MDSCOUNT); do
544 updated1[$n]=$(scrub_status $n |
545 awk '/^prior_updated/ { print $2 }')
546 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
547 echo "OI scrub on MDS$n status for the 4th time:"
548 do_facet mds$n $LCTL get_param -n \
549 osd-*.$(facet_svc mds$n).oi_scrub
551 error "(18) NOT auto trigger full scrub as expected"
555 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
558 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
559 skip "ldiskfs special test" && return
562 echo "starting MDTs with OI scrub disabled"
563 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
564 scrub_check_flags 4 recreated,inconsistent
565 mount_client $MOUNT || error "(5) Fail to start client!"
568 full_scrub_threshold_rate 20
572 scrub_check_status 7 completed
573 scrub_check_flags 8 ""
576 for n in $(seq $MDSCOUNT); do
577 updated0[$n]=$(scrub_status $n |
578 awk '/^prior_updated/ { print $2 }')
580 echo "OI scrub on MDS$n status for the 1st time:"
581 do_facet mds$n $LCTL get_param -n \
582 osd-*.$(facet_svc mds$n).oi_scrub
585 scrub_check_data2 sanity-scrub.sh 9
588 scrub_check_status 10 completed
589 scrub_check_flags 11 ""
592 for n in $(seq $MDSCOUNT); do
593 updated1[$n]=$(scrub_status $n |
594 awk '/^prior_updated/ { print $2 }')
596 echo "OI scrub on MDS$n status for the 2nd time:"
597 do_facet mds$n $LCTL get_param -n \
598 osd-*.$(facet_svc mds$n).oi_scrub
600 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
601 error "(12) Auto trigger full scrub unexpectedly"
604 for n in $(seq $MDSCOUNT); do
605 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
606 error "(13) fail to ls"
610 scrub_check_status 14 completed
611 scrub_check_flags 15 ""
613 for n in $(seq $MDSCOUNT); do
614 updated0[$n]=$(scrub_status $n |
615 awk '/^prior_updated/ { print $2 }')
617 echo "OI scrub on MDS$n status for the 3rd time:"
618 do_facet mds$n $LCTL get_param -n \
619 osd-*.$(facet_svc mds$n).oi_scrub
621 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
622 error "(16) Auto trigger full scrub unexpectedly"
625 for n in $(seq $MDSCOUNT); do
626 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
630 for n in $(seq $MDSCOUNT); do
631 updated1[$n]=$(scrub_status $n |
632 awk '/^prior_updated/ { print $2 }')
633 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
634 echo "OI scrub on MDS$n status for the 4th time:"
635 do_facet mds$n $LCTL get_param -n \
636 osd-*.$(facet_svc mds$n).oi_scrub
638 error "(18) NOT auto trigger full scrub as expected"
642 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
645 formatall > /dev/null
649 echo "starting MDTs with OI scrub disabled (1)"
650 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
651 scrub_check_status 3 init
652 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
653 scrub_check_flags 4 recreated,inconsistent
654 mount_client $MOUNT || error "(5) Fail to start client!"
658 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
659 do_nodes $(comma_list $(mdts_nodes)) \
660 $LCTL set_param fail_val=3 fail_loc=0x190
663 umount_client $MOUNT || error "(7) Fail to stop client!"
664 scrub_check_status 8 scanning
666 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
667 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
672 do_nodes $(comma_list $(mdts_nodes)) \
673 $LCTL set_param fail_loc=0 fail_val=0
675 echo "starting MDTs with OI scrub disabled (2)"
676 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
677 scrub_check_status 11 crashed
680 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
681 do_nodes $(comma_list $(mdts_nodes)) \
682 $LCTL set_param fail_val=3 fail_loc=0x190
684 echo "starting MDTs without disabling OI scrub"
685 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
686 scrub_check_status 14 scanning
688 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
689 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
691 scrub_check_status 15 failed
692 mount_client $MOUNT || error "(16) Fail to start client!"
695 do_nodes $(comma_list $(mdts_nodes)) \
696 $LCTL set_param fail_loc=0 fail_val=0
701 for n in $(seq $MDSCOUNT); do
702 stat $DIR/$tdir/mds$n/sanity-scrub.sh &
706 for n in $(seq $MDSCOUNT); do
708 error "(18) Fail to stat mds$n/sanity-scrub.sh"
711 scrub_check_status 19 completed
712 scrub_check_flags 20 ""
714 run_test 5 "OI scrub state machine"
718 echo "starting MDTs with OI scrub disabled"
719 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
720 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
721 scrub_check_flags 4 recreated,inconsistent
722 mount_client $MOUNT || error "(5) Fail to start client!"
726 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
727 do_nodes $(comma_list $(mdts_nodes)) \
728 $LCTL set_param fail_val=2 fail_loc=0x190
732 # Sleep 5 sec to guarantee at least one object processed by OI scrub
734 # Fail the OI scrub to guarantee there is at least one checkpoint
735 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
736 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
738 scrub_check_status 7 failed
740 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
741 do_nodes $(comma_list $(mdts_nodes)) \
742 $LCTL set_param fail_val=3 fail_loc=0x190
745 for n in $(seq $MDSCOUNT); do
746 # stat will re-trigger OI scrub
747 stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
748 error "(8) Failed to stat mds$n/sanity-scrub.sh"
751 umount_client $MOUNT || error "(9) Fail to stop client!"
752 scrub_check_status 10 scanning
754 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
755 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
759 for n in $(seq $MDSCOUNT); do
760 position0[$n]=$(scrub_status $n |
761 awk '/^last_checkpoint_position/ {print $2}')
762 position0[$n]=$((${position0[$n]} + 1))
767 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
768 do_nodes $(comma_list $(mdts_nodes)) \
769 $LCTL set_param fail_val=3 fail_loc=0x190
771 echo "starting MDTs without disabling OI scrub"
772 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
774 scrub_check_status 13 scanning
777 for n in $(seq $MDSCOUNT); do
778 position1[$n]=$(scrub_status $n |
779 awk '/^latest_start_position/ {print $2}')
780 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
781 error "(14) Expected position ${position0[$n]}, but" \
782 "got ${position1[$n]}"
786 do_nodes $(comma_list $(mdts_nodes)) \
787 $LCTL set_param fail_loc=0 fail_val=0
789 scrub_check_status 15 completed
790 scrub_check_flags 16 ""
792 run_test 6 "OI scrub resumes from last checkpoint"
796 echo "starting MDTs with OI scrub disabled"
797 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
798 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
799 scrub_check_flags 4 recreated,inconsistent
800 mount_client $MOUNT || error "(5) Fail to start client!"
804 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
805 do_nodes $(comma_list $(mdts_nodes)) \
806 $LCTL set_param fail_val=3 fail_loc=0x190
811 for n in $(seq $MDSCOUNT); do
812 stat $DIR/$tdir/mds$n/${tfile}300 ||
813 error "(7) Failed to stat mds$n/${tfile}300!"
816 scrub_check_status 8 scanning
817 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
818 scrub_check_flags 9 inconsistent,auto
820 scrub_check_flags 9 recreated,inconsistent,auto
823 do_nodes $(comma_list $(mdts_nodes)) \
824 $LCTL set_param fail_loc=0 fail_val=0
826 scrub_check_status 10 completed
829 run_test 7 "System is available during OI scrub scanning"
833 echo "starting MDTs with OI scrub disabled"
834 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
835 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
836 scrub_check_flags 4 recreated,inconsistent
838 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
839 do_nodes $(comma_list $(mdts_nodes)) \
840 $LCTL set_param fail_val=1 fail_loc=0x190
843 scrub_check_status 6 scanning
845 scrub_check_status 8 stopped
847 scrub_check_status 10 scanning
849 do_nodes $(comma_list $(mdts_nodes)) \
850 $LCTL set_param fail_loc=0 fail_val=0
852 scrub_check_status 11 completed
853 scrub_check_flags 12 ""
855 run_test 8 "Control OI scrub manually"
858 # Skip scrub speed test for ZFS because of performance unstable
859 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
860 skip "test scrub speed only on ldiskfs" && return
862 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
863 skip "Testing on UP system, the speed may be inaccurate."
869 echo "starting MDTs with OI scrub disabled"
870 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
871 scrub_check_flags 4 recreated,inconsistent
873 local BASE_SPEED1=100
875 # OI scrub should run with full speed under inconsistent case
876 scrub_start 5 -s $BASE_SPEED1
879 scrub_check_status 6 completed
880 scrub_check_flags 7 ""
881 # OI scrub should run with limited speed under non-inconsistent case
882 scrub_start 8 -s $BASE_SPEED1 -r
885 scrub_check_status 9 scanning
887 # Do NOT ignore that there are 1024 pre-fetched items. And there
888 # may be time error, normally it should be less than 2 seconds.
889 # We allow another 20% schedule error.
890 local PRE_FETCHED=1024
892 # MAX_MARGIN = 1.2 = 12 / 10
893 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
894 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
896 for n in $(seq $MDSCOUNT); do
897 local SPEED=$(scrub_status $n | \
898 awk '/^average_speed/ { print $2 }')
899 [ $SPEED -lt $MAX_SPEED ] ||
900 error "(10) Got speed $SPEED, expected less than" \
905 local BASE_SPEED2=300
907 for n in $(seq $MDSCOUNT); do
908 do_facet mds$n $LCTL set_param -n \
909 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
913 # MIN_MARGIN = 0.8 = 8 / 10
914 local MIN_SPEED=$(((PRE_FETCHED + \
915 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
916 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
917 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
918 # MAX_MARGIN = 1.2 = 12 / 10
919 MAX_SPEED=$(((PRE_FETCHED + \
920 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
921 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
922 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
923 for n in $(seq $MDSCOUNT); do
924 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
925 [ $SPEED -gt $MIN_SPEED ] ||
926 error "(11) Got speed $SPEED, expected more than" \
928 [ $SPEED -lt $MAX_SPEED ] ||
929 error "(12) Got speed $SPEED, expected less than" \
932 do_facet mds$n $LCTL set_param -n \
933 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
936 scrub_check_status 13 completed
938 run_test 9 "OI scrub speed control"
942 echo "starting mds$n with OI scrub disabled (1)"
943 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
944 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
945 scrub_check_flags 4 recreated,inconsistent
946 mount_client $MOUNT || error "(5) Fail to start client!"
950 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
951 do_nodes $(comma_list $(mdts_nodes)) \
952 $LCTL set_param fail_val=1 fail_loc=0x190
955 scrub_check_status 7 scanning
956 umount_client $MOUNT || error "(8) Fail to stop client!"
958 echo "starting MDTs with OI scrub disabled (2)"
959 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
960 scrub_check_status 11 paused
962 echo "starting MDTs without disabling OI scrub"
963 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
964 scrub_check_status 14 scanning
966 do_nodes $(comma_list $(mdts_nodes)) \
967 $LCTL set_param fail_loc=0 fail_val=0
969 scrub_check_status 15 completed
970 scrub_check_flags 16 ""
972 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
974 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
977 echo "starting MDTs with OI scrub disabled"
978 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
979 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
980 scrub_check_flags 4 recreated,inconsistent
982 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
983 do_nodes $(comma_list $(mdts_nodes)) \
984 $LCTL set_param fail_val=3 fail_loc=0x190
987 scrub_check_status 6 scanning
989 echo "starting MDTs with OI scrub disabled"
990 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
991 scrub_check_status 9 paused
993 echo "starting MDTs without disabling OI scrub"
994 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
995 scrub_check_status 12 scanning
997 do_nodes $(comma_list $(mdts_nodes)) \
998 $LCTL set_param fail_loc=0 fail_val=0
1000 scrub_check_status 13 completed
1001 scrub_check_flags 14 ""
1003 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
1006 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1007 skip "ldiskfs special test" && return
1012 check_mount_and_prep
1014 for n in $(seq $MDSCOUNT); do
1015 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
1016 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
1018 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
1019 error "(2) Fail to create under $tdir/mds$n"
1022 # reset OI scrub start point by force
1024 scrub_check_status 4 completed
1029 # OI scrub should skip the new created objects for the first accessing
1030 # notice we're creating a new llog for every OST on every startup
1031 # new features can make this even less stable, so we only check that
1032 # the number of skipped files is more than the number or known created
1033 local MINIMUM=$((CREATED + 1)) # files + directory
1034 for n in $(seq $MDSCOUNT); do
1035 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1036 [ $SKIPPED -lt $MINIMUM ] &&
1037 error "(5) Expect at least $MINIMUM objects" \
1038 "skipped on mds$n, but got $SKIPPED"
1040 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1043 # reset OI scrub start point by force
1045 scrub_check_status 7 completed
1047 # OI scrub should skip the new created object only once
1048 for n in $(seq $MDSCOUNT); do
1049 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1050 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1052 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1053 error "(8) Expect 0 objects skipped on mds$n, but" \
1057 run_test 11 "OI scrub skips the new created objects only once"
1060 check_mount_and_prep
1061 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1063 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1064 do_facet ost1 $LCTL set_param fail_loc=0x195
1065 local count=$(precreated_ost_obj_count 0 0)
1067 createmany -o $DIR/$tdir/f $((count + 32))
1068 umount_client $MOUNT || error "(1) Fail to stop client!"
1070 stop ost1 || error "(2) Fail to stop ost1"
1072 #define OBD_FAIL_OST_NODESTROY 0x233
1073 do_facet ost1 $LCTL set_param fail_loc=0x233
1075 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1076 error "(3) Fail to start ost1"
1078 mount_client $MOUNT || error "(4) Fail to start client!"
1080 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1082 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1084 do_facet ost1 $LCTL set_param fail_loc=0
1085 wait_update_facet ost1 "$LCTL get_param -n \
1086 osd-*.$(facet_svc ost1).oi_scrub |
1087 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1088 error "(7) Expected '$expected' on ost1"
1090 ls -ail $DIR/$tdir > /dev/null || {
1092 error "(8) ls should succeed"
1095 run_test 12 "OI scrub can rebuild invalid /O entries"
1098 check_mount_and_prep
1099 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1101 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1102 do_facet ost1 $LCTL set_param fail_loc=0x196
1103 local count=$(precreated_ost_obj_count 0 0)
1105 createmany -o $DIR/$tdir/f $((count + 32))
1106 do_facet ost1 $LCTL set_param fail_loc=0
1108 umount_client $MOUNT || error "(1) Fail to stop client!"
1110 stop ost1 || error "(2) Fail to stop ost1"
1112 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1113 error "(3) Fail to start ost1"
1115 mount_client $MOUNT || error "(4) Fail to start client!"
1117 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1119 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1121 wait_update_facet ost1 "$LCTL get_param -n \
1122 osd-*.$(facet_svc ost1).oi_scrub |
1123 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1124 error "(7) Expected '$expected' on ost1"
1126 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1128 run_test 13 "OI scrub can rebuild missed /O entries"
1131 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1132 skip "ldiskfs special test" && return
1134 check_mount_and_prep
1135 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1137 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1138 do_facet ost1 $LCTL set_param fail_loc=0x196
1139 local count=$(precreated_ost_obj_count 0 0)
1141 createmany -o $DIR/$tdir/f $((count + 1000))
1142 do_facet ost1 $LCTL set_param fail_loc=0
1144 umount_client $MOUNT || error "(1) Fail to stop client!"
1146 stop ost1 || error "(2) Fail to stop ost1"
1149 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1150 error "(3) Fail to run e2fsck error"
1152 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1153 error "(4) Fail to start ost1"
1155 mount_client $MOUNT || error "(5) Fail to start client!"
1157 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1158 awk '/^lf_repa[ri]*ed/ { print $2 }')
1159 [ $LF_REPAIRED -ge 1000 ] ||
1160 error "(6) Some entry under /lost+found should be repaired"
1162 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1164 run_test 14 "OI scrub can repair objects under lost+found"
1169 formatall > /dev/null
1170 setupall > /dev/null
1173 echo "starting MDTs with OI scrub disabled"
1174 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1175 scrub_check_status 3 init
1176 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1177 scrub_check_flags 4 recreated,inconsistent
1179 # run under dryrun mode
1180 scrub_start 5 --dryrun
1181 scrub_check_status 6 completed
1182 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1183 scrub_check_flags 7 inconsistent
1186 scrub_check_flags 7 recreated,inconsistent
1189 scrub_check_params 8 dryrun
1190 scrub_check_repaired 9 $repaired 1
1192 # run under dryrun mode again
1193 scrub_start 10 --dryrun
1194 scrub_check_status 11 completed
1195 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1196 scrub_check_flags 12 inconsistent
1198 scrub_check_flags 12 recreated,inconsistent
1200 scrub_check_params 13 dryrun
1201 scrub_check_repaired 14 $repaired 1
1203 # run under normal mode
1205 scrub_check_status 16 completed
1206 scrub_check_flags 17 ""
1207 scrub_check_params 18 ""
1208 scrub_check_repaired 19 $repaired 0
1210 # run under normal mode again
1212 scrub_check_status 21 completed
1213 scrub_check_flags 22 ""
1214 scrub_check_params 23 ""
1215 scrub_check_repaired 24 0 0
1217 run_test 15 "Dryrun mode OI scrub"
1219 # restore MDS/OST size
1220 MDSSIZE=${SAVED_MDSSIZE}
1221 OSTSIZE=${SAVED_OSTSIZE}
1222 OSTCOUNT=${SAVED_OSTCOUNT}
1224 # cleanup the system at last