3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
30 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
31 skip "ldiskfs only test" && exit 0
33 [ $(facet_fstype ost1) != "ldiskfs" ] &&
34 skip "ldiskfs only test" && exit 0
36 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
37 skip "Need MDS version at least 2.2.90" && exit 0
39 SAVED_MDSSIZE=${MDSSIZE}
40 SAVED_OSTSIZE=${OSTSIZE}
41 SAVED_OSTCOUNT=${OSTCOUNT}
42 # use small MDS + OST size to speed formatting time
43 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
44 # 200M MDT device can guarantee uninitialized groups during the OI scrub
47 # no need too much OSTs, to reduce the format/start/stop overhead
49 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
53 # build up a clean test environment.
57 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
58 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
60 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
61 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
63 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
64 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
66 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
67 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
68 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
70 [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
71 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
73 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
78 MDT_DEV="${FSNAME}-MDT0000"
79 OST_DEV="${FSNAME}-OST0000"
80 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
86 # use "lfsck_start -A" when we no longer need testing interop
87 for n in $(seq $MDSCOUNT); do
88 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
90 error "($error_id) Failed to start OI scrub on mds$n"
98 # use "lfsck_stop -A" when we no longer need testing interop
99 for n in $(seq $MDSCOUNT); do
100 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
101 error "($error_id) Failed to stop OI scrub on mds$n"
108 do_facet mds$n $LCTL get_param -n \
109 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
112 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
113 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
114 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
115 SHOW_SCRUB="do_facet $SINGLEMDS \
116 $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
117 SHOW_SCRUB_ON_OST="do_facet ost1 \
118 $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
119 MOUNT_OPTS_SCRUB="-o user_xattr"
120 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
128 echo "preparing... $(date)"
129 for n in $(seq $MDSCOUNT); do
130 echo "creating $nfiles files on mds$n"
131 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
132 error "Failed to create directory mds$n"
133 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
134 error "Failed to copy files to mds$n"
135 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
136 error "mkdir failed on mds$n"
137 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
138 /dev/null || error "create failed on mds$n"
139 if [[ $nfiles -gt 0 ]]; then
140 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
141 /dev/null || error "createmany failed on mds$n"
144 echo "prepared $(date)."
145 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
147 # sync local transactions on every MDT
148 do_nodes $(comma_list $(mdts_nodes)) \
149 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
151 # wait for a while to cancel update logs after transactions committed.
154 # sync again to guarantee all things done.
155 do_nodes $(comma_list $(mdts_nodes)) \
156 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
158 for n in $(seq $MDSCOUNT); do
160 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
169 for n in $(seq $MDSCOUNT); do
170 start mds$n $(mdsdevname $n) $opts >/dev/null ||
171 error "($error_id) Failed to start mds$n"
179 for n in $(seq $MDSCOUNT); do
180 echo "stopping mds$n"
181 stop mds$n >/dev/null ||
182 error "($error_id) Failed to stop mds$n"
186 scrub_check_status() {
191 for n in $(seq $MDSCOUNT); do
192 wait_update_facet mds$n "$LCTL get_param -n \
193 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
194 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
195 error "($error_id) Expected '$expected' on mds$n"
199 scrub_check_flags() {
205 for n in $(seq $MDSCOUNT); do
206 actual=$(do_facet mds$n $LCTL get_param -n \
207 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
208 awk '/^flags/ { print $2 }')
209 if [ "$actual" != "$expected" ]; then
210 error "($error_id) Expected '$expected' on mds$n, but" \
216 scrub_check_params() {
222 for n in $(seq $MDSCOUNT); do
223 actual=$(do_facet mds$n $LCTL get_param -n \
224 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
225 awk '/^param/ { print $2 }')
226 if [ "$actual" != "$expected" ]; then
227 error "($error_id) Expected '$expected' on mds$n, but" \
233 scrub_check_repaired() {
240 for n in $(seq $MDSCOUNT); do
241 if [ $dryrun -eq 1 ]; then
242 actual=$(do_facet mds$n $LCTL get_param -n \
243 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
244 awk '/^inconsistent:/ { print $2 }')
246 actual=$(do_facet mds$n $LCTL get_param -n \
247 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
248 awk '/^updated:/ { print $2 }')
251 if [ $expected -eq 0 -a $actual -ne 0 ]; then
252 error "($error_id) Expected no repaired on mds$n, but" \
256 if [ $expected -ne 0 -a $actual -lt $expected ]; then
257 error "($error_id) Expected '$expected' on mds$n, but" \
267 for n in $(seq $MDSCOUNT); do
268 diff -q $LUSTRE/tests/test-framework.sh \
269 $DIR/$tdir/mds$n/test-framework.sh ||
270 error "($error_id) File data check failed"
274 scrub_check_data2() {
279 for n in $(seq $MDSCOUNT); do
280 diff -q $LUSTRE/tests/$filename \
281 $DIR/$tdir/mds$n/$filename ||
282 error "($error_id) File data check failed"
291 for n in $(seq $MDSCOUNT); do
292 mds_remove_ois mds$n $index ||
293 error "($error_id) Failed to remove OI .$index on mds$n"
297 scrub_backup_restore() {
302 for n in $(seq $MDSCOUNT); do
303 mds_backup_restore mds$n $igif ||
304 error "($error_id) Backup/restore on mds$n failed"
308 scrub_enable_auto() {
309 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
310 osd-ldiskfs.*.auto_scrub=1
314 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
319 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
320 osd-ldiskfs.*.full_scrub_ratio=$ratio
323 full_scrub_threshold_rate() {
324 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
329 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
330 osd-ldiskfs.*.full_scrub_threshold_rate=$rate
335 echo "starting MDTs without disabling OI scrub"
336 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
337 scrub_check_status 2 init
338 scrub_check_flags 3 ""
339 mount_client $MOUNT || error "(4) Fail to start client!"
342 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
346 echo "start $SINGLEMDS without disabling OI scrub"
347 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
349 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
350 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
352 mount_client $MOUNT || error "(4) Fail to start client!"
353 #define OBD_FAIL_OSD_FID_MAPPING 0x193
354 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
355 # update .lustre OI mapping
357 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
358 umount_client $MOUNT || error "(5) Fail to stop client!"
360 echo "stop $SINGLEMDS"
361 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
363 echo "start $SINGLEMDS with disabling OI scrub"
364 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
365 error "(7) Fail to start MDS!"
367 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
368 [ "$FLAGS" == "inconsistent" ] ||
369 error "(9) Expect 'inconsistent', but got '$FLAGS'"
371 run_test 1a "Auto trigger initial OI scrub when server mounts"
376 echo "start MDTs without disabling OI scrub"
377 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
378 scrub_check_status 3 completed
379 mount_client $MOUNT || error "(4) Fail to start client!"
382 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
387 # OI files to be removed:
389 # idx 2: oi.16.{2,4,8,16,32}
390 # idx 3: oi.16.{3,9,27}
391 for index in 0 2 3; do
393 scrub_remove_ois 1 $index
394 echo "start MDTs with OI scrub disabled"
395 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
396 scrub_check_flags 3 recreated
398 scrub_check_status 5 completed
399 scrub_check_flags 6 ""
402 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
406 scrub_backup_restore 1
407 echo "starting MDTs without disabling OI scrub"
408 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
409 scrub_check_status 3 completed
410 mount_client $MOUNT || error "(4) Fail to start client!"
413 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
415 # test_3 is obsolete, it will be covered by test_5.
417 formatall > /dev/null
421 scrub_backup_restore 1
422 echo "starting MDTs with OI scrub disabled"
423 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
424 scrub_check_status 3 init
425 scrub_check_flags 4 recreated,inconsistent
427 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
431 scrub_backup_restore 1
432 echo "starting MDTs with OI scrub disabled"
433 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
434 scrub_check_flags 4 recreated,inconsistent
435 mount_client $MOUNT || error "(5) Fail to start client!"
441 scrub_check_status 7 completed
442 scrub_check_flags 8 ""
445 for n in $(seq $MDSCOUNT); do
446 updated0[$n]=$(scrub_status $n |
447 awk '/^prior_updated/ { print $2 }')
450 scrub_check_data2 sanity-scrub.sh 9
454 for n in $(seq $MDSCOUNT); do
455 updated1[$n]=$(scrub_status $n |
456 awk '/^prior_updated/ { print $2 }')
457 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
458 error "(10) NOT auto trigger full scrub as expected"
461 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
465 scrub_backup_restore 1
466 echo "starting MDTs with OI scrub disabled"
467 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
468 scrub_check_flags 4 recreated,inconsistent
469 mount_client $MOUNT || error "(5) Fail to start client!"
472 full_scrub_threshold_rate 10000
476 scrub_check_status 7 completed
477 scrub_check_flags 8 ""
480 for n in $(seq $MDSCOUNT); do
481 updated0[$n]=$(scrub_status $n |
482 awk '/^prior_updated/ { print $2 }')
484 echo "OI scrub on MDS$n status for the 1st time:"
485 do_facet mds$n $LCTL get_param -n \
486 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
489 scrub_check_data2 sanity-scrub.sh 9
492 scrub_check_status 10 completed
493 scrub_check_flags 11 ""
496 for n in $(seq $MDSCOUNT); do
497 updated1[$n]=$(scrub_status $n |
498 awk '/^prior_updated/ { print $2 }')
500 echo "OI scrub on MDS$n status for the 2nd time:"
501 do_facet mds$n $LCTL get_param -n \
502 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
504 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
505 error "(12) Auto trigger full scrub unexpectedly"
508 for n in $(seq $MDSCOUNT); do
509 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
510 error "(13) fail to ls"
514 scrub_check_status 14 completed
515 scrub_check_flags 15 ""
517 for n in $(seq $MDSCOUNT); do
518 updated0[$n]=$(scrub_status $n |
519 awk '/^prior_updated/ { print $2 }')
521 echo "OI scrub on MDS$n status for the 3rd time:"
522 do_facet mds$n $LCTL get_param -n \
523 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
525 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
526 error "(16) Auto trigger full scrub unexpectedly"
529 for n in $(seq $MDSCOUNT); do
530 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
534 for n in $(seq $MDSCOUNT); do
535 updated1[$n]=$(scrub_status $n |
536 awk '/^prior_updated/ { print $2 }')
537 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
538 echo "OI scrub on MDS$n status for the 4th time:"
539 do_facet mds$n $LCTL get_param -n \
540 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
542 error "(18) NOT auto trigger full scrub as expected"
546 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
550 scrub_backup_restore 1
551 echo "starting MDTs with OI scrub disabled"
552 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
553 scrub_check_flags 4 recreated,inconsistent
554 mount_client $MOUNT || error "(5) Fail to start client!"
557 full_scrub_threshold_rate 20
561 scrub_check_status 7 completed
562 scrub_check_flags 8 ""
565 for n in $(seq $MDSCOUNT); do
566 updated0[$n]=$(scrub_status $n |
567 awk '/^prior_updated/ { print $2 }')
569 echo "OI scrub on MDS$n status for the 1st time:"
570 do_facet mds$n $LCTL get_param -n \
571 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
574 scrub_check_data2 sanity-scrub.sh 9
577 scrub_check_status 10 completed
578 scrub_check_flags 11 ""
581 for n in $(seq $MDSCOUNT); do
582 updated1[$n]=$(scrub_status $n |
583 awk '/^prior_updated/ { print $2 }')
585 echo "OI scrub on MDS$n status for the 2nd time:"
586 do_facet mds$n $LCTL get_param -n \
587 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
589 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
590 error "(12) Auto trigger full scrub unexpectedly"
593 for n in $(seq $MDSCOUNT); do
594 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
595 error "(13) fail to ls"
599 scrub_check_status 14 completed
600 scrub_check_flags 15 ""
602 for n in $(seq $MDSCOUNT); do
603 updated0[$n]=$(scrub_status $n |
604 awk '/^prior_updated/ { print $2 }')
606 echo "OI scrub on MDS$n status for the 3rd time:"
607 do_facet mds$n $LCTL get_param -n \
608 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
610 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
611 error "(16) Auto trigger full scrub unexpectedly"
614 for n in $(seq $MDSCOUNT); do
615 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
619 for n in $(seq $MDSCOUNT); do
620 updated1[$n]=$(scrub_status $n |
621 awk '/^prior_updated/ { print $2 }')
622 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
623 echo "OI scrub on MDS$n status for the 4th time:"
624 do_facet mds$n $LCTL get_param -n \
625 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
627 error "(18) NOT auto trigger full scrub as expected"
631 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
634 formatall > /dev/null
638 scrub_backup_restore 1
639 echo "starting MDTs with OI scrub disabled (1)"
640 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
641 scrub_check_status 3 init
642 scrub_check_flags 4 recreated,inconsistent
643 mount_client $MOUNT || error "(5) Fail to start client!"
647 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
648 do_nodes $(comma_list $(mdts_nodes)) \
649 $LCTL set_param fail_val=3 fail_loc=0x190
652 umount_client $MOUNT || error "(7) Fail to stop client!"
653 scrub_check_status 8 scanning
655 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
656 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
661 do_nodes $(comma_list $(mdts_nodes)) \
662 $LCTL set_param fail_loc=0 fail_val=0
664 echo "starting MDTs with OI scrub disabled (2)"
665 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
666 scrub_check_status 11 crashed
669 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
670 do_nodes $(comma_list $(mdts_nodes)) \
671 $LCTL set_param fail_val=3 fail_loc=0x190
673 echo "starting MDTs without disabling OI scrub"
674 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
675 scrub_check_status 14 scanning
677 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
678 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
680 scrub_check_status 15 failed
681 mount_client $MOUNT || error "(16) Fail to start client!"
684 do_nodes $(comma_list $(mdts_nodes)) \
685 $LCTL set_param fail_loc=0 fail_val=0
690 for n in $(seq $MDSCOUNT); do
691 stat $DIR/$tdir/mds$n/${tfile}800 &
695 for n in $(seq $MDSCOUNT); do
696 wait ${pids[$n]} || error "(18) Fail to stat mds$n/${tfile}800"
699 scrub_check_status 19 completed
700 scrub_check_flags 20 ""
702 run_test 5 "OI scrub state machine"
706 scrub_backup_restore 1
707 echo "starting MDTs with OI scrub disabled"
708 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
709 scrub_check_flags 4 recreated,inconsistent
710 mount_client $MOUNT || error "(5) Fail to start client!"
714 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
715 do_nodes $(comma_list $(mdts_nodes)) \
716 $LCTL set_param fail_val=2 fail_loc=0x190
720 # Sleep 5 sec to guarantee at least one object processed by OI scrub
722 # Fail the OI scrub to guarantee there is at least one checkpoint
723 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
724 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
726 scrub_check_status 7 failed
728 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
729 do_nodes $(comma_list $(mdts_nodes)) \
730 $LCTL set_param fail_val=3 fail_loc=0x190
733 for n in $(seq $MDSCOUNT); do
734 # stat will re-trigger OI scrub
735 stat $DIR/$tdir/mds$n/${tfile}800 ||
736 error "(8) Failed to stat mds$n/${tfile}800"
739 umount_client $MOUNT || error "(9) Fail to stop client!"
740 scrub_check_status 10 scanning
742 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
743 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
747 for n in $(seq $MDSCOUNT); do
748 position0[$n]=$(scrub_status $n |
749 awk '/^last_checkpoint_position/ {print $2}')
750 position0[$n]=$((${position0[$n]} + 1))
755 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
756 do_nodes $(comma_list $(mdts_nodes)) \
757 $LCTL set_param fail_val=3 fail_loc=0x190
759 echo "starting MDTs without disabling OI scrub"
760 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
762 scrub_check_status 13 scanning
765 for n in $(seq $MDSCOUNT); do
766 position1[$n]=$(scrub_status $n |
767 awk '/^latest_start_position/ {print $2}')
768 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
769 error "(14) Expected position ${position0[$n]}, but" \
770 "got ${position1[$n]}"
774 do_nodes $(comma_list $(mdts_nodes)) \
775 $LCTL set_param fail_loc=0 fail_val=0
777 scrub_check_status 15 completed
778 scrub_check_flags 16 ""
780 run_test 6 "OI scrub resumes from last checkpoint"
784 scrub_backup_restore 1
785 echo "starting MDTs with OI scrub disabled"
786 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
787 scrub_check_flags 4 recreated,inconsistent
788 mount_client $MOUNT || error "(5) Fail to start client!"
792 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
793 do_nodes $(comma_list $(mdts_nodes)) \
794 $LCTL set_param fail_val=3 fail_loc=0x190
799 for n in $(seq $MDSCOUNT); do
800 stat $DIR/$tdir/mds$n/${tfile}300 ||
801 error "(7) Failed to stat mds$n/${tfile}300!"
804 scrub_check_status 8 scanning
805 scrub_check_flags 9 recreated,inconsistent,auto
807 do_nodes $(comma_list $(mdts_nodes)) \
808 $LCTL set_param fail_loc=0 fail_val=0
810 scrub_check_status 10 completed
813 run_test 7 "System is available during OI scrub scanning"
817 scrub_backup_restore 1
818 echo "starting MDTs with OI scrub disabled"
819 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
820 scrub_check_flags 4 recreated,inconsistent
822 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
823 do_nodes $(comma_list $(mdts_nodes)) \
824 $LCTL set_param fail_val=1 fail_loc=0x190
827 scrub_check_status 6 scanning
829 scrub_check_status 8 stopped
831 scrub_check_status 10 scanning
833 do_nodes $(comma_list $(mdts_nodes)) \
834 $LCTL set_param fail_loc=0 fail_val=0
836 scrub_check_status 11 completed
837 scrub_check_flags 12 ""
839 run_test 8 "Control OI scrub manually"
842 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
843 skip "Testing on UP system, the speed may be inaccurate."
848 scrub_backup_restore 1
850 echo "starting MDTs with OI scrub disabled"
851 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
852 scrub_check_flags 4 recreated,inconsistent
854 local BASE_SPEED1=100
856 # OI scrub should run with full speed under inconsistent case
857 scrub_start 5 -s $BASE_SPEED1
860 scrub_check_status 6 completed
861 scrub_check_flags 7 ""
862 # OI scrub should run with limited speed under non-inconsistent case
863 scrub_start 8 -s $BASE_SPEED1 -r
866 scrub_check_status 9 scanning
868 # Do NOT ignore that there are 1024 pre-fetched items. And there
869 # may be time error, normally it should be less than 2 seconds.
870 # We allow another 20% schedule error.
871 local PRE_FETCHED=1024
873 # MAX_MARGIN = 1.2 = 12 / 10
874 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
875 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
877 for n in $(seq $MDSCOUNT); do
878 local SPEED=$(scrub_status $n | \
879 awk '/^average_speed/ { print $2 }')
880 [ $SPEED -lt $MAX_SPEED ] ||
881 error "(10) Got speed $SPEED, expected less than" \
886 local BASE_SPEED2=300
888 for n in $(seq $MDSCOUNT); do
889 do_facet mds$n $LCTL set_param -n \
890 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
894 # MIN_MARGIN = 0.8 = 8 / 10
895 local MIN_SPEED=$(((PRE_FETCHED + \
896 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
897 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
898 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
899 # MAX_MARGIN = 1.2 = 12 / 10
900 MAX_SPEED=$(((PRE_FETCHED + \
901 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
902 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
903 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
904 for n in $(seq $MDSCOUNT); do
905 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
906 [ $SPEED -gt $MIN_SPEED ] ||
907 error "(11) Got speed $SPEED, expected more than" \
909 [ $SPEED -lt $MAX_SPEED ] ||
910 error "(12) Got speed $SPEED, expected less than" \
913 do_facet mds$n $LCTL set_param -n \
914 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
917 scrub_check_status 13 completed
919 run_test 9 "OI scrub speed control"
923 scrub_backup_restore 1
924 echo "starting mds$n with OI scrub disabled (1)"
925 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
926 scrub_check_flags 4 recreated,inconsistent
927 mount_client $MOUNT || error "(5) Fail to start client!"
931 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
932 do_nodes $(comma_list $(mdts_nodes)) \
933 $LCTL set_param fail_val=1 fail_loc=0x190
936 scrub_check_status 7 scanning
937 umount_client $MOUNT || error "(8) Fail to stop client!"
939 echo "starting MDTs with OI scrub disabled (2)"
940 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
941 scrub_check_status 11 paused
943 echo "starting MDTs without disabling OI scrub"
944 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
945 scrub_check_status 14 scanning
947 do_nodes $(comma_list $(mdts_nodes)) \
948 $LCTL set_param fail_loc=0 fail_val=0
950 scrub_check_status 15 completed
951 scrub_check_flags 16 ""
953 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
955 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
958 scrub_backup_restore 1
959 echo "starting MDTs with OI scrub disabled"
960 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
961 scrub_check_flags 4 recreated,inconsistent
963 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
964 do_nodes $(comma_list $(mdts_nodes)) \
965 $LCTL set_param fail_val=3 fail_loc=0x190
968 scrub_check_status 6 scanning
970 echo "starting MDTs with OI scrub disabled"
971 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
972 scrub_check_status 9 paused
974 echo "starting MDTs without disabling OI scrub"
975 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
976 scrub_check_status 12 scanning
978 do_nodes $(comma_list $(mdts_nodes)) \
979 $LCTL set_param fail_loc=0 fail_val=0
981 scrub_check_status 13 completed
982 scrub_check_flags 14 ""
984 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
992 for n in $(seq $MDSCOUNT); do
993 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
994 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
996 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
997 error "(2) Fail to create under $tdir/mds$n"
1000 # reset OI scrub start point by force
1002 scrub_check_status 4 completed
1007 # OI scrub should skip the new created objects for the first accessing
1008 # notice we're creating a new llog for every OST on every startup
1009 # new features can make this even less stable, so we only check that
1010 # the number of skipped files is more than the number or known created
1011 local MINIMUM=$((CREATED + 1)) # files + directory
1012 for n in $(seq $MDSCOUNT); do
1013 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1014 [ $SKIPPED -lt $MINIMUM ] &&
1015 error "(5) Expect at least $MINIMUM objects" \
1016 "skipped on mds$n, but got $SKIPPED"
1018 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1021 # reset OI scrub start point by force
1023 scrub_check_status 7 completed
1025 # OI scrub should skip the new created object only once
1026 for n in $(seq $MDSCOUNT); do
1027 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1028 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1030 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1031 error "(8) Expect 0 objects skipped on mds$n, but" \
1035 run_test 11 "OI scrub skips the new created objects only once"
1038 check_mount_and_prep
1039 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1041 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1042 do_facet ost1 $LCTL set_param fail_loc=0x195
1043 local count=$(precreated_ost_obj_count 0 0)
1045 createmany -o $DIR/$tdir/f $((count + 32))
1046 umount_client $MOUNT || error "(1) Fail to stop client!"
1048 stop ost1 || error "(2) Fail to stop ost1"
1050 #define OBD_FAIL_OST_NODESTROY 0x233
1051 do_facet ost1 $LCTL set_param fail_loc=0x233
1053 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1054 error "(3) Fail to start ost1"
1056 mount_client $MOUNT || error "(4) Fail to start client!"
1058 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1060 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1062 do_facet ost1 $LCTL set_param fail_loc=0
1063 wait_update_facet ost1 "$LCTL get_param -n \
1064 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1065 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1066 error "(7) Expected '$expected' on ost1"
1068 ls -ail $DIR/$tdir > /dev/null || {
1070 error "(8) ls should succeed"
1073 run_test 12 "OI scrub can rebuild invalid /O entries"
1076 check_mount_and_prep
1077 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1079 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1080 do_facet ost1 $LCTL set_param fail_loc=0x196
1081 local count=$(precreated_ost_obj_count 0 0)
1083 createmany -o $DIR/$tdir/f $((count + 32))
1084 do_facet ost1 $LCTL set_param fail_loc=0
1086 umount_client $MOUNT || error "(1) Fail to stop client!"
1088 stop ost1 || error "(2) Fail to stop ost1"
1090 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1091 error "(3) Fail to start ost1"
1093 mount_client $MOUNT || error "(4) Fail to start client!"
1095 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1097 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1099 wait_update_facet ost1 "$LCTL get_param -n \
1100 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1101 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1102 error "(7) Expected '$expected' on ost1"
1104 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1106 run_test 13 "OI scrub can rebuild missed /O entries"
1109 check_mount_and_prep
1110 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1112 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1113 do_facet ost1 $LCTL set_param fail_loc=0x196
1114 local count=$(precreated_ost_obj_count 0 0)
1116 createmany -o $DIR/$tdir/f $((count + 32))
1117 do_facet ost1 $LCTL set_param fail_loc=0
1119 umount_client $MOUNT || error "(1) Fail to stop client!"
1121 stop ost1 || error "(2) Fail to stop ost1"
1124 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1125 error "(3) Fail to run e2fsck error"
1127 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1128 error "(4) Fail to start ost1"
1130 mount_client $MOUNT || error "(5) Fail to start client!"
1132 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1133 awk '/^lf_repa[ri]*ed/ { print $2 }')
1134 [ $LF_REPAIRED -gt 0 ] ||
1135 error "(6) Some entry under /lost+found should be repaired"
1137 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1139 run_test 14 "OI scrub can repair objects under lost+found"
1142 local server_version=$(lustre_version_code $SINGLEMDS)
1144 scrub_backup_restore 1
1145 echo "starting MDTs with OI scrub disabled"
1146 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1147 scrub_check_status 3 init
1148 scrub_check_flags 4 recreated,inconsistent
1150 # run under dryrun mode
1151 if [ $server_version -lt $(version_code 2.5.58) ]; then
1152 scrub_start 5 --dryrun on
1154 scrub_start 5 --dryrun
1156 scrub_check_status 6 completed
1157 scrub_check_flags 7 recreated,inconsistent
1158 scrub_check_params 8 dryrun
1159 scrub_check_repaired 9 20 1
1161 # run under dryrun mode again
1162 if [ $server_version -lt $(version_code 2.5.58) ]; then
1163 scrub_start 10 --dryrun on
1165 scrub_start 10 --dryrun
1167 scrub_check_status 11 completed
1168 scrub_check_flags 12 recreated,inconsistent
1169 scrub_check_params 13 dryrun
1170 scrub_check_repaired 14 20 1
1172 # run under normal mode
1174 # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
1175 # work under Lustre-2.y (y >= 6), the test script should be fixed as
1176 # "-noff" or "--dryrun=off" or nothing by default.
1177 if [ $server_version -lt $(version_code 2.5.58) ]; then
1178 scrub_start 15 --dryrun off
1182 scrub_check_status 16 completed
1183 scrub_check_flags 17 ""
1184 scrub_check_params 18 ""
1185 scrub_check_repaired 19 20 0
1187 # run under normal mode again
1188 if [ $server_version -lt $(version_code 2.5.58) ]; then
1189 scrub_start 20 --dryrun off
1193 scrub_check_status 21 completed
1194 scrub_check_flags 22 ""
1195 scrub_check_params 23 ""
1196 scrub_check_repaired 24 0 0
1198 run_test 15 "Dryrun mode OI scrub"
1200 # restore MDS/OST size
1201 MDSSIZE=${SAVED_MDSSIZE}
1202 OSTSIZE=${SAVED_OSTSIZE}
1203 OSTCOUNT=${SAVED_OSTCOUNT}
1205 # cleanup the system at last