3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
30 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
31 skip "test OI scrub only for ldiskfs" && exit 0
33 [ $(facet_fstype ost1) != "ldiskfs" ] &&
34 skip "test OI scrub only for ldiskfs" && exit 0
36 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
37 skip "Need MDS version at least 2.2.90" && exit 0
39 SAVED_MDSSIZE=${MDSSIZE}
40 SAVED_OSTSIZE=${OSTSIZE}
41 SAVED_OSTCOUNT=${OSTCOUNT}
42 # use small MDS + OST size to speed formatting time
43 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
44 # 200M MDT device can guarantee uninitialized groups during the OI scrub
47 # no need too much OSTs, to reduce the format/start/stop overhead
49 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
53 # build up a clean test environment.
57 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
58 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
60 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
61 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
63 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
64 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
66 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
67 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
68 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
70 [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
71 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
73 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
78 MDT_DEV="${FSNAME}-MDT0000"
79 OST_DEV="${FSNAME}-OST0000"
80 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
86 # use "lfsck_start -A" when we no longer need testing interop
87 for n in $(seq $MDSCOUNT); do
88 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
90 error "($error_id) Failed to start OI scrub on mds$n"
98 # use "lfsck_stop -A" when we no longer need testing interop
99 for n in $(seq $MDSCOUNT); do
100 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
101 error "($error_id) Failed to stop OI scrub on mds$n"
108 do_facet mds$n $LCTL get_param -n \
109 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
112 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
113 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
114 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
115 SHOW_SCRUB="do_facet $SINGLEMDS \
116 $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
117 SHOW_SCRUB_ON_OST="do_facet ost1 \
118 $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
119 MOUNT_OPTS_SCRUB="-o user_xattr"
120 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
128 echo "preparing... $(date)"
129 for n in $(seq $MDSCOUNT); do
130 echo "creating $nfiles files on mds$n"
131 if [ $n -eq 1 ]; then
132 mkdir $DIR/$tdir/mds$n ||
133 error "Failed to create directory mds$n"
135 $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
136 error "Failed to create remote directory mds$n"
138 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
139 error "Failed to copy files to mds$n"
140 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
141 error "mkdir failed on mds$n"
142 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
143 /dev/null || error "create failed on mds$n"
144 if [[ $nfiles -gt 0 ]]; then
145 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
146 /dev/null || error "createmany failed on mds$n"
149 echo "prepared $(date)."
150 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
151 for n in $(seq $MDSCOUNT); do
153 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
162 for n in $(seq $MDSCOUNT); do
163 start mds$n $(mdsdevname $n) $opts >/dev/null ||
164 error "($error_id) Failed to start mds$n"
172 for n in $(seq $MDSCOUNT); do
173 echo "stopping mds$n"
174 stop mds$n >/dev/null ||
175 error "($error_id) Failed to stop mds$n"
179 scrub_check_status() {
184 for n in $(seq $MDSCOUNT); do
185 wait_update_facet mds$n "$LCTL get_param -n \
186 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
187 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
188 error "($error_id) Expected '$expected' on mds$n"
192 scrub_check_flags() {
198 for n in $(seq $MDSCOUNT); do
199 actual=$(do_facet mds$n $LCTL get_param -n \
200 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
201 awk '/^flags/ { print $2 }')
202 if [ "$actual" != "$expected" ]; then
203 error "($error_id) Expected '$expected' on mds$n, but" \
209 scrub_check_params() {
215 for n in $(seq $MDSCOUNT); do
216 actual=$(do_facet mds$n $LCTL get_param -n \
217 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
218 awk '/^param/ { print $2 }')
219 if [ "$actual" != "$expected" ]; then
220 error "($error_id) Expected '$expected' on mds$n, but" \
226 scrub_check_repaired() {
232 for n in $(seq $MDSCOUNT); do
233 actual=$(do_facet mds$n $LCTL get_param -n \
234 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
235 awk '/^updated/ { print $2 }')
237 if [ $expected -eq 0 -a $actual -ne 0 ]; then
238 error "($error_id) Expected no repaired on mds$n, but" \
242 if [ $expected -ne 0 -a $actual -lt $expected ]; then
243 error "($error_id) Expected '$expected' on mds$n, but" \
253 for n in $(seq $MDSCOUNT); do
254 diff -q $LUSTRE/tests/test-framework.sh \
255 $DIR/$tdir/mds$n/test-framework.sh ||
256 error "($error_id) File data check failed"
260 scrub_check_data2() {
265 for n in $(seq $MDSCOUNT); do
266 diff -q $LUSTRE/tests/$filename \
267 $DIR/$tdir/mds$n/$filename ||
268 error "($error_id) File data check failed"
277 for n in $(seq $MDSCOUNT); do
278 mds_remove_ois mds$n $index ||
279 error "($error_id) Failed to remove OI .$index on mds$n"
283 scrub_backup_restore() {
288 for n in $(seq $MDSCOUNT); do
289 mds_backup_restore mds$n $igif ||
290 error "(error_id) Backup/restore on mds$n failed"
294 scrub_enable_auto() {
295 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
296 osd-ldiskfs.*.auto_scrub=1
300 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
305 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
306 osd-ldiskfs.*.full_scrub_ratio=$ratio
309 full_scrub_threshold_rate() {
310 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
315 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
316 osd-ldiskfs.*.full_scrub_threshold_rate=$rate
321 echo "starting MDTs without disabling OI scrub"
322 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
323 scrub_check_status 2 init
324 scrub_check_flags 3 ""
325 mount_client $MOUNT || error "(4) Fail to start client!"
328 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
332 echo "start $SINGLEMDS without disabling OI scrub"
333 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
335 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
336 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
338 mount_client $MOUNT || error "(4) Fail to start client!"
339 #define OBD_FAIL_OSD_FID_MAPPING 0x193
340 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
341 # update .lustre OI mapping
343 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
344 umount_client $MOUNT || error "(5) Fail to stop client!"
346 echo "stop $SINGLEMDS"
347 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
349 echo "start $SINGLEMDS with disabling OI scrub"
350 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
351 error "(7) Fail to start MDS!"
353 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
354 [ "$FLAGS" == "inconsistent" ] ||
355 error "(9) Expect 'inconsistent', but got '$FLAGS'"
357 run_test 1a "Auto trigger initial OI scrub when server mounts"
362 echo "start MDTs without disabling OI scrub"
363 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
364 scrub_check_status 3 completed
365 mount_client $MOUNT || error "(4) Fail to start client!"
368 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
373 # OI files to be removed:
375 # idx 2: oi.16.{2,4,8,16,32}
376 # idx 3: oi.16.{3,9,27}
377 for index in 0 2 3; do
379 scrub_remove_ois 1 $index
380 echo "start MDTs with OI scrub disabled"
381 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
382 scrub_check_flags 3 recreated
384 scrub_check_status 5 completed
385 scrub_check_flags 6 ""
388 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
392 scrub_backup_restore 1
393 echo "starting MDTs without disabling OI scrub"
394 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
395 scrub_check_status 3 completed
396 mount_client $MOUNT || error "(4) Fail to start client!"
399 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
401 # test_3 is obsolete, it will be covered by test_5.
403 formatall > /dev/null
407 scrub_backup_restore 1
408 echo "starting MDTs with OI scrub disabled"
409 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
410 scrub_check_status 3 init
411 scrub_check_flags 4 recreated,inconsistent
413 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
417 scrub_backup_restore 1
418 echo "starting MDTs with OI scrub disabled"
419 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
420 scrub_check_flags 4 recreated,inconsistent
421 mount_client $MOUNT || error "(5) Fail to start client!"
427 scrub_check_status 7 completed
428 scrub_check_flags 8 ""
431 for n in $(seq $MDSCOUNT); do
432 updated0[$n]=$(scrub_status $n |
433 awk '/^sf_items_updated_prior/ { print $2 }')
436 scrub_check_data2 sanity-scrub.sh 9
440 for n in $(seq $MDSCOUNT); do
441 updated1[$n]=$(scrub_status $n |
442 awk '/^sf_items_updated_prior/ { print $2 }')
443 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
444 error "(10) NOT auto trigger full scrub as expected"
447 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
451 scrub_backup_restore 1
452 echo "starting MDTs with OI scrub disabled"
453 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
454 scrub_check_flags 4 recreated,inconsistent
455 mount_client $MOUNT || error "(5) Fail to start client!"
458 full_scrub_threshold_rate 10000
462 scrub_check_status 7 completed
463 scrub_check_flags 8 ""
466 for n in $(seq $MDSCOUNT); do
467 updated0[$n]=$(scrub_status $n |
468 awk '/^sf_items_updated_prior/ { print $2 }')
471 scrub_check_data2 sanity-scrub.sh 9
474 scrub_check_status 10 completed
475 scrub_check_flags 11 ""
478 for n in $(seq $MDSCOUNT); do
479 updated1[$n]=$(scrub_status $n |
480 awk '/^sf_items_updated_prior/ { print $2 }')
481 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
482 error "(12) Auto trigger full scrub unexpectedly"
485 for n in $(seq $MDSCOUNT); do
486 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
487 error "(13) fail to ls"
491 scrub_check_status 14 completed
492 scrub_check_flags 15 ""
494 for n in $(seq $MDSCOUNT); do
495 updated0[$n]=$(scrub_status $n |
496 awk '/^sf_items_updated_prior/ { print $2 }')
497 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
498 error "(16) Auto trigger full scrub unexpectedly"
501 for n in $(seq $MDSCOUNT); do
502 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
506 for n in $(seq $MDSCOUNT); do
507 updated1[$n]=$(scrub_status $n |
508 awk '/^sf_items_updated_prior/ { print $2 }')
509 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
510 error "(18) NOT auto trigger full scrub as expected"
513 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
517 scrub_backup_restore 1
518 echo "starting MDTs with OI scrub disabled"
519 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
520 scrub_check_flags 4 recreated,inconsistent
521 mount_client $MOUNT || error "(5) Fail to start client!"
524 full_scrub_threshold_rate 20
528 scrub_check_status 7 completed
529 scrub_check_flags 8 ""
532 for n in $(seq $MDSCOUNT); do
533 updated0[$n]=$(scrub_status $n |
534 awk '/^sf_items_updated_prior/ { print $2 }')
537 scrub_check_data2 sanity-scrub.sh 9
540 scrub_check_status 10 completed
541 scrub_check_flags 11 ""
544 for n in $(seq $MDSCOUNT); do
545 updated1[$n]=$(scrub_status $n |
546 awk '/^sf_items_updated_prior/ { print $2 }')
547 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
548 error "(12) Auto trigger full scrub unexpectedly"
551 for n in $(seq $MDSCOUNT); do
552 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
553 error "(13) fail to ls"
557 scrub_check_status 14 completed
558 scrub_check_flags 15 ""
560 for n in $(seq $MDSCOUNT); do
561 updated0[$n]=$(scrub_status $n |
562 awk '/^sf_items_updated_prior/ { print $2 }')
563 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
564 error "(16) Auto trigger full scrub unexpectedly"
567 for n in $(seq $MDSCOUNT); do
568 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
572 for n in $(seq $MDSCOUNT); do
573 updated1[$n]=$(scrub_status $n |
574 awk '/^sf_items_updated_prior/ { print $2 }')
575 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
576 error "(18) NOT auto trigger full scrub as expected"
579 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
582 formatall > /dev/null
586 scrub_backup_restore 1
587 echo "starting MDTs with OI scrub disabled (1)"
588 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
589 scrub_check_status 3 init
590 scrub_check_flags 4 recreated,inconsistent
591 mount_client $MOUNT || error "(5) Fail to start client!"
594 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
595 do_nodes $(comma_list $(mdts_nodes)) \
596 $LCTL set_param fail_val=3 fail_loc=0x190
600 umount_client $MOUNT || error "(7) Fail to stop client!"
601 scrub_check_status 8 scanning
603 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
604 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
609 do_nodes $(comma_list $(mdts_nodes)) \
610 $LCTL set_param fail_loc=0 fail_val=0
612 echo "starting MDTs with OI scrub disabled (2)"
613 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
614 scrub_check_status 11 crashed
617 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
618 do_nodes $(comma_list $(mdts_nodes)) \
619 $LCTL set_param fail_val=3 fail_loc=0x190
621 echo "starting MDTs without disabling OI scrub"
622 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
623 scrub_check_status 14 scanning
625 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
626 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
628 scrub_check_status 15 failed
629 mount_client $MOUNT || error "(16) Fail to start client!"
632 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
633 do_nodes $(comma_list $(mdts_nodes)) \
634 $LCTL set_param fail_val=3 fail_loc=0x190
637 for n in $(seq $MDSCOUNT); do
638 stat $DIR/$tdir/mds$n/${tfile}800 ||
639 error "(17) Failed to stat mds$n/${tfile}800"
642 scrub_check_status 18 scanning
644 do_nodes $(comma_list $(mdts_nodes)) \
645 $LCTL set_param fail_loc=0 fail_val=0
647 scrub_check_status 19 completed
648 scrub_check_flags 20 ""
650 run_test 5 "OI scrub state machine"
654 scrub_backup_restore 1
655 echo "starting MDTs with OI scrub disabled"
656 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
657 scrub_check_flags 4 recreated,inconsistent
658 mount_client $MOUNT || error "(5) Fail to start client!"
661 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
662 do_nodes $(comma_list $(mdts_nodes)) \
663 $LCTL set_param fail_val=2 fail_loc=0x190
668 # Sleep 5 sec to guarantee at least one object processed by OI scrub
670 # Fail the OI scrub to guarantee there is at least one checkpoint
671 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
672 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
674 scrub_check_status 7 failed
676 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
677 do_nodes $(comma_list $(mdts_nodes)) \
678 $LCTL set_param fail_val=3 fail_loc=0x190
681 for n in $(seq $MDSCOUNT); do
682 # stat will re-trigger OI scrub
683 stat $DIR/$tdir/mds$n/${tfile}800 ||
684 error "(8) Failed to stat mds$n/${tfile}800"
687 umount_client $MOUNT || error "(9) Fail to stop client!"
688 scrub_check_status 10 scanning
690 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
691 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
695 for n in $(seq $MDSCOUNT); do
696 position0[$n]=$(scrub_status $n |
697 awk '/^last_checkpoint_position/ {print $2}')
698 position0[$n]=$((${position0[$n]} + 1))
703 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
704 do_nodes $(comma_list $(mdts_nodes)) \
705 $LCTL set_param fail_val=3 fail_loc=0x190
707 echo "starting MDTs without disabling OI scrub"
708 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
710 scrub_check_status 13 scanning
713 for n in $(seq $MDSCOUNT); do
714 position1[$n]=$(scrub_status $n |
715 awk '/^latest_start_position/ {print $2}')
716 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
717 error "(14) Expected position ${position0[$n]}, but" \
718 "got ${position1[$n]}"
722 do_nodes $(comma_list $(mdts_nodes)) \
723 $LCTL set_param fail_loc=0 fail_val=0
725 scrub_check_status 15 completed
726 scrub_check_flags 16 ""
728 run_test 6 "OI scrub resumes from last checkpoint"
732 scrub_backup_restore 1
733 echo "starting MDTs with OI scrub disabled"
734 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
735 scrub_check_flags 4 recreated,inconsistent
736 mount_client $MOUNT || error "(5) Fail to start client!"
739 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
740 do_nodes $(comma_list $(mdts_nodes)) \
741 $LCTL set_param fail_val=3 fail_loc=0x190
747 for n in $(seq $MDSCOUNT); do
748 stat $DIR/$tdir/mds$n/${tfile}300 ||
749 error "(7) Failed to stat mds$n/${tfile}300!"
752 scrub_check_status 8 scanning
753 scrub_check_flags 9 recreated,inconsistent,auto
755 do_nodes $(comma_list $(mdts_nodes)) \
756 $LCTL set_param fail_loc=0 fail_val=0
758 scrub_check_status 10 completed
761 run_test 7 "System is available during OI scrub scanning"
765 scrub_backup_restore 1
766 echo "starting MDTs with OI scrub disabled"
767 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
768 scrub_check_flags 4 recreated,inconsistent
770 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
771 do_nodes $(comma_list $(mdts_nodes)) \
772 $LCTL set_param fail_val=1 fail_loc=0x190
775 scrub_check_status 6 scanning
777 scrub_check_status 8 stopped
779 scrub_check_status 10 scanning
781 do_nodes $(comma_list $(mdts_nodes)) \
782 $LCTL set_param fail_loc=0 fail_val=0
784 scrub_check_status 11 completed
785 scrub_check_flags 12 ""
787 run_test 8 "Control OI scrub manually"
790 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
791 skip "Testing on UP system, the speed may be inaccurate."
796 scrub_backup_restore 1
798 echo "starting MDTs with OI scrub disabled"
799 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
800 scrub_check_flags 4 recreated,inconsistent
802 local BASE_SPEED1=100
804 # OI scrub should run with full speed under inconsistent case
805 scrub_start 5 -s $BASE_SPEED1
808 scrub_check_status 6 completed
809 scrub_check_flags 7 ""
810 # OI scrub should run with limited speed under non-inconsistent case
811 scrub_start 8 -s $BASE_SPEED1 -r
814 scrub_check_status 9 scanning
816 # Do NOT ignore that there are 1024 pre-fetched items. And there
817 # may be time error, normally it should be less than 2 seconds.
818 # We allow another 20% schedule error.
819 local PRE_FETCHED=1024
821 # MAX_MARGIN = 1.2 = 12 / 10
822 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
823 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
825 for n in $(seq $MDSCOUNT); do
826 local SPEED=$(scrub_status $n | \
827 awk '/^average_speed/ { print $2 }')
828 [ $SPEED -lt $MAX_SPEED ] ||
829 error "(10) Got speed $SPEED, expected less than" \
834 local BASE_SPEED2=300
836 for n in $(seq $MDSCOUNT); do
837 do_facet mds$n $LCTL set_param -n \
838 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
842 # MIN_MARGIN = 0.8 = 8 / 10
843 local MIN_SPEED=$(((PRE_FETCHED + \
844 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
845 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
846 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
847 # MAX_MARGIN = 1.2 = 12 / 10
848 MAX_SPEED=$(((PRE_FETCHED + \
849 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
850 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
851 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
852 for n in $(seq $MDSCOUNT); do
853 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
854 [ $SPEED -gt $MIN_SPEED ] ||
855 error "(11) Got speed $SPEED, expected more than" \
857 [ $SPEED -lt $MAX_SPEED ] ||
858 error "(12) Got speed $SPEED, expected less than" \
861 do_facet mds$n $LCTL set_param -n \
862 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
865 scrub_check_status 13 completed
867 run_test 9 "OI scrub speed control"
871 scrub_backup_restore 1
872 echo "starting mds$n with OI scrub disabled (1)"
873 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
874 scrub_check_flags 4 recreated,inconsistent
875 mount_client $MOUNT || error "(5) Fail to start client!"
878 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
879 do_nodes $(comma_list $(mdts_nodes)) \
880 $LCTL set_param fail_val=1 fail_loc=0x190
884 scrub_check_status 7 scanning
885 umount_client $MOUNT || error "(8) Fail to stop client!"
887 echo "starting MDTs with OI scrub disabled (2)"
888 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
889 scrub_check_status 11 paused
891 echo "starting MDTs without disabling OI scrub"
892 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
893 scrub_check_status 14 scanning
895 do_nodes $(comma_list $(mdts_nodes)) \
896 $LCTL set_param fail_loc=0 fail_val=0
898 scrub_check_status 15 completed
899 scrub_check_flags 16 ""
901 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
903 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
906 scrub_backup_restore 1
907 echo "starting MDTs with OI scrub disabled"
908 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
909 scrub_check_flags 4 recreated,inconsistent
911 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
912 do_nodes $(comma_list $(mdts_nodes)) \
913 $LCTL set_param fail_val=3 fail_loc=0x190
916 scrub_check_status 6 scanning
918 echo "starting MDTs with OI scrub disabled"
919 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
920 scrub_check_status 9 paused
922 echo "starting MDTs without disabling OI scrub"
923 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
924 scrub_check_status 12 scanning
926 do_nodes $(comma_list $(mdts_nodes)) \
927 $LCTL set_param fail_loc=0 fail_val=0
929 scrub_check_status 13 completed
930 scrub_check_flags 14 ""
932 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
940 for n in $(seq $MDSCOUNT); do
941 $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
942 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
944 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
945 error "(2) Fail to create under $tdir/mds$n"
948 # reset OI scrub start point by force
950 scrub_check_status 4 completed
955 # OI scrub should skip the new created objects for the first accessing
956 # notice we're creating a new llog for every OST on every startup
957 # new features can make this even less stable, so we only check that
958 # the number of skipped files is more than the number or known created
959 local MINIMUM=$((CREATED + 1)) # files + directory
960 for n in $(seq $MDSCOUNT); do
961 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
962 [ $SKIPPED -lt $MINIMUM ] &&
963 error "(5) Expect at least $MINIMUM objects" \
964 "skipped on mds$n, but got $SKIPPED"
966 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
969 # reset OI scrub start point by force
971 scrub_check_status 7 completed
973 # OI scrub should skip the new created object only once
974 for n in $(seq $MDSCOUNT); do
975 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
976 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
978 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
979 error "(8) Expect 0 objects skipped on mds$n, but" \
983 run_test 11 "OI scrub skips the new created objects only once"
987 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
989 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
990 do_facet ost1 $LCTL set_param fail_loc=0x195
991 local count=$(precreated_ost_obj_count 0 0)
993 createmany -o $DIR/$tdir/f $((count + 32))
994 umount_client $MOUNT || error "(1) Fail to stop client!"
996 stop ost1 || error "(2) Fail to stop ost1"
998 #define OBD_FAIL_OST_NODESTROY 0x233
999 do_facet ost1 $LCTL set_param fail_loc=0x233
1001 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1002 error "(3) Fail to start ost1"
1004 mount_client $MOUNT || error "(4) Fail to start client!"
1006 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1008 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1010 do_facet ost1 $LCTL set_param fail_loc=0
1011 wait_update_facet ost1 "$LCTL get_param -n \
1012 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1013 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1014 error "(7) Expected '$expected' on ost1"
1016 ls -ail $DIR/$tdir > /dev/null || {
1018 error "(8) ls should succeed"
1021 run_test 12 "OI scrub can rebuild invalid /O entries"
1024 check_mount_and_prep
1025 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1027 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1028 do_facet ost1 $LCTL set_param fail_loc=0x196
1029 local count=$(precreated_ost_obj_count 0 0)
1031 createmany -o $DIR/$tdir/f $((count + 32))
1032 do_facet ost1 $LCTL set_param fail_loc=0
1034 umount_client $MOUNT || error "(1) Fail to stop client!"
1036 stop ost1 || error "(2) Fail to stop ost1"
1038 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1039 error "(3) Fail to start ost1"
1041 mount_client $MOUNT || error "(4) Fail to start client!"
1043 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1045 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1047 wait_update_facet ost1 "$LCTL get_param -n \
1048 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1049 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1050 error "(7) Expected '$expected' on ost1"
1052 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1054 run_test 13 "OI scrub can rebuild missed /O entries"
1057 check_mount_and_prep
1058 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1060 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1061 do_facet ost1 $LCTL set_param fail_loc=0x196
1062 local count=$(precreated_ost_obj_count 0 0)
1064 createmany -o $DIR/$tdir/f $((count + 32))
1065 do_facet ost1 $LCTL set_param fail_loc=0
1067 umount_client $MOUNT || error "(1) Fail to stop client!"
1069 stop ost1 || error "(2) Fail to stop ost1"
1072 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1073 error "(3) Fail to run e2fsck error"
1075 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1076 error "(4) Fail to start ost1"
1078 mount_client $MOUNT || error "(5) Fail to start client!"
1080 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1081 awk '/^lf_repa[ri]*ed/ { print $2 }')
1082 [ $LF_REPAIRED -gt 0 ] ||
1083 error "(6) Some entry under /lost+found should be repaired"
1085 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1087 run_test 14 "OI scrub can repair objects under lost+found"
1090 local server_version=$(lustre_version_code $SINGLEMDS)
1092 scrub_backup_restore 1
1093 echo "starting MDTs with OI scrub disabled"
1094 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1095 scrub_check_status 3 init
1096 scrub_check_flags 4 recreated,inconsistent
1098 # run under dryrun mode
1099 if [ $server_version -lt $(version_code 2.5.58) ]; then
1100 scrub_start 5 --dryrun on
1102 scrub_start 5 --dryrun
1104 scrub_check_status 6 completed
1105 scrub_check_flags 7 recreated,inconsistent
1106 scrub_check_params 8 dryrun
1107 scrub_check_repaired 9 20
1109 # run under dryrun mode again
1110 if [ $server_version -lt $(version_code 2.5.58) ]; then
1111 scrub_start 10 --dryrun on
1113 scrub_start 10 --dryrun
1115 scrub_check_status 11 completed
1116 scrub_check_flags 12 recreated,inconsistent
1117 scrub_check_params 13 dryrun
1118 scrub_check_repaired 14 20
1120 # run under normal mode
1122 # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
1123 # work under Lustre-2.y (y >= 6), the test script should be fixed as
1124 # "-noff" or "--dryrun=off" or nothing by default.
1125 if [ $server_version -lt $(version_code 2.5.58) ]; then
1126 scrub_start 15 --dryrun off
1130 scrub_check_status 16 completed
1131 scrub_check_flags 17 ""
1132 scrub_check_params 18 ""
1133 scrub_check_repaired 19 20
1135 # run under normal mode again
1136 if [ $server_version -lt $(version_code 2.5.58) ]; then
1137 scrub_start 20 --dryrun off
1141 scrub_check_status 21 completed
1142 scrub_check_flags 22 ""
1143 scrub_check_params 23 ""
1144 scrub_check_repaired 24 0
1146 run_test 15 "Dryrun mode OI scrub"
1148 # restore MDS/OST size
1149 MDSSIZE=${SAVED_MDSSIZE}
1150 OSTSIZE=${SAVED_OSTSIZE}
1151 OSTCOUNT=${SAVED_OSTCOUNT}
1153 # cleanup the system at last