3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
30 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
31 skip "test OI scrub only for ldiskfs" && exit 0
33 [ $(facet_fstype ost1) != "ldiskfs" ] &&
34 skip "test OI scrub only for ldiskfs" && exit 0
36 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
37 skip "Need MDS version at least 2.2.90" && exit 0
39 SAVED_MDSSIZE=${MDSSIZE}
40 SAVED_OSTSIZE=${OSTSIZE}
41 SAVED_OSTCOUNT=${OSTCOUNT}
42 # use small MDS + OST size to speed formatting time
43 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
44 # 200M MDT device can guarantee uninitialized groups during the OI scrub
47 # no need too much OSTs, to reduce the format/start/stop overhead
49 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
53 # build up a clean test environment.
57 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
58 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
60 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
61 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
63 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
64 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
66 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
67 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
68 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
70 [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
71 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
73 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
78 MDT_DEV="${FSNAME}-MDT0000"
79 OST_DEV="${FSNAME}-OST0000"
80 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
86 # use "lfsck_start -A" when we no longer need testing interop
87 for n in $(seq $MDSCOUNT); do
88 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
90 error "($error_id) Failed to start OI scrub on mds$n"
98 # use "lfsck_stop -A" when we no longer need testing interop
99 for n in $(seq $MDSCOUNT); do
100 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
101 error "($error_id) Failed to stop OI scrub on mds$n"
108 do_facet mds$n $LCTL get_param -n \
109 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
112 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
113 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
114 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
115 SHOW_SCRUB="do_facet $SINGLEMDS \
116 $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
117 SHOW_SCRUB_ON_OST="do_facet ost1 \
118 $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
119 MOUNT_OPTS_SCRUB="-o user_xattr"
120 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
128 echo "preparing... $(date)"
129 for n in $(seq $MDSCOUNT); do
130 echo "creating $nfiles files on mds$n"
131 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
132 error "Failed to create directory mds$n"
133 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
134 error "Failed to copy files to mds$n"
135 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
136 error "mkdir failed on mds$n"
137 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
138 /dev/null || error "create failed on mds$n"
139 if [[ $nfiles -gt 0 ]]; then
140 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
141 /dev/null || error "createmany failed on mds$n"
144 echo "prepared $(date)."
145 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
147 # sync local transactions on every MDT
148 do_nodes $(comma_list $(mdts_nodes)) \
149 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
151 # wait for a while to cancel update logs after transactions committed.
154 # sync again to guarantee all things done.
155 do_nodes $(comma_list $(mdts_nodes)) \
156 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
158 for n in $(seq $MDSCOUNT); do
160 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
169 for n in $(seq $MDSCOUNT); do
170 start mds$n $(mdsdevname $n) $opts >/dev/null ||
171 error "($error_id) Failed to start mds$n"
179 for n in $(seq $MDSCOUNT); do
180 echo "stopping mds$n"
181 stop mds$n >/dev/null ||
182 error "($error_id) Failed to stop mds$n"
186 scrub_check_status() {
191 for n in $(seq $MDSCOUNT); do
192 wait_update_facet mds$n "$LCTL get_param -n \
193 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
194 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
195 error "($error_id) Expected '$expected' on mds$n"
199 scrub_check_flags() {
205 for n in $(seq $MDSCOUNT); do
206 actual=$(do_facet mds$n $LCTL get_param -n \
207 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
208 awk '/^flags/ { print $2 }')
209 if [ "$actual" != "$expected" ]; then
210 error "($error_id) Expected '$expected' on mds$n, but" \
216 scrub_check_params() {
222 for n in $(seq $MDSCOUNT); do
223 actual=$(do_facet mds$n $LCTL get_param -n \
224 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
225 awk '/^param/ { print $2 }')
226 if [ "$actual" != "$expected" ]; then
227 error "($error_id) Expected '$expected' on mds$n, but" \
233 scrub_check_repaired() {
239 for n in $(seq $MDSCOUNT); do
240 actual=$(do_facet mds$n $LCTL get_param -n \
241 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
242 awk '/^updated/ { print $2 }')
244 if [ $expected -eq 0 -a $actual -ne 0 ]; then
245 error "($error_id) Expected no repaired on mds$n, but" \
249 if [ $expected -ne 0 -a $actual -lt $expected ]; then
250 error "($error_id) Expected '$expected' on mds$n, but" \
260 for n in $(seq $MDSCOUNT); do
261 diff -q $LUSTRE/tests/test-framework.sh \
262 $DIR/$tdir/mds$n/test-framework.sh ||
263 error "($error_id) File data check failed"
267 scrub_check_data2() {
272 for n in $(seq $MDSCOUNT); do
273 diff -q $LUSTRE/tests/$filename \
274 $DIR/$tdir/mds$n/$filename ||
275 error "($error_id) File data check failed"
284 for n in $(seq $MDSCOUNT); do
285 mds_remove_ois mds$n $index ||
286 error "($error_id) Failed to remove OI .$index on mds$n"
290 scrub_backup_restore() {
295 for n in $(seq $MDSCOUNT); do
296 mds_backup_restore mds$n $igif ||
297 error "($error_id) Backup/restore on mds$n failed"
301 scrub_enable_auto() {
302 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
303 osd-ldiskfs.*.auto_scrub=1
307 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
312 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
313 osd-ldiskfs.*.full_scrub_ratio=$ratio
316 full_scrub_threshold_rate() {
317 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
322 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
323 osd-ldiskfs.*.full_scrub_threshold_rate=$rate
328 echo "starting MDTs without disabling OI scrub"
329 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
330 scrub_check_status 2 init
331 scrub_check_flags 3 ""
332 mount_client $MOUNT || error "(4) Fail to start client!"
335 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
339 echo "start $SINGLEMDS without disabling OI scrub"
340 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
342 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
343 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
345 mount_client $MOUNT || error "(4) Fail to start client!"
346 #define OBD_FAIL_OSD_FID_MAPPING 0x193
347 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
348 # update .lustre OI mapping
350 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
351 umount_client $MOUNT || error "(5) Fail to stop client!"
353 echo "stop $SINGLEMDS"
354 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
356 echo "start $SINGLEMDS with disabling OI scrub"
357 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
358 error "(7) Fail to start MDS!"
360 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
361 [ "$FLAGS" == "inconsistent" ] ||
362 error "(9) Expect 'inconsistent', but got '$FLAGS'"
364 run_test 1a "Auto trigger initial OI scrub when server mounts"
369 echo "start MDTs without disabling OI scrub"
370 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
371 scrub_check_status 3 completed
372 mount_client $MOUNT || error "(4) Fail to start client!"
375 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
380 # OI files to be removed:
382 # idx 2: oi.16.{2,4,8,16,32}
383 # idx 3: oi.16.{3,9,27}
384 for index in 0 2 3; do
386 scrub_remove_ois 1 $index
387 echo "start MDTs with OI scrub disabled"
388 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
389 scrub_check_flags 3 recreated
391 scrub_check_status 5 completed
392 scrub_check_flags 6 ""
395 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
399 scrub_backup_restore 1
400 echo "starting MDTs without disabling OI scrub"
401 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
402 scrub_check_status 3 completed
403 mount_client $MOUNT || error "(4) Fail to start client!"
406 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
408 # test_3 is obsolete, it will be covered by test_5.
410 formatall > /dev/null
414 scrub_backup_restore 1
415 echo "starting MDTs with OI scrub disabled"
416 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
417 scrub_check_status 3 init
418 scrub_check_flags 4 recreated,inconsistent
420 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
424 scrub_backup_restore 1
425 echo "starting MDTs with OI scrub disabled"
426 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
427 scrub_check_flags 4 recreated,inconsistent
428 mount_client $MOUNT || error "(5) Fail to start client!"
434 scrub_check_status 7 completed
435 scrub_check_flags 8 ""
438 for n in $(seq $MDSCOUNT); do
439 updated0[$n]=$(scrub_status $n |
440 awk '/^prior_updated/ { print $2 }')
443 scrub_check_data2 sanity-scrub.sh 9
447 for n in $(seq $MDSCOUNT); do
448 updated1[$n]=$(scrub_status $n |
449 awk '/^prior_updated/ { print $2 }')
450 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
451 error "(10) NOT auto trigger full scrub as expected"
454 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
458 scrub_backup_restore 1
459 echo "starting MDTs with OI scrub disabled"
460 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
461 scrub_check_flags 4 recreated,inconsistent
462 mount_client $MOUNT || error "(5) Fail to start client!"
465 full_scrub_threshold_rate 10000
469 scrub_check_status 7 completed
470 scrub_check_flags 8 ""
473 for n in $(seq $MDSCOUNT); do
474 updated0[$n]=$(scrub_status $n |
475 awk '/^prior_updated/ { print $2 }')
477 echo "OI scrub on MDS$n status for the 1st time:"
478 do_facet mds$n $LCTL get_param -n \
479 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
482 scrub_check_data2 sanity-scrub.sh 9
485 scrub_check_status 10 completed
486 scrub_check_flags 11 ""
489 for n in $(seq $MDSCOUNT); do
490 updated1[$n]=$(scrub_status $n |
491 awk '/^prior_updated/ { print $2 }')
493 echo "OI scrub on MDS$n status for the 2nd time:"
494 do_facet mds$n $LCTL get_param -n \
495 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
497 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
498 error "(12) Auto trigger full scrub unexpectedly"
501 for n in $(seq $MDSCOUNT); do
502 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
503 error "(13) fail to ls"
507 scrub_check_status 14 completed
508 scrub_check_flags 15 ""
510 for n in $(seq $MDSCOUNT); do
511 updated0[$n]=$(scrub_status $n |
512 awk '/^prior_updated/ { print $2 }')
514 echo "OI scrub on MDS$n status for the 3rd time:"
515 do_facet mds$n $LCTL get_param -n \
516 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
518 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
519 error "(16) Auto trigger full scrub unexpectedly"
522 for n in $(seq $MDSCOUNT); do
523 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
527 for n in $(seq $MDSCOUNT); do
528 updated1[$n]=$(scrub_status $n |
529 awk '/^prior_updated/ { print $2 }')
530 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
531 echo "OI scrub on MDS$n status for the 4th time:"
532 do_facet mds$n $LCTL get_param -n \
533 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
535 error "(18) NOT auto trigger full scrub as expected"
539 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
543 scrub_backup_restore 1
544 echo "starting MDTs with OI scrub disabled"
545 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
546 scrub_check_flags 4 recreated,inconsistent
547 mount_client $MOUNT || error "(5) Fail to start client!"
550 full_scrub_threshold_rate 20
554 scrub_check_status 7 completed
555 scrub_check_flags 8 ""
558 for n in $(seq $MDSCOUNT); do
559 updated0[$n]=$(scrub_status $n |
560 awk '/^prior_updated/ { print $2 }')
562 echo "OI scrub on MDS$n status for the 1st time:"
563 do_facet mds$n $LCTL get_param -n \
564 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
567 scrub_check_data2 sanity-scrub.sh 9
570 scrub_check_status 10 completed
571 scrub_check_flags 11 ""
574 for n in $(seq $MDSCOUNT); do
575 updated1[$n]=$(scrub_status $n |
576 awk '/^prior_updated/ { print $2 }')
578 echo "OI scrub on MDS$n status for the 2nd time:"
579 do_facet mds$n $LCTL get_param -n \
580 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
582 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
583 error "(12) Auto trigger full scrub unexpectedly"
586 for n in $(seq $MDSCOUNT); do
587 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
588 error "(13) fail to ls"
592 scrub_check_status 14 completed
593 scrub_check_flags 15 ""
595 for n in $(seq $MDSCOUNT); do
596 updated0[$n]=$(scrub_status $n |
597 awk '/^prior_updated/ { print $2 }')
599 echo "OI scrub on MDS$n status for the 3rd time:"
600 do_facet mds$n $LCTL get_param -n \
601 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
603 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
604 error "(16) Auto trigger full scrub unexpectedly"
607 for n in $(seq $MDSCOUNT); do
608 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
612 for n in $(seq $MDSCOUNT); do
613 updated1[$n]=$(scrub_status $n |
614 awk '/^prior_updated/ { print $2 }')
615 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
616 echo "OI scrub on MDS$n status for the 4th time:"
617 do_facet mds$n $LCTL get_param -n \
618 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
620 error "(18) NOT auto trigger full scrub as expected"
624 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
627 formatall > /dev/null
631 scrub_backup_restore 1
632 echo "starting MDTs with OI scrub disabled (1)"
633 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
634 scrub_check_status 3 init
635 scrub_check_flags 4 recreated,inconsistent
636 mount_client $MOUNT || error "(5) Fail to start client!"
640 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
641 do_nodes $(comma_list $(mdts_nodes)) \
642 $LCTL set_param fail_val=3 fail_loc=0x190
645 umount_client $MOUNT || error "(7) Fail to stop client!"
646 scrub_check_status 8 scanning
648 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
649 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
654 do_nodes $(comma_list $(mdts_nodes)) \
655 $LCTL set_param fail_loc=0 fail_val=0
657 echo "starting MDTs with OI scrub disabled (2)"
658 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
659 scrub_check_status 11 crashed
662 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
663 do_nodes $(comma_list $(mdts_nodes)) \
664 $LCTL set_param fail_val=3 fail_loc=0x190
666 echo "starting MDTs without disabling OI scrub"
667 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
668 scrub_check_status 14 scanning
670 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
671 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
673 scrub_check_status 15 failed
674 mount_client $MOUNT || error "(16) Fail to start client!"
677 do_nodes $(comma_list $(mdts_nodes)) \
678 $LCTL set_param fail_loc=0 fail_val=0
683 for n in $(seq $MDSCOUNT); do
684 stat $DIR/$tdir/mds$n/${tfile}800 &
688 for n in $(seq $MDSCOUNT); do
689 wait ${pids[$n]} || error "(18) Fail to stat mds$n/${tfile}800"
692 scrub_check_status 19 completed
693 scrub_check_flags 20 ""
695 run_test 5 "OI scrub state machine"
699 scrub_backup_restore 1
700 echo "starting MDTs with OI scrub disabled"
701 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
702 scrub_check_flags 4 recreated,inconsistent
703 mount_client $MOUNT || error "(5) Fail to start client!"
707 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
708 do_nodes $(comma_list $(mdts_nodes)) \
709 $LCTL set_param fail_val=2 fail_loc=0x190
713 # Sleep 5 sec to guarantee at least one object processed by OI scrub
715 # Fail the OI scrub to guarantee there is at least one checkpoint
716 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
717 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
719 scrub_check_status 7 failed
721 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
722 do_nodes $(comma_list $(mdts_nodes)) \
723 $LCTL set_param fail_val=3 fail_loc=0x190
726 for n in $(seq $MDSCOUNT); do
727 # stat will re-trigger OI scrub
728 stat $DIR/$tdir/mds$n/${tfile}800 ||
729 error "(8) Failed to stat mds$n/${tfile}800"
732 umount_client $MOUNT || error "(9) Fail to stop client!"
733 scrub_check_status 10 scanning
735 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
736 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
740 for n in $(seq $MDSCOUNT); do
741 position0[$n]=$(scrub_status $n |
742 awk '/^last_checkpoint_position/ {print $2}')
743 position0[$n]=$((${position0[$n]} + 1))
748 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
749 do_nodes $(comma_list $(mdts_nodes)) \
750 $LCTL set_param fail_val=3 fail_loc=0x190
752 echo "starting MDTs without disabling OI scrub"
753 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
755 scrub_check_status 13 scanning
758 for n in $(seq $MDSCOUNT); do
759 position1[$n]=$(scrub_status $n |
760 awk '/^latest_start_position/ {print $2}')
761 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
762 error "(14) Expected position ${position0[$n]}, but" \
763 "got ${position1[$n]}"
767 do_nodes $(comma_list $(mdts_nodes)) \
768 $LCTL set_param fail_loc=0 fail_val=0
770 scrub_check_status 15 completed
771 scrub_check_flags 16 ""
773 run_test 6 "OI scrub resumes from last checkpoint"
777 scrub_backup_restore 1
778 echo "starting MDTs with OI scrub disabled"
779 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
780 scrub_check_flags 4 recreated,inconsistent
781 mount_client $MOUNT || error "(5) Fail to start client!"
785 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
786 do_nodes $(comma_list $(mdts_nodes)) \
787 $LCTL set_param fail_val=3 fail_loc=0x190
792 for n in $(seq $MDSCOUNT); do
793 stat $DIR/$tdir/mds$n/${tfile}300 ||
794 error "(7) Failed to stat mds$n/${tfile}300!"
797 scrub_check_status 8 scanning
798 scrub_check_flags 9 recreated,inconsistent,auto
800 do_nodes $(comma_list $(mdts_nodes)) \
801 $LCTL set_param fail_loc=0 fail_val=0
803 scrub_check_status 10 completed
806 run_test 7 "System is available during OI scrub scanning"
810 scrub_backup_restore 1
811 echo "starting MDTs with OI scrub disabled"
812 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
813 scrub_check_flags 4 recreated,inconsistent
815 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
816 do_nodes $(comma_list $(mdts_nodes)) \
817 $LCTL set_param fail_val=1 fail_loc=0x190
820 scrub_check_status 6 scanning
822 scrub_check_status 8 stopped
824 scrub_check_status 10 scanning
826 do_nodes $(comma_list $(mdts_nodes)) \
827 $LCTL set_param fail_loc=0 fail_val=0
829 scrub_check_status 11 completed
830 scrub_check_flags 12 ""
832 run_test 8 "Control OI scrub manually"
835 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
836 skip "Testing on UP system, the speed may be inaccurate."
841 scrub_backup_restore 1
843 echo "starting MDTs with OI scrub disabled"
844 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
845 scrub_check_flags 4 recreated,inconsistent
847 local BASE_SPEED1=100
849 # OI scrub should run with full speed under inconsistent case
850 scrub_start 5 -s $BASE_SPEED1
853 scrub_check_status 6 completed
854 scrub_check_flags 7 ""
855 # OI scrub should run with limited speed under non-inconsistent case
856 scrub_start 8 -s $BASE_SPEED1 -r
859 scrub_check_status 9 scanning
861 # Do NOT ignore that there are 1024 pre-fetched items. And there
862 # may be time error, normally it should be less than 2 seconds.
863 # We allow another 20% schedule error.
864 local PRE_FETCHED=1024
866 # MAX_MARGIN = 1.2 = 12 / 10
867 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
868 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
870 for n in $(seq $MDSCOUNT); do
871 local SPEED=$(scrub_status $n | \
872 awk '/^average_speed/ { print $2 }')
873 [ $SPEED -lt $MAX_SPEED ] ||
874 error "(10) Got speed $SPEED, expected less than" \
879 local BASE_SPEED2=300
881 for n in $(seq $MDSCOUNT); do
882 do_facet mds$n $LCTL set_param -n \
883 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
887 # MIN_MARGIN = 0.8 = 8 / 10
888 local MIN_SPEED=$(((PRE_FETCHED + \
889 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
890 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
891 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
892 # MAX_MARGIN = 1.2 = 12 / 10
893 MAX_SPEED=$(((PRE_FETCHED + \
894 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
895 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
896 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
897 for n in $(seq $MDSCOUNT); do
898 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
899 [ $SPEED -gt $MIN_SPEED ] ||
900 error "(11) Got speed $SPEED, expected more than" \
902 [ $SPEED -lt $MAX_SPEED ] ||
903 error "(12) Got speed $SPEED, expected less than" \
906 do_facet mds$n $LCTL set_param -n \
907 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
910 scrub_check_status 13 completed
912 run_test 9 "OI scrub speed control"
916 scrub_backup_restore 1
917 echo "starting mds$n with OI scrub disabled (1)"
918 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
919 scrub_check_flags 4 recreated,inconsistent
920 mount_client $MOUNT || error "(5) Fail to start client!"
924 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
925 do_nodes $(comma_list $(mdts_nodes)) \
926 $LCTL set_param fail_val=1 fail_loc=0x190
929 scrub_check_status 7 scanning
930 umount_client $MOUNT || error "(8) Fail to stop client!"
932 echo "starting MDTs with OI scrub disabled (2)"
933 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
934 scrub_check_status 11 paused
936 echo "starting MDTs without disabling OI scrub"
937 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
938 scrub_check_status 14 scanning
940 do_nodes $(comma_list $(mdts_nodes)) \
941 $LCTL set_param fail_loc=0 fail_val=0
943 scrub_check_status 15 completed
944 scrub_check_flags 16 ""
946 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
948 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
951 scrub_backup_restore 1
952 echo "starting MDTs with OI scrub disabled"
953 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
954 scrub_check_flags 4 recreated,inconsistent
956 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
957 do_nodes $(comma_list $(mdts_nodes)) \
958 $LCTL set_param fail_val=3 fail_loc=0x190
961 scrub_check_status 6 scanning
963 echo "starting MDTs with OI scrub disabled"
964 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
965 scrub_check_status 9 paused
967 echo "starting MDTs without disabling OI scrub"
968 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
969 scrub_check_status 12 scanning
971 do_nodes $(comma_list $(mdts_nodes)) \
972 $LCTL set_param fail_loc=0 fail_val=0
974 scrub_check_status 13 completed
975 scrub_check_flags 14 ""
977 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
985 for n in $(seq $MDSCOUNT); do
986 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
987 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
989 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
990 error "(2) Fail to create under $tdir/mds$n"
993 # reset OI scrub start point by force
995 scrub_check_status 4 completed
1000 # OI scrub should skip the new created objects for the first accessing
1001 # notice we're creating a new llog for every OST on every startup
1002 # new features can make this even less stable, so we only check that
1003 # the number of skipped files is more than the number or known created
1004 local MINIMUM=$((CREATED + 1)) # files + directory
1005 for n in $(seq $MDSCOUNT); do
1006 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1007 [ $SKIPPED -lt $MINIMUM ] &&
1008 error "(5) Expect at least $MINIMUM objects" \
1009 "skipped on mds$n, but got $SKIPPED"
1011 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1014 # reset OI scrub start point by force
1016 scrub_check_status 7 completed
1018 # OI scrub should skip the new created object only once
1019 for n in $(seq $MDSCOUNT); do
1020 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1021 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1023 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1024 error "(8) Expect 0 objects skipped on mds$n, but" \
1028 run_test 11 "OI scrub skips the new created objects only once"
1031 check_mount_and_prep
1032 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1034 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1035 do_facet ost1 $LCTL set_param fail_loc=0x195
1036 local count=$(precreated_ost_obj_count 0 0)
1038 createmany -o $DIR/$tdir/f $((count + 32))
1039 umount_client $MOUNT || error "(1) Fail to stop client!"
1041 stop ost1 || error "(2) Fail to stop ost1"
1043 #define OBD_FAIL_OST_NODESTROY 0x233
1044 do_facet ost1 $LCTL set_param fail_loc=0x233
1046 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1047 error "(3) Fail to start ost1"
1049 mount_client $MOUNT || error "(4) Fail to start client!"
1051 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1053 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1055 do_facet ost1 $LCTL set_param fail_loc=0
1056 wait_update_facet ost1 "$LCTL get_param -n \
1057 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1058 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1059 error "(7) Expected '$expected' on ost1"
1061 ls -ail $DIR/$tdir > /dev/null || {
1063 error "(8) ls should succeed"
1066 run_test 12 "OI scrub can rebuild invalid /O entries"
1069 check_mount_and_prep
1070 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1072 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1073 do_facet ost1 $LCTL set_param fail_loc=0x196
1074 local count=$(precreated_ost_obj_count 0 0)
1076 createmany -o $DIR/$tdir/f $((count + 32))
1077 do_facet ost1 $LCTL set_param fail_loc=0
1079 umount_client $MOUNT || error "(1) Fail to stop client!"
1081 stop ost1 || error "(2) Fail to stop ost1"
1083 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1084 error "(3) Fail to start ost1"
1086 mount_client $MOUNT || error "(4) Fail to start client!"
1088 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1090 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1092 wait_update_facet ost1 "$LCTL get_param -n \
1093 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1094 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1095 error "(7) Expected '$expected' on ost1"
1097 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1099 run_test 13 "OI scrub can rebuild missed /O entries"
1102 check_mount_and_prep
1103 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1105 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1106 do_facet ost1 $LCTL set_param fail_loc=0x196
1107 local count=$(precreated_ost_obj_count 0 0)
1109 createmany -o $DIR/$tdir/f $((count + 32))
1110 do_facet ost1 $LCTL set_param fail_loc=0
1112 umount_client $MOUNT || error "(1) Fail to stop client!"
1114 stop ost1 || error "(2) Fail to stop ost1"
1117 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1118 error "(3) Fail to run e2fsck error"
1120 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1121 error "(4) Fail to start ost1"
1123 mount_client $MOUNT || error "(5) Fail to start client!"
1125 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1126 awk '/^lf_repa[ri]*ed/ { print $2 }')
1127 [ $LF_REPAIRED -gt 0 ] ||
1128 error "(6) Some entry under /lost+found should be repaired"
1130 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1132 run_test 14 "OI scrub can repair objects under lost+found"
1135 local server_version=$(lustre_version_code $SINGLEMDS)
1137 scrub_backup_restore 1
1138 echo "starting MDTs with OI scrub disabled"
1139 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1140 scrub_check_status 3 init
1141 scrub_check_flags 4 recreated,inconsistent
1143 # run under dryrun mode
1144 if [ $server_version -lt $(version_code 2.5.58) ]; then
1145 scrub_start 5 --dryrun on
1147 scrub_start 5 --dryrun
1149 scrub_check_status 6 completed
1150 scrub_check_flags 7 recreated,inconsistent
1151 scrub_check_params 8 dryrun
1152 scrub_check_repaired 9 20
1154 # run under dryrun mode again
1155 if [ $server_version -lt $(version_code 2.5.58) ]; then
1156 scrub_start 10 --dryrun on
1158 scrub_start 10 --dryrun
1160 scrub_check_status 11 completed
1161 scrub_check_flags 12 recreated,inconsistent
1162 scrub_check_params 13 dryrun
1163 scrub_check_repaired 14 20
1165 # run under normal mode
1167 # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
1168 # work under Lustre-2.y (y >= 6), the test script should be fixed as
1169 # "-noff" or "--dryrun=off" or nothing by default.
1170 if [ $server_version -lt $(version_code 2.5.58) ]; then
1171 scrub_start 15 --dryrun off
1175 scrub_check_status 16 completed
1176 scrub_check_flags 17 ""
1177 scrub_check_params 18 ""
1178 scrub_check_repaired 19 20
1180 # run under normal mode again
1181 if [ $server_version -lt $(version_code 2.5.58) ]; then
1182 scrub_start 20 --dryrun off
1186 scrub_check_status 21 completed
1187 scrub_check_flags 22 ""
1188 scrub_check_params 23 ""
1189 scrub_check_repaired 24 0
1191 run_test 15 "Dryrun mode OI scrub"
1193 # restore MDS/OST size
1194 MDSSIZE=${SAVED_MDSSIZE}
1195 OSTSIZE=${SAVED_OSTSIZE}
1196 OSTCOUNT=${SAVED_OSTCOUNT}
1198 # cleanup the system at last