3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
30 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
31 skip "test OI scrub only for ldiskfs" && exit 0
33 [ $(facet_fstype ost1) != "ldiskfs" ] &&
34 skip "test OI scrub only for ldiskfs" && exit 0
36 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
37 skip "Need MDS version at least 2.2.90" && exit 0
39 SAVED_MDSSIZE=${MDSSIZE}
40 SAVED_OSTSIZE=${OSTSIZE}
41 SAVED_OSTCOUNT=${OSTCOUNT}
42 # use small MDS + OST size to speed formatting time
43 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
44 # 200M MDT device can guarantee uninitialized groups during the OI scrub
47 # no need too much OSTs, to reduce the format/start/stop overhead
49 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
53 # build up a clean test environment.
57 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
58 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
60 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
61 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
63 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
64 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
66 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
67 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
68 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
70 [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
71 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
73 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
78 MDT_DEV="${FSNAME}-MDT0000"
79 OST_DEV="${FSNAME}-OST0000"
80 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
86 # use "lfsck_start -A" when we no longer need testing interop
87 for n in $(seq $MDSCOUNT); do
88 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
90 error "($error_id) Failed to start OI scrub on mds$n"
98 # use "lfsck_stop -A" when we no longer need testing interop
99 for n in $(seq $MDSCOUNT); do
100 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
101 error "($error_id) Failed to stop OI scrub on mds$n"
108 do_facet mds$n $LCTL get_param -n \
109 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
112 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
113 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
114 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
115 SHOW_SCRUB="do_facet $SINGLEMDS \
116 $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
117 SHOW_SCRUB_ON_OST="do_facet ost1 \
118 $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
119 MOUNT_OPTS_SCRUB="-o user_xattr"
120 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
128 echo "preparing... $(date)"
129 for n in $(seq $MDSCOUNT); do
130 echo "creating $nfiles files on mds$n"
131 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
132 error "Failed to create directory mds$n"
133 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
134 error "Failed to copy files to mds$n"
135 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
136 error "mkdir failed on mds$n"
137 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
138 /dev/null || error "create failed on mds$n"
139 if [[ $nfiles -gt 0 ]]; then
140 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
141 /dev/null || error "createmany failed on mds$n"
144 echo "prepared $(date)."
145 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
147 # sync local transactions on every MDT
148 do_nodes $(comma_list $(mdts_nodes)) \
149 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
151 # wait for a while to cancel update logs after transactions committed.
154 # sync again to guarantee all things done.
155 do_nodes $(comma_list $(mdts_nodes)) \
156 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
158 for n in $(seq $MDSCOUNT); do
160 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
169 for n in $(seq $MDSCOUNT); do
170 start mds$n $(mdsdevname $n) $opts >/dev/null ||
171 error "($error_id) Failed to start mds$n"
179 for n in $(seq $MDSCOUNT); do
180 echo "stopping mds$n"
181 stop mds$n >/dev/null ||
182 error "($error_id) Failed to stop mds$n"
186 scrub_check_status() {
191 for n in $(seq $MDSCOUNT); do
192 wait_update_facet mds$n "$LCTL get_param -n \
193 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
194 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
195 error "($error_id) Expected '$expected' on mds$n"
199 scrub_check_flags() {
205 for n in $(seq $MDSCOUNT); do
206 actual=$(do_facet mds$n $LCTL get_param -n \
207 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
208 awk '/^flags/ { print $2 }')
209 if [ "$actual" != "$expected" ]; then
210 error "($error_id) Expected '$expected' on mds$n, but" \
216 scrub_check_params() {
222 for n in $(seq $MDSCOUNT); do
223 actual=$(do_facet mds$n $LCTL get_param -n \
224 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
225 awk '/^param/ { print $2 }')
226 if [ "$actual" != "$expected" ]; then
227 error "($error_id) Expected '$expected' on mds$n, but" \
233 scrub_check_repaired() {
239 for n in $(seq $MDSCOUNT); do
240 actual=$(do_facet mds$n $LCTL get_param -n \
241 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
242 awk '/^updated/ { print $2 }')
244 if [ $expected -eq 0 -a $actual -ne 0 ]; then
245 error "($error_id) Expected no repaired on mds$n, but" \
249 if [ $expected -ne 0 -a $actual -lt $expected ]; then
250 error "($error_id) Expected '$expected' on mds$n, but" \
260 for n in $(seq $MDSCOUNT); do
261 diff -q $LUSTRE/tests/test-framework.sh \
262 $DIR/$tdir/mds$n/test-framework.sh ||
263 error "($error_id) File data check failed"
267 scrub_check_data2() {
272 for n in $(seq $MDSCOUNT); do
273 diff -q $LUSTRE/tests/$filename \
274 $DIR/$tdir/mds$n/$filename ||
275 error "($error_id) File data check failed"
284 for n in $(seq $MDSCOUNT); do
285 mds_remove_ois mds$n $index ||
286 error "($error_id) Failed to remove OI .$index on mds$n"
290 scrub_backup_restore() {
295 for n in $(seq $MDSCOUNT); do
296 mds_backup_restore mds$n $igif ||
297 error "($error_id) Backup/restore on mds$n failed"
301 scrub_enable_auto() {
302 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
303 osd-ldiskfs.*.auto_scrub=1
307 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
312 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
313 osd-ldiskfs.*.full_scrub_ratio=$ratio
316 full_scrub_threshold_rate() {
317 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
322 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
323 osd-ldiskfs.*.full_scrub_threshold_rate=$rate
328 echo "starting MDTs without disabling OI scrub"
329 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
330 scrub_check_status 2 init
331 scrub_check_flags 3 ""
332 mount_client $MOUNT || error "(4) Fail to start client!"
335 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
339 echo "start $SINGLEMDS without disabling OI scrub"
340 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
342 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
343 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
345 mount_client $MOUNT || error "(4) Fail to start client!"
346 #define OBD_FAIL_OSD_FID_MAPPING 0x193
347 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
348 # update .lustre OI mapping
350 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
351 umount_client $MOUNT || error "(5) Fail to stop client!"
353 echo "stop $SINGLEMDS"
354 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
356 echo "start $SINGLEMDS with disabling OI scrub"
357 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
358 error "(7) Fail to start MDS!"
360 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
361 [ "$FLAGS" == "inconsistent" ] ||
362 error "(9) Expect 'inconsistent', but got '$FLAGS'"
364 run_test 1a "Auto trigger initial OI scrub when server mounts"
369 echo "start MDTs without disabling OI scrub"
370 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
371 scrub_check_status 3 completed
372 mount_client $MOUNT || error "(4) Fail to start client!"
375 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
380 # OI files to be removed:
382 # idx 2: oi.16.{2,4,8,16,32}
383 # idx 3: oi.16.{3,9,27}
384 for index in 0 2 3; do
386 scrub_remove_ois 1 $index
387 echo "start MDTs with OI scrub disabled"
388 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
389 scrub_check_flags 3 recreated
391 scrub_check_status 5 completed
392 scrub_check_flags 6 ""
395 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
399 scrub_backup_restore 1
400 echo "starting MDTs without disabling OI scrub"
401 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
402 scrub_check_status 3 completed
403 mount_client $MOUNT || error "(4) Fail to start client!"
406 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
408 # test_3 is obsolete, it will be covered by test_5.
410 formatall > /dev/null
414 scrub_backup_restore 1
415 echo "starting MDTs with OI scrub disabled"
416 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
417 scrub_check_status 3 init
418 scrub_check_flags 4 recreated,inconsistent
420 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
424 scrub_backup_restore 1
425 echo "starting MDTs with OI scrub disabled"
426 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
427 scrub_check_flags 4 recreated,inconsistent
428 mount_client $MOUNT || error "(5) Fail to start client!"
434 scrub_check_status 7 completed
435 scrub_check_flags 8 ""
438 for n in $(seq $MDSCOUNT); do
439 updated0[$n]=$(scrub_status $n |
440 awk '/^prior_updated/ { print $2 }')
443 scrub_check_data2 sanity-scrub.sh 9
447 for n in $(seq $MDSCOUNT); do
448 updated1[$n]=$(scrub_status $n |
449 awk '/^prior_updated/ { print $2 }')
450 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
451 error "(10) NOT auto trigger full scrub as expected"
454 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
458 scrub_backup_restore 1
459 echo "starting MDTs with OI scrub disabled"
460 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
461 scrub_check_flags 4 recreated,inconsistent
462 mount_client $MOUNT || error "(5) Fail to start client!"
465 full_scrub_threshold_rate 10000
469 scrub_check_status 7 completed
470 scrub_check_flags 8 ""
473 for n in $(seq $MDSCOUNT); do
474 updated0[$n]=$(scrub_status $n |
475 awk '/^prior_updated/ { print $2 }')
477 echo "OI scrub on MDS$n status for the 1st time:"
478 do_facet mds$n $LCTL get_param -n \
479 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
482 scrub_check_data2 sanity-scrub.sh 9
485 scrub_check_status 10 completed
486 scrub_check_flags 11 ""
489 for n in $(seq $MDSCOUNT); do
490 updated1[$n]=$(scrub_status $n |
491 awk '/^prior_updated/ { print $2 }')
493 echo "OI scrub on MDS$n status for the 2nd time:"
494 do_facet mds$n $LCTL get_param -n \
495 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
497 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
498 error "(12) Auto trigger full scrub unexpectedly"
501 for n in $(seq $MDSCOUNT); do
502 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
503 error "(13) fail to ls"
507 scrub_check_status 14 completed
508 scrub_check_flags 15 ""
510 for n in $(seq $MDSCOUNT); do
511 updated0[$n]=$(scrub_status $n |
512 awk '/^prior_updated/ { print $2 }')
514 echo "OI scrub on MDS$n status for the 3rd time:"
515 do_facet mds$n $LCTL get_param -n \
516 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
518 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
519 error "(16) Auto trigger full scrub unexpectedly"
522 for n in $(seq $MDSCOUNT); do
523 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
527 for n in $(seq $MDSCOUNT); do
528 updated1[$n]=$(scrub_status $n |
529 awk '/^prior_updated/ { print $2 }')
530 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
531 echo "OI scrub on MDS$n status for the 4th time:"
532 do_facet mds$n $LCTL get_param -n \
533 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
535 error "(18) NOT auto trigger full scrub as expected"
539 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
543 scrub_backup_restore 1
544 echo "starting MDTs with OI scrub disabled"
545 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
546 scrub_check_flags 4 recreated,inconsistent
547 mount_client $MOUNT || error "(5) Fail to start client!"
550 full_scrub_threshold_rate 20
554 scrub_check_status 7 completed
555 scrub_check_flags 8 ""
558 for n in $(seq $MDSCOUNT); do
559 updated0[$n]=$(scrub_status $n |
560 awk '/^prior_updated/ { print $2 }')
562 echo "OI scrub on MDS$n status for the 1st time:"
563 do_facet mds$n $LCTL get_param -n \
564 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
567 scrub_check_data2 sanity-scrub.sh 9
570 scrub_check_status 10 completed
571 scrub_check_flags 11 ""
574 for n in $(seq $MDSCOUNT); do
575 updated1[$n]=$(scrub_status $n |
576 awk '/^prior_updated/ { print $2 }')
578 echo "OI scrub on MDS$n status for the 2nd time:"
579 do_facet mds$n $LCTL get_param -n \
580 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
582 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
583 error "(12) Auto trigger full scrub unexpectedly"
586 for n in $(seq $MDSCOUNT); do
587 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
588 error "(13) fail to ls"
592 scrub_check_status 14 completed
593 scrub_check_flags 15 ""
595 for n in $(seq $MDSCOUNT); do
596 updated0[$n]=$(scrub_status $n |
597 awk '/^prior_updated/ { print $2 }')
599 echo "OI scrub on MDS$n status for the 3rd time:"
600 do_facet mds$n $LCTL get_param -n \
601 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
603 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
604 error "(16) Auto trigger full scrub unexpectedly"
607 for n in $(seq $MDSCOUNT); do
608 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
612 for n in $(seq $MDSCOUNT); do
613 updated1[$n]=$(scrub_status $n |
614 awk '/^prior_updated/ { print $2 }')
615 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
616 echo "OI scrub on MDS$n status for the 4th time:"
617 do_facet mds$n $LCTL get_param -n \
618 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
620 error "(18) NOT auto trigger full scrub as expected"
624 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
627 formatall > /dev/null
631 scrub_backup_restore 1
632 echo "starting MDTs with OI scrub disabled (1)"
633 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
634 scrub_check_status 3 init
635 scrub_check_flags 4 recreated,inconsistent
636 mount_client $MOUNT || error "(5) Fail to start client!"
640 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
641 do_nodes $(comma_list $(mdts_nodes)) \
642 $LCTL set_param fail_val=3 fail_loc=0x190
645 umount_client $MOUNT || error "(7) Fail to stop client!"
646 scrub_check_status 8 scanning
648 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
649 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
654 do_nodes $(comma_list $(mdts_nodes)) \
655 $LCTL set_param fail_loc=0 fail_val=0
657 echo "starting MDTs with OI scrub disabled (2)"
658 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
659 scrub_check_status 11 crashed
662 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
663 do_nodes $(comma_list $(mdts_nodes)) \
664 $LCTL set_param fail_val=3 fail_loc=0x190
666 echo "starting MDTs without disabling OI scrub"
667 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
668 scrub_check_status 14 scanning
670 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
671 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
673 scrub_check_status 15 failed
674 mount_client $MOUNT || error "(16) Fail to start client!"
677 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
678 do_nodes $(comma_list $(mdts_nodes)) \
679 $LCTL set_param fail_val=3 fail_loc=0x190
684 for n in $(seq $MDSCOUNT); do
685 stat $DIR/$tdir/mds$n/${tfile}800 &
691 scrub_check_status 17 scanning
693 do_nodes $(comma_list $(mdts_nodes)) \
694 $LCTL set_param fail_loc=0 fail_val=0
696 for n in $(seq $MDSCOUNT); do
697 wait ${pids[$n]} || error "(18) Fail to stat mds$n/${tfile}800"
700 scrub_check_status 19 completed
701 scrub_check_flags 20 ""
703 run_test 5 "OI scrub state machine"
707 scrub_backup_restore 1
708 echo "starting MDTs with OI scrub disabled"
709 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
710 scrub_check_flags 4 recreated,inconsistent
711 mount_client $MOUNT || error "(5) Fail to start client!"
715 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
716 do_nodes $(comma_list $(mdts_nodes)) \
717 $LCTL set_param fail_val=2 fail_loc=0x190
721 # Sleep 5 sec to guarantee at least one object processed by OI scrub
723 # Fail the OI scrub to guarantee there is at least one checkpoint
724 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
725 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
727 scrub_check_status 7 failed
729 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
730 do_nodes $(comma_list $(mdts_nodes)) \
731 $LCTL set_param fail_val=3 fail_loc=0x190
734 for n in $(seq $MDSCOUNT); do
735 # stat will re-trigger OI scrub
736 stat $DIR/$tdir/mds$n/${tfile}800 ||
737 error "(8) Failed to stat mds$n/${tfile}800"
740 umount_client $MOUNT || error "(9) Fail to stop client!"
741 scrub_check_status 10 scanning
743 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
744 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
748 for n in $(seq $MDSCOUNT); do
749 position0[$n]=$(scrub_status $n |
750 awk '/^last_checkpoint_position/ {print $2}')
751 position0[$n]=$((${position0[$n]} + 1))
756 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
757 do_nodes $(comma_list $(mdts_nodes)) \
758 $LCTL set_param fail_val=3 fail_loc=0x190
760 echo "starting MDTs without disabling OI scrub"
761 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
763 scrub_check_status 13 scanning
766 for n in $(seq $MDSCOUNT); do
767 position1[$n]=$(scrub_status $n |
768 awk '/^latest_start_position/ {print $2}')
769 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
770 error "(14) Expected position ${position0[$n]}, but" \
771 "got ${position1[$n]}"
775 do_nodes $(comma_list $(mdts_nodes)) \
776 $LCTL set_param fail_loc=0 fail_val=0
778 scrub_check_status 15 completed
779 scrub_check_flags 16 ""
781 run_test 6 "OI scrub resumes from last checkpoint"
785 scrub_backup_restore 1
786 echo "starting MDTs with OI scrub disabled"
787 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
788 scrub_check_flags 4 recreated,inconsistent
789 mount_client $MOUNT || error "(5) Fail to start client!"
793 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
794 do_nodes $(comma_list $(mdts_nodes)) \
795 $LCTL set_param fail_val=3 fail_loc=0x190
800 for n in $(seq $MDSCOUNT); do
801 stat $DIR/$tdir/mds$n/${tfile}300 ||
802 error "(7) Failed to stat mds$n/${tfile}300!"
805 scrub_check_status 8 scanning
806 scrub_check_flags 9 recreated,inconsistent,auto
808 do_nodes $(comma_list $(mdts_nodes)) \
809 $LCTL set_param fail_loc=0 fail_val=0
811 scrub_check_status 10 completed
814 run_test 7 "System is available during OI scrub scanning"
818 scrub_backup_restore 1
819 echo "starting MDTs with OI scrub disabled"
820 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
821 scrub_check_flags 4 recreated,inconsistent
823 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
824 do_nodes $(comma_list $(mdts_nodes)) \
825 $LCTL set_param fail_val=1 fail_loc=0x190
828 scrub_check_status 6 scanning
830 scrub_check_status 8 stopped
832 scrub_check_status 10 scanning
834 do_nodes $(comma_list $(mdts_nodes)) \
835 $LCTL set_param fail_loc=0 fail_val=0
837 scrub_check_status 11 completed
838 scrub_check_flags 12 ""
840 run_test 8 "Control OI scrub manually"
843 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
844 skip "Testing on UP system, the speed may be inaccurate."
849 scrub_backup_restore 1
851 echo "starting MDTs with OI scrub disabled"
852 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
853 scrub_check_flags 4 recreated,inconsistent
855 local BASE_SPEED1=100
857 # OI scrub should run with full speed under inconsistent case
858 scrub_start 5 -s $BASE_SPEED1
861 scrub_check_status 6 completed
862 scrub_check_flags 7 ""
863 # OI scrub should run with limited speed under non-inconsistent case
864 scrub_start 8 -s $BASE_SPEED1 -r
867 scrub_check_status 9 scanning
869 # Do NOT ignore that there are 1024 pre-fetched items. And there
870 # may be time error, normally it should be less than 2 seconds.
871 # We allow another 20% schedule error.
872 local PRE_FETCHED=1024
874 # MAX_MARGIN = 1.2 = 12 / 10
875 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
876 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
878 for n in $(seq $MDSCOUNT); do
879 local SPEED=$(scrub_status $n | \
880 awk '/^average_speed/ { print $2 }')
881 [ $SPEED -lt $MAX_SPEED ] ||
882 error "(10) Got speed $SPEED, expected less than" \
887 local BASE_SPEED2=300
889 for n in $(seq $MDSCOUNT); do
890 do_facet mds$n $LCTL set_param -n \
891 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
895 # MIN_MARGIN = 0.8 = 8 / 10
896 local MIN_SPEED=$(((PRE_FETCHED + \
897 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
898 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
899 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
900 # MAX_MARGIN = 1.2 = 12 / 10
901 MAX_SPEED=$(((PRE_FETCHED + \
902 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
903 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
904 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
905 for n in $(seq $MDSCOUNT); do
906 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
907 [ $SPEED -gt $MIN_SPEED ] ||
908 error "(11) Got speed $SPEED, expected more than" \
910 [ $SPEED -lt $MAX_SPEED ] ||
911 error "(12) Got speed $SPEED, expected less than" \
914 do_facet mds$n $LCTL set_param -n \
915 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
918 scrub_check_status 13 completed
920 run_test 9 "OI scrub speed control"
924 scrub_backup_restore 1
925 echo "starting mds$n with OI scrub disabled (1)"
926 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
927 scrub_check_flags 4 recreated,inconsistent
928 mount_client $MOUNT || error "(5) Fail to start client!"
932 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
933 do_nodes $(comma_list $(mdts_nodes)) \
934 $LCTL set_param fail_val=1 fail_loc=0x190
937 scrub_check_status 7 scanning
938 umount_client $MOUNT || error "(8) Fail to stop client!"
940 echo "starting MDTs with OI scrub disabled (2)"
941 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
942 scrub_check_status 11 paused
944 echo "starting MDTs without disabling OI scrub"
945 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
946 scrub_check_status 14 scanning
948 do_nodes $(comma_list $(mdts_nodes)) \
949 $LCTL set_param fail_loc=0 fail_val=0
951 scrub_check_status 15 completed
952 scrub_check_flags 16 ""
954 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
956 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
959 scrub_backup_restore 1
960 echo "starting MDTs with OI scrub disabled"
961 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
962 scrub_check_flags 4 recreated,inconsistent
964 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
965 do_nodes $(comma_list $(mdts_nodes)) \
966 $LCTL set_param fail_val=3 fail_loc=0x190
969 scrub_check_status 6 scanning
971 echo "starting MDTs with OI scrub disabled"
972 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
973 scrub_check_status 9 paused
975 echo "starting MDTs without disabling OI scrub"
976 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
977 scrub_check_status 12 scanning
979 do_nodes $(comma_list $(mdts_nodes)) \
980 $LCTL set_param fail_loc=0 fail_val=0
982 scrub_check_status 13 completed
983 scrub_check_flags 14 ""
985 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
993 for n in $(seq $MDSCOUNT); do
994 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
995 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
997 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
998 error "(2) Fail to create under $tdir/mds$n"
1001 # reset OI scrub start point by force
1003 scrub_check_status 4 completed
1008 # OI scrub should skip the new created objects for the first accessing
1009 # notice we're creating a new llog for every OST on every startup
1010 # new features can make this even less stable, so we only check that
1011 # the number of skipped files is more than the number or known created
1012 local MINIMUM=$((CREATED + 1)) # files + directory
1013 for n in $(seq $MDSCOUNT); do
1014 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1015 [ $SKIPPED -lt $MINIMUM ] &&
1016 error "(5) Expect at least $MINIMUM objects" \
1017 "skipped on mds$n, but got $SKIPPED"
1019 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1022 # reset OI scrub start point by force
1024 scrub_check_status 7 completed
1026 # OI scrub should skip the new created object only once
1027 for n in $(seq $MDSCOUNT); do
1028 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1029 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1031 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1032 error "(8) Expect 0 objects skipped on mds$n, but" \
1036 run_test 11 "OI scrub skips the new created objects only once"
1039 check_mount_and_prep
1040 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1042 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1043 do_facet ost1 $LCTL set_param fail_loc=0x195
1044 local count=$(precreated_ost_obj_count 0 0)
1046 createmany -o $DIR/$tdir/f $((count + 32))
1047 umount_client $MOUNT || error "(1) Fail to stop client!"
1049 stop ost1 || error "(2) Fail to stop ost1"
1051 #define OBD_FAIL_OST_NODESTROY 0x233
1052 do_facet ost1 $LCTL set_param fail_loc=0x233
1054 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1055 error "(3) Fail to start ost1"
1057 mount_client $MOUNT || error "(4) Fail to start client!"
1059 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1061 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1063 do_facet ost1 $LCTL set_param fail_loc=0
1064 wait_update_facet ost1 "$LCTL get_param -n \
1065 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1066 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1067 error "(7) Expected '$expected' on ost1"
1069 ls -ail $DIR/$tdir > /dev/null || {
1071 error "(8) ls should succeed"
1074 run_test 12 "OI scrub can rebuild invalid /O entries"
1077 check_mount_and_prep
1078 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1080 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1081 do_facet ost1 $LCTL set_param fail_loc=0x196
1082 local count=$(precreated_ost_obj_count 0 0)
1084 createmany -o $DIR/$tdir/f $((count + 32))
1085 do_facet ost1 $LCTL set_param fail_loc=0
1087 umount_client $MOUNT || error "(1) Fail to stop client!"
1089 stop ost1 || error "(2) Fail to stop ost1"
1091 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1092 error "(3) Fail to start ost1"
1094 mount_client $MOUNT || error "(4) Fail to start client!"
1096 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1098 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1100 wait_update_facet ost1 "$LCTL get_param -n \
1101 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1102 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1103 error "(7) Expected '$expected' on ost1"
1105 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1107 run_test 13 "OI scrub can rebuild missed /O entries"
1110 check_mount_and_prep
1111 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1113 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1114 do_facet ost1 $LCTL set_param fail_loc=0x196
1115 local count=$(precreated_ost_obj_count 0 0)
1117 createmany -o $DIR/$tdir/f $((count + 32))
1118 do_facet ost1 $LCTL set_param fail_loc=0
1120 umount_client $MOUNT || error "(1) Fail to stop client!"
1122 stop ost1 || error "(2) Fail to stop ost1"
1125 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1126 error "(3) Fail to run e2fsck error"
1128 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1129 error "(4) Fail to start ost1"
1131 mount_client $MOUNT || error "(5) Fail to start client!"
1133 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1134 awk '/^lf_repa[ri]*ed/ { print $2 }')
1135 [ $LF_REPAIRED -gt 0 ] ||
1136 error "(6) Some entry under /lost+found should be repaired"
1138 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1140 run_test 14 "OI scrub can repair objects under lost+found"
1143 local server_version=$(lustre_version_code $SINGLEMDS)
1145 scrub_backup_restore 1
1146 echo "starting MDTs with OI scrub disabled"
1147 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1148 scrub_check_status 3 init
1149 scrub_check_flags 4 recreated,inconsistent
1151 # run under dryrun mode
1152 if [ $server_version -lt $(version_code 2.5.58) ]; then
1153 scrub_start 5 --dryrun on
1155 scrub_start 5 --dryrun
1157 scrub_check_status 6 completed
1158 scrub_check_flags 7 recreated,inconsistent
1159 scrub_check_params 8 dryrun
1160 scrub_check_repaired 9 20
1162 # run under dryrun mode again
1163 if [ $server_version -lt $(version_code 2.5.58) ]; then
1164 scrub_start 10 --dryrun on
1166 scrub_start 10 --dryrun
1168 scrub_check_status 11 completed
1169 scrub_check_flags 12 recreated,inconsistent
1170 scrub_check_params 13 dryrun
1171 scrub_check_repaired 14 20
1173 # run under normal mode
1175 # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
1176 # work under Lustre-2.y (y >= 6), the test script should be fixed as
1177 # "-noff" or "--dryrun=off" or nothing by default.
1178 if [ $server_version -lt $(version_code 2.5.58) ]; then
1179 scrub_start 15 --dryrun off
1183 scrub_check_status 16 completed
1184 scrub_check_flags 17 ""
1185 scrub_check_params 18 ""
1186 scrub_check_repaired 19 20
1188 # run under normal mode again
1189 if [ $server_version -lt $(version_code 2.5.58) ]; then
1190 scrub_start 20 --dryrun off
1194 scrub_check_status 21 completed
1195 scrub_check_flags 22 ""
1196 scrub_check_params 23 ""
1197 scrub_check_repaired 24 0
1199 run_test 15 "Dryrun mode OI scrub"
1201 # restore MDS/OST size
1202 MDSSIZE=${SAVED_MDSSIZE}
1203 OSTSIZE=${SAVED_OSTSIZE}
1204 OSTCOUNT=${SAVED_OSTCOUNT}
1206 # cleanup the system at last