3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
30 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
31 skip "test OI scrub only for ldiskfs" && exit 0
33 [ $(facet_fstype ost1) != "ldiskfs" ] &&
34 skip "test OI scrub only for ldiskfs" && exit 0
36 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
37 skip "Need MDS version at least 2.2.90" && exit 0
39 SAVED_MDSSIZE=${MDSSIZE}
40 SAVED_OSTSIZE=${OSTSIZE}
41 SAVED_OSTCOUNT=${OSTCOUNT}
42 # use small MDS + OST size to speed formatting time
43 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
44 # 200M MDT device can guarantee uninitialized groups during the OI scrub
47 # no need too much OSTs, to reduce the format/start/stop overhead
49 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
53 # build up a clean test environment.
57 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
58 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
60 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
61 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
63 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
64 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
66 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
67 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
68 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
70 [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
71 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
73 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
78 MDT_DEV="${FSNAME}-MDT0000"
79 OST_DEV="${FSNAME}-OST0000"
80 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
86 # use "lfsck_start -A" when we no longer need testing interop
87 for n in $(seq $MDSCOUNT); do
88 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
90 error "($error_id) Failed to start OI scrub on mds$n"
98 # use "lfsck_stop -A" when we no longer need testing interop
99 for n in $(seq $MDSCOUNT); do
100 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
101 error "($error_id) Failed to stop OI scrub on mds$n"
108 do_facet mds$n $LCTL get_param -n \
109 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
112 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
113 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
114 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
115 SHOW_SCRUB="do_facet $SINGLEMDS \
116 $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
117 SHOW_SCRUB_ON_OST="do_facet ost1 \
118 $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
119 MOUNT_OPTS_SCRUB="-o user_xattr"
120 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
128 echo "preparing... $(date)"
129 for n in $(seq $MDSCOUNT); do
130 echo "creating $nfiles files on mds$n"
131 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
132 error "Failed to create directory mds$n"
133 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
134 error "Failed to copy files to mds$n"
135 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
136 error "mkdir failed on mds$n"
137 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
138 /dev/null || error "create failed on mds$n"
139 if [[ $nfiles -gt 0 ]]; then
140 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
141 /dev/null || error "createmany failed on mds$n"
144 echo "prepared $(date)."
145 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
146 for n in $(seq $MDSCOUNT); do
148 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
157 for n in $(seq $MDSCOUNT); do
158 start mds$n $(mdsdevname $n) $opts >/dev/null ||
159 error "($error_id) Failed to start mds$n"
167 for n in $(seq $MDSCOUNT); do
168 echo "stopping mds$n"
169 stop mds$n >/dev/null ||
170 error "($error_id) Failed to stop mds$n"
174 scrub_check_status() {
179 for n in $(seq $MDSCOUNT); do
180 wait_update_facet mds$n "$LCTL get_param -n \
181 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
182 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
183 error "($error_id) Expected '$expected' on mds$n"
187 scrub_check_flags() {
193 for n in $(seq $MDSCOUNT); do
194 actual=$(do_facet mds$n $LCTL get_param -n \
195 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
196 awk '/^flags/ { print $2 }')
197 if [ "$actual" != "$expected" ]; then
198 error "($error_id) Expected '$expected' on mds$n, but" \
204 scrub_check_params() {
210 for n in $(seq $MDSCOUNT); do
211 actual=$(do_facet mds$n $LCTL get_param -n \
212 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
213 awk '/^param/ { print $2 }')
214 if [ "$actual" != "$expected" ]; then
215 error "($error_id) Expected '$expected' on mds$n, but" \
221 scrub_check_repaired() {
227 for n in $(seq $MDSCOUNT); do
228 actual=$(do_facet mds$n $LCTL get_param -n \
229 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
230 awk '/^updated/ { print $2 }')
232 if [ $expected -eq 0 -a $actual -ne 0 ]; then
233 error "($error_id) Expected no repaired on mds$n, but" \
237 if [ $expected -ne 0 -a $actual -lt $expected ]; then
238 error "($error_id) Expected '$expected' on mds$n, but" \
248 for n in $(seq $MDSCOUNT); do
249 diff -q $LUSTRE/tests/test-framework.sh \
250 $DIR/$tdir/mds$n/test-framework.sh ||
251 error "($error_id) File data check failed"
255 scrub_check_data2() {
260 for n in $(seq $MDSCOUNT); do
261 diff -q $LUSTRE/tests/$filename \
262 $DIR/$tdir/mds$n/$filename ||
263 error "($error_id) File data check failed"
272 for n in $(seq $MDSCOUNT); do
273 mds_remove_ois mds$n $index ||
274 error "($error_id) Failed to remove OI .$index on mds$n"
278 scrub_backup_restore() {
283 for n in $(seq $MDSCOUNT); do
284 mds_backup_restore mds$n $igif ||
285 error "($error_id) Backup/restore on mds$n failed"
289 scrub_enable_auto() {
290 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
291 osd-ldiskfs.*.auto_scrub=1
295 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
300 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
301 osd-ldiskfs.*.full_scrub_ratio=$ratio
304 full_scrub_threshold_rate() {
305 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
310 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
311 osd-ldiskfs.*.full_scrub_threshold_rate=$rate
316 echo "starting MDTs without disabling OI scrub"
317 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
318 scrub_check_status 2 init
319 scrub_check_flags 3 ""
320 mount_client $MOUNT || error "(4) Fail to start client!"
323 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
327 echo "start $SINGLEMDS without disabling OI scrub"
328 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
330 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
331 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
333 mount_client $MOUNT || error "(4) Fail to start client!"
334 #define OBD_FAIL_OSD_FID_MAPPING 0x193
335 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
336 # update .lustre OI mapping
338 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
339 umount_client $MOUNT || error "(5) Fail to stop client!"
341 echo "stop $SINGLEMDS"
342 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
344 echo "start $SINGLEMDS with disabling OI scrub"
345 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
346 error "(7) Fail to start MDS!"
348 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
349 [ "$FLAGS" == "inconsistent" ] ||
350 error "(9) Expect 'inconsistent', but got '$FLAGS'"
352 run_test 1a "Auto trigger initial OI scrub when server mounts"
357 echo "start MDTs without disabling OI scrub"
358 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
359 scrub_check_status 3 completed
360 mount_client $MOUNT || error "(4) Fail to start client!"
363 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
368 # OI files to be removed:
370 # idx 2: oi.16.{2,4,8,16,32}
371 # idx 3: oi.16.{3,9,27}
372 for index in 0 2 3; do
374 scrub_remove_ois 1 $index
375 echo "start MDTs with OI scrub disabled"
376 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
377 scrub_check_flags 3 recreated
379 scrub_check_status 5 completed
380 scrub_check_flags 6 ""
383 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
387 scrub_backup_restore 1
388 echo "starting MDTs without disabling OI scrub"
389 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
390 scrub_check_status 3 completed
391 mount_client $MOUNT || error "(4) Fail to start client!"
394 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
396 # test_3 is obsolete, it will be covered by test_5.
398 formatall > /dev/null
402 scrub_backup_restore 1
403 echo "starting MDTs with OI scrub disabled"
404 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
405 scrub_check_status 3 init
406 scrub_check_flags 4 recreated,inconsistent
408 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
412 scrub_backup_restore 1
413 echo "starting MDTs with OI scrub disabled"
414 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
415 scrub_check_flags 4 recreated,inconsistent
416 mount_client $MOUNT || error "(5) Fail to start client!"
422 scrub_check_status 7 completed
423 scrub_check_flags 8 ""
426 for n in $(seq $MDSCOUNT); do
427 updated0[$n]=$(scrub_status $n |
428 awk '/^prior_updated/ { print $2 }')
431 scrub_check_data2 sanity-scrub.sh 9
435 for n in $(seq $MDSCOUNT); do
436 updated1[$n]=$(scrub_status $n |
437 awk '/^prior_updated/ { print $2 }')
438 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
439 error "(10) NOT auto trigger full scrub as expected"
442 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
446 scrub_backup_restore 1
447 echo "starting MDTs with OI scrub disabled"
448 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
449 scrub_check_flags 4 recreated,inconsistent
450 mount_client $MOUNT || error "(5) Fail to start client!"
453 full_scrub_threshold_rate 10000
457 scrub_check_status 7 completed
458 scrub_check_flags 8 ""
461 for n in $(seq $MDSCOUNT); do
462 updated0[$n]=$(scrub_status $n |
463 awk '/^prior_updated/ { print $2 }')
465 echo "OI scrub on MDS$n status for the 1st time:"
466 do_facet mds$n $LCTL get_param -n \
467 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
470 scrub_check_data2 sanity-scrub.sh 9
473 scrub_check_status 10 completed
474 scrub_check_flags 11 ""
477 for n in $(seq $MDSCOUNT); do
478 updated1[$n]=$(scrub_status $n |
479 awk '/^prior_updated/ { print $2 }')
481 echo "OI scrub on MDS$n status for the 2nd time:"
482 do_facet mds$n $LCTL get_param -n \
483 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
485 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
486 error "(12) Auto trigger full scrub unexpectedly"
489 for n in $(seq $MDSCOUNT); do
490 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
491 error "(13) fail to ls"
495 scrub_check_status 14 completed
496 scrub_check_flags 15 ""
498 for n in $(seq $MDSCOUNT); do
499 updated0[$n]=$(scrub_status $n |
500 awk '/^prior_updated/ { print $2 }')
502 echo "OI scrub on MDS$n status for the 3rd time:"
503 do_facet mds$n $LCTL get_param -n \
504 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
506 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
507 error "(16) Auto trigger full scrub unexpectedly"
510 for n in $(seq $MDSCOUNT); do
511 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
515 for n in $(seq $MDSCOUNT); do
516 updated1[$n]=$(scrub_status $n |
517 awk '/^prior_updated/ { print $2 }')
518 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
519 echo "OI scrub on MDS$n status for the 4th time:"
520 do_facet mds$n $LCTL get_param -n \
521 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
523 error "(18) NOT auto trigger full scrub as expected"
527 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
531 scrub_backup_restore 1
532 echo "starting MDTs with OI scrub disabled"
533 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
534 scrub_check_flags 4 recreated,inconsistent
535 mount_client $MOUNT || error "(5) Fail to start client!"
538 full_scrub_threshold_rate 20
542 scrub_check_status 7 completed
543 scrub_check_flags 8 ""
546 for n in $(seq $MDSCOUNT); do
547 updated0[$n]=$(scrub_status $n |
548 awk '/^prior_updated/ { print $2 }')
550 echo "OI scrub on MDS$n status for the 1st time:"
551 do_facet mds$n $LCTL get_param -n \
552 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
555 scrub_check_data2 sanity-scrub.sh 9
558 scrub_check_status 10 completed
559 scrub_check_flags 11 ""
562 for n in $(seq $MDSCOUNT); do
563 updated1[$n]=$(scrub_status $n |
564 awk '/^prior_updated/ { print $2 }')
566 echo "OI scrub on MDS$n status for the 2nd time:"
567 do_facet mds$n $LCTL get_param -n \
568 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
570 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
571 error "(12) Auto trigger full scrub unexpectedly"
574 for n in $(seq $MDSCOUNT); do
575 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
576 error "(13) fail to ls"
580 scrub_check_status 14 completed
581 scrub_check_flags 15 ""
583 for n in $(seq $MDSCOUNT); do
584 updated0[$n]=$(scrub_status $n |
585 awk '/^prior_updated/ { print $2 }')
587 echo "OI scrub on MDS$n status for the 3rd time:"
588 do_facet mds$n $LCTL get_param -n \
589 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
591 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
592 error "(16) Auto trigger full scrub unexpectedly"
595 for n in $(seq $MDSCOUNT); do
596 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
600 for n in $(seq $MDSCOUNT); do
601 updated1[$n]=$(scrub_status $n |
602 awk '/^prior_updated/ { print $2 }')
603 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
604 echo "OI scrub on MDS$n status for the 4th time:"
605 do_facet mds$n $LCTL get_param -n \
606 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
608 error "(18) NOT auto trigger full scrub as expected"
612 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
615 formatall > /dev/null
619 scrub_backup_restore 1
620 echo "starting MDTs with OI scrub disabled (1)"
621 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
622 scrub_check_status 3 init
623 scrub_check_flags 4 recreated,inconsistent
624 mount_client $MOUNT || error "(5) Fail to start client!"
628 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
629 do_nodes $(comma_list $(mdts_nodes)) \
630 $LCTL set_param fail_val=3 fail_loc=0x190
633 umount_client $MOUNT || error "(7) Fail to stop client!"
634 scrub_check_status 8 scanning
636 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
637 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
642 do_nodes $(comma_list $(mdts_nodes)) \
643 $LCTL set_param fail_loc=0 fail_val=0
645 echo "starting MDTs with OI scrub disabled (2)"
646 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
647 scrub_check_status 11 crashed
650 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
651 do_nodes $(comma_list $(mdts_nodes)) \
652 $LCTL set_param fail_val=3 fail_loc=0x190
654 echo "starting MDTs without disabling OI scrub"
655 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
656 scrub_check_status 14 scanning
658 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
659 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
661 scrub_check_status 15 failed
662 mount_client $MOUNT || error "(16) Fail to start client!"
665 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
666 do_nodes $(comma_list $(mdts_nodes)) \
667 $LCTL set_param fail_val=3 fail_loc=0x190
670 for n in $(seq $MDSCOUNT); do
671 stat $DIR/$tdir/mds$n/${tfile}800 ||
672 error "(17) Failed to stat mds$n/${tfile}800"
675 scrub_check_status 18 scanning
677 do_nodes $(comma_list $(mdts_nodes)) \
678 $LCTL set_param fail_loc=0 fail_val=0
680 scrub_check_status 19 completed
681 scrub_check_flags 20 ""
683 run_test 5 "OI scrub state machine"
687 scrub_backup_restore 1
688 echo "starting MDTs with OI scrub disabled"
689 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
690 scrub_check_flags 4 recreated,inconsistent
691 mount_client $MOUNT || error "(5) Fail to start client!"
695 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
696 do_nodes $(comma_list $(mdts_nodes)) \
697 $LCTL set_param fail_val=2 fail_loc=0x190
701 # Sleep 5 sec to guarantee at least one object processed by OI scrub
703 # Fail the OI scrub to guarantee there is at least one checkpoint
704 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
705 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
707 scrub_check_status 7 failed
709 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
710 do_nodes $(comma_list $(mdts_nodes)) \
711 $LCTL set_param fail_val=3 fail_loc=0x190
714 for n in $(seq $MDSCOUNT); do
715 # stat will re-trigger OI scrub
716 stat $DIR/$tdir/mds$n/${tfile}800 ||
717 error "(8) Failed to stat mds$n/${tfile}800"
720 umount_client $MOUNT || error "(9) Fail to stop client!"
721 scrub_check_status 10 scanning
723 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
724 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
728 for n in $(seq $MDSCOUNT); do
729 position0[$n]=$(scrub_status $n |
730 awk '/^last_checkpoint_position/ {print $2}')
731 position0[$n]=$((${position0[$n]} + 1))
736 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
737 do_nodes $(comma_list $(mdts_nodes)) \
738 $LCTL set_param fail_val=3 fail_loc=0x190
740 echo "starting MDTs without disabling OI scrub"
741 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
743 scrub_check_status 13 scanning
746 for n in $(seq $MDSCOUNT); do
747 position1[$n]=$(scrub_status $n |
748 awk '/^latest_start_position/ {print $2}')
749 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
750 error "(14) Expected position ${position0[$n]}, but" \
751 "got ${position1[$n]}"
755 do_nodes $(comma_list $(mdts_nodes)) \
756 $LCTL set_param fail_loc=0 fail_val=0
758 scrub_check_status 15 completed
759 scrub_check_flags 16 ""
761 run_test 6 "OI scrub resumes from last checkpoint"
765 scrub_backup_restore 1
766 echo "starting MDTs with OI scrub disabled"
767 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
768 scrub_check_flags 4 recreated,inconsistent
769 mount_client $MOUNT || error "(5) Fail to start client!"
773 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
774 do_nodes $(comma_list $(mdts_nodes)) \
775 $LCTL set_param fail_val=3 fail_loc=0x190
780 for n in $(seq $MDSCOUNT); do
781 stat $DIR/$tdir/mds$n/${tfile}300 ||
782 error "(7) Failed to stat mds$n/${tfile}300!"
785 scrub_check_status 8 scanning
786 scrub_check_flags 9 recreated,inconsistent,auto
788 do_nodes $(comma_list $(mdts_nodes)) \
789 $LCTL set_param fail_loc=0 fail_val=0
791 scrub_check_status 10 completed
794 run_test 7 "System is available during OI scrub scanning"
798 scrub_backup_restore 1
799 echo "starting MDTs with OI scrub disabled"
800 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
801 scrub_check_flags 4 recreated,inconsistent
803 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
804 do_nodes $(comma_list $(mdts_nodes)) \
805 $LCTL set_param fail_val=1 fail_loc=0x190
808 scrub_check_status 6 scanning
810 scrub_check_status 8 stopped
812 scrub_check_status 10 scanning
814 do_nodes $(comma_list $(mdts_nodes)) \
815 $LCTL set_param fail_loc=0 fail_val=0
817 scrub_check_status 11 completed
818 scrub_check_flags 12 ""
820 run_test 8 "Control OI scrub manually"
823 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
824 skip "Testing on UP system, the speed may be inaccurate."
829 scrub_backup_restore 1
831 echo "starting MDTs with OI scrub disabled"
832 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
833 scrub_check_flags 4 recreated,inconsistent
835 local BASE_SPEED1=100
837 # OI scrub should run with full speed under inconsistent case
838 scrub_start 5 -s $BASE_SPEED1
841 scrub_check_status 6 completed
842 scrub_check_flags 7 ""
843 # OI scrub should run with limited speed under non-inconsistent case
844 scrub_start 8 -s $BASE_SPEED1 -r
847 scrub_check_status 9 scanning
849 # Do NOT ignore that there are 1024 pre-fetched items. And there
850 # may be time error, normally it should be less than 2 seconds.
851 # We allow another 20% schedule error.
852 local PRE_FETCHED=1024
854 # MAX_MARGIN = 1.2 = 12 / 10
855 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
856 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
858 for n in $(seq $MDSCOUNT); do
859 local SPEED=$(scrub_status $n | \
860 awk '/^average_speed/ { print $2 }')
861 [ $SPEED -lt $MAX_SPEED ] ||
862 error "(10) Got speed $SPEED, expected less than" \
867 local BASE_SPEED2=300
869 for n in $(seq $MDSCOUNT); do
870 do_facet mds$n $LCTL set_param -n \
871 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
875 # MIN_MARGIN = 0.8 = 8 / 10
876 local MIN_SPEED=$(((PRE_FETCHED + \
877 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
878 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
879 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
880 # MAX_MARGIN = 1.2 = 12 / 10
881 MAX_SPEED=$(((PRE_FETCHED + \
882 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
883 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
884 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
885 for n in $(seq $MDSCOUNT); do
886 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
887 [ $SPEED -gt $MIN_SPEED ] ||
888 error "(11) Got speed $SPEED, expected more than" \
890 [ $SPEED -lt $MAX_SPEED ] ||
891 error "(12) Got speed $SPEED, expected less than" \
894 do_facet mds$n $LCTL set_param -n \
895 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
898 scrub_check_status 13 completed
900 run_test 9 "OI scrub speed control"
904 scrub_backup_restore 1
905 echo "starting mds$n with OI scrub disabled (1)"
906 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
907 scrub_check_flags 4 recreated,inconsistent
908 mount_client $MOUNT || error "(5) Fail to start client!"
912 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
913 do_nodes $(comma_list $(mdts_nodes)) \
914 $LCTL set_param fail_val=1 fail_loc=0x190
917 scrub_check_status 7 scanning
918 umount_client $MOUNT || error "(8) Fail to stop client!"
920 echo "starting MDTs with OI scrub disabled (2)"
921 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
922 scrub_check_status 11 paused
924 echo "starting MDTs without disabling OI scrub"
925 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
926 scrub_check_status 14 scanning
928 do_nodes $(comma_list $(mdts_nodes)) \
929 $LCTL set_param fail_loc=0 fail_val=0
931 scrub_check_status 15 completed
932 scrub_check_flags 16 ""
934 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
936 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
939 scrub_backup_restore 1
940 echo "starting MDTs with OI scrub disabled"
941 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
942 scrub_check_flags 4 recreated,inconsistent
944 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
945 do_nodes $(comma_list $(mdts_nodes)) \
946 $LCTL set_param fail_val=3 fail_loc=0x190
949 scrub_check_status 6 scanning
951 echo "starting MDTs with OI scrub disabled"
952 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
953 scrub_check_status 9 paused
955 echo "starting MDTs without disabling OI scrub"
956 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
957 scrub_check_status 12 scanning
959 do_nodes $(comma_list $(mdts_nodes)) \
960 $LCTL set_param fail_loc=0 fail_val=0
962 scrub_check_status 13 completed
963 scrub_check_flags 14 ""
965 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
973 for n in $(seq $MDSCOUNT); do
974 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
975 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
977 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
978 error "(2) Fail to create under $tdir/mds$n"
981 # reset OI scrub start point by force
983 scrub_check_status 4 completed
988 # OI scrub should skip the new created objects for the first accessing
989 # notice we're creating a new llog for every OST on every startup
990 # new features can make this even less stable, so we only check that
991 # the number of skipped files is more than the number or known created
992 local MINIMUM=$((CREATED + 1)) # files + directory
993 for n in $(seq $MDSCOUNT); do
994 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
995 [ $SKIPPED -lt $MINIMUM ] &&
996 error "(5) Expect at least $MINIMUM objects" \
997 "skipped on mds$n, but got $SKIPPED"
999 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1002 # reset OI scrub start point by force
1004 scrub_check_status 7 completed
1006 # OI scrub should skip the new created object only once
1007 for n in $(seq $MDSCOUNT); do
1008 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1009 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1011 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1012 error "(8) Expect 0 objects skipped on mds$n, but" \
1016 run_test 11 "OI scrub skips the new created objects only once"
1019 check_mount_and_prep
1020 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1022 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1023 do_facet ost1 $LCTL set_param fail_loc=0x195
1024 local count=$(precreated_ost_obj_count 0 0)
1026 createmany -o $DIR/$tdir/f $((count + 32))
1027 umount_client $MOUNT || error "(1) Fail to stop client!"
1029 stop ost1 || error "(2) Fail to stop ost1"
1031 #define OBD_FAIL_OST_NODESTROY 0x233
1032 do_facet ost1 $LCTL set_param fail_loc=0x233
1034 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1035 error "(3) Fail to start ost1"
1037 mount_client $MOUNT || error "(4) Fail to start client!"
1039 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1041 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1043 do_facet ost1 $LCTL set_param fail_loc=0
1044 wait_update_facet ost1 "$LCTL get_param -n \
1045 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1046 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1047 error "(7) Expected '$expected' on ost1"
1049 ls -ail $DIR/$tdir > /dev/null || {
1051 error "(8) ls should succeed"
1054 run_test 12 "OI scrub can rebuild invalid /O entries"
1057 check_mount_and_prep
1058 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1060 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1061 do_facet ost1 $LCTL set_param fail_loc=0x196
1062 local count=$(precreated_ost_obj_count 0 0)
1064 createmany -o $DIR/$tdir/f $((count + 32))
1065 do_facet ost1 $LCTL set_param fail_loc=0
1067 umount_client $MOUNT || error "(1) Fail to stop client!"
1069 stop ost1 || error "(2) Fail to stop ost1"
1071 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1072 error "(3) Fail to start ost1"
1074 mount_client $MOUNT || error "(4) Fail to start client!"
1076 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1078 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1080 wait_update_facet ost1 "$LCTL get_param -n \
1081 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1082 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1083 error "(7) Expected '$expected' on ost1"
1085 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1087 run_test 13 "OI scrub can rebuild missed /O entries"
1090 check_mount_and_prep
1091 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1093 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1094 do_facet ost1 $LCTL set_param fail_loc=0x196
1095 local count=$(precreated_ost_obj_count 0 0)
1097 createmany -o $DIR/$tdir/f $((count + 32))
1098 do_facet ost1 $LCTL set_param fail_loc=0
1100 umount_client $MOUNT || error "(1) Fail to stop client!"
1102 stop ost1 || error "(2) Fail to stop ost1"
1105 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1106 error "(3) Fail to run e2fsck error"
1108 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1109 error "(4) Fail to start ost1"
1111 mount_client $MOUNT || error "(5) Fail to start client!"
1113 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1114 awk '/^lf_repa[ri]*ed/ { print $2 }')
1115 [ $LF_REPAIRED -gt 0 ] ||
1116 error "(6) Some entry under /lost+found should be repaired"
1118 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1120 run_test 14 "OI scrub can repair objects under lost+found"
1123 local server_version=$(lustre_version_code $SINGLEMDS)
1125 scrub_backup_restore 1
1126 echo "starting MDTs with OI scrub disabled"
1127 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1128 scrub_check_status 3 init
1129 scrub_check_flags 4 recreated,inconsistent
1131 # run under dryrun mode
1132 if [ $server_version -lt $(version_code 2.5.58) ]; then
1133 scrub_start 5 --dryrun on
1135 scrub_start 5 --dryrun
1137 scrub_check_status 6 completed
1138 scrub_check_flags 7 recreated,inconsistent
1139 scrub_check_params 8 dryrun
1140 scrub_check_repaired 9 20
1142 # run under dryrun mode again
1143 if [ $server_version -lt $(version_code 2.5.58) ]; then
1144 scrub_start 10 --dryrun on
1146 scrub_start 10 --dryrun
1148 scrub_check_status 11 completed
1149 scrub_check_flags 12 recreated,inconsistent
1150 scrub_check_params 13 dryrun
1151 scrub_check_repaired 14 20
1153 # run under normal mode
1155 # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
1156 # work under Lustre-2.y (y >= 6), the test script should be fixed as
1157 # "-noff" or "--dryrun=off" or nothing by default.
1158 if [ $server_version -lt $(version_code 2.5.58) ]; then
1159 scrub_start 15 --dryrun off
1163 scrub_check_status 16 completed
1164 scrub_check_flags 17 ""
1165 scrub_check_params 18 ""
1166 scrub_check_repaired 19 20
1168 # run under normal mode again
1169 if [ $server_version -lt $(version_code 2.5.58) ]; then
1170 scrub_start 20 --dryrun off
1174 scrub_check_status 21 completed
1175 scrub_check_flags 22 ""
1176 scrub_check_params 23 ""
1177 scrub_check_repaired 24 0
1179 run_test 15 "Dryrun mode OI scrub"
1181 # restore MDS/OST size
1182 MDSSIZE=${SAVED_MDSSIZE}
1183 OSTSIZE=${SAVED_OSTSIZE}
1184 OSTCOUNT=${SAVED_OSTCOUNT}
1186 # cleanup the system at last