3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
11 LUSTRE=${LUSTRE:-$(dirname $0)/..}
12 . $LUSTRE/tests/test-framework.sh
16 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
18 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
19 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
23 require_dsh_mds || exit 0
27 if ! check_versions; then
28 skip "It is NOT necessary to test scrub under interoperation mode"
34 SAVED_MDSSIZE=${MDSSIZE}
35 SAVED_OSTSIZE=${OSTSIZE}
36 SAVED_OSTCOUNT=${OSTCOUNT}
38 # use small MDS + OST size to speed formatting time
39 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
40 # 400M MDT device can guarantee uninitialized groups during the OI scrub
41 [[ $MDSSIZE < 400000 || "$mds1_FSTYPE" == ldiskfs ]] && MDSSIZE=400000
42 [[ $OSTSIZE < 400000 || "$ost1_FSTYPE" == ldiskfs ]] && OSTSIZE=400000
44 # no need too many OSTs, to reduce the format/start/stop overhead
45 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
47 # build up a clean test environment.
48 REFORMAT="yes" check_and_setup_lustre
50 MDT_DEV="${FSNAME}-MDT0000"
51 OST_DEV="${FSNAME}-OST0000"
57 # use "lfsck_start -A" when we no longer need testing interop
58 for n in $(seq $MDSCOUNT); do
59 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
61 error "($error_id) Failed to start OI scrub on mds$n"
69 # use "lfsck_stop -A" when we no longer need testing interop
70 for n in $(seq $MDSCOUNT); do
71 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
72 error "($error_id) Failed to stop OI scrub on mds$n"
79 do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
82 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
83 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
84 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
85 SHOW_SCRUB="do_facet $SINGLEMDS \
86 $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
87 SHOW_SCRUB_ON_OST="do_facet ost1 \
88 $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
89 MOUNT_OPTS_SCRUB="-o user_xattr"
90 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
99 echo "preparing... $(date)"
100 for n in $(seq $MDSCOUNT); do
101 echo "creating $nfiles files on mds$n"
102 test_mkdir -i $((n - 1)) -c1 $DIR/$tdir/mds$n ||
103 error "Failed to create directory mds$n"
104 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
105 error "Failed to copy files to mds$n"
106 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
107 error "mkdir failed on mds$n"
108 touch $DIR/$tdir/mds$n/d_$tfile/f1 > \
109 /dev/null || error "create failed on mds$n"
110 dd if=/dev/zero of=$DIR/$tdir/mds$n/d_$tfile/f2 bs=1M count=1 ||
111 error "write failed on mds$n"
112 if [[ $nfiles -gt 0 ]]; then
113 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
114 /dev/null || error "createmany failed on mds$n"
117 echo "prepared $(date)."
119 [ ! -z $inject ] && [ $inject -eq 2 ] && {
120 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
121 do_nodes $(comma_list $(mdts_nodes)) \
122 $LCTL set_param fail_loc=0x198
124 for n in $(seq $MDSCOUNT); do
125 cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
126 error "Fail to copy runas to MDS$n"
129 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
132 [ ! -z $inject ] && [ $inject -eq 1 ] &&
133 [ $(facet_fstype $SINGLEMDS) = "zfs" ] && {
134 #define OBD_FAIL_OSD_FID_MAPPING 0x193
135 do_nodes $(comma_list $(mdts_nodes)) \
136 $LCTL set_param fail_loc=0x193
138 for n in $(seq $MDSCOUNT); do
139 chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
140 chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
143 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
146 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
148 # sync local transactions on every MDT
149 do_nodes $(comma_list $(mdts_nodes)) \
150 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
152 # wait for a while to cancel update logs after transactions committed.
155 # sync again to guarantee all things done.
156 do_nodes $(comma_list $(mdts_nodes)) \
157 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
159 for n in $(seq $MDSCOUNT); do
161 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
164 [ ! -z $inject ] && [ $(facet_fstype $SINGLEMDS) = "ldiskfs" ] && {
165 if [ $inject -eq 1 ]; then
166 for n in $(seq $MDSCOUNT); do
167 mds_backup_restore mds$n ||
168 error "Backup/restore on mds$n failed"
170 elif [ $inject -eq 2 ]; then
181 for n in $(seq $MDSCOUNT); do
182 start mds$n $(mdsdevname $n) $opts >/dev/null ||
183 error "($error_id) Failed to start mds$n"
191 for n in $(seq $MDSCOUNT); do
192 echo "stopping mds$n"
193 stop mds$n >/dev/null ||
194 error "($error_id) Failed to stop mds$n"
198 scrub_check_status() {
203 for n in $(seq $MDSCOUNT); do
204 wait_update_facet mds$n "$LCTL get_param -n \
205 osd-*.$(facet_svc mds$n).oi_scrub |
206 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
207 error "($error_id) Expected '$expected' on mds$n"
211 scrub_check_flags() {
217 for n in $(seq $MDSCOUNT); do
218 actual=$(do_facet mds$n $LCTL get_param -n \
219 osd-*.$(facet_svc mds$n).oi_scrub |
220 awk '/^flags/ { print $2 }')
221 if [ "$actual" != "$expected" ]; then
222 error "($error_id) Expected '$expected' on mds$n, but" \
228 scrub_check_params() {
234 for n in $(seq $MDSCOUNT); do
235 actual=$(do_facet mds$n $LCTL get_param -n \
236 osd-*.$(facet_svc mds$n).oi_scrub |
237 awk '/^param/ { print $2 }')
238 if [ "$actual" != "$expected" ]; then
239 error "($error_id) Expected '$expected' on mds$n, but" \
245 scrub_check_repaired() {
252 for n in $(seq $MDSCOUNT); do
253 if [ $dryrun -eq 1 ]; then
254 actual=$(do_facet mds$n $LCTL get_param -n \
255 osd-*.$(facet_svc mds$n).oi_scrub |
256 awk '/^inconsistent:/ { print $2 }')
258 actual=$(do_facet mds$n $LCTL get_param -n \
259 osd-*.$(facet_svc mds$n).oi_scrub |
260 awk '/^updated:/ { print $2 }')
263 if [ $expected -eq 0 -a $actual -ne 0 ]; then
264 error "($error_id) Expected no repaired on mds$n, but" \
268 if [ $expected -ne 0 -a $actual -lt $expected ]; then
269 error "($error_id) Expected '$expected' on mds$n, but" \
279 for n in $(seq $MDSCOUNT); do
280 diff -q $LUSTRE/tests/test-framework.sh \
281 $DIR/$tdir/mds$n/test-framework.sh ||
282 error "($error_id) File data check failed"
286 scrub_check_data2() {
291 for n in $(seq $MDSCOUNT); do
292 diff -q $LUSTRE/tests/$filename \
293 $DIR/$tdir/mds$n/$filename ||
294 error "($error_id) File data check failed"
299 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
305 for n in $(seq $MDSCOUNT); do
306 mds_remove_ois mds$n $index ||
307 error "($error_id) Failed to remove OI .$index on mds$n"
311 scrub_enable_auto() {
312 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
317 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
321 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
322 osd-*.*.full_scrub_ratio=$ratio
325 full_scrub_threshold_rate() {
326 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
330 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
331 osd-*.*.full_scrub_threshold_rate=$rate
334 scrub_enable_index_backup() {
335 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
336 osd-*.*.index_backup=1
339 scrub_disable_index_backup() {
340 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
341 osd-*.*.index_backup=0
346 echo "starting MDTs without disabling OI scrub"
347 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
348 scrub_check_status 2 init
349 scrub_check_flags 3 ""
350 mount_client $MOUNT || error "(4) Fail to start client!"
353 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
356 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
359 echo "start $SINGLEMDS without disabling OI scrub"
360 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
362 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
363 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
365 mount_client $MOUNT || error "(4) Fail to start client!"
366 #define OBD_FAIL_OSD_FID_MAPPING 0x193
367 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
368 # update .lustre OI mapping
370 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
371 umount_client $MOUNT || error "(5) Fail to stop client!"
373 echo "stop $SINGLEMDS"
374 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
376 echo "start $SINGLEMDS with disabling OI scrub"
377 start $SINGLEMDS $(mdsdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
378 error "(7) Fail to start MDS!"
380 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
381 [ "$FLAGS" == "inconsistent" ] ||
382 error "(9) Expect 'inconsistent', but got '$FLAGS'"
384 run_test 1a "Auto trigger initial OI scrub when server mounts"
388 echo "start MDTs without disabling OI scrub"
389 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
390 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
391 scrub_check_status 3 completed
392 mount_client $MOUNT || error "(4) Fail to start client!"
393 scrub_check_data2 runas 5
394 scrub_check_status 6 completed
396 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
399 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
400 skip "ldiskfs special test" && return
404 # OI files to be removed:
406 # idx 2: oi.16.{2,4,8,16,32}
407 # idx 3: oi.16.{3,9,27}
408 for index in 0 2 3; do
410 scrub_remove_ois 1 $index
411 echo "start MDTs with OI scrub disabled"
412 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
413 scrub_check_flags 3 recreated
415 scrub_check_status 5 completed
416 scrub_check_flags 6 ""
419 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
422 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
423 skip "ldiskfs special test" && return
426 echo "starting MDTs without disabling OI scrub"
427 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
428 scrub_check_status 3 completed
429 mount_client $MOUNT || error "(4) Fail to start client!"
432 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
434 # test_3 is obsolete, it will be covered by test_5.
436 formatall > /dev/null
440 echo "starting MDTs with OI scrub disabled"
441 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
442 scrub_check_status 3 init
443 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
444 scrub_check_flags 4 recreated,inconsistent
446 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
450 echo "starting MDTs with OI scrub disabled"
451 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
452 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
453 scrub_check_flags 4 recreated,inconsistent
454 mount_client $MOUNT || error "(5) Fail to start client!"
460 scrub_check_status 7 completed
461 scrub_check_flags 8 ""
464 for n in $(seq $MDSCOUNT); do
465 updated0[$n]=$(scrub_status $n |
466 awk '/^prior_updated/ { print $2 }')
469 scrub_check_data2 sanity-scrub.sh 9
473 for n in $(seq $MDSCOUNT); do
474 updated1[$n]=$(scrub_status $n |
475 awk '/^prior_updated/ { print $2 }')
476 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
477 error "(10) NOT auto trigger full scrub as expected"
480 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
483 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
484 skip "ldiskfs special test" && return
487 echo "starting MDTs with OI scrub disabled"
488 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
489 scrub_check_flags 4 recreated,inconsistent
490 mount_client $MOUNT || error "(5) Fail to start client!"
493 full_scrub_threshold_rate 10000
497 scrub_check_status 7 completed
498 scrub_check_flags 8 ""
501 for n in $(seq $MDSCOUNT); do
502 updated0[$n]=$(scrub_status $n |
503 awk '/^prior_updated/ { print $2 }')
505 echo "OI scrub on MDS$n status for the 1st time:"
506 do_facet mds$n $LCTL get_param -n \
507 osd-*.$(facet_svc mds$n).oi_scrub
510 scrub_check_data2 sanity-scrub.sh 9
513 scrub_check_status 10 completed
514 scrub_check_flags 11 ""
517 for n in $(seq $MDSCOUNT); do
518 updated1[$n]=$(scrub_status $n |
519 awk '/^prior_updated/ { print $2 }')
521 echo "OI scrub on MDS$n status for the 2nd time:"
522 do_facet mds$n $LCTL get_param -n \
523 osd-*.$(facet_svc mds$n).oi_scrub
525 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
526 error "(12) Auto trigger full scrub unexpectedly"
529 for n in $(seq $MDSCOUNT); do
530 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
531 error "(13) fail to ls"
535 scrub_check_status 14 completed
536 scrub_check_flags 15 ""
538 for n in $(seq $MDSCOUNT); do
539 updated0[$n]=$(scrub_status $n |
540 awk '/^prior_updated/ { print $2 }')
542 echo "OI scrub on MDS$n status for the 3rd time:"
543 do_facet mds$n $LCTL get_param -n \
544 osd-*.$(facet_svc mds$n).oi_scrub
546 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
547 error "(16) Auto trigger full scrub unexpectedly"
550 for n in $(seq $MDSCOUNT); do
551 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
555 for n in $(seq $MDSCOUNT); do
556 updated1[$n]=$(scrub_status $n |
557 awk '/^prior_updated/ { print $2 }')
558 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
559 echo "OI scrub on MDS$n status for the 4th time:"
560 do_facet mds$n $LCTL get_param -n \
561 osd-*.$(facet_svc mds$n).oi_scrub
563 error "(18) NOT auto trigger full scrub as expected"
567 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
570 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
571 skip "ldiskfs special test" && return
574 echo "starting MDTs with OI scrub disabled"
575 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
576 scrub_check_flags 4 recreated,inconsistent
577 mount_client $MOUNT || error "(5) Fail to start client!"
580 full_scrub_threshold_rate 20
584 scrub_check_status 7 completed
585 scrub_check_flags 8 ""
588 for n in $(seq $MDSCOUNT); do
589 updated0[$n]=$(scrub_status $n |
590 awk '/^prior_updated/ { print $2 }')
592 echo "OI scrub on MDS$n status for the 1st time:"
593 do_facet mds$n $LCTL get_param -n \
594 osd-*.$(facet_svc mds$n).oi_scrub
597 scrub_check_data2 sanity-scrub.sh 9
600 scrub_check_status 10 completed
601 scrub_check_flags 11 ""
604 for n in $(seq $MDSCOUNT); do
605 updated1[$n]=$(scrub_status $n |
606 awk '/^prior_updated/ { print $2 }')
608 echo "OI scrub on MDS$n status for the 2nd time:"
609 do_facet mds$n $LCTL get_param -n \
610 osd-*.$(facet_svc mds$n).oi_scrub
612 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
613 error "(12) Auto trigger full scrub unexpectedly"
616 for n in $(seq $MDSCOUNT); do
617 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
618 error "(13) fail to ls"
622 scrub_check_status 14 completed
623 scrub_check_flags 15 ""
625 for n in $(seq $MDSCOUNT); do
626 updated0[$n]=$(scrub_status $n |
627 awk '/^prior_updated/ { print $2 }')
629 echo "OI scrub on MDS$n status for the 3rd time:"
630 do_facet mds$n $LCTL get_param -n \
631 osd-*.$(facet_svc mds$n).oi_scrub
633 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
634 error "(16) Auto trigger full scrub unexpectedly"
637 for n in $(seq $MDSCOUNT); do
638 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
642 for n in $(seq $MDSCOUNT); do
643 updated1[$n]=$(scrub_status $n |
644 awk '/^prior_updated/ { print $2 }')
645 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
646 echo "OI scrub on MDS$n status for the 4th time:"
647 do_facet mds$n $LCTL get_param -n \
648 osd-*.$(facet_svc mds$n).oi_scrub
650 error "(18) NOT auto trigger full scrub as expected"
654 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
657 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && skip "ldiskfs only test"
661 #define OBD_FAIL_OSD_DUPLICATE_MAP 0x19b
662 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x19b
663 for i in {1..100}; do
664 echo $i > $DIR/$tdir/f_$i || error "write f_$i failed"
666 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0
668 for i in {101..200}; do
669 echo $i > $DIR/$tdir/f_$i || error "write f_$i failed"
672 for i in {1..200}; do
673 echo $i | cmp $DIR/$tdir/f_$i - || error "f_$i data corrupt"
676 run_test 4d "FID in LMA mismatch with object FID won't block create"
679 formatall > /dev/null
683 echo "starting MDTs with OI scrub disabled (1)"
684 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
685 scrub_check_status 3 init
686 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
687 scrub_check_flags 4 recreated,inconsistent
688 mount_client $MOUNT || error "(5) Fail to start client!"
692 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
693 do_nodes $(comma_list $(mdts_nodes)) \
694 $LCTL set_param fail_val=3 fail_loc=0x190
697 umount_client $MOUNT || error "(7) Fail to stop client!"
698 scrub_check_status 8 scanning
700 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
701 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
706 do_nodes $(comma_list $(mdts_nodes)) \
707 $LCTL set_param fail_loc=0 fail_val=0
709 echo "starting MDTs with OI scrub disabled (2)"
710 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
711 scrub_check_status 11 crashed
714 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
715 do_nodes $(comma_list $(mdts_nodes)) \
716 $LCTL set_param fail_val=3 fail_loc=0x190
718 echo "starting MDTs without disabling OI scrub"
719 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
720 scrub_check_status 14 scanning
722 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
723 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
725 scrub_check_status 15 failed
726 mount_client $MOUNT || error "(16) Fail to start client!"
729 do_nodes $(comma_list $(mdts_nodes)) \
730 $LCTL set_param fail_loc=0 fail_val=0
735 for n in $(seq $MDSCOUNT); do
736 stat $DIR/$tdir/mds$n/sanity-scrub.sh &
740 for n in $(seq $MDSCOUNT); do
742 error "(18) Fail to stat mds$n/sanity-scrub.sh"
745 scrub_check_status 19 completed
746 scrub_check_flags 20 ""
748 run_test 5 "OI scrub state machine"
752 echo "starting MDTs with OI scrub disabled"
753 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
754 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
755 scrub_check_flags 4 recreated,inconsistent
756 mount_client $MOUNT || error "(5) Fail to start client!"
760 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
761 do_nodes $(comma_list $(mdts_nodes)) \
762 $LCTL set_param fail_val=2 fail_loc=0x190
766 # Sleep 5 sec to guarantee at least one object processed by OI scrub
768 # Fail the OI scrub to guarantee there is at least one checkpoint
769 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
770 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
772 scrub_check_status 7 failed
774 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
775 do_nodes $(comma_list $(mdts_nodes)) \
776 $LCTL set_param fail_val=3 fail_loc=0x190
779 for n in $(seq $MDSCOUNT); do
780 # stat will re-trigger OI scrub
781 stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
782 error "(8) Failed to stat mds$n/sanity-scrub.sh"
785 umount_client $MOUNT || error "(9) Fail to stop client!"
786 scrub_check_status 10 scanning
788 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
789 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
793 for n in $(seq $MDSCOUNT); do
794 position0[$n]=$(scrub_status $n |
795 awk '/^last_checkpoint_position/ {print $2}')
796 position0[$n]=$((${position0[$n]} + 1))
801 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
802 do_nodes $(comma_list $(mdts_nodes)) \
803 $LCTL set_param fail_val=3 fail_loc=0x190
805 echo "starting MDTs without disabling OI scrub"
806 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
808 scrub_check_status 13 scanning
811 for n in $(seq $MDSCOUNT); do
812 position1[$n]=$(scrub_status $n |
813 awk '/^latest_start_position/ {print $2}')
814 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
815 error "(14) Expected position ${position0[$n]}, but" \
816 "got ${position1[$n]}"
820 do_nodes $(comma_list $(mdts_nodes)) \
821 $LCTL set_param fail_loc=0 fail_val=0
823 scrub_check_status 15 completed
824 scrub_check_flags 16 ""
826 run_test 6 "OI scrub resumes from last checkpoint"
830 echo "starting MDTs with OI scrub disabled"
831 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
832 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
833 scrub_check_flags 4 recreated,inconsistent
834 mount_client $MOUNT || error "(5) Fail to start client!"
838 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
839 do_nodes $(comma_list $(mdts_nodes)) \
840 $LCTL set_param fail_val=3 fail_loc=0x190
845 for n in $(seq $MDSCOUNT); do
846 stat $DIR/$tdir/mds$n/${tfile}300 ||
847 error "(7) Failed to stat mds$n/${tfile}300!"
850 scrub_check_status 8 scanning
851 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
852 scrub_check_flags 9 inconsistent,auto
854 scrub_check_flags 9 recreated,inconsistent,auto
857 do_nodes $(comma_list $(mdts_nodes)) \
858 $LCTL set_param fail_loc=0 fail_val=0
860 scrub_check_status 10 completed
863 run_test 7 "System is available during OI scrub scanning"
867 echo "starting MDTs with OI scrub disabled"
868 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
869 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
870 scrub_check_flags 4 recreated,inconsistent
872 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
873 do_nodes $(comma_list $(mdts_nodes)) \
874 $LCTL set_param fail_val=1 fail_loc=0x190
877 scrub_check_status 6 scanning
879 scrub_check_status 8 stopped
881 scrub_check_status 10 scanning
883 do_nodes $(comma_list $(mdts_nodes)) \
884 $LCTL set_param fail_loc=0 fail_val=0
886 scrub_check_status 11 completed
887 scrub_check_flags 12 ""
889 run_test 8 "Control OI scrub manually"
892 # Skip scrub speed test for ZFS because of performance unstable
893 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
894 skip "test scrub speed only on ldiskfs" && return
896 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
897 skip "Testing on UP system, the speed may be inaccurate."
903 echo "starting MDTs with OI scrub disabled"
904 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
905 scrub_check_flags 4 recreated,inconsistent
907 local BASE_SPEED1=100
909 # OI scrub should run with full speed under inconsistent case
910 scrub_start 5 -s $BASE_SPEED1
913 scrub_check_status 6 completed
914 scrub_check_flags 7 ""
915 # OI scrub should run with limited speed under non-inconsistent case
916 scrub_start 8 -s $BASE_SPEED1 -r
919 scrub_check_status 9 scanning
921 # Do NOT ignore that there are 1024 pre-fetched items. And there
922 # may be time error, normally it should be less than 2 seconds.
923 # We allow another 20% schedule error.
924 local PRE_FETCHED=1024
926 # MAX_MARGIN = 1.2 = 12 / 10
927 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
928 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
930 for n in $(seq $MDSCOUNT); do
931 local SPEED=$(scrub_status $n | \
932 awk '/^average_speed/ { print $2 }')
933 [ $SPEED -lt $MAX_SPEED ] ||
934 error "(10) Got speed $SPEED, expected less than" \
939 local BASE_SPEED2=300
941 for n in $(seq $MDSCOUNT); do
942 do_facet mds$n $LCTL set_param -n \
943 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
947 # MIN_MARGIN = 0.8 = 8 / 10
948 local MIN_SPEED=$(((PRE_FETCHED + \
949 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
950 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
951 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
952 # MAX_MARGIN = 1.2 = 12 / 10
953 MAX_SPEED=$(((PRE_FETCHED + \
954 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
955 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
956 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
957 for n in $(seq $MDSCOUNT); do
958 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
959 [ $SPEED -gt $MIN_SPEED ] ||
960 error "(11) Got speed $SPEED, expected more than" \
962 [ $SPEED -lt $MAX_SPEED ] ||
963 error "(12) Got speed $SPEED, expected less than" \
966 do_facet mds$n $LCTL set_param -n \
967 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
970 scrub_check_status 13 completed
972 run_test 9 "OI scrub speed control"
976 echo "starting mds$n with OI scrub disabled (1)"
977 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
978 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
979 scrub_check_flags 4 recreated,inconsistent
980 mount_client $MOUNT || error "(5) Fail to start client!"
984 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
985 do_nodes $(comma_list $(mdts_nodes)) \
986 $LCTL set_param fail_val=1 fail_loc=0x190
989 scrub_check_status 7 scanning
990 umount_client $MOUNT || error "(8) Fail to stop client!"
992 echo "starting MDTs with OI scrub disabled (2)"
993 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
994 scrub_check_status 11 paused
996 echo "starting MDTs without disabling OI scrub"
997 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
998 scrub_check_status 14 scanning
1000 do_nodes $(comma_list $(mdts_nodes)) \
1001 $LCTL set_param fail_loc=0 fail_val=0
1003 scrub_check_status 15 completed
1004 scrub_check_flags 16 ""
1006 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
1008 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
1011 echo "starting MDTs with OI scrub disabled"
1012 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1013 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1014 scrub_check_flags 4 recreated,inconsistent
1016 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
1017 do_nodes $(comma_list $(mdts_nodes)) \
1018 $LCTL set_param fail_val=3 fail_loc=0x190
1021 scrub_check_status 6 scanning
1023 echo "starting MDTs with OI scrub disabled"
1024 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
1025 scrub_check_status 9 paused
1027 echo "starting MDTs without disabling OI scrub"
1028 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
1029 scrub_check_status 12 scanning
1031 do_nodes $(comma_list $(mdts_nodes)) \
1032 $LCTL set_param fail_loc=0 fail_val=0
1034 scrub_check_status 13 completed
1035 scrub_check_flags 14 ""
1037 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
1040 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1041 skip "ldiskfs special test" && return
1046 check_mount_and_prep
1048 for n in $(seq $MDSCOUNT); do
1049 test_mkdir -i $((n - 1)) -c1 $DIR/$tdir/mds$n ||
1050 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
1052 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
1053 error "(2) Fail to create under $tdir/mds$n"
1056 # reset OI scrub start point by force
1058 scrub_check_status 4 completed
1063 # OI scrub should skip the new created objects for the first accessing
1064 # notice we're creating a new llog for every OST on every startup
1065 # new features can make this even less stable, so we only check that
1066 # the number of skipped files is more than the number or known created
1067 local MINIMUM=$((CREATED + 1)) # files + directory
1068 for n in $(seq $MDSCOUNT); do
1069 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1070 [ $SKIPPED -lt $MINIMUM ] &&
1071 error "(5) Expect at least $MINIMUM objects" \
1072 "skipped on mds$n, but got $SKIPPED"
1074 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1077 # reset OI scrub start point by force
1079 scrub_check_status 7 completed
1081 # OI scrub should skip the new created object only once
1082 for n in $(seq $MDSCOUNT); do
1083 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1084 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1086 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1087 error "(8) Expect 0 objects skipped on mds$n, but" \
1091 run_test 11 "OI scrub skips the new created objects only once"
1094 check_mount_and_prep
1095 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1097 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1098 do_facet ost1 $LCTL set_param fail_loc=0x195
1099 local count=$(precreated_ost_obj_count 0 0)
1101 createmany -o $DIR/$tdir/f $((count + 32))
1102 umount_client $MOUNT || error "(1) Fail to stop client!"
1104 stop ost1 || error "(2) Fail to stop ost1"
1106 #define OBD_FAIL_OST_NODESTROY 0x233
1107 do_facet ost1 $LCTL set_param fail_loc=0x233
1109 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1110 error "(3) Fail to start ost1"
1112 mount_client $MOUNT || error "(4) Fail to start client!"
1114 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1116 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1118 do_facet ost1 $LCTL set_param fail_loc=0
1119 wait_update_facet ost1 "$LCTL get_param -n \
1120 osd-*.$(facet_svc ost1).oi_scrub |
1121 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1122 error "(7) Expected '$expected' on ost1"
1124 ls -ail $DIR/$tdir > /dev/null || {
1126 error "(8) ls should succeed"
1129 run_test 12 "OI scrub can rebuild invalid /O entries"
1132 check_mount_and_prep
1133 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1135 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1136 do_facet ost1 $LCTL set_param fail_loc=0x196
1137 local count=$(precreated_ost_obj_count 0 0)
1139 createmany -o $DIR/$tdir/f $((count + 32))
1140 do_facet ost1 $LCTL set_param fail_loc=0
1142 umount_client $MOUNT || error "(1) Fail to stop client!"
1144 stop ost1 || error "(2) Fail to stop ost1"
1146 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1147 error "(3) Fail to start ost1"
1149 mount_client $MOUNT || error "(4) Fail to start client!"
1151 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1153 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1155 wait_update_facet ost1 "$LCTL get_param -n \
1156 osd-*.$(facet_svc ost1).oi_scrub |
1157 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1158 error "(7) Expected '$expected' on ost1"
1160 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1162 run_test 13 "OI scrub can rebuild missed /O entries"
1165 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1166 skip "ldiskfs special test"
1168 check_mount_and_prep
1169 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1171 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1172 do_facet ost1 $LCTL set_param fail_loc=0x196
1173 local count=$(precreated_ost_obj_count 0 0)
1175 createmany -o $DIR/$tdir/f $((count + 1000))
1176 do_facet ost1 $LCTL set_param fail_loc=0
1178 umount_client $MOUNT || error "(1) Fail to stop client!"
1180 stop ost1 || error "(2) Fail to stop ost1"
1183 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1184 error "(3) Fail to run e2fsck error"
1186 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1187 error "(4) Fail to start ost1"
1189 mount_client $MOUNT || error "(5) Fail to start client!"
1191 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1192 awk '/^lf_repa[ir]*ed/ { print $2 }')
1193 [ $LF_REPAIRED -ge 1000 ] ||
1194 error "(6) Some entry under /lost+found should be repaired"
1196 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1200 echo "run e2fsck again after LFSCK"
1201 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1202 error "(8) Fail to run e2fsck error"
1204 run_test 14 "OI scrub can repair OST objects under lost+found"
1209 formatall > /dev/null
1210 setupall > /dev/null
1213 echo "starting MDTs with OI scrub disabled"
1214 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1215 scrub_check_status 3 init
1216 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1217 scrub_check_flags 4 recreated,inconsistent
1219 # run under dryrun mode
1220 scrub_start 5 --dryrun
1221 scrub_check_status 6 completed
1222 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1223 scrub_check_flags 7 inconsistent
1226 scrub_check_flags 7 recreated,inconsistent
1229 scrub_check_params 8 dryrun
1230 scrub_check_repaired 9 $repaired 1
1232 # run under dryrun mode again
1233 scrub_start 10 --dryrun
1234 scrub_check_status 11 completed
1235 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1236 scrub_check_flags 12 inconsistent
1238 scrub_check_flags 12 recreated,inconsistent
1240 scrub_check_params 13 dryrun
1241 scrub_check_repaired 14 $repaired 1
1243 # run under normal mode
1245 scrub_check_status 16 completed
1246 scrub_check_flags 17 ""
1247 scrub_check_params 18 ""
1248 scrub_check_repaired 19 $repaired 0
1250 # run under normal mode again
1252 scrub_check_status 21 completed
1253 scrub_check_flags 22 ""
1254 scrub_check_params 23 ""
1255 scrub_check_repaired 24 0 0
1257 run_test 15 "Dryrun mode OI scrub"
1260 check_mount_and_prep
1261 scrub_enable_index_backup
1263 #define OBD_FAIL_OSD_INDEX_CRASH 0x199
1264 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x199
1266 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
1268 echo "starting MDTs without disabling OI scrub"
1269 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
1270 mount_client $MOUNT || error "(2) Fail to start client!"
1272 scrub_disable_index_backup
1274 run_test 16 "Initial OI scrub can rebuild crashed index objects"
1276 # restore MDS/OST size
1277 MDSSIZE=${SAVED_MDSSIZE}
1278 OSTSIZE=${SAVED_OSTSIZE}
1279 OSTCOUNT=${SAVED_OSTCOUNT}
1281 # cleanup the system at last
1282 REFORMAT="yes" cleanup_and_setup_lustre
1285 check_and_cleanup_lustre