3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
11 LUSTRE=${LUSTRE:-$(dirname $0)/..}
12 . $LUSTRE/tests/test-framework.sh
16 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
18 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
19 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
23 require_dsh_mds || exit 0
27 if ! check_versions; then
28 skip "It is NOT necessary to test scrub under interoperation mode"
34 SAVED_MDSSIZE=${MDSSIZE}
35 SAVED_OSTSIZE=${OSTSIZE}
36 SAVED_OSTCOUNT=${OSTCOUNT}
38 # use small MDS + OST size to speed formatting time
39 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
40 # 400M MDT device can guarantee uninitialized groups during the OI scrub
41 [[ $MDSSIZE < 400000 || "$mds1_FSTYPE" == ldiskfs ]] && MDSSIZE=400000
42 [[ $OSTSIZE < 400000 || "$ost1_FSTYPE" == ldiskfs ]] && OSTSIZE=400000
44 # no need too many OSTs, to reduce the format/start/stop overhead
45 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
47 # build up a clean test environment.
48 REFORMAT="yes" check_and_setup_lustre
50 MDT_DEV="${FSNAME}-MDT0000"
51 OST_DEV="${FSNAME}-OST0000"
57 # use "lfsck_start -A" when we no longer need testing interop
58 for n in $(seq $MDSCOUNT); do
59 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
61 error "($error_id) Failed to start OI scrub on mds$n"
69 # use "lfsck_stop -A" when we no longer need testing interop
70 for n in $(seq $MDSCOUNT); do
71 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
72 error "($error_id) Failed to stop OI scrub on mds$n"
79 do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
82 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
83 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
84 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
85 SHOW_SCRUB="do_facet $SINGLEMDS \
86 $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
87 SHOW_SCRUB_ON_OST="do_facet ost1 \
88 $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
89 MOUNT_OPTS_SCRUB="-o user_xattr"
90 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
99 echo "preparing... $(date)"
100 for n in $(seq $MDSCOUNT); do
101 echo "creating $nfiles files on mds$n"
102 test_mkdir -i $((n - 1)) -c1 $DIR/$tdir/mds$n ||
103 error "Failed to create directory mds$n"
104 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
105 error "Failed to copy files to mds$n"
106 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
107 error "mkdir failed on mds$n"
108 touch $DIR/$tdir/mds$n/d_$tfile/f1 > \
109 /dev/null || error "create failed on mds$n"
110 dd if=/dev/zero of=$DIR/$tdir/mds$n/d_$tfile/f2 bs=1M count=1 ||
111 error "write failed on mds$n"
112 if [[ $nfiles -gt 0 ]]; then
113 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
114 /dev/null || error "createmany failed on mds$n"
117 echo "prepared $(date)."
119 [ ! -z $inject ] && [ $inject -eq 2 ] && {
120 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
121 do_nodes $(comma_list $(mdts_nodes)) \
122 $LCTL set_param fail_loc=0x198
124 for n in $(seq $MDSCOUNT); do
125 cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
126 error "Fail to copy runas to MDS$n"
129 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
132 [ ! -z $inject ] && [ $inject -eq 1 ] &&
133 [ "$mds1_FSTYPE" = "zfs" ] && {
134 #define OBD_FAIL_OSD_FID_MAPPING 0x193
135 do_nodes $(comma_list $(mdts_nodes)) \
136 $LCTL set_param fail_loc=0x193
138 for n in $(seq $MDSCOUNT); do
139 chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
140 chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
143 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
146 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
148 # sync local transactions on every MDT
149 do_nodes $(comma_list $(mdts_nodes)) \
150 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
152 # wait for a while to cancel update logs after transactions committed.
155 # sync again to guarantee all things done.
156 do_nodes $(comma_list $(mdts_nodes)) \
157 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
159 for n in $(seq $MDSCOUNT); do
161 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
164 [ ! -z $inject ] && [ "$mds1_FSTYPE" = "ldiskfs" ] && {
165 if [ $inject -eq 1 ]; then
166 for n in $(seq $MDSCOUNT); do
167 mds_backup_restore mds$n ||
168 error "Backup/restore on mds$n failed"
170 elif [ $inject -eq 2 ]; then
181 for n in $(seq $MDSCOUNT); do
182 start mds$n $(mdsdevname $n) $opts >/dev/null ||
183 error "($error_id) Failed to start mds$n"
191 for n in $(seq $MDSCOUNT); do
192 echo "stopping mds$n"
193 stop mds$n >/dev/null ||
194 error "($error_id) Failed to stop mds$n"
198 scrub_check_status() {
203 for n in $(seq $MDSCOUNT); do
204 wait_update_facet mds$n "$LCTL get_param -n \
205 osd-*.$(facet_svc mds$n).oi_scrub |
206 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
207 error "($error_id) Expected '$expected' on mds$n"
211 scrub_check_flags() {
217 for n in $(seq $MDSCOUNT); do
218 actual=$(do_facet mds$n $LCTL get_param -n \
219 osd-*.$(facet_svc mds$n).oi_scrub |
220 awk '/^flags/ { print $2 }')
221 if [ "$actual" != "$expected" ]; then
222 error "($error_id) Expected '$expected' on mds$n, but" \
228 scrub_check_params() {
234 for n in $(seq $MDSCOUNT); do
235 actual=$(do_facet mds$n $LCTL get_param -n \
236 osd-*.$(facet_svc mds$n).oi_scrub |
237 awk '/^param/ { print $2 }')
238 if [ "$actual" != "$expected" ]; then
239 error "($error_id) Expected '$expected' on mds$n, but" \
245 scrub_check_repaired() {
252 for n in $(seq $MDSCOUNT); do
253 if [ $dryrun -eq 1 ]; then
254 actual=$(do_facet mds$n $LCTL get_param -n \
255 osd-*.$(facet_svc mds$n).oi_scrub |
256 awk '/^inconsistent:/ { print $2 }')
258 actual=$(do_facet mds$n $LCTL get_param -n \
259 osd-*.$(facet_svc mds$n).oi_scrub |
260 awk '/^updated:/ { print $2 }')
263 if [ $expected -eq 0 -a $actual -ne 0 ]; then
264 error "($error_id) Expected no repaired on mds$n, but" \
268 if [ $expected -ne 0 -a $actual -lt $expected ]; then
269 error "($error_id) Expected '$expected' on mds$n, but" \
279 for n in $(seq $MDSCOUNT); do
280 diff -q $LUSTRE/tests/test-framework.sh \
281 $DIR/$tdir/mds$n/test-framework.sh ||
282 error "($error_id) File data check failed"
286 scrub_check_data2() {
291 for n in $(seq $MDSCOUNT); do
292 diff -q $LUSTRE/tests/$filename \
293 $DIR/$tdir/mds$n/$filename ||
294 error "($error_id) File data check failed"
299 [ "$mds1_FSTYPE" != "ldiskfs" ] && return
305 for n in $(seq $MDSCOUNT); do
306 mds_remove_ois mds$n $index ||
307 error "($error_id) Failed to remove OI .$index on mds$n"
311 scrub_enable_auto() {
312 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
317 [ "$mds1_FSTYPE" != "ldiskfs" ] && return
321 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
322 osd-*.*.full_scrub_ratio=$ratio
325 full_scrub_threshold_rate() {
326 [ "$mds1_FSTYPE" != "ldiskfs" ] && return
330 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
331 osd-*.*.full_scrub_threshold_rate=$rate
334 scrub_enable_index_backup() {
335 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
336 osd-*.*.index_backup=1
339 scrub_disable_index_backup() {
340 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
341 osd-*.*.index_backup=0
346 echo "starting MDTs without disabling OI scrub"
347 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
348 scrub_check_status 2 init
349 scrub_check_flags 3 ""
350 mount_client $MOUNT || error "(4) Fail to start client!"
353 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
356 [ -n "$FILESET" ] && skip "Not functional for FILESET set"
359 echo "start $SINGLEMDS without disabling OI scrub"
360 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
362 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
363 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
365 mount_client $MOUNT || error "(4) Fail to start client!"
366 #define OBD_FAIL_OSD_FID_MAPPING 0x193
367 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
368 # update .lustre OI mapping
370 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
371 umount_client $MOUNT || error "(5) Fail to stop client!"
373 echo "stop $SINGLEMDS"
374 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
376 echo "start $SINGLEMDS with disabling OI scrub"
377 start $SINGLEMDS $(mdsdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
378 error "(7) Fail to start MDS!"
380 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
381 [ "$FLAGS" == "inconsistent" ] ||
382 error "(9) Expect 'inconsistent', but got '$FLAGS'"
384 run_test 1a "Auto trigger initial OI scrub when server mounts"
388 echo "start MDTs without disabling OI scrub"
389 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
390 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
391 scrub_check_status 3 completed
392 mount_client $MOUNT || error "(4) Fail to start client!"
393 scrub_check_data2 runas 5
394 scrub_check_status 6 completed
396 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
399 [ "$mds1_FSTYPE" != "ldiskfs" ] &&
400 skip "ldiskfs special test"
404 # OI files to be removed:
406 # idx 2: oi.16.{2,4,8,16,32}
407 # idx 3: oi.16.{3,9,27}
408 for index in 0 2 3; do
410 scrub_remove_ois 1 $index
411 echo "start MDTs with OI scrub disabled"
412 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
413 scrub_check_flags 3 recreated
415 scrub_check_status 5 completed
416 scrub_check_flags 6 ""
419 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
422 [ "$mds1_FSTYPE" != "ldiskfs" ] &&
423 skip "ldiskfs special test"
426 echo "starting MDTs without disabling OI scrub"
427 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
428 scrub_check_status 3 completed
429 mount_client $MOUNT || error "(4) Fail to start client!"
432 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
434 # test_3 is obsolete, it will be covered by test_5.
436 formatall > /dev/null
440 echo "starting MDTs with OI scrub disabled"
441 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
442 scrub_check_status 3 init
443 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
444 scrub_check_flags 4 recreated,inconsistent
446 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
450 echo "starting MDTs with OI scrub disabled"
451 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
452 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
453 scrub_check_flags 4 recreated,inconsistent
454 mount_client $MOUNT || error "(5) Fail to start client!"
460 scrub_check_status 7 completed
461 scrub_check_flags 8 ""
464 for n in $(seq $MDSCOUNT); do
465 updated0[$n]=$(scrub_status $n |
466 awk '/^prior_updated/ { print $2 }')
469 scrub_check_data2 sanity-scrub.sh 9
473 for n in $(seq $MDSCOUNT); do
474 updated1[$n]=$(scrub_status $n |
475 awk '/^prior_updated/ { print $2 }')
476 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
477 error "(10) NOT auto trigger full scrub as expected"
480 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
483 [ "$mds1_FSTYPE" != "ldiskfs" ] &&
484 skip "ldiskfs special test"
487 echo "starting MDTs with OI scrub disabled"
488 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
489 scrub_check_flags 4 recreated,inconsistent
490 mount_client $MOUNT || error "(5) Fail to start client!"
493 full_scrub_threshold_rate 10000
497 scrub_check_status 7 completed
498 scrub_check_flags 8 ""
501 for n in $(seq $MDSCOUNT); do
502 updated0[$n]=$(scrub_status $n |
503 awk '/^prior_updated/ { print $2 }')
505 echo "OI scrub on MDS$n status for the 1st time:"
506 do_facet mds$n $LCTL get_param -n \
507 osd-*.$(facet_svc mds$n).oi_scrub
510 scrub_check_data2 sanity-scrub.sh 9
513 scrub_check_status 10 completed
514 scrub_check_flags 11 ""
517 for n in $(seq $MDSCOUNT); do
518 updated1[$n]=$(scrub_status $n |
519 awk '/^prior_updated/ { print $2 }')
521 echo "OI scrub on MDS$n status for the 2nd time:"
522 do_facet mds$n $LCTL get_param -n \
523 osd-*.$(facet_svc mds$n).oi_scrub
525 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
526 error "(12) Auto trigger full scrub unexpectedly"
529 for n in $(seq $MDSCOUNT); do
530 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
531 error "(13) fail to ls"
535 scrub_check_status 14 completed
536 scrub_check_flags 15 ""
538 for n in $(seq $MDSCOUNT); do
539 updated0[$n]=$(scrub_status $n |
540 awk '/^prior_updated/ { print $2 }')
542 echo "OI scrub on MDS$n status for the 3rd time:"
543 do_facet mds$n $LCTL get_param -n \
544 osd-*.$(facet_svc mds$n).oi_scrub
546 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
547 error "(16) Auto trigger full scrub unexpectedly"
550 for n in $(seq $MDSCOUNT); do
551 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
555 for n in $(seq $MDSCOUNT); do
556 updated1[$n]=$(scrub_status $n |
557 awk '/^prior_updated/ { print $2 }')
558 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
559 echo "OI scrub on MDS$n status for the 4th time:"
560 do_facet mds$n $LCTL get_param -n \
561 osd-*.$(facet_svc mds$n).oi_scrub
563 error "(18) NOT auto trigger full scrub as expected"
567 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
570 [ "$mds1_FSTYPE" != "ldiskfs" ] &&
571 skip "ldiskfs special test"
574 echo "starting MDTs with OI scrub disabled"
575 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
576 scrub_check_flags 4 recreated,inconsistent
577 mount_client $MOUNT || error "(5) Fail to start client!"
580 full_scrub_threshold_rate 20
584 scrub_check_status 7 completed
585 scrub_check_flags 8 ""
588 for n in $(seq $MDSCOUNT); do
589 updated0[$n]=$(scrub_status $n |
590 awk '/^prior_updated/ { print $2 }')
592 echo "OI scrub on MDS$n status for the 1st time:"
593 do_facet mds$n $LCTL get_param -n \
594 osd-*.$(facet_svc mds$n).oi_scrub
597 scrub_check_data2 sanity-scrub.sh 9
600 scrub_check_status 10 completed
601 scrub_check_flags 11 ""
604 for n in $(seq $MDSCOUNT); do
605 updated1[$n]=$(scrub_status $n |
606 awk '/^prior_updated/ { print $2 }')
608 echo "OI scrub on MDS$n status for the 2nd time:"
609 do_facet mds$n $LCTL get_param -n \
610 osd-*.$(facet_svc mds$n).oi_scrub
612 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
613 error "(12) Auto trigger full scrub unexpectedly"
616 for n in $(seq $MDSCOUNT); do
617 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
618 error "(13) fail to ls"
622 scrub_check_status 14 completed
623 scrub_check_flags 15 ""
625 for n in $(seq $MDSCOUNT); do
626 updated0[$n]=$(scrub_status $n |
627 awk '/^prior_updated/ { print $2 }')
629 echo "OI scrub on MDS$n status for the 3rd time:"
630 do_facet mds$n $LCTL get_param -n \
631 osd-*.$(facet_svc mds$n).oi_scrub
633 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
634 error "(16) Auto trigger full scrub unexpectedly"
637 for n in $(seq $MDSCOUNT); do
638 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
642 for n in $(seq $MDSCOUNT); do
643 updated1[$n]=$(scrub_status $n |
644 awk '/^prior_updated/ { print $2 }')
645 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
646 echo "OI scrub on MDS$n status for the 4th time:"
647 do_facet mds$n $LCTL get_param -n \
648 osd-*.$(facet_svc mds$n).oi_scrub
650 error "(18) NOT auto trigger full scrub as expected"
654 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
657 [ "$mds1_FSTYPE" != "ldiskfs" ] && skip "ldiskfs only test"
661 #define OBD_FAIL_OSD_DUPLICATE_MAP 0x19b
662 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x19b
663 for i in {1..100}; do
664 echo $i > $DIR/$tdir/f_$i || error "write f_$i failed"
666 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0
668 for i in {101..200}; do
669 echo $i > $DIR/$tdir/f_$i || error "write f_$i failed"
672 for i in {1..200}; do
673 echo $i | cmp $DIR/$tdir/f_$i - || error "f_$i data corrupt"
676 run_test 4d "FID in LMA mismatch with object FID won't block create"
679 formatall > /dev/null
683 echo "starting MDTs with OI scrub disabled (1)"
684 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
685 scrub_check_status 3 init
686 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
687 scrub_check_flags 4 recreated,inconsistent
688 mount_client $MOUNT || error "(5) Fail to start client!"
692 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
693 do_nodes $(comma_list $(mdts_nodes)) \
694 $LCTL set_param fail_val=3 fail_loc=0x190
697 umount_client $MOUNT || error "(7) Fail to stop client!"
698 scrub_check_status 8 scanning
700 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
701 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
706 do_nodes $(comma_list $(mdts_nodes)) \
707 $LCTL set_param fail_loc=0 fail_val=0
709 echo "starting MDTs with OI scrub disabled (2)"
710 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
711 scrub_check_status 11 crashed
714 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
715 do_nodes $(comma_list $(mdts_nodes)) \
716 $LCTL set_param fail_val=3 fail_loc=0x190
718 echo "starting MDTs without disabling OI scrub"
719 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
720 scrub_check_status 14 scanning
722 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
723 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
725 scrub_check_status 15 failed
726 mount_client $MOUNT || error "(16) Fail to start client!"
729 do_nodes $(comma_list $(mdts_nodes)) \
730 $LCTL set_param fail_loc=0 fail_val=0
735 for n in $(seq $MDSCOUNT); do
736 stat $DIR/$tdir/mds$n/sanity-scrub.sh &
740 for n in $(seq $MDSCOUNT); do
742 error "(18) Fail to stat mds$n/sanity-scrub.sh"
745 scrub_check_status 19 completed
746 scrub_check_flags 20 ""
748 run_test 5 "OI scrub state machine"
752 echo "starting MDTs with OI scrub disabled"
753 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
754 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
755 scrub_check_flags 4 recreated,inconsistent
756 mount_client $MOUNT || error "(5) Fail to start client!"
760 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
761 do_nodes $(comma_list $(mdts_nodes)) \
762 $LCTL set_param fail_val=2 fail_loc=0x190
766 # Sleep 5 sec to guarantee at least one object processed by OI scrub
768 # Fail the OI scrub to guarantee there is at least one checkpoint
769 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
770 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
772 scrub_check_status 7 failed
774 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
775 do_nodes $(comma_list $(mdts_nodes)) \
776 $LCTL set_param fail_val=3 fail_loc=0x190
779 for n in $(seq $MDSCOUNT); do
780 # stat will re-trigger OI scrub
781 stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
782 error "(8) Failed to stat mds$n/sanity-scrub.sh"
785 umount_client $MOUNT || error "(9) Fail to stop client!"
786 scrub_check_status 10 scanning
788 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
789 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
793 for n in $(seq $MDSCOUNT); do
794 position0[$n]=$(scrub_status $n |
795 awk '/^last_checkpoint_position/ {print $2}')
796 position0[$n]=$((${position0[$n]} + 1))
801 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
802 do_nodes $(comma_list $(mdts_nodes)) \
803 $LCTL set_param fail_val=3 fail_loc=0x190
805 echo "starting MDTs without disabling OI scrub"
806 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
808 scrub_check_status 13 scanning
811 for n in $(seq $MDSCOUNT); do
812 position1[$n]=$(scrub_status $n |
813 awk '/^latest_start_position/ {print $2}')
814 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
815 error "(14) Expected position ${position0[$n]}, but" \
816 "got ${position1[$n]}"
820 do_nodes $(comma_list $(mdts_nodes)) \
821 $LCTL set_param fail_loc=0 fail_val=0
823 scrub_check_status 15 completed
824 scrub_check_flags 16 ""
826 run_test 6 "OI scrub resumes from last checkpoint"
830 echo "starting MDTs with OI scrub disabled"
831 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
832 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
833 scrub_check_flags 4 recreated,inconsistent
834 mount_client $MOUNT || error "(5) Fail to start client!"
838 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
839 do_nodes $(comma_list $(mdts_nodes)) \
840 $LCTL set_param fail_val=3 fail_loc=0x190
845 for n in $(seq $MDSCOUNT); do
846 stat $DIR/$tdir/mds$n/${tfile}300 ||
847 error "(7) Failed to stat mds$n/${tfile}300!"
850 scrub_check_status 8 scanning
851 if [ "$mds1_FSTYPE" != "ldiskfs" ]; then
852 scrub_check_flags 9 inconsistent,auto
854 scrub_check_flags 9 recreated,inconsistent,auto
857 do_nodes $(comma_list $(mdts_nodes)) \
858 $LCTL set_param fail_loc=0 fail_val=0
860 scrub_check_status 10 completed
863 run_test 7 "System is available during OI scrub scanning"
867 echo "starting MDTs with OI scrub disabled"
868 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
869 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
870 scrub_check_flags 4 recreated,inconsistent
872 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
873 do_nodes $(comma_list $(mdts_nodes)) \
874 $LCTL set_param fail_val=1 fail_loc=0x190
877 scrub_check_status 6 scanning
879 scrub_check_status 8 stopped
881 scrub_check_status 10 scanning
883 do_nodes $(comma_list $(mdts_nodes)) \
884 $LCTL set_param fail_loc=0 fail_val=0
886 scrub_check_status 11 completed
887 scrub_check_flags 12 ""
889 run_test 8 "Control OI scrub manually"
892 # Skip scrub speed test for ZFS because of performance unstable
893 [ "$mds1_FSTYPE" != "ldiskfs" ] &&
894 skip "test scrub speed only on ldiskfs"
896 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
897 skip "Testing on UP system, the speed may be inaccurate."
902 echo "starting MDTs with OI scrub disabled"
903 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
904 scrub_check_flags 4 recreated,inconsistent
906 local BASE_SPEED1=100
908 # OI scrub should run with full speed under inconsistent case
909 scrub_start 5 -s $BASE_SPEED1
912 scrub_check_status 6 completed
913 scrub_check_flags 7 ""
914 # OI scrub should run with limited speed under non-inconsistent case
915 scrub_start 8 -s $BASE_SPEED1 -r
918 scrub_check_status 9 scanning
920 # Do NOT ignore that there are 1024 pre-fetched items. And there
921 # may be time error, normally it should be less than 2 seconds.
922 # We allow another 20% schedule error.
923 local PRE_FETCHED=1024
925 # MAX_MARGIN = 1.2 = 12 / 10
926 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
927 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
929 for n in $(seq $MDSCOUNT); do
930 local SPEED=$(scrub_status $n | \
931 awk '/^average_speed/ { print $2 }')
932 [ $SPEED -lt $MAX_SPEED ] ||
933 error "(10) Got speed $SPEED, expected less than" \
938 local BASE_SPEED2=300
940 for n in $(seq $MDSCOUNT); do
941 do_facet mds$n $LCTL set_param -n \
942 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
946 # MIN_MARGIN = 0.8 = 8 / 10
947 local MIN_SPEED=$(((PRE_FETCHED + \
948 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
949 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
950 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
951 # MAX_MARGIN = 1.2 = 12 / 10
952 MAX_SPEED=$(((PRE_FETCHED + \
953 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
954 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
955 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
956 for n in $(seq $MDSCOUNT); do
957 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
958 [ $SPEED -gt $MIN_SPEED ] ||
959 error "(11) Got speed $SPEED, expected more than" \
961 [ $SPEED -lt $MAX_SPEED ] ||
962 error "(12) Got speed $SPEED, expected less than" \
965 do_facet mds$n $LCTL set_param -n \
966 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
969 scrub_check_status 13 completed
971 run_test 9 "OI scrub speed control"
975 echo "starting mds$n with OI scrub disabled (1)"
976 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
977 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
978 scrub_check_flags 4 recreated,inconsistent
979 mount_client $MOUNT || error "(5) Fail to start client!"
983 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
984 do_nodes $(comma_list $(mdts_nodes)) \
985 $LCTL set_param fail_val=1 fail_loc=0x190
988 scrub_check_status 7 scanning
989 umount_client $MOUNT || error "(8) Fail to stop client!"
991 echo "starting MDTs with OI scrub disabled (2)"
992 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
993 scrub_check_status 11 paused
995 echo "starting MDTs without disabling OI scrub"
996 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
997 scrub_check_status 14 scanning
999 do_nodes $(comma_list $(mdts_nodes)) \
1000 $LCTL set_param fail_loc=0 fail_val=0
1002 scrub_check_status 15 completed
1003 scrub_check_flags 16 ""
1005 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
1007 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
1010 echo "starting MDTs with OI scrub disabled"
1011 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1012 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
1013 scrub_check_flags 4 recreated,inconsistent
1015 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
1016 do_nodes $(comma_list $(mdts_nodes)) \
1017 $LCTL set_param fail_val=3 fail_loc=0x190
1020 scrub_check_status 6 scanning
1022 echo "starting MDTs with OI scrub disabled"
1023 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
1024 scrub_check_status 9 paused
1026 echo "starting MDTs without disabling OI scrub"
1027 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
1028 scrub_check_status 12 scanning
1030 do_nodes $(comma_list $(mdts_nodes)) \
1031 $LCTL set_param fail_loc=0 fail_val=0
1033 scrub_check_status 13 completed
1034 scrub_check_flags 14 ""
1036 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
1039 [ "$mds1_FSTYPE" != "ldiskfs" ] &&
1040 skip "ldiskfs special test"
1045 check_mount_and_prep
1047 for n in $(seq $MDSCOUNT); do
1048 test_mkdir -i $((n - 1)) -c1 $DIR/$tdir/mds$n ||
1049 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
1051 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
1052 error "(2) Fail to create under $tdir/mds$n"
1055 # reset OI scrub start point by force
1057 scrub_check_status 4 completed
1062 # OI scrub should skip the new created objects for the first accessing
1063 # notice we're creating a new llog for every OST on every startup
1064 # new features can make this even less stable, so we only check that
1065 # the number of skipped files is more than the number or known created
1066 local MINIMUM=$((CREATED + 1)) # files + directory
1067 for n in $(seq $MDSCOUNT); do
1068 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1069 [ $SKIPPED -lt $MINIMUM ] &&
1070 error "(5) Expect at least $MINIMUM objects" \
1071 "skipped on mds$n, but got $SKIPPED"
1073 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1076 # reset OI scrub start point by force
1078 scrub_check_status 7 completed
1080 # OI scrub should skip the new created object only once
1081 for n in $(seq $MDSCOUNT); do
1082 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1083 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1085 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1086 error "(8) Expect 0 objects skipped on mds$n, but" \
1090 run_test 11 "OI scrub skips the new created objects only once"
1093 check_mount_and_prep
1094 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1096 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1097 do_facet ost1 $LCTL set_param fail_loc=0x195
1098 local count=$(precreated_ost_obj_count 0 0)
1100 createmany -o $DIR/$tdir/f $((count + 32))
1101 umount_client $MOUNT || error "(1) Fail to stop client!"
1103 stop ost1 || error "(2) Fail to stop ost1"
1105 #define OBD_FAIL_OST_NODESTROY 0x233
1106 do_facet ost1 $LCTL set_param fail_loc=0x233
1108 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1109 error "(3) Fail to start ost1"
1111 mount_client $MOUNT || error "(4) Fail to start client!"
1113 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1115 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1117 do_facet ost1 $LCTL set_param fail_loc=0
1118 wait_update_facet ost1 "$LCTL get_param -n \
1119 osd-*.$(facet_svc ost1).oi_scrub |
1120 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1121 error "(7) Expected '$expected' on ost1"
1123 ls -ail $DIR/$tdir > /dev/null || {
1125 error "(8) ls should succeed"
1128 run_test 12 "OI scrub can rebuild invalid /O entries"
1131 check_mount_and_prep
1132 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1134 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1135 do_facet ost1 $LCTL set_param fail_loc=0x196
1136 local count=$(precreated_ost_obj_count 0 0)
1138 createmany -o $DIR/$tdir/f $((count + 32))
1139 do_facet ost1 $LCTL set_param fail_loc=0
1141 umount_client $MOUNT || error "(1) Fail to stop client!"
1143 stop ost1 || error "(2) Fail to stop ost1"
1145 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1146 error "(3) Fail to start ost1"
1148 mount_client $MOUNT || error "(4) Fail to start client!"
1150 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1152 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1154 wait_update_facet ost1 "$LCTL get_param -n \
1155 osd-*.$(facet_svc ost1).oi_scrub |
1156 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1157 error "(7) Expected '$expected' on ost1"
1159 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1161 run_test 13 "OI scrub can rebuild missed /O entries"
1164 [ "$mds1_FSTYPE" != "ldiskfs" ] &&
1165 skip "ldiskfs special test"
1167 check_mount_and_prep
1168 $LFS setstripe -c 1 -i 0 $DIR/$tdir
1170 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1171 do_facet ost1 $LCTL set_param fail_loc=0x196
1172 local count=$(precreated_ost_obj_count 0 0)
1174 createmany -o $DIR/$tdir/f $((count + 1000))
1175 do_facet ost1 $LCTL set_param fail_loc=0
1177 umount_client $MOUNT || error "(1) Fail to stop client!"
1179 stop ost1 || error "(2) Fail to stop ost1"
1182 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1183 error "(3) Fail to run e2fsck error"
1185 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1186 error "(4) Fail to start ost1"
1188 mount_client $MOUNT || error "(5) Fail to start client!"
1190 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1191 awk '/^lf_repa[ir]*ed/ { print $2 }')
1192 [ $LF_REPAIRED -ge 1000 ] ||
1193 error "(6) Some entry under /lost+found should be repaired"
1195 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1199 echo "run e2fsck again after LFSCK"
1200 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1201 error "(8) Fail to run e2fsck error"
1203 run_test 14 "OI scrub can repair OST objects under lost+found"
1208 formatall > /dev/null
1209 setupall > /dev/null
1212 echo "starting MDTs with OI scrub disabled"
1213 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1214 scrub_check_status 3 init
1215 [ "$mds1_FSTYPE" != "ldiskfs" ] ||
1216 scrub_check_flags 4 recreated,inconsistent
1218 # run under dryrun mode
1219 scrub_start 5 --dryrun
1220 scrub_check_status 6 completed
1221 if [ "$mds1_FSTYPE" != "ldiskfs" ]; then
1222 scrub_check_flags 7 inconsistent
1225 scrub_check_flags 7 recreated,inconsistent
1228 scrub_check_params 8 dryrun
1229 scrub_check_repaired 9 $repaired 1
1231 # run under dryrun mode again
1232 scrub_start 10 --dryrun
1233 scrub_check_status 11 completed
1234 if [ "$mds1_FSTYPE" != "ldiskfs" ]; then
1235 scrub_check_flags 12 inconsistent
1237 scrub_check_flags 12 recreated,inconsistent
1239 scrub_check_params 13 dryrun
1240 scrub_check_repaired 14 $repaired 1
1242 # run under normal mode
1244 scrub_check_status 16 completed
1245 scrub_check_flags 17 ""
1246 scrub_check_params 18 ""
1247 scrub_check_repaired 19 $repaired 0
1249 # run under normal mode again
1251 scrub_check_status 21 completed
1252 scrub_check_flags 22 ""
1253 scrub_check_params 23 ""
1254 scrub_check_repaired 24 0 0
1256 run_test 15 "Dryrun mode OI scrub"
1259 check_mount_and_prep
1260 scrub_enable_index_backup
1262 #define OBD_FAIL_OSD_INDEX_CRASH 0x199
1263 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x199
1265 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
1267 echo "starting MDTs without disabling OI scrub"
1268 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
1269 mount_client $MOUNT || error "(2) Fail to start client!"
1271 scrub_disable_index_backup
1273 run_test 16 "Initial OI scrub can rebuild crashed index objects"
1275 # restore MDS/OST size
1276 MDSSIZE=${SAVED_MDSSIZE}
1277 OSTSIZE=${SAVED_OSTSIZE}
1278 OSTCOUNT=${SAVED_OSTCOUNT}
1280 # cleanup the system at last
1281 REFORMAT="yes" cleanup_and_setup_lustre
1284 check_and_cleanup_lustre