3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
32 SAVED_MDSSIZE=${MDSSIZE}
33 SAVED_OSTSIZE=${OSTSIZE}
34 SAVED_OSTCOUNT=${OSTCOUNT}
36 # use small MDS + OST size to speed formatting time
37 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
38 # 400M MDT device can guarantee uninitialized groups during the OI scrub
42 # no need too many OSTs, to reduce the format/start/stop overhead
43 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
45 # build up a clean test environment.
46 REFORMAT="yes" check_and_setup_lustre
50 MDT_DEV="${FSNAME}-MDT0000"
51 OST_DEV="${FSNAME}-OST0000"
52 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
58 # use "lfsck_start -A" when we no longer need testing interop
59 for n in $(seq $MDSCOUNT); do
60 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
62 error "($error_id) Failed to start OI scrub on mds$n"
70 # use "lfsck_stop -A" when we no longer need testing interop
71 for n in $(seq $MDSCOUNT); do
72 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
73 error "($error_id) Failed to stop OI scrub on mds$n"
80 do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
83 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
84 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
85 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
86 SHOW_SCRUB="do_facet $SINGLEMDS \
87 $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
88 SHOW_SCRUB_ON_OST="do_facet ost1 \
89 $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
90 MOUNT_OPTS_SCRUB="-o user_xattr"
91 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
100 echo "preparing... $(date)"
101 for n in $(seq $MDSCOUNT); do
102 echo "creating $nfiles files on mds$n"
103 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
104 error "Failed to create directory mds$n"
105 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
106 error "Failed to copy files to mds$n"
107 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
108 error "mkdir failed on mds$n"
109 touch $DIR/$tdir/mds$n/d_$tfile/f1 > \
110 /dev/null || error "create failed on mds$n"
111 dd if=/dev/zero of=$DIR/$tdir/mds$n/d_$tfile/f2 bs=1M count=1 ||
112 error "write failed on mds$n"
113 if [[ $nfiles -gt 0 ]]; then
114 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
115 /dev/null || error "createmany failed on mds$n"
118 echo "prepared $(date)."
120 [ ! -z $inject ] && [ $inject -eq 2 ] && {
121 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
122 do_nodes $(comma_list $(mdts_nodes)) \
123 $LCTL set_param fail_loc=0x198
125 for n in $(seq $MDSCOUNT); do
126 cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
127 error "Fail to copy runas to MDS$n"
130 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
133 [ ! -z $inject ] && [ $inject -eq 1 ] &&
134 [ $(facet_fstype $SINGLEMDS) = "zfs" ] && {
135 #define OBD_FAIL_OSD_FID_MAPPING 0x193
136 do_nodes $(comma_list $(mdts_nodes)) \
137 $LCTL set_param fail_loc=0x193
139 for n in $(seq $MDSCOUNT); do
140 chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
141 chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
144 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
147 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
149 # sync local transactions on every MDT
150 do_nodes $(comma_list $(mdts_nodes)) \
151 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
153 # wait for a while to cancel update logs after transactions committed.
156 # sync again to guarantee all things done.
157 do_nodes $(comma_list $(mdts_nodes)) \
158 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
160 for n in $(seq $MDSCOUNT); do
162 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
165 [ ! -z $inject ] && [ $(facet_fstype $SINGLEMDS) = "ldiskfs" ] && {
166 if [ $inject -eq 1 ]; then
167 for n in $(seq $MDSCOUNT); do
168 mds_backup_restore mds$n ||
169 error "Backup/restore on mds$n failed"
171 elif [ $inject -eq 2 ]; then
182 for n in $(seq $MDSCOUNT); do
183 start mds$n $(mdsdevname $n) $opts >/dev/null ||
184 error "($error_id) Failed to start mds$n"
192 for n in $(seq $MDSCOUNT); do
193 echo "stopping mds$n"
194 stop mds$n >/dev/null ||
195 error "($error_id) Failed to stop mds$n"
199 scrub_check_status() {
204 for n in $(seq $MDSCOUNT); do
205 wait_update_facet mds$n "$LCTL get_param -n \
206 osd-*.$(facet_svc mds$n).oi_scrub |
207 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
208 error "($error_id) Expected '$expected' on mds$n"
212 scrub_check_flags() {
218 for n in $(seq $MDSCOUNT); do
219 actual=$(do_facet mds$n $LCTL get_param -n \
220 osd-*.$(facet_svc mds$n).oi_scrub |
221 awk '/^flags/ { print $2 }')
222 if [ "$actual" != "$expected" ]; then
223 error "($error_id) Expected '$expected' on mds$n, but" \
229 scrub_check_params() {
235 for n in $(seq $MDSCOUNT); do
236 actual=$(do_facet mds$n $LCTL get_param -n \
237 osd-*.$(facet_svc mds$n).oi_scrub |
238 awk '/^param/ { print $2 }')
239 if [ "$actual" != "$expected" ]; then
240 error "($error_id) Expected '$expected' on mds$n, but" \
246 scrub_check_repaired() {
253 for n in $(seq $MDSCOUNT); do
254 if [ $dryrun -eq 1 ]; then
255 actual=$(do_facet mds$n $LCTL get_param -n \
256 osd-*.$(facet_svc mds$n).oi_scrub |
257 awk '/^inconsistent:/ { print $2 }')
259 actual=$(do_facet mds$n $LCTL get_param -n \
260 osd-*.$(facet_svc mds$n).oi_scrub |
261 awk '/^updated:/ { print $2 }')
264 if [ $expected -eq 0 -a $actual -ne 0 ]; then
265 error "($error_id) Expected no repaired on mds$n, but" \
269 if [ $expected -ne 0 -a $actual -lt $expected ]; then
270 error "($error_id) Expected '$expected' on mds$n, but" \
280 for n in $(seq $MDSCOUNT); do
281 diff -q $LUSTRE/tests/test-framework.sh \
282 $DIR/$tdir/mds$n/test-framework.sh ||
283 error "($error_id) File data check failed"
287 scrub_check_data2() {
292 for n in $(seq $MDSCOUNT); do
293 diff -q $LUSTRE/tests/$filename \
294 $DIR/$tdir/mds$n/$filename ||
295 error "($error_id) File data check failed"
300 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
306 for n in $(seq $MDSCOUNT); do
307 mds_remove_ois mds$n $index ||
308 error "($error_id) Failed to remove OI .$index on mds$n"
312 scrub_enable_auto() {
313 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
318 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
322 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
323 osd-*.*.full_scrub_ratio=$ratio
326 full_scrub_threshold_rate() {
327 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
331 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
332 osd-*.*.full_scrub_threshold_rate=$rate
335 scrub_enable_index_backup() {
336 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
337 osd-*.*.index_backup=1
340 scrub_disable_index_backup() {
341 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
342 osd-*.*.index_backup=0
347 echo "starting MDTs without disabling OI scrub"
348 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
349 scrub_check_status 2 init
350 scrub_check_flags 3 ""
351 mount_client $MOUNT || error "(4) Fail to start client!"
354 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
358 echo "start $SINGLEMDS without disabling OI scrub"
359 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
361 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
362 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
364 mount_client $MOUNT || error "(4) Fail to start client!"
365 #define OBD_FAIL_OSD_FID_MAPPING 0x193
366 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
367 # update .lustre OI mapping
369 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
370 umount_client $MOUNT || error "(5) Fail to stop client!"
372 echo "stop $SINGLEMDS"
373 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
375 echo "start $SINGLEMDS with disabling OI scrub"
376 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
377 error "(7) Fail to start MDS!"
379 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
380 [ "$FLAGS" == "inconsistent" ] ||
381 error "(9) Expect 'inconsistent', but got '$FLAGS'"
383 run_test 1a "Auto trigger initial OI scrub when server mounts"
387 echo "start MDTs without disabling OI scrub"
388 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
389 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
390 scrub_check_status 3 completed
391 mount_client $MOUNT || error "(4) Fail to start client!"
392 scrub_check_data2 runas 5
393 scrub_check_status 6 completed
395 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
398 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
399 skip "ldiskfs special test" && return
403 # OI files to be removed:
405 # idx 2: oi.16.{2,4,8,16,32}
406 # idx 3: oi.16.{3,9,27}
407 for index in 0 2 3; do
409 scrub_remove_ois 1 $index
410 echo "start MDTs with OI scrub disabled"
411 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
412 scrub_check_flags 3 recreated
414 scrub_check_status 5 completed
415 scrub_check_flags 6 ""
418 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
421 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
422 skip "ldiskfs special test" && return
425 echo "starting MDTs without disabling OI scrub"
426 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
427 scrub_check_status 3 completed
428 mount_client $MOUNT || error "(4) Fail to start client!"
431 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
433 # test_3 is obsolete, it will be covered by test_5.
435 formatall > /dev/null
439 echo "starting MDTs with OI scrub disabled"
440 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
441 scrub_check_status 3 init
442 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
443 scrub_check_flags 4 recreated,inconsistent
445 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
449 echo "starting MDTs with OI scrub disabled"
450 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
451 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
452 scrub_check_flags 4 recreated,inconsistent
453 mount_client $MOUNT || error "(5) Fail to start client!"
459 scrub_check_status 7 completed
460 scrub_check_flags 8 ""
463 for n in $(seq $MDSCOUNT); do
464 updated0[$n]=$(scrub_status $n |
465 awk '/^prior_updated/ { print $2 }')
468 scrub_check_data2 sanity-scrub.sh 9
472 for n in $(seq $MDSCOUNT); do
473 updated1[$n]=$(scrub_status $n |
474 awk '/^prior_updated/ { print $2 }')
475 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
476 error "(10) NOT auto trigger full scrub as expected"
479 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
482 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
483 skip "ldiskfs special test" && return
486 echo "starting MDTs with OI scrub disabled"
487 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
488 scrub_check_flags 4 recreated,inconsistent
489 mount_client $MOUNT || error "(5) Fail to start client!"
492 full_scrub_threshold_rate 10000
496 scrub_check_status 7 completed
497 scrub_check_flags 8 ""
500 for n in $(seq $MDSCOUNT); do
501 updated0[$n]=$(scrub_status $n |
502 awk '/^prior_updated/ { print $2 }')
504 echo "OI scrub on MDS$n status for the 1st time:"
505 do_facet mds$n $LCTL get_param -n \
506 osd-*.$(facet_svc mds$n).oi_scrub
509 scrub_check_data2 sanity-scrub.sh 9
512 scrub_check_status 10 completed
513 scrub_check_flags 11 ""
516 for n in $(seq $MDSCOUNT); do
517 updated1[$n]=$(scrub_status $n |
518 awk '/^prior_updated/ { print $2 }')
520 echo "OI scrub on MDS$n status for the 2nd time:"
521 do_facet mds$n $LCTL get_param -n \
522 osd-*.$(facet_svc mds$n).oi_scrub
524 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
525 error "(12) Auto trigger full scrub unexpectedly"
528 for n in $(seq $MDSCOUNT); do
529 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
530 error "(13) fail to ls"
534 scrub_check_status 14 completed
535 scrub_check_flags 15 ""
537 for n in $(seq $MDSCOUNT); do
538 updated0[$n]=$(scrub_status $n |
539 awk '/^prior_updated/ { print $2 }')
541 echo "OI scrub on MDS$n status for the 3rd time:"
542 do_facet mds$n $LCTL get_param -n \
543 osd-*.$(facet_svc mds$n).oi_scrub
545 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
546 error "(16) Auto trigger full scrub unexpectedly"
549 for n in $(seq $MDSCOUNT); do
550 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
554 for n in $(seq $MDSCOUNT); do
555 updated1[$n]=$(scrub_status $n |
556 awk '/^prior_updated/ { print $2 }')
557 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
558 echo "OI scrub on MDS$n status for the 4th time:"
559 do_facet mds$n $LCTL get_param -n \
560 osd-*.$(facet_svc mds$n).oi_scrub
562 error "(18) NOT auto trigger full scrub as expected"
566 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
569 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
570 skip "ldiskfs special test" && return
573 echo "starting MDTs with OI scrub disabled"
574 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
575 scrub_check_flags 4 recreated,inconsistent
576 mount_client $MOUNT || error "(5) Fail to start client!"
579 full_scrub_threshold_rate 20
583 scrub_check_status 7 completed
584 scrub_check_flags 8 ""
587 for n in $(seq $MDSCOUNT); do
588 updated0[$n]=$(scrub_status $n |
589 awk '/^prior_updated/ { print $2 }')
591 echo "OI scrub on MDS$n status for the 1st time:"
592 do_facet mds$n $LCTL get_param -n \
593 osd-*.$(facet_svc mds$n).oi_scrub
596 scrub_check_data2 sanity-scrub.sh 9
599 scrub_check_status 10 completed
600 scrub_check_flags 11 ""
603 for n in $(seq $MDSCOUNT); do
604 updated1[$n]=$(scrub_status $n |
605 awk '/^prior_updated/ { print $2 }')
607 echo "OI scrub on MDS$n status for the 2nd time:"
608 do_facet mds$n $LCTL get_param -n \
609 osd-*.$(facet_svc mds$n).oi_scrub
611 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
612 error "(12) Auto trigger full scrub unexpectedly"
615 for n in $(seq $MDSCOUNT); do
616 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
617 error "(13) fail to ls"
621 scrub_check_status 14 completed
622 scrub_check_flags 15 ""
624 for n in $(seq $MDSCOUNT); do
625 updated0[$n]=$(scrub_status $n |
626 awk '/^prior_updated/ { print $2 }')
628 echo "OI scrub on MDS$n status for the 3rd time:"
629 do_facet mds$n $LCTL get_param -n \
630 osd-*.$(facet_svc mds$n).oi_scrub
632 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
633 error "(16) Auto trigger full scrub unexpectedly"
636 for n in $(seq $MDSCOUNT); do
637 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
641 for n in $(seq $MDSCOUNT); do
642 updated1[$n]=$(scrub_status $n |
643 awk '/^prior_updated/ { print $2 }')
644 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
645 echo "OI scrub on MDS$n status for the 4th time:"
646 do_facet mds$n $LCTL get_param -n \
647 osd-*.$(facet_svc mds$n).oi_scrub
649 error "(18) NOT auto trigger full scrub as expected"
653 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
656 formatall > /dev/null
660 echo "starting MDTs with OI scrub disabled (1)"
661 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
662 scrub_check_status 3 init
663 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
664 scrub_check_flags 4 recreated,inconsistent
665 mount_client $MOUNT || error "(5) Fail to start client!"
669 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
670 do_nodes $(comma_list $(mdts_nodes)) \
671 $LCTL set_param fail_val=3 fail_loc=0x190
674 umount_client $MOUNT || error "(7) Fail to stop client!"
675 scrub_check_status 8 scanning
677 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
678 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
683 do_nodes $(comma_list $(mdts_nodes)) \
684 $LCTL set_param fail_loc=0 fail_val=0
686 echo "starting MDTs with OI scrub disabled (2)"
687 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
688 scrub_check_status 11 crashed
691 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
692 do_nodes $(comma_list $(mdts_nodes)) \
693 $LCTL set_param fail_val=3 fail_loc=0x190
695 echo "starting MDTs without disabling OI scrub"
696 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
697 scrub_check_status 14 scanning
699 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
700 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
702 scrub_check_status 15 failed
703 mount_client $MOUNT || error "(16) Fail to start client!"
706 do_nodes $(comma_list $(mdts_nodes)) \
707 $LCTL set_param fail_loc=0 fail_val=0
712 for n in $(seq $MDSCOUNT); do
713 stat $DIR/$tdir/mds$n/sanity-scrub.sh &
717 for n in $(seq $MDSCOUNT); do
719 error "(18) Fail to stat mds$n/sanity-scrub.sh"
722 scrub_check_status 19 completed
723 scrub_check_flags 20 ""
725 run_test 5 "OI scrub state machine"
729 echo "starting MDTs with OI scrub disabled"
730 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
731 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
732 scrub_check_flags 4 recreated,inconsistent
733 mount_client $MOUNT || error "(5) Fail to start client!"
737 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
738 do_nodes $(comma_list $(mdts_nodes)) \
739 $LCTL set_param fail_val=2 fail_loc=0x190
743 # Sleep 5 sec to guarantee at least one object processed by OI scrub
745 # Fail the OI scrub to guarantee there is at least one checkpoint
746 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
747 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
749 scrub_check_status 7 failed
751 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
752 do_nodes $(comma_list $(mdts_nodes)) \
753 $LCTL set_param fail_val=3 fail_loc=0x190
756 for n in $(seq $MDSCOUNT); do
757 # stat will re-trigger OI scrub
758 stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
759 error "(8) Failed to stat mds$n/sanity-scrub.sh"
762 umount_client $MOUNT || error "(9) Fail to stop client!"
763 scrub_check_status 10 scanning
765 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
766 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
770 for n in $(seq $MDSCOUNT); do
771 position0[$n]=$(scrub_status $n |
772 awk '/^last_checkpoint_position/ {print $2}')
773 position0[$n]=$((${position0[$n]} + 1))
778 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
779 do_nodes $(comma_list $(mdts_nodes)) \
780 $LCTL set_param fail_val=3 fail_loc=0x190
782 echo "starting MDTs without disabling OI scrub"
783 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
785 scrub_check_status 13 scanning
788 for n in $(seq $MDSCOUNT); do
789 position1[$n]=$(scrub_status $n |
790 awk '/^latest_start_position/ {print $2}')
791 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
792 error "(14) Expected position ${position0[$n]}, but" \
793 "got ${position1[$n]}"
797 do_nodes $(comma_list $(mdts_nodes)) \
798 $LCTL set_param fail_loc=0 fail_val=0
800 scrub_check_status 15 completed
801 scrub_check_flags 16 ""
803 run_test 6 "OI scrub resumes from last checkpoint"
807 echo "starting MDTs with OI scrub disabled"
808 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
809 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
810 scrub_check_flags 4 recreated,inconsistent
811 mount_client $MOUNT || error "(5) Fail to start client!"
815 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
816 do_nodes $(comma_list $(mdts_nodes)) \
817 $LCTL set_param fail_val=3 fail_loc=0x190
822 for n in $(seq $MDSCOUNT); do
823 stat $DIR/$tdir/mds$n/${tfile}300 ||
824 error "(7) Failed to stat mds$n/${tfile}300!"
827 scrub_check_status 8 scanning
828 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
829 scrub_check_flags 9 inconsistent,auto
831 scrub_check_flags 9 recreated,inconsistent,auto
834 do_nodes $(comma_list $(mdts_nodes)) \
835 $LCTL set_param fail_loc=0 fail_val=0
837 scrub_check_status 10 completed
840 run_test 7 "System is available during OI scrub scanning"
844 echo "starting MDTs with OI scrub disabled"
845 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
846 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
847 scrub_check_flags 4 recreated,inconsistent
849 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
850 do_nodes $(comma_list $(mdts_nodes)) \
851 $LCTL set_param fail_val=1 fail_loc=0x190
854 scrub_check_status 6 scanning
856 scrub_check_status 8 stopped
858 scrub_check_status 10 scanning
860 do_nodes $(comma_list $(mdts_nodes)) \
861 $LCTL set_param fail_loc=0 fail_val=0
863 scrub_check_status 11 completed
864 scrub_check_flags 12 ""
866 run_test 8 "Control OI scrub manually"
869 # Skip scrub speed test for ZFS because of performance unstable
870 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
871 skip "test scrub speed only on ldiskfs" && return
873 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
874 skip "Testing on UP system, the speed may be inaccurate."
880 echo "starting MDTs with OI scrub disabled"
881 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
882 scrub_check_flags 4 recreated,inconsistent
884 local BASE_SPEED1=100
886 # OI scrub should run with full speed under inconsistent case
887 scrub_start 5 -s $BASE_SPEED1
890 scrub_check_status 6 completed
891 scrub_check_flags 7 ""
892 # OI scrub should run with limited speed under non-inconsistent case
893 scrub_start 8 -s $BASE_SPEED1 -r
896 scrub_check_status 9 scanning
898 # Do NOT ignore that there are 1024 pre-fetched items. And there
899 # may be time error, normally it should be less than 2 seconds.
900 # We allow another 20% schedule error.
901 local PRE_FETCHED=1024
903 # MAX_MARGIN = 1.2 = 12 / 10
904 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
905 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
907 for n in $(seq $MDSCOUNT); do
908 local SPEED=$(scrub_status $n | \
909 awk '/^average_speed/ { print $2 }')
910 [ $SPEED -lt $MAX_SPEED ] ||
911 error "(10) Got speed $SPEED, expected less than" \
916 local BASE_SPEED2=300
918 for n in $(seq $MDSCOUNT); do
919 do_facet mds$n $LCTL set_param -n \
920 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
924 # MIN_MARGIN = 0.8 = 8 / 10
925 local MIN_SPEED=$(((PRE_FETCHED + \
926 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
927 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
928 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
929 # MAX_MARGIN = 1.2 = 12 / 10
930 MAX_SPEED=$(((PRE_FETCHED + \
931 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
932 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
933 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
934 for n in $(seq $MDSCOUNT); do
935 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
936 [ $SPEED -gt $MIN_SPEED ] ||
937 error "(11) Got speed $SPEED, expected more than" \
939 [ $SPEED -lt $MAX_SPEED ] ||
940 error "(12) Got speed $SPEED, expected less than" \
943 do_facet mds$n $LCTL set_param -n \
944 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
947 scrub_check_status 13 completed
949 run_test 9 "OI scrub speed control"
953 echo "starting mds$n with OI scrub disabled (1)"
954 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
955 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
956 scrub_check_flags 4 recreated,inconsistent
957 mount_client $MOUNT || error "(5) Fail to start client!"
961 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
962 do_nodes $(comma_list $(mdts_nodes)) \
963 $LCTL set_param fail_val=1 fail_loc=0x190
966 scrub_check_status 7 scanning
967 umount_client $MOUNT || error "(8) Fail to stop client!"
969 echo "starting MDTs with OI scrub disabled (2)"
970 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
971 scrub_check_status 11 paused
973 echo "starting MDTs without disabling OI scrub"
974 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
975 scrub_check_status 14 scanning
977 do_nodes $(comma_list $(mdts_nodes)) \
978 $LCTL set_param fail_loc=0 fail_val=0
980 scrub_check_status 15 completed
981 scrub_check_flags 16 ""
983 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
985 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
988 echo "starting MDTs with OI scrub disabled"
989 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
990 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
991 scrub_check_flags 4 recreated,inconsistent
993 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
994 do_nodes $(comma_list $(mdts_nodes)) \
995 $LCTL set_param fail_val=3 fail_loc=0x190
998 scrub_check_status 6 scanning
1000 echo "starting MDTs with OI scrub disabled"
1001 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
1002 scrub_check_status 9 paused
1004 echo "starting MDTs without disabling OI scrub"
1005 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
1006 scrub_check_status 12 scanning
1008 do_nodes $(comma_list $(mdts_nodes)) \
1009 $LCTL set_param fail_loc=0 fail_val=0
1011 scrub_check_status 13 completed
1012 scrub_check_flags 14 ""
1014 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
1017 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1018 skip "ldiskfs special test" && return
1023 check_mount_and_prep
1025 for n in $(seq $MDSCOUNT); do
1026 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
1027 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
1029 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
1030 error "(2) Fail to create under $tdir/mds$n"
1033 # reset OI scrub start point by force
1035 scrub_check_status 4 completed
1040 # OI scrub should skip the new created objects for the first accessing
1041 # notice we're creating a new llog for every OST on every startup
1042 # new features can make this even less stable, so we only check that
1043 # the number of skipped files is more than the number or known created
1044 local MINIMUM=$((CREATED + 1)) # files + directory
1045 for n in $(seq $MDSCOUNT); do
1046 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1047 [ $SKIPPED -lt $MINIMUM ] &&
1048 error "(5) Expect at least $MINIMUM objects" \
1049 "skipped on mds$n, but got $SKIPPED"
1051 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1054 # reset OI scrub start point by force
1056 scrub_check_status 7 completed
1058 # OI scrub should skip the new created object only once
1059 for n in $(seq $MDSCOUNT); do
1060 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1061 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1063 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1064 error "(8) Expect 0 objects skipped on mds$n, but" \
1068 run_test 11 "OI scrub skips the new created objects only once"
1071 check_mount_and_prep
1072 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1074 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1075 do_facet ost1 $LCTL set_param fail_loc=0x195
1076 local count=$(precreated_ost_obj_count 0 0)
1078 createmany -o $DIR/$tdir/f $((count + 32))
1079 umount_client $MOUNT || error "(1) Fail to stop client!"
1081 stop ost1 || error "(2) Fail to stop ost1"
1083 #define OBD_FAIL_OST_NODESTROY 0x233
1084 do_facet ost1 $LCTL set_param fail_loc=0x233
1086 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1087 error "(3) Fail to start ost1"
1089 mount_client $MOUNT || error "(4) Fail to start client!"
1091 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1093 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1095 do_facet ost1 $LCTL set_param fail_loc=0
1096 wait_update_facet ost1 "$LCTL get_param -n \
1097 osd-*.$(facet_svc ost1).oi_scrub |
1098 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1099 error "(7) Expected '$expected' on ost1"
1101 ls -ail $DIR/$tdir > /dev/null || {
1103 error "(8) ls should succeed"
1106 run_test 12 "OI scrub can rebuild invalid /O entries"
1109 check_mount_and_prep
1110 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1112 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1113 do_facet ost1 $LCTL set_param fail_loc=0x196
1114 local count=$(precreated_ost_obj_count 0 0)
1116 createmany -o $DIR/$tdir/f $((count + 32))
1117 do_facet ost1 $LCTL set_param fail_loc=0
1119 umount_client $MOUNT || error "(1) Fail to stop client!"
1121 stop ost1 || error "(2) Fail to stop ost1"
1123 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1124 error "(3) Fail to start ost1"
1126 mount_client $MOUNT || error "(4) Fail to start client!"
1128 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1130 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1132 wait_update_facet ost1 "$LCTL get_param -n \
1133 osd-*.$(facet_svc ost1).oi_scrub |
1134 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1135 error "(7) Expected '$expected' on ost1"
1137 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1139 run_test 13 "OI scrub can rebuild missed /O entries"
1142 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1143 skip "ldiskfs special test" && return
1145 check_mount_and_prep
1146 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1148 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1149 do_facet ost1 $LCTL set_param fail_loc=0x196
1150 local count=$(precreated_ost_obj_count 0 0)
1152 createmany -o $DIR/$tdir/f $((count + 1000))
1153 do_facet ost1 $LCTL set_param fail_loc=0
1155 umount_client $MOUNT || error "(1) Fail to stop client!"
1157 stop ost1 || error "(2) Fail to stop ost1"
1160 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1161 error "(3) Fail to run e2fsck error"
1163 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1164 error "(4) Fail to start ost1"
1166 mount_client $MOUNT || error "(5) Fail to start client!"
1168 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1169 awk '/^lf_repa[ri]*ed/ { print $2 }')
1170 [ $LF_REPAIRED -ge 1000 ] ||
1171 error "(6) Some entry under /lost+found should be repaired"
1173 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1175 run_test 14 "OI scrub can repair objects under lost+found"
1180 formatall > /dev/null
1181 setupall > /dev/null
1184 echo "starting MDTs with OI scrub disabled"
1185 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1186 scrub_check_status 3 init
1187 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1188 scrub_check_flags 4 recreated,inconsistent
1190 # run under dryrun mode
1191 scrub_start 5 --dryrun
1192 scrub_check_status 6 completed
1193 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1194 scrub_check_flags 7 inconsistent
1197 scrub_check_flags 7 recreated,inconsistent
1200 scrub_check_params 8 dryrun
1201 scrub_check_repaired 9 $repaired 1
1203 # run under dryrun mode again
1204 scrub_start 10 --dryrun
1205 scrub_check_status 11 completed
1206 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1207 scrub_check_flags 12 inconsistent
1209 scrub_check_flags 12 recreated,inconsistent
1211 scrub_check_params 13 dryrun
1212 scrub_check_repaired 14 $repaired 1
1214 # run under normal mode
1216 scrub_check_status 16 completed
1217 scrub_check_flags 17 ""
1218 scrub_check_params 18 ""
1219 scrub_check_repaired 19 $repaired 0
1221 # run under normal mode again
1223 scrub_check_status 21 completed
1224 scrub_check_flags 22 ""
1225 scrub_check_params 23 ""
1226 scrub_check_repaired 24 0 0
1228 run_test 15 "Dryrun mode OI scrub"
1231 check_mount_and_prep
1232 scrub_enable_index_backup
1234 #define OBD_FAIL_OSD_INDEX_CRASH 0x199
1235 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x199
1237 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
1239 echo "starting MDTs without disabling OI scrub"
1240 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
1241 mount_client $MOUNT || error "(2) Fail to start client!"
1243 scrub_disable_index_backup
1245 run_test 16 "Initial OI scrub can rebuild crashed index objects"
1247 # restore MDS/OST size
1248 MDSSIZE=${SAVED_MDSSIZE}
1249 OSTSIZE=${SAVED_OSTSIZE}
1250 OSTCOUNT=${SAVED_OSTCOUNT}
1252 # cleanup the system at last
1253 REFORMAT="yes" cleanup_and_setup_lustre
1256 check_and_cleanup_lustre