3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
12 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
13 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
15 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
16 . $LUSTRE/tests/test-framework.sh
18 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
21 require_dsh_mds || exit 0
25 if ! check_versions; then
26 skip "It is NOT necessary to test scrub under interoperation mode"
32 SAVED_MDSSIZE=${MDSSIZE}
33 SAVED_OSTSIZE=${OSTSIZE}
34 SAVED_OSTCOUNT=${OSTCOUNT}
36 # use small MDS + OST size to speed formatting time
37 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
38 # 400M MDT device can guarantee uninitialized groups during the OI scrub
42 # no need too many OSTs, to reduce the format/start/stop overhead
43 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
45 # build up a clean test environment.
46 REFORMAT="yes" check_and_setup_lustre
50 MDT_DEV="${FSNAME}-MDT0000"
51 OST_DEV="${FSNAME}-OST0000"
57 # use "lfsck_start -A" when we no longer need testing interop
58 for n in $(seq $MDSCOUNT); do
59 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
61 error "($error_id) Failed to start OI scrub on mds$n"
69 # use "lfsck_stop -A" when we no longer need testing interop
70 for n in $(seq $MDSCOUNT); do
71 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
72 error "($error_id) Failed to stop OI scrub on mds$n"
79 do_facet mds$n $LCTL get_param -n osd-*.$(facet_svc mds$n).oi_scrub
82 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} -t scrub"
83 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} -t scrub"
84 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
85 SHOW_SCRUB="do_facet $SINGLEMDS \
86 $LCTL get_param -n osd-*.${MDT_DEV}.oi_scrub"
87 SHOW_SCRUB_ON_OST="do_facet ost1 \
88 $LCTL get_param -n osd-*.${OST_DEV}.oi_scrub"
89 MOUNT_OPTS_SCRUB="-o user_xattr"
90 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
99 echo "preparing... $(date)"
100 for n in $(seq $MDSCOUNT); do
101 echo "creating $nfiles files on mds$n"
102 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
103 error "Failed to create directory mds$n"
104 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
105 error "Failed to copy files to mds$n"
106 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
107 error "mkdir failed on mds$n"
108 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
109 /dev/null || error "create failed on mds$n"
110 if [[ $nfiles -gt 0 ]]; then
111 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
112 /dev/null || error "createmany failed on mds$n"
115 echo "prepared $(date)."
117 [ ! -z $inject ] && [ $inject -eq 2 ] && {
118 #define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
119 do_nodes $(comma_list $(mdts_nodes)) \
120 $LCTL set_param fail_loc=0x198
122 for n in $(seq $MDSCOUNT); do
123 cp $LUSTRE/tests/runas $DIR/$tdir/mds$n ||
124 error "Fail to copy runas to MDS$n"
127 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
130 [ ! -z $inject ] && [ $inject -eq 1 ] &&
131 [ $(facet_fstype $SINGLEMDS) = "zfs" ] && {
132 #define OBD_FAIL_OSD_FID_MAPPING 0x193
133 do_nodes $(comma_list $(mdts_nodes)) \
134 $LCTL set_param fail_loc=0x193
136 for n in $(seq $MDSCOUNT); do
137 chmod 0400 $DIR/$tdir/mds$n/test-framework.sh
138 chmod 0400 $DIR/$tdir/mds$n/sanity-scrub.sh
141 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
144 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
146 # sync local transactions on every MDT
147 do_nodes $(comma_list $(mdts_nodes)) \
148 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
150 # wait for a while to cancel update logs after transactions committed.
153 # sync again to guarantee all things done.
154 do_nodes $(comma_list $(mdts_nodes)) \
155 "$LCTL set_param -n osd*.*MDT*.force_sync=1"
157 for n in $(seq $MDSCOUNT); do
159 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
162 [ ! -z $inject ] && [ $(facet_fstype $SINGLEMDS) = "ldiskfs" ] && {
163 if [ $inject -eq 1 ]; then
164 for n in $(seq $MDSCOUNT); do
165 mds_backup_restore mds$n ||
166 error "Backup/restore on mds$n failed"
168 elif [ $inject -eq 2 ]; then
179 for n in $(seq $MDSCOUNT); do
180 start mds$n $(mdsdevname $n) $opts >/dev/null ||
181 error "($error_id) Failed to start mds$n"
189 for n in $(seq $MDSCOUNT); do
190 echo "stopping mds$n"
191 stop mds$n >/dev/null ||
192 error "($error_id) Failed to stop mds$n"
196 scrub_check_status() {
201 for n in $(seq $MDSCOUNT); do
202 wait_update_facet mds$n "$LCTL get_param -n \
203 osd-*.$(facet_svc mds$n).oi_scrub |
204 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
205 error "($error_id) Expected '$expected' on mds$n"
209 scrub_check_flags() {
215 for n in $(seq $MDSCOUNT); do
216 actual=$(do_facet mds$n $LCTL get_param -n \
217 osd-*.$(facet_svc mds$n).oi_scrub |
218 awk '/^flags/ { print $2 }')
219 if [ "$actual" != "$expected" ]; then
220 error "($error_id) Expected '$expected' on mds$n, but" \
226 scrub_check_params() {
232 for n in $(seq $MDSCOUNT); do
233 actual=$(do_facet mds$n $LCTL get_param -n \
234 osd-*.$(facet_svc mds$n).oi_scrub |
235 awk '/^param/ { print $2 }')
236 if [ "$actual" != "$expected" ]; then
237 error "($error_id) Expected '$expected' on mds$n, but" \
243 scrub_check_repaired() {
250 for n in $(seq $MDSCOUNT); do
251 if [ $dryrun -eq 1 ]; then
252 actual=$(do_facet mds$n $LCTL get_param -n \
253 osd-*.$(facet_svc mds$n).oi_scrub |
254 awk '/^inconsistent:/ { print $2 }')
256 actual=$(do_facet mds$n $LCTL get_param -n \
257 osd-*.$(facet_svc mds$n).oi_scrub |
258 awk '/^updated:/ { print $2 }')
261 if [ $expected -eq 0 -a $actual -ne 0 ]; then
262 error "($error_id) Expected no repaired on mds$n, but" \
266 if [ $expected -ne 0 -a $actual -lt $expected ]; then
267 error "($error_id) Expected '$expected' on mds$n, but" \
277 for n in $(seq $MDSCOUNT); do
278 diff -q $LUSTRE/tests/test-framework.sh \
279 $DIR/$tdir/mds$n/test-framework.sh ||
280 error "($error_id) File data check failed"
284 scrub_check_data2() {
289 for n in $(seq $MDSCOUNT); do
290 diff -q $LUSTRE/tests/$filename \
291 $DIR/$tdir/mds$n/$filename ||
292 error "($error_id) File data check failed"
297 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
303 for n in $(seq $MDSCOUNT); do
304 mds_remove_ois mds$n $index ||
305 error "($error_id) Failed to remove OI .$index on mds$n"
309 scrub_enable_auto() {
310 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
315 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
319 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
320 osd-*.*.full_scrub_ratio=$ratio
323 full_scrub_threshold_rate() {
324 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && return
328 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
329 osd-*.*.full_scrub_threshold_rate=$rate
332 scrub_enable_index_backup() {
333 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
334 osd-*.*.index_backup=1
337 scrub_disable_index_backup() {
338 do_nodes $(comma_list $(all_server_nodes)) $LCTL set_param -n \
339 osd-*.*.index_backup=0
344 echo "starting MDTs without disabling OI scrub"
345 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
346 scrub_check_status 2 init
347 scrub_check_flags 3 ""
348 mount_client $MOUNT || error "(4) Fail to start client!"
351 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
355 echo "start $SINGLEMDS without disabling OI scrub"
356 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
358 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
359 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
361 mount_client $MOUNT || error "(4) Fail to start client!"
362 #define OBD_FAIL_OSD_FID_MAPPING 0x193
363 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
364 # update .lustre OI mapping
366 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
367 umount_client $MOUNT || error "(5) Fail to stop client!"
369 echo "stop $SINGLEMDS"
370 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
372 echo "start $SINGLEMDS with disabling OI scrub"
373 start $SINGLEMDS $(mdsdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
374 error "(7) Fail to start MDS!"
376 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
377 [ "$FLAGS" == "inconsistent" ] ||
378 error "(9) Expect 'inconsistent', but got '$FLAGS'"
380 run_test 1a "Auto trigger initial OI scrub when server mounts"
384 echo "start MDTs without disabling OI scrub"
385 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
386 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
387 scrub_check_status 3 completed
388 mount_client $MOUNT || error "(4) Fail to start client!"
389 scrub_check_data2 runas 5
390 scrub_check_status 6 completed
392 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
395 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
396 skip "ldiskfs special test" && return
400 # OI files to be removed:
402 # idx 2: oi.16.{2,4,8,16,32}
403 # idx 3: oi.16.{3,9,27}
404 for index in 0 2 3; do
406 scrub_remove_ois 1 $index
407 echo "start MDTs with OI scrub disabled"
408 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
409 scrub_check_flags 3 recreated
411 scrub_check_status 5 completed
412 scrub_check_flags 6 ""
415 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
418 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
419 skip "ldiskfs special test" && return
422 echo "starting MDTs without disabling OI scrub"
423 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
424 scrub_check_status 3 completed
425 mount_client $MOUNT || error "(4) Fail to start client!"
428 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
430 # test_3 is obsolete, it will be covered by test_5.
432 formatall > /dev/null
436 echo "starting MDTs with OI scrub disabled"
437 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
438 scrub_check_status 3 init
439 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
440 scrub_check_flags 4 recreated,inconsistent
442 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
446 echo "starting MDTs with OI scrub disabled"
447 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
448 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
449 scrub_check_flags 4 recreated,inconsistent
450 mount_client $MOUNT || error "(5) Fail to start client!"
456 scrub_check_status 7 completed
457 scrub_check_flags 8 ""
460 for n in $(seq $MDSCOUNT); do
461 updated0[$n]=$(scrub_status $n |
462 awk '/^prior_updated/ { print $2 }')
465 scrub_check_data2 sanity-scrub.sh 9
469 for n in $(seq $MDSCOUNT); do
470 updated1[$n]=$(scrub_status $n |
471 awk '/^prior_updated/ { print $2 }')
472 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
473 error "(10) NOT auto trigger full scrub as expected"
476 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
479 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
480 skip "ldiskfs special test" && return
483 echo "starting MDTs with OI scrub disabled"
484 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
485 scrub_check_flags 4 recreated,inconsistent
486 mount_client $MOUNT || error "(5) Fail to start client!"
489 full_scrub_threshold_rate 10000
493 scrub_check_status 7 completed
494 scrub_check_flags 8 ""
497 for n in $(seq $MDSCOUNT); do
498 updated0[$n]=$(scrub_status $n |
499 awk '/^prior_updated/ { print $2 }')
501 echo "OI scrub on MDS$n status for the 1st time:"
502 do_facet mds$n $LCTL get_param -n \
503 osd-*.$(facet_svc mds$n).oi_scrub
506 scrub_check_data2 sanity-scrub.sh 9
509 scrub_check_status 10 completed
510 scrub_check_flags 11 ""
513 for n in $(seq $MDSCOUNT); do
514 updated1[$n]=$(scrub_status $n |
515 awk '/^prior_updated/ { print $2 }')
517 echo "OI scrub on MDS$n status for the 2nd time:"
518 do_facet mds$n $LCTL get_param -n \
519 osd-*.$(facet_svc mds$n).oi_scrub
521 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
522 error "(12) Auto trigger full scrub unexpectedly"
525 for n in $(seq $MDSCOUNT); do
526 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
527 error "(13) fail to ls"
531 scrub_check_status 14 completed
532 scrub_check_flags 15 ""
534 for n in $(seq $MDSCOUNT); do
535 updated0[$n]=$(scrub_status $n |
536 awk '/^prior_updated/ { print $2 }')
538 echo "OI scrub on MDS$n status for the 3rd time:"
539 do_facet mds$n $LCTL get_param -n \
540 osd-*.$(facet_svc mds$n).oi_scrub
542 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
543 error "(16) Auto trigger full scrub unexpectedly"
546 for n in $(seq $MDSCOUNT); do
547 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
551 for n in $(seq $MDSCOUNT); do
552 updated1[$n]=$(scrub_status $n |
553 awk '/^prior_updated/ { print $2 }')
554 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
555 echo "OI scrub on MDS$n status for the 4th time:"
556 do_facet mds$n $LCTL get_param -n \
557 osd-*.$(facet_svc mds$n).oi_scrub
559 error "(18) NOT auto trigger full scrub as expected"
563 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
566 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
567 skip "ldiskfs special test" && return
570 echo "starting MDTs with OI scrub disabled"
571 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
572 scrub_check_flags 4 recreated,inconsistent
573 mount_client $MOUNT || error "(5) Fail to start client!"
576 full_scrub_threshold_rate 20
580 scrub_check_status 7 completed
581 scrub_check_flags 8 ""
584 for n in $(seq $MDSCOUNT); do
585 updated0[$n]=$(scrub_status $n |
586 awk '/^prior_updated/ { print $2 }')
588 echo "OI scrub on MDS$n status for the 1st time:"
589 do_facet mds$n $LCTL get_param -n \
590 osd-*.$(facet_svc mds$n).oi_scrub
593 scrub_check_data2 sanity-scrub.sh 9
596 scrub_check_status 10 completed
597 scrub_check_flags 11 ""
600 for n in $(seq $MDSCOUNT); do
601 updated1[$n]=$(scrub_status $n |
602 awk '/^prior_updated/ { print $2 }')
604 echo "OI scrub on MDS$n status for the 2nd time:"
605 do_facet mds$n $LCTL get_param -n \
606 osd-*.$(facet_svc mds$n).oi_scrub
608 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
609 error "(12) Auto trigger full scrub unexpectedly"
612 for n in $(seq $MDSCOUNT); do
613 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
614 error "(13) fail to ls"
618 scrub_check_status 14 completed
619 scrub_check_flags 15 ""
621 for n in $(seq $MDSCOUNT); do
622 updated0[$n]=$(scrub_status $n |
623 awk '/^prior_updated/ { print $2 }')
625 echo "OI scrub on MDS$n status for the 3rd time:"
626 do_facet mds$n $LCTL get_param -n \
627 osd-*.$(facet_svc mds$n).oi_scrub
629 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
630 error "(16) Auto trigger full scrub unexpectedly"
633 for n in $(seq $MDSCOUNT); do
634 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
638 for n in $(seq $MDSCOUNT); do
639 updated1[$n]=$(scrub_status $n |
640 awk '/^prior_updated/ { print $2 }')
641 [ ${updated0[$n]} -eq ${updated1[$n]} ] || {
642 echo "OI scrub on MDS$n status for the 4th time:"
643 do_facet mds$n $LCTL get_param -n \
644 osd-*.$(facet_svc mds$n).oi_scrub
646 error "(18) NOT auto trigger full scrub as expected"
650 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
653 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && skip "ldiskfs only test"
657 #define OBD_FAIL_OSD_DUPLICATE_MAP 0x19b
658 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x19b
659 for i in {1..100}; do
660 echo $i > $DIR/$tdir/f_$i || error "write f_$i failed"
662 do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0
664 for i in {101..200}; do
665 echo $i > $DIR/$tdir/f_$i || error "write f_$i failed"
668 for i in {1..200}; do
669 echo $i | cmp $DIR/$tdir/f_$i - || error "f_$i data corrupt"
672 run_test 4d "FID in LMA mismatch with object FID won't block create"
675 formatall > /dev/null
679 echo "starting MDTs with OI scrub disabled (1)"
680 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
681 scrub_check_status 3 init
682 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
683 scrub_check_flags 4 recreated,inconsistent
684 mount_client $MOUNT || error "(5) Fail to start client!"
688 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
689 do_nodes $(comma_list $(mdts_nodes)) \
690 $LCTL set_param fail_val=3 fail_loc=0x190
693 umount_client $MOUNT || error "(7) Fail to stop client!"
694 scrub_check_status 8 scanning
696 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
697 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
702 do_nodes $(comma_list $(mdts_nodes)) \
703 $LCTL set_param fail_loc=0 fail_val=0
705 echo "starting MDTs with OI scrub disabled (2)"
706 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
707 scrub_check_status 11 crashed
710 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
711 do_nodes $(comma_list $(mdts_nodes)) \
712 $LCTL set_param fail_val=3 fail_loc=0x190
714 echo "starting MDTs without disabling OI scrub"
715 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
716 scrub_check_status 14 scanning
718 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
719 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
721 scrub_check_status 15 failed
722 mount_client $MOUNT || error "(16) Fail to start client!"
725 do_nodes $(comma_list $(mdts_nodes)) \
726 $LCTL set_param fail_loc=0 fail_val=0
731 for n in $(seq $MDSCOUNT); do
732 stat $DIR/$tdir/mds$n/sanity-scrub.sh &
736 for n in $(seq $MDSCOUNT); do
738 error "(18) Fail to stat mds$n/sanity-scrub.sh"
741 scrub_check_status 19 completed
742 scrub_check_flags 20 ""
744 run_test 5 "OI scrub state machine"
748 echo "starting MDTs with OI scrub disabled"
749 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
750 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
751 scrub_check_flags 4 recreated,inconsistent
752 mount_client $MOUNT || error "(5) Fail to start client!"
756 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
757 do_nodes $(comma_list $(mdts_nodes)) \
758 $LCTL set_param fail_val=2 fail_loc=0x190
762 # Sleep 5 sec to guarantee at least one object processed by OI scrub
764 # Fail the OI scrub to guarantee there is at least one checkpoint
765 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
766 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
768 scrub_check_status 7 failed
770 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
771 do_nodes $(comma_list $(mdts_nodes)) \
772 $LCTL set_param fail_val=3 fail_loc=0x190
775 for n in $(seq $MDSCOUNT); do
776 # stat will re-trigger OI scrub
777 stat $DIR/$tdir/mds$n/sanity-scrub.sh ||
778 error "(8) Failed to stat mds$n/sanity-scrub.sh"
781 umount_client $MOUNT || error "(9) Fail to stop client!"
782 scrub_check_status 10 scanning
784 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
785 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
789 for n in $(seq $MDSCOUNT); do
790 position0[$n]=$(scrub_status $n |
791 awk '/^last_checkpoint_position/ {print $2}')
792 position0[$n]=$((${position0[$n]} + 1))
797 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
798 do_nodes $(comma_list $(mdts_nodes)) \
799 $LCTL set_param fail_val=3 fail_loc=0x190
801 echo "starting MDTs without disabling OI scrub"
802 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
804 scrub_check_status 13 scanning
807 for n in $(seq $MDSCOUNT); do
808 position1[$n]=$(scrub_status $n |
809 awk '/^latest_start_position/ {print $2}')
810 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
811 error "(14) Expected position ${position0[$n]}, but" \
812 "got ${position1[$n]}"
816 do_nodes $(comma_list $(mdts_nodes)) \
817 $LCTL set_param fail_loc=0 fail_val=0
819 scrub_check_status 15 completed
820 scrub_check_flags 16 ""
822 run_test 6 "OI scrub resumes from last checkpoint"
826 echo "starting MDTs with OI scrub disabled"
827 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
828 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
829 scrub_check_flags 4 recreated,inconsistent
830 mount_client $MOUNT || error "(5) Fail to start client!"
834 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
835 do_nodes $(comma_list $(mdts_nodes)) \
836 $LCTL set_param fail_val=3 fail_loc=0x190
841 for n in $(seq $MDSCOUNT); do
842 stat $DIR/$tdir/mds$n/${tfile}300 ||
843 error "(7) Failed to stat mds$n/${tfile}300!"
846 scrub_check_status 8 scanning
847 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
848 scrub_check_flags 9 inconsistent,auto
850 scrub_check_flags 9 recreated,inconsistent,auto
853 do_nodes $(comma_list $(mdts_nodes)) \
854 $LCTL set_param fail_loc=0 fail_val=0
856 scrub_check_status 10 completed
859 run_test 7 "System is available during OI scrub scanning"
863 echo "starting MDTs with OI scrub disabled"
864 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
865 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
866 scrub_check_flags 4 recreated,inconsistent
868 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
869 do_nodes $(comma_list $(mdts_nodes)) \
870 $LCTL set_param fail_val=1 fail_loc=0x190
873 scrub_check_status 6 scanning
875 scrub_check_status 8 stopped
877 scrub_check_status 10 scanning
879 do_nodes $(comma_list $(mdts_nodes)) \
880 $LCTL set_param fail_loc=0 fail_val=0
882 scrub_check_status 11 completed
883 scrub_check_flags 12 ""
885 run_test 8 "Control OI scrub manually"
888 # Skip scrub speed test for ZFS because of performance unstable
889 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
890 skip "test scrub speed only on ldiskfs" && return
892 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
893 skip "Testing on UP system, the speed may be inaccurate."
899 echo "starting MDTs with OI scrub disabled"
900 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
901 scrub_check_flags 4 recreated,inconsistent
903 local BASE_SPEED1=100
905 # OI scrub should run with full speed under inconsistent case
906 scrub_start 5 -s $BASE_SPEED1
909 scrub_check_status 6 completed
910 scrub_check_flags 7 ""
911 # OI scrub should run with limited speed under non-inconsistent case
912 scrub_start 8 -s $BASE_SPEED1 -r
915 scrub_check_status 9 scanning
917 # Do NOT ignore that there are 1024 pre-fetched items. And there
918 # may be time error, normally it should be less than 2 seconds.
919 # We allow another 20% schedule error.
920 local PRE_FETCHED=1024
922 # MAX_MARGIN = 1.2 = 12 / 10
923 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
924 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
926 for n in $(seq $MDSCOUNT); do
927 local SPEED=$(scrub_status $n | \
928 awk '/^average_speed/ { print $2 }')
929 [ $SPEED -lt $MAX_SPEED ] ||
930 error "(10) Got speed $SPEED, expected less than" \
935 local BASE_SPEED2=300
937 for n in $(seq $MDSCOUNT); do
938 do_facet mds$n $LCTL set_param -n \
939 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
943 # MIN_MARGIN = 0.8 = 8 / 10
944 local MIN_SPEED=$(((PRE_FETCHED + \
945 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
946 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
947 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
948 # MAX_MARGIN = 1.2 = 12 / 10
949 MAX_SPEED=$(((PRE_FETCHED + \
950 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
951 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
952 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
953 for n in $(seq $MDSCOUNT); do
954 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
955 [ $SPEED -gt $MIN_SPEED ] ||
956 error "(11) Got speed $SPEED, expected more than" \
958 [ $SPEED -lt $MAX_SPEED ] ||
959 error "(12) Got speed $SPEED, expected less than" \
962 do_facet mds$n $LCTL set_param -n \
963 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
966 scrub_check_status 13 completed
968 run_test 9 "OI scrub speed control"
972 echo "starting mds$n with OI scrub disabled (1)"
973 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
974 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
975 scrub_check_flags 4 recreated,inconsistent
976 mount_client $MOUNT || error "(5) Fail to start client!"
980 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
981 do_nodes $(comma_list $(mdts_nodes)) \
982 $LCTL set_param fail_val=1 fail_loc=0x190
985 scrub_check_status 7 scanning
986 umount_client $MOUNT || error "(8) Fail to stop client!"
988 echo "starting MDTs with OI scrub disabled (2)"
989 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
990 scrub_check_status 11 paused
992 echo "starting MDTs without disabling OI scrub"
993 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
994 scrub_check_status 14 scanning
996 do_nodes $(comma_list $(mdts_nodes)) \
997 $LCTL set_param fail_loc=0 fail_val=0
999 scrub_check_status 15 completed
1000 scrub_check_flags 16 ""
1002 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
1004 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
1007 echo "starting MDTs with OI scrub disabled"
1008 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1009 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1010 scrub_check_flags 4 recreated,inconsistent
1012 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
1013 do_nodes $(comma_list $(mdts_nodes)) \
1014 $LCTL set_param fail_val=3 fail_loc=0x190
1017 scrub_check_status 6 scanning
1019 echo "starting MDTs with OI scrub disabled"
1020 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
1021 scrub_check_status 9 paused
1023 echo "starting MDTs without disabling OI scrub"
1024 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
1025 scrub_check_status 12 scanning
1027 do_nodes $(comma_list $(mdts_nodes)) \
1028 $LCTL set_param fail_loc=0 fail_val=0
1030 scrub_check_status 13 completed
1031 scrub_check_flags 14 ""
1033 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
1036 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1037 skip "ldiskfs special test" && return
1042 check_mount_and_prep
1044 for n in $(seq $MDSCOUNT); do
1045 test_mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
1046 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
1048 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
1049 error "(2) Fail to create under $tdir/mds$n"
1052 # reset OI scrub start point by force
1054 scrub_check_status 4 completed
1059 # OI scrub should skip the new created objects for the first accessing
1060 # notice we're creating a new llog for every OST on every startup
1061 # new features can make this even less stable, so we only check that
1062 # the number of skipped files is more than the number or known created
1063 local MINIMUM=$((CREATED + 1)) # files + directory
1064 for n in $(seq $MDSCOUNT); do
1065 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1066 [ $SKIPPED -lt $MINIMUM ] &&
1067 error "(5) Expect at least $MINIMUM objects" \
1068 "skipped on mds$n, but got $SKIPPED"
1070 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1073 # reset OI scrub start point by force
1075 scrub_check_status 7 completed
1077 # OI scrub should skip the new created object only once
1078 for n in $(seq $MDSCOUNT); do
1079 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
1080 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
1082 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
1083 error "(8) Expect 0 objects skipped on mds$n, but" \
1087 run_test 11 "OI scrub skips the new created objects only once"
1090 check_mount_and_prep
1091 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1093 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
1094 do_facet ost1 $LCTL set_param fail_loc=0x195
1095 local count=$(precreated_ost_obj_count 0 0)
1097 createmany -o $DIR/$tdir/f $((count + 32))
1098 umount_client $MOUNT || error "(1) Fail to stop client!"
1100 stop ost1 || error "(2) Fail to stop ost1"
1102 #define OBD_FAIL_OST_NODESTROY 0x233
1103 do_facet ost1 $LCTL set_param fail_loc=0x233
1105 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1106 error "(3) Fail to start ost1"
1108 mount_client $MOUNT || error "(4) Fail to start client!"
1110 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1112 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1114 do_facet ost1 $LCTL set_param fail_loc=0
1115 wait_update_facet ost1 "$LCTL get_param -n \
1116 osd-*.$(facet_svc ost1).oi_scrub |
1117 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1118 error "(7) Expected '$expected' on ost1"
1120 ls -ail $DIR/$tdir > /dev/null || {
1122 error "(8) ls should succeed"
1125 run_test 12 "OI scrub can rebuild invalid /O entries"
1128 check_mount_and_prep
1129 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1131 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1132 do_facet ost1 $LCTL set_param fail_loc=0x196
1133 local count=$(precreated_ost_obj_count 0 0)
1135 createmany -o $DIR/$tdir/f $((count + 32))
1136 do_facet ost1 $LCTL set_param fail_loc=0
1138 umount_client $MOUNT || error "(1) Fail to stop client!"
1140 stop ost1 || error "(2) Fail to stop ost1"
1142 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1143 error "(3) Fail to start ost1"
1145 mount_client $MOUNT || error "(4) Fail to start client!"
1147 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1149 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1151 wait_update_facet ost1 "$LCTL get_param -n \
1152 osd-*.$(facet_svc ost1).oi_scrub |
1153 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1154 error "(7) Expected '$expected' on ost1"
1156 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1158 run_test 13 "OI scrub can rebuild missed /O entries"
1161 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
1162 skip "ldiskfs special test" && return
1164 check_mount_and_prep
1165 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1167 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1168 do_facet ost1 $LCTL set_param fail_loc=0x196
1169 local count=$(precreated_ost_obj_count 0 0)
1171 createmany -o $DIR/$tdir/f $((count + 1000))
1172 do_facet ost1 $LCTL set_param fail_loc=0
1174 umount_client $MOUNT || error "(1) Fail to stop client!"
1176 stop ost1 || error "(2) Fail to stop ost1"
1179 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1180 error "(3) Fail to run e2fsck error"
1182 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1183 error "(4) Fail to start ost1"
1185 mount_client $MOUNT || error "(5) Fail to start client!"
1187 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1188 awk '/^lf_repa[ri]*ed/ { print $2 }')
1189 [ $LF_REPAIRED -ge 1000 ] ||
1190 error "(6) Some entry under /lost+found should be repaired"
1192 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1194 run_test 14 "OI scrub can repair objects under lost+found"
1199 formatall > /dev/null
1200 setupall > /dev/null
1203 echo "starting MDTs with OI scrub disabled"
1204 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1205 scrub_check_status 3 init
1206 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] ||
1207 scrub_check_flags 4 recreated,inconsistent
1209 # run under dryrun mode
1210 scrub_start 5 --dryrun
1211 scrub_check_status 6 completed
1212 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1213 scrub_check_flags 7 inconsistent
1216 scrub_check_flags 7 recreated,inconsistent
1219 scrub_check_params 8 dryrun
1220 scrub_check_repaired 9 $repaired 1
1222 # run under dryrun mode again
1223 scrub_start 10 --dryrun
1224 scrub_check_status 11 completed
1225 if [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ]; then
1226 scrub_check_flags 12 inconsistent
1228 scrub_check_flags 12 recreated,inconsistent
1230 scrub_check_params 13 dryrun
1231 scrub_check_repaired 14 $repaired 1
1233 # run under normal mode
1235 scrub_check_status 16 completed
1236 scrub_check_flags 17 ""
1237 scrub_check_params 18 ""
1238 scrub_check_repaired 19 $repaired 0
1240 # run under normal mode again
1242 scrub_check_status 21 completed
1243 scrub_check_flags 22 ""
1244 scrub_check_params 23 ""
1245 scrub_check_repaired 24 0 0
1247 run_test 15 "Dryrun mode OI scrub"
1250 check_mount_and_prep
1251 scrub_enable_index_backup
1253 #define OBD_FAIL_OSD_INDEX_CRASH 0x199
1254 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x199
1256 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0
1258 echo "starting MDTs without disabling OI scrub"
1259 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
1260 mount_client $MOUNT || error "(2) Fail to start client!"
1262 scrub_disable_index_backup
1264 run_test 16 "Initial OI scrub can rebuild crashed index objects"
1266 # restore MDS/OST size
1267 MDSSIZE=${SAVED_MDSSIZE}
1268 OSTSIZE=${SAVED_OSTSIZE}
1269 OSTCOUNT=${SAVED_OSTCOUNT}
1271 # cleanup the system at last
1272 REFORMAT="yes" cleanup_and_setup_lustre
1275 check_and_cleanup_lustre