3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT"
11 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
12 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
14 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
15 . $LUSTRE/tests/test-framework.sh
17 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
20 require_dsh_mds || exit 0
22 SAVED_MDSSIZE=${MDSSIZE}
23 SAVED_OSTSIZE=${OSTSIZE}
24 SAVED_OSTCOUNT=${OSTCOUNT}
25 # use small MDS + OST size to speed formatting time
26 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
27 # 200M MDT device can guarantee uninitialized groups during the OI scrub
30 # no need too much OSTs, to reduce the format/start/stop overhead
31 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
35 # build up a clean test environment.
39 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
40 skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
42 [ $(facet_fstype ost1) != "ldiskfs" ] &&
43 skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
45 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
46 skip "Need MDS version at least 2.2.90" && check_and_cleanup_lustre &&
49 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
50 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
52 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
53 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
55 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
56 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
58 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
59 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
60 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
62 [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
63 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
65 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
70 MDT_DEV="${FSNAME}-MDT0000"
71 OST_DEV="${FSNAME}-OST0000"
72 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
78 # use "lfsck_start -A" when we no longer need testing interop
79 for n in $(seq $MDSCOUNT); do
80 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
82 error "($error_id) Failed to start OI scrub on mds$n"
90 # use "lfsck_stop -A" when we no longer need testing interop
91 for n in $(seq $MDSCOUNT); do
92 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
93 error "($error_id) Failed to stop OI scrub on mds$n"
100 do_facet mds$n $LCTL get_param -n \
101 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
104 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
105 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
106 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
107 SHOW_SCRUB="do_facet $SINGLEMDS \
108 $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
109 SHOW_SCRUB_ON_OST="do_facet ost1 \
110 $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
111 MOUNT_OPTS_SCRUB="-o user_xattr"
112 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
120 echo "preparing... $(date)"
121 for n in $(seq $MDSCOUNT); do
122 echo "creating $nfiles files on mds$n"
123 if [ $n -eq 1 ]; then
124 mkdir $DIR/$tdir/mds$n ||
125 error "Failed to create directory mds$n"
127 $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
128 error "Failed to create remote directory mds$n"
130 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
131 error "Failed to copy files to mds$n"
132 if [[ $nfiles -gt 0 ]]; then
133 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
134 /dev/null || error "createmany failed on mds$n"
137 echo "prepared $(date)."
138 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
139 for n in $(seq $MDSCOUNT); do
141 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
150 for n in $(seq $MDSCOUNT); do
151 start mds$n $(mdsdevname $n) $opts >/dev/null ||
152 error "($error_id) Failed to start mds$n"
160 for n in $(seq $MDSCOUNT); do
161 echo "stopping mds$n"
162 stop mds$n >/dev/null ||
163 error "($error_id) Failed to stop mds$n"
167 scrub_check_status() {
172 for n in $(seq $MDSCOUNT); do
173 wait_update_facet mds$n "$LCTL get_param -n \
174 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
175 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
176 error "($error_id) Expected '$expected' on mds$n"
180 scrub_check_flags() {
186 for n in $(seq $MDSCOUNT); do
187 actual=$(do_facet mds$n $LCTL get_param -n \
188 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
189 awk '/^flags/ { print $2 }')
190 if [ "$actual" != "$expected" ]; then
191 error "($error_id) Expected '$expected' on mds$n, but" \
197 scrub_check_params() {
203 for n in $(seq $MDSCOUNT); do
204 actual=$(do_facet mds$n $LCTL get_param -n \
205 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
206 awk '/^param/ { print $2 }')
207 if [ "$actual" != "$expected" ]; then
208 error "($error_id) Expected '$expected' on mds$n, but" \
214 scrub_check_repaired() {
220 for n in $(seq $MDSCOUNT); do
221 actual=$(do_facet mds$n $LCTL get_param -n \
222 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
223 awk '/^updated/ { print $2 }')
225 if [ $expected -eq 0 -a $actual -ne 0 ]; then
226 error "($error_id) Expected no repaired on mds$n, but" \
230 if [ $expected -ne 0 -a $actual -lt $expected ]; then
231 error "($error_id) Expected '$expected' on mds$n, but" \
241 for n in $(seq $MDSCOUNT); do
242 diff -q $LUSTRE/tests/test-framework.sh \
243 $DIR/$tdir/mds$n/test-framework.sh ||
244 error "($error_id) File data check failed"
248 scrub_check_data2() {
253 for n in $(seq $MDSCOUNT); do
254 diff -q $LUSTRE/tests/$filename \
255 $DIR/$tdir/mds$n/$filename ||
256 error "($error_id) File data check failed"
265 for n in $(seq $MDSCOUNT); do
266 mds_remove_ois mds$n $index ||
267 error "($error_id) Failed to remove OI .$index on mds$n"
271 scrub_backup_restore() {
276 for n in $(seq $MDSCOUNT); do
277 mds_backup_restore mds$n $igif ||
278 error "(error_id) Backup/restore on mds$n failed"
282 scrub_enable_auto() {
285 for n in $(seq $MDSCOUNT); do
286 do_facet mds$n $LCTL set_param -n \
287 osd-ldiskfs.$(facet_svc mds$n).auto_scrub 1
292 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
298 for n in $(seq $MDSCOUNT); do
299 do_facet mds$n $LCTL set_param -n \
300 osd-ldiskfs.$(facet_svc mds$n).full_scrub_ratio $ratio
308 for n in $(seq $MDSCOUNT); do
309 do_facet mds$n $LCTL set_param -n \
310 osd-ldiskfs.$(facet_svc mds$n).full_scrub_speed $speed
316 echo "starting MDTs without disabling OI scrub"
317 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
318 scrub_check_status 2 init
319 scrub_check_flags 3 ""
320 mount_client $MOUNT || error "(4) Fail to start client!"
323 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
327 echo "start $SINGLEMDS without disabling OI scrub"
328 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
329 error "(1) Fail to start MDS!"
331 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
332 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
334 mount_client $MOUNT || error "(4) Fail to start client!"
335 #define OBD_FAIL_OSD_FID_MAPPING 0x193
336 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
337 # update .lustre OI mapping
339 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
340 umount_client $MOUNT || error "(5) Fail to stop client!"
342 echo "stop $SINGLEMDS"
343 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
345 echo "start $SINGLEMDS with disabling OI scrub"
346 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
347 error "(7) Fail to start MDS!"
349 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
350 [ "$FLAGS" == "inconsistent" ] ||
351 error "(9) Expect 'inconsistent', but got '$FLAGS'"
353 run_test 1a "Auto trigger initial OI scrub when server mounts"
358 echo "start MDTs without disabling OI scrub"
359 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
360 scrub_check_status 3 completed
361 mount_client $MOUNT || error "(4) Fail to start client!"
364 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
369 # OI files to be removed:
371 # idx 2: oi.16.{2,4,8,16,32}
372 # idx 3: oi.16.{3,9,27}
373 for index in 0 2 3; do
375 scrub_remove_ois 1 $index
376 echo "start MDTs with OI scrub disabled"
377 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
378 scrub_check_flags 3 recreated
380 scrub_check_status 5 completed
381 scrub_check_flags 6 ""
384 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
388 scrub_backup_restore 1
389 echo "starting MDTs without disabling OI scrub"
390 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
391 scrub_check_status 3 completed
392 mount_client $MOUNT || error "(4) Fail to start client!"
395 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
397 # test_3 is obsolete, it will be covered by test_5.
399 formatall > /dev/null
403 scrub_backup_restore 1
404 echo "starting MDTs with OI scrub disabled"
405 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
406 scrub_check_status 3 init
407 scrub_check_flags 4 inconsistent
409 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
413 scrub_backup_restore 1
414 echo "starting MDTs with OI scrub disabled"
415 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
416 scrub_check_flags 4 inconsistent
417 mount_client $MOUNT || error "(5) Fail to start client!"
423 scrub_check_status 7 completed
424 scrub_check_flags 8 ""
427 for n in $(seq $MDSCOUNT); do
428 updated0[$n]=$(scrub_status $n |
429 awk '/^sf_items_updated_prior/ { print $2 }')
432 scrub_check_data2 sanity-scrub.sh 9
436 for n in $(seq $MDSCOUNT); do
437 updated1[$n]=$(scrub_status $n |
438 awk '/^sf_items_updated_prior/ { print $2 }')
439 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
440 error "(10) NOT auto trigger full scrub as expected"
443 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
447 scrub_backup_restore 1
448 echo "starting MDTs with OI scrub disabled"
449 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
450 scrub_check_flags 4 inconsistent
451 mount_client $MOUNT || error "(5) Fail to start client!"
454 full_scrub_speed 10000
458 scrub_check_status 7 completed
459 scrub_check_flags 8 ""
462 for n in $(seq $MDSCOUNT); do
463 updated0[$n]=$(scrub_status $n |
464 awk '/^sf_items_updated_prior/ { print $2 }')
467 scrub_check_data2 sanity-scrub.sh 9
470 scrub_check_status 10 completed
471 scrub_check_flags 11 ""
474 for n in $(seq $MDSCOUNT); do
475 updated1[$n]=$(scrub_status $n |
476 awk '/^sf_items_updated_prior/ { print $2 }')
477 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
478 error "(12) Auto trigger full scrub unexpectedly"
481 for n in $(seq $MDSCOUNT); do
482 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
483 error "(13) fail to ls"
487 scrub_check_status 14 completed
488 scrub_check_flags 15 ""
490 for n in $(seq $MDSCOUNT); do
491 updated0[$n]=$(scrub_status $n |
492 awk '/^sf_items_updated_prior/ { print $2 }')
493 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
494 error "(16) Auto trigger full scrub unexpectedly"
497 for n in $(seq $MDSCOUNT); do
498 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
502 for n in $(seq $MDSCOUNT); do
503 updated1[$n]=$(scrub_status $n |
504 awk '/^sf_items_updated_prior/ { print $2 }')
505 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
506 error "(18) NOT auto trigger full scrub as expected"
509 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
513 scrub_backup_restore 1
514 echo "starting MDTs with OI scrub disabled"
515 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
516 scrub_check_flags 4 inconsistent
517 mount_client $MOUNT || error "(5) Fail to start client!"
524 scrub_check_status 7 completed
525 scrub_check_flags 8 ""
528 for n in $(seq $MDSCOUNT); do
529 updated0[$n]=$(scrub_status $n |
530 awk '/^sf_items_updated_prior/ { print $2 }')
533 scrub_check_data2 sanity-scrub.sh 9
536 scrub_check_status 10 completed
537 scrub_check_flags 11 ""
540 for n in $(seq $MDSCOUNT); do
541 updated1[$n]=$(scrub_status $n |
542 awk '/^sf_items_updated_prior/ { print $2 }')
543 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
544 error "(12) Auto trigger full scrub unexpectedly"
547 for n in $(seq $MDSCOUNT); do
548 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
549 error "(13) fail to ls"
553 scrub_check_status 14 completed
554 scrub_check_flags 15 ""
556 for n in $(seq $MDSCOUNT); do
557 updated0[$n]=$(scrub_status $n |
558 awk '/^sf_items_updated_prior/ { print $2 }')
559 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
560 error "(16) Auto trigger full scrub unexpectedly"
563 for n in $(seq $MDSCOUNT); do
564 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
568 for n in $(seq $MDSCOUNT); do
569 updated1[$n]=$(scrub_status $n |
570 awk '/^sf_items_updated_prior/ { print $2 }')
571 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
572 error "(18) NOT auto trigger full scrub as expected"
575 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
578 formatall > /dev/null
582 scrub_backup_restore 1
583 echo "starting MDTs with OI scrub disabled"
584 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
585 scrub_check_status 3 init
586 scrub_check_flags 4 inconsistent
587 mount_client $MOUNT || error "(5) Fail to start client!"
590 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
591 do_nodes $(comma_list $(mdts_nodes)) \
592 $LCTL set_param fail_val=3 fail_loc=0x190
596 umount_client $MOUNT || error "(7) Fail to stop client!"
597 scrub_check_status 8 scanning
599 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
600 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
605 do_nodes $(comma_list $(mdts_nodes)) \
606 $LCTL set_param fail_loc=0 fail_val=0
608 echo "starting MDTs with OI scrub disabled"
609 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
610 scrub_check_status 11 crashed
613 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
614 do_nodes $(comma_list $(mdts_nodes)) \
615 $LCTL set_param fail_val=3 fail_loc=0x190
617 echo "starting MDTs without disabling OI scrub"
618 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
619 scrub_check_status 14 scanning
621 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
622 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
624 scrub_check_status 15 failed
625 mount_client $MOUNT || error "(16) Fail to start client!"
627 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
628 do_nodes $(comma_list $(mdts_nodes)) \
629 $LCTL set_param fail_val=3 fail_loc=0x190
632 for n in $(seq $MDSCOUNT); do
633 stat $DIR/$tdir/mds$n/${tfile}800 ||
634 error "(17) Failed to stat mds$n/${tfile}800"
637 scrub_check_status 18 scanning
639 do_nodes $(comma_list $(mdts_nodes)) \
640 $LCTL set_param fail_loc=0 fail_val=0
642 scrub_check_status 19 completed
643 scrub_check_flags 20 ""
645 run_test 5 "OI scrub state machine"
649 scrub_backup_restore 1
650 echo "starting MDTs with OI scrub disabled"
651 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
652 scrub_check_flags 4 inconsistent
653 mount_client $MOUNT || error "(5) Fail to start client!"
656 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
657 do_nodes $(comma_list $(mdts_nodes)) \
658 $LCTL set_param fail_val=2 fail_loc=0x190
663 # Sleep 5 sec to guarantee at least one object processed by OI scrub
665 # Fail the OI scrub to guarantee there is at least one checkpoint
666 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
667 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
669 scrub_check_status 7 failed
671 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
672 do_nodes $(comma_list $(mdts_nodes)) \
673 $LCTL set_param fail_val=3 fail_loc=0x190
676 for n in $(seq $MDSCOUNT); do
677 # stat will re-trigger OI scrub
678 stat $DIR/$tdir/mds$n/${tfile}800 ||
679 error "(8) Failed to stat mds$n/${tfile}800"
682 umount_client $MOUNT || error "(9) Fail to stop client!"
683 scrub_check_status 10 scanning
685 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
686 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
690 for n in $(seq $MDSCOUNT); do
691 position0[$n]=$(scrub_status $n |
692 awk '/^last_checkpoint_position/ {print $2}')
693 position0[$n]=$((${position0[$n]} + 1))
698 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
699 do_nodes $(comma_list $(mdts_nodes)) \
700 $LCTL set_param fail_val=3 fail_loc=0x190
702 echo "starting MDTs without disabling OI scrub"
703 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
705 scrub_check_status 13 scanning
708 for n in $(seq $MDSCOUNT); do
709 position1[$n]=$(scrub_status $n |
710 awk '/^latest_start_position/ {print $2}')
711 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
712 error "(14) Expected position ${position0[$n]}, but" \
713 "got ${position1[$n]}"
717 do_nodes $(comma_list $(mdts_nodes)) \
718 $LCTL set_param fail_loc=0 fail_val=0
720 scrub_check_status 15 completed
721 scrub_check_flags 16 ""
723 run_test 6 "OI scrub resumes from last checkpoint"
727 scrub_backup_restore 1
728 echo "starting MDTs with OI scrub disabled"
729 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
730 scrub_check_flags 4 inconsistent
731 mount_client $MOUNT || error "(5) Fail to start client!"
734 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
735 do_nodes $(comma_list $(mdts_nodes)) \
736 $LCTL set_param fail_val=3 fail_loc=0x190
742 for n in $(seq $MDSCOUNT); do
743 stat $DIR/$tdir/mds$n/${tfile}300 ||
744 error "(7) Failed to stat mds$n/${tfile}300!"
747 scrub_check_status 8 scanning
748 scrub_check_flags 9 inconsistent,auto
750 do_nodes $(comma_list $(mdts_nodes)) \
751 $LCTL set_param fail_loc=0 fail_val=0
753 scrub_check_status 10 completed
756 run_test 7 "System is available during OI scrub scanning"
760 scrub_backup_restore 1
761 echo "starting MDTs with OI scrub disabled"
762 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
763 scrub_check_flags 4 inconsistent
765 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
766 do_nodes $(comma_list $(mdts_nodes)) \
767 $LCTL set_param fail_val=1 fail_loc=0x190
770 scrub_check_status 6 scanning
772 scrub_check_status 8 stopped
774 scrub_check_status 10 scanning
776 do_nodes $(comma_list $(mdts_nodes)) \
777 $LCTL set_param fail_loc=0 fail_val=0
779 scrub_check_status 11 completed
780 scrub_check_flags 12 ""
782 run_test 8 "Control OI scrub manually"
785 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
786 skip "Testing on UP system, the speed may be inaccurate."
791 scrub_backup_restore 1
793 echo "starting MDTs with OI scrub disabled"
794 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
795 scrub_check_flags 4 inconsistent
797 local BASE_SPEED1=100
799 # OI scrub should run with full speed under inconsistent case
800 scrub_start 5 -s $BASE_SPEED1
803 scrub_check_status 6 completed
804 scrub_check_flags 7 ""
805 # OI scrub should run with limited speed under non-inconsistent case
806 scrub_start 8 -s $BASE_SPEED1 -r
809 scrub_check_status 9 scanning
811 # Do NOT ignore that there are 1024 pre-fetched items. And there
812 # may be time error, normally it should be less than 2 seconds.
813 # We allow another 20% schedule error.
814 local PRE_FETCHED=1024
816 # MAX_MARGIN = 1.2 = 12 / 10
817 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
818 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
820 for n in $(seq $MDSCOUNT); do
821 local SPEED=$(scrub_status $n | \
822 awk '/^average_speed/ { print $2 }')
823 [ $SPEED -lt $MAX_SPEED ] ||
824 error "(10) Got speed $SPEED, expected less than" \
829 local BASE_SPEED2=300
831 for n in $(seq $MDSCOUNT); do
832 do_facet mds$n $LCTL set_param -n \
833 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
837 # MIN_MARGIN = 0.8 = 8 / 10
838 local MIN_SPEED=$(((PRE_FETCHED + \
839 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
840 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
841 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
842 # MAX_MARGIN = 1.2 = 12 / 10
843 MAX_SPEED=$(((PRE_FETCHED + \
844 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
845 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
846 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
847 for n in $(seq $MDSCOUNT); do
848 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
849 [ $SPEED -gt $MIN_SPEED ] ||
850 error "(11) Got speed $SPEED, expected more than" \
852 [ $SPEED -lt $MAX_SPEED ] ||
853 error "(12) Got speed $SPEED, expected less than" \
856 do_facet mds$n $LCTL set_param -n \
857 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
860 scrub_check_status 13 completed
862 run_test 9 "OI scrub speed control"
866 scrub_backup_restore 1
867 echo "starting mds$n with OI scrub disabled"
868 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
869 scrub_check_flags 4 inconsistent
870 mount_client $MOUNT || error "(5) Fail to start client!"
873 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
874 do_nodes $(comma_list $(mdts_nodes)) \
875 $LCTL set_param fail_val=1 fail_loc=0x190
879 scrub_check_status 7 scanning
880 umount_client $MOUNT || error "(8) Fail to stop client!"
882 echo "starting MDTs with OI scrub disabled"
883 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
884 scrub_check_status 11 paused
886 echo "starting MDTs without disabling OI scrub"
887 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
888 scrub_check_status 14 scanning
890 do_nodes $(comma_list $(mdts_nodes)) \
891 $LCTL set_param fail_loc=0 fail_val=0
893 scrub_check_status 15 completed
894 scrub_check_flags 16 ""
896 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
898 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
901 scrub_backup_restore 1
902 echo "starting MDTs with OI scrub disabled"
903 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
904 scrub_check_flags 4 inconsistent
906 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
907 do_nodes $(comma_list $(mdts_nodes)) \
908 $LCTL set_param fail_val=3 fail_loc=0x190
911 scrub_check_status 6 scanning
913 echo "starting MDTs with OI scrub disabled"
914 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
915 scrub_check_status 9 paused
917 echo "starting MDTs without disabling OI scrub"
918 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
919 scrub_check_status 12 scanning
921 do_nodes $(comma_list $(mdts_nodes)) \
922 $LCTL set_param fail_loc=0 fail_val=0
924 scrub_check_status 13 completed
925 scrub_check_flags 14 ""
927 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
935 for n in $(seq $MDSCOUNT); do
936 $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
937 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
939 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
940 error "(2) Fail to create under $tdir/mds$n"
943 # reset OI scrub start point by force
945 scrub_check_status 4 completed
950 # OI scrub should skip the new created objects for the first accessing
951 # notice we're creating a new llog for every OST on every startup
952 # new features can make this even less stable, so we only check
953 # that the number of skipped files is less than 2x the number of files
954 local MAXIMUM=$((CREATED * 2))
955 local MINIMUM=$((CREATED + 1)) # files + directory
956 for n in $(seq $MDSCOUNT); do
957 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
958 [ $SKIPPED -ge $MAXIMUM -o $SKIPPED -lt $MINIMUM ] &&
959 error "(5) Expect [ $MINIMUM , $MAXIMUM ) objects" \
960 "skipped on mds$n, but got $SKIPPED"
962 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
965 # reset OI scrub start point by force
967 scrub_check_status 7 completed
969 # OI scrub should skip the new created object only once
970 for n in $(seq $MDSCOUNT); do
971 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
972 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
974 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
975 error "(8) Expect 0 objects skipped on mds$n, but" \
979 run_test 11 "OI scrub skips the new created objects only once"
983 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
985 local count=$(precreated_ost_obj_count 0 0)
987 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
988 do_facet ost1 $LCTL set_param fail_loc=0x195
989 createmany -o $DIR/$tdir/f $((count + 32))
991 umount_client $MOUNT || error "(1) Fail to stop client!"
993 stop ost1 || error "(2) Fail to stop ost1"
995 #define OBD_FAIL_OST_NODESTROY 0x233
996 do_facet ost1 $LCTL set_param fail_loc=0x233
998 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
999 error "(3) Fail to start ost1"
1001 mount_client $MOUNT || error "(4) Fail to start client!"
1003 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1005 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1007 do_facet ost1 $LCTL set_param fail_loc=0
1008 wait_update_facet ost1 "$LCTL get_param -n \
1009 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1010 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1011 error "(7) Expected '$expected' on ost1"
1013 ls -ail $DIR/$tdir > /dev/null || {
1015 error "(8) ls should succeed"
1018 run_test 12 "OI scrub can rebuild invalid /O entries"
1021 check_mount_and_prep
1022 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1024 local count=$(precreated_ost_obj_count 0 0)
1026 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1027 do_facet ost1 $LCTL set_param fail_loc=0x196
1028 createmany -o $DIR/$tdir/f $((count + 32))
1029 do_facet ost1 $LCTL set_param fail_loc=0
1031 umount_client $MOUNT || error "(1) Fail to stop client!"
1033 stop ost1 || error "(2) Fail to stop ost1"
1035 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1036 error "(3) Fail to start ost1"
1038 mount_client $MOUNT || error "(4) Fail to start client!"
1040 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1042 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1044 wait_update_facet ost1 "$LCTL get_param -n \
1045 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1046 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1047 error "(7) Expected '$expected' on ost1"
1049 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1051 run_test 13 "OI scrub can rebuild missed /O entries"
1054 check_mount_and_prep
1055 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1057 local count=$(precreated_ost_obj_count 0 0)
1059 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1060 do_facet ost1 $LCTL set_param fail_loc=0x196
1061 createmany -o $DIR/$tdir/f $((count + 32))
1062 do_facet ost1 $LCTL set_param fail_loc=0
1064 umount_client $MOUNT || error "(1) Fail to stop client!"
1066 stop ost1 || error "(2) Fail to stop ost1"
1069 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1070 error "(3) Fail to run e2fsck error"
1072 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1073 error "(4) Fail to start ost1"
1075 mount_client $MOUNT || error "(5) Fail to start client!"
1077 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1078 awk '/^lf_reparied/ { print $2 }')
1079 [ $LF_REPAIRED -gt 0 ] ||
1080 error "(6) Some entry under /lost+found should be repaired"
1082 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1084 run_test 14 "OI scrub can repair objects under lost+found"
1087 # skip test_15 for LU-4182
1088 [ $MDSCOUNT -ge 2 ] && skip "skip now for >= 2 MDTs" && return
1089 local server_version=$(lustre_version_code $SINGLEMDS)
1091 scrub_backup_restore 1
1092 echo "starting MDTs with OI scrub disabled"
1093 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1094 scrub_check_status 3 init
1095 scrub_check_flags 4 inconsistent
1097 # run under dryrun mode
1098 if [ $server_version -lt $(version_code 2.5.58) ]; then
1099 scrub_start 5 --dryrun on
1101 scrub_start 5 --dryrun
1103 scrub_check_status 6 completed
1104 scrub_check_flags 7 inconsistent
1105 scrub_check_params 8 dryrun
1106 scrub_check_repaired 9 20
1108 # run under dryrun mode again
1109 if [ $server_version -lt $(version_code 2.5.58) ]; then
1110 scrub_start 10 --dryrun on
1112 scrub_start 10 --dryrun
1114 scrub_check_status 11 completed
1115 scrub_check_flags 12 inconsistent
1116 scrub_check_params 13 dryrun
1117 scrub_check_repaired 14 20
1119 # run under normal mode
1121 # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
1122 # work under Lustre-2.y (y >= 6), the test script should be fixed as
1123 # "-noff" or "--dryrun=off" or nothing by default.
1124 if [ $server_version -lt $(version_code 2.5.58) ]; then
1125 scrub_start 15 --dryrun off
1129 scrub_check_status 16 completed
1130 scrub_check_flags 17 ""
1131 scrub_check_params 18 ""
1132 scrub_check_repaired 19 20
1134 # run under normal mode again
1135 if [ $server_version -lt $(version_code 2.5.58) ]; then
1136 scrub_start 20 --dryrun off
1140 scrub_check_status 21 completed
1141 scrub_check_flags 22 ""
1142 scrub_check_params 23 ""
1143 scrub_check_repaired 24 0
1145 run_test 15 "Dryrun mode OI scrub"
1147 # restore MDS/OST size
1148 MDSSIZE=${SAVED_MDSSIZE}
1149 OSTSIZE=${SAVED_OSTSIZE}
1150 OSTCOUNT=${SAVED_OSTCOUNT}
1152 # cleanup the system at last