3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 #Bug number for excepting test 6705
11 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT 1c 5 10"
13 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
14 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
16 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
17 . $LUSTRE/tests/test-framework.sh
19 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
22 require_dsh_mds || exit 0
26 if ! check_versions; then
27 skip "It is NOT necessary to test scrub under interoperation mode"
31 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
32 skip "test OI scrub only for ldiskfs" && exit 0
34 [ $(facet_fstype ost1) != "ldiskfs" ] &&
35 skip "test OI scrub only for ldiskfs" && exit 0
37 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
38 skip "Need MDS version at least 2.2.90" && exit 0
40 SAVED_MDSSIZE=${MDSSIZE}
41 SAVED_OSTSIZE=${OSTSIZE}
42 SAVED_OSTCOUNT=${OSTCOUNT}
43 # use small MDS + OST size to speed formatting time
44 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
45 # 200M MDT device can guarantee uninitialized groups during the OI scrub
48 # no need too much OSTs, to reduce the format/start/stop overhead
50 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
54 # build up a clean test environment.
58 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
59 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
61 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
62 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
64 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
65 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
67 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
68 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
69 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
71 [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
72 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
74 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
79 MDT_DEV="${FSNAME}-MDT0000"
80 OST_DEV="${FSNAME}-OST0000"
81 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
87 # use "lfsck_start -A" when we no longer need testing interop
88 for n in $(seq $MDSCOUNT); do
89 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
91 error "($error_id) Failed to start OI scrub on mds$n"
99 # use "lfsck_stop -A" when we no longer need testing interop
100 for n in $(seq $MDSCOUNT); do
101 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
102 error "($error_id) Failed to stop OI scrub on mds$n"
109 do_facet mds$n $LCTL get_param -n \
110 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
113 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
114 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
115 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
116 SHOW_SCRUB="do_facet $SINGLEMDS \
117 $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
118 SHOW_SCRUB_ON_OST="do_facet ost1 \
119 $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
120 MOUNT_OPTS_SCRUB="-o user_xattr"
121 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
129 echo "preparing... $(date)"
130 for n in $(seq $MDSCOUNT); do
131 echo "creating $nfiles files on mds$n"
132 if [ $n -eq 1 ]; then
133 mkdir $DIR/$tdir/mds$n ||
134 error "Failed to create directory mds$n"
136 $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
137 error "Failed to create remote directory mds$n"
139 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
140 error "Failed to copy files to mds$n"
141 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
142 error "mkdir failed on mds$n"
143 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
144 /dev/null || error "create failed on mds$n"
145 if [[ $nfiles -gt 0 ]]; then
146 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
147 /dev/null || error "createmany failed on mds$n"
150 echo "prepared $(date)."
151 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
152 for n in $(seq $MDSCOUNT); do
154 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
163 for n in $(seq $MDSCOUNT); do
164 start mds$n $(mdsdevname $n) $opts >/dev/null ||
165 error "($error_id) Failed to start mds$n"
173 for n in $(seq $MDSCOUNT); do
174 echo "stopping mds$n"
175 stop mds$n >/dev/null ||
176 error "($error_id) Failed to stop mds$n"
180 scrub_check_status() {
185 for n in $(seq $MDSCOUNT); do
186 wait_update_facet mds$n "$LCTL get_param -n \
187 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
188 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
189 error "($error_id) Expected '$expected' on mds$n"
193 scrub_check_flags() {
199 for n in $(seq $MDSCOUNT); do
200 actual=$(do_facet mds$n $LCTL get_param -n \
201 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
202 awk '/^flags/ { print $2 }')
203 if [ "$actual" != "$expected" ]; then
204 error "($error_id) Expected '$expected' on mds$n, but" \
210 scrub_check_params() {
216 for n in $(seq $MDSCOUNT); do
217 actual=$(do_facet mds$n $LCTL get_param -n \
218 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
219 awk '/^param/ { print $2 }')
220 if [ "$actual" != "$expected" ]; then
221 error "($error_id) Expected '$expected' on mds$n, but" \
227 scrub_check_repaired() {
233 for n in $(seq $MDSCOUNT); do
234 actual=$(do_facet mds$n $LCTL get_param -n \
235 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
236 awk '/^updated/ { print $2 }')
238 if [ $expected -eq 0 -a $actual -ne 0 ]; then
239 error "($error_id) Expected no repaired on mds$n, but" \
243 if [ $expected -ne 0 -a $actual -lt $expected ]; then
244 error "($error_id) Expected '$expected' on mds$n, but" \
254 for n in $(seq $MDSCOUNT); do
255 diff -q $LUSTRE/tests/test-framework.sh \
256 $DIR/$tdir/mds$n/test-framework.sh ||
257 error "($error_id) File data check failed"
261 scrub_check_data2() {
266 for n in $(seq $MDSCOUNT); do
267 diff -q $LUSTRE/tests/$filename \
268 $DIR/$tdir/mds$n/$filename ||
269 error "($error_id) File data check failed"
278 for n in $(seq $MDSCOUNT); do
279 mds_remove_ois mds$n $index ||
280 error "($error_id) Failed to remove OI .$index on mds$n"
284 scrub_backup_restore() {
289 for n in $(seq $MDSCOUNT); do
290 mds_backup_restore mds$n $igif ||
291 error "(error_id) Backup/restore on mds$n failed"
295 scrub_enable_auto() {
296 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
297 osd-ldiskfs.*.auto_scrub=1
301 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
306 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
307 osd-ldiskfs.*.full_scrub_ratio=$ratio
310 full_scrub_threshold_rate() {
311 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
316 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
317 osd-ldiskfs.*.full_scrub_threshold_rate=$rate
322 echo "starting MDTs without disabling OI scrub"
323 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
324 scrub_check_status 2 init
325 scrub_check_flags 3 ""
326 mount_client $MOUNT || error "(4) Fail to start client!"
329 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
333 echo "start $SINGLEMDS without disabling OI scrub"
334 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
336 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
337 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
339 mount_client $MOUNT || error "(4) Fail to start client!"
340 #define OBD_FAIL_OSD_FID_MAPPING 0x193
341 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
342 # update .lustre OI mapping
344 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
345 umount_client $MOUNT || error "(5) Fail to stop client!"
347 echo "stop $SINGLEMDS"
348 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
350 echo "start $SINGLEMDS with disabling OI scrub"
351 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
352 error "(7) Fail to start MDS!"
354 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
355 [ "$FLAGS" == "inconsistent" ] ||
356 error "(9) Expect 'inconsistent', but got '$FLAGS'"
358 run_test 1a "Auto trigger initial OI scrub when server mounts"
363 echo "start MDTs without disabling OI scrub"
364 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
365 scrub_check_status 3 completed
366 mount_client $MOUNT || error "(4) Fail to start client!"
369 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
374 # OI files to be removed:
376 # idx 2: oi.16.{2,4,8,16,32}
377 # idx 3: oi.16.{3,9,27}
378 for index in 0 2 3; do
380 scrub_remove_ois 1 $index
381 echo "start MDTs with OI scrub disabled"
382 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
383 scrub_check_flags 3 recreated
385 scrub_check_status 5 completed
386 scrub_check_flags 6 ""
389 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
393 scrub_backup_restore 1
394 echo "starting MDTs without disabling OI scrub"
395 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
396 scrub_check_status 3 completed
397 mount_client $MOUNT || error "(4) Fail to start client!"
400 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
402 # test_3 is obsolete, it will be covered by test_5.
404 formatall > /dev/null
408 scrub_backup_restore 1
409 echo "starting MDTs with OI scrub disabled"
410 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
411 scrub_check_status 3 init
412 scrub_check_flags 4 inconsistent
414 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
418 scrub_backup_restore 1
419 echo "starting MDTs with OI scrub disabled"
420 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
421 scrub_check_flags 4 inconsistent
422 mount_client $MOUNT || error "(5) Fail to start client!"
428 scrub_check_status 7 completed
429 scrub_check_flags 8 ""
432 for n in $(seq $MDSCOUNT); do
433 updated0[$n]=$(scrub_status $n |
434 awk '/^sf_items_updated_prior/ { print $2 }')
437 scrub_check_data2 sanity-scrub.sh 9
441 for n in $(seq $MDSCOUNT); do
442 updated1[$n]=$(scrub_status $n |
443 awk '/^sf_items_updated_prior/ { print $2 }')
444 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
445 error "(10) NOT auto trigger full scrub as expected"
448 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
452 scrub_backup_restore 1
453 echo "starting MDTs with OI scrub disabled"
454 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
455 scrub_check_flags 4 inconsistent
456 mount_client $MOUNT || error "(5) Fail to start client!"
459 full_scrub_threshold_rate 10000
463 scrub_check_status 7 completed
464 scrub_check_flags 8 ""
467 for n in $(seq $MDSCOUNT); do
468 updated0[$n]=$(scrub_status $n |
469 awk '/^sf_items_updated_prior/ { print $2 }')
472 scrub_check_data2 sanity-scrub.sh 9
475 scrub_check_status 10 completed
476 scrub_check_flags 11 ""
479 for n in $(seq $MDSCOUNT); do
480 updated1[$n]=$(scrub_status $n |
481 awk '/^sf_items_updated_prior/ { print $2 }')
482 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
483 error "(12) Auto trigger full scrub unexpectedly"
486 for n in $(seq $MDSCOUNT); do
487 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
488 error "(13) fail to ls"
492 scrub_check_status 14 completed
493 scrub_check_flags 15 ""
495 for n in $(seq $MDSCOUNT); do
496 updated0[$n]=$(scrub_status $n |
497 awk '/^sf_items_updated_prior/ { print $2 }')
498 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
499 error "(16) Auto trigger full scrub unexpectedly"
502 for n in $(seq $MDSCOUNT); do
503 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
507 for n in $(seq $MDSCOUNT); do
508 updated1[$n]=$(scrub_status $n |
509 awk '/^sf_items_updated_prior/ { print $2 }')
510 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
511 error "(18) NOT auto trigger full scrub as expected"
514 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
518 scrub_backup_restore 1
519 echo "starting MDTs with OI scrub disabled"
520 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
521 scrub_check_flags 4 inconsistent
522 mount_client $MOUNT || error "(5) Fail to start client!"
525 full_scrub_threshold_rate 20
529 scrub_check_status 7 completed
530 scrub_check_flags 8 ""
533 for n in $(seq $MDSCOUNT); do
534 updated0[$n]=$(scrub_status $n |
535 awk '/^sf_items_updated_prior/ { print $2 }')
538 scrub_check_data2 sanity-scrub.sh 9
541 scrub_check_status 10 completed
542 scrub_check_flags 11 ""
545 for n in $(seq $MDSCOUNT); do
546 updated1[$n]=$(scrub_status $n |
547 awk '/^sf_items_updated_prior/ { print $2 }')
548 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
549 error "(12) Auto trigger full scrub unexpectedly"
552 for n in $(seq $MDSCOUNT); do
553 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
554 error "(13) fail to ls"
558 scrub_check_status 14 completed
559 scrub_check_flags 15 ""
561 for n in $(seq $MDSCOUNT); do
562 updated0[$n]=$(scrub_status $n |
563 awk '/^sf_items_updated_prior/ { print $2 }')
564 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
565 error "(16) Auto trigger full scrub unexpectedly"
568 for n in $(seq $MDSCOUNT); do
569 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
573 for n in $(seq $MDSCOUNT); do
574 updated1[$n]=$(scrub_status $n |
575 awk '/^sf_items_updated_prior/ { print $2 }')
576 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
577 error "(18) NOT auto trigger full scrub as expected"
580 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
583 formatall > /dev/null
587 scrub_backup_restore 1
588 echo "starting MDTs with OI scrub disabled (1)"
589 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
590 scrub_check_status 3 init
591 scrub_check_flags 4 inconsistent
592 mount_client $MOUNT || error "(5) Fail to start client!"
595 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
596 do_nodes $(comma_list $(mdts_nodes)) \
597 $LCTL set_param fail_val=3 fail_loc=0x190
601 umount_client $MOUNT || error "(7) Fail to stop client!"
602 scrub_check_status 8 scanning
604 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
605 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
610 do_nodes $(comma_list $(mdts_nodes)) \
611 $LCTL set_param fail_loc=0 fail_val=0
613 echo "starting MDTs with OI scrub disabled (2)"
614 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
615 scrub_check_status 11 crashed
618 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
619 do_nodes $(comma_list $(mdts_nodes)) \
620 $LCTL set_param fail_val=3 fail_loc=0x190
622 echo "starting MDTs without disabling OI scrub"
623 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
624 scrub_check_status 14 scanning
626 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
627 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
629 scrub_check_status 15 failed
630 mount_client $MOUNT || error "(16) Fail to start client!"
633 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
634 do_nodes $(comma_list $(mdts_nodes)) \
635 $LCTL set_param fail_val=3 fail_loc=0x190
638 for n in $(seq $MDSCOUNT); do
639 stat $DIR/$tdir/mds$n/${tfile}800 ||
640 error "(17) Failed to stat mds$n/${tfile}800"
643 scrub_check_status 18 scanning
645 do_nodes $(comma_list $(mdts_nodes)) \
646 $LCTL set_param fail_loc=0 fail_val=0
648 scrub_check_status 19 completed
649 scrub_check_flags 20 ""
651 run_test 5 "OI scrub state machine"
655 scrub_backup_restore 1
656 echo "starting MDTs with OI scrub disabled"
657 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
658 scrub_check_flags 4 inconsistent
659 mount_client $MOUNT || error "(5) Fail to start client!"
662 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
663 do_nodes $(comma_list $(mdts_nodes)) \
664 $LCTL set_param fail_val=2 fail_loc=0x190
669 # Sleep 5 sec to guarantee at least one object processed by OI scrub
671 # Fail the OI scrub to guarantee there is at least one checkpoint
672 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
673 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
675 scrub_check_status 7 failed
677 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
678 do_nodes $(comma_list $(mdts_nodes)) \
679 $LCTL set_param fail_val=3 fail_loc=0x190
682 for n in $(seq $MDSCOUNT); do
683 # stat will re-trigger OI scrub
684 stat $DIR/$tdir/mds$n/${tfile}800 ||
685 error "(8) Failed to stat mds$n/${tfile}800"
688 umount_client $MOUNT || error "(9) Fail to stop client!"
689 scrub_check_status 10 scanning
691 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
692 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
696 for n in $(seq $MDSCOUNT); do
697 position0[$n]=$(scrub_status $n |
698 awk '/^last_checkpoint_position/ {print $2}')
699 position0[$n]=$((${position0[$n]} + 1))
704 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
705 do_nodes $(comma_list $(mdts_nodes)) \
706 $LCTL set_param fail_val=3 fail_loc=0x190
708 echo "starting MDTs without disabling OI scrub"
709 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
711 scrub_check_status 13 scanning
714 for n in $(seq $MDSCOUNT); do
715 position1[$n]=$(scrub_status $n |
716 awk '/^latest_start_position/ {print $2}')
717 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
718 error "(14) Expected position ${position0[$n]}, but" \
719 "got ${position1[$n]}"
723 do_nodes $(comma_list $(mdts_nodes)) \
724 $LCTL set_param fail_loc=0 fail_val=0
726 scrub_check_status 15 completed
727 scrub_check_flags 16 ""
729 run_test 6 "OI scrub resumes from last checkpoint"
733 scrub_backup_restore 1
734 echo "starting MDTs with OI scrub disabled"
735 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
736 scrub_check_flags 4 inconsistent
737 mount_client $MOUNT || error "(5) Fail to start client!"
740 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
741 do_nodes $(comma_list $(mdts_nodes)) \
742 $LCTL set_param fail_val=3 fail_loc=0x190
748 for n in $(seq $MDSCOUNT); do
749 stat $DIR/$tdir/mds$n/${tfile}300 ||
750 error "(7) Failed to stat mds$n/${tfile}300!"
753 scrub_check_status 8 scanning
754 scrub_check_flags 9 inconsistent,auto
756 do_nodes $(comma_list $(mdts_nodes)) \
757 $LCTL set_param fail_loc=0 fail_val=0
759 scrub_check_status 10 completed
762 run_test 7 "System is available during OI scrub scanning"
766 scrub_backup_restore 1
767 echo "starting MDTs with OI scrub disabled"
768 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
769 scrub_check_flags 4 inconsistent
771 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
772 do_nodes $(comma_list $(mdts_nodes)) \
773 $LCTL set_param fail_val=1 fail_loc=0x190
776 scrub_check_status 6 scanning
778 scrub_check_status 8 stopped
780 scrub_check_status 10 scanning
782 do_nodes $(comma_list $(mdts_nodes)) \
783 $LCTL set_param fail_loc=0 fail_val=0
785 scrub_check_status 11 completed
786 scrub_check_flags 12 ""
788 run_test 8 "Control OI scrub manually"
791 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
792 skip "Testing on UP system, the speed may be inaccurate."
797 scrub_backup_restore 1
799 echo "starting MDTs with OI scrub disabled"
800 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
801 scrub_check_flags 4 inconsistent
803 local BASE_SPEED1=100
805 # OI scrub should run with full speed under inconsistent case
806 scrub_start 5 -s $BASE_SPEED1
809 scrub_check_status 6 completed
810 scrub_check_flags 7 ""
811 # OI scrub should run with limited speed under non-inconsistent case
812 scrub_start 8 -s $BASE_SPEED1 -r
815 scrub_check_status 9 scanning
817 # Do NOT ignore that there are 1024 pre-fetched items. And there
818 # may be time error, normally it should be less than 2 seconds.
819 # We allow another 20% schedule error.
820 local PRE_FETCHED=1024
822 # MAX_MARGIN = 1.2 = 12 / 10
823 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
824 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
826 for n in $(seq $MDSCOUNT); do
827 local SPEED=$(scrub_status $n | \
828 awk '/^average_speed/ { print $2 }')
829 [ $SPEED -lt $MAX_SPEED ] ||
830 error "(10) Got speed $SPEED, expected less than" \
835 local BASE_SPEED2=300
837 for n in $(seq $MDSCOUNT); do
838 do_facet mds$n $LCTL set_param -n \
839 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
843 # MIN_MARGIN = 0.8 = 8 / 10
844 local MIN_SPEED=$(((PRE_FETCHED + \
845 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
846 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
847 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
848 # MAX_MARGIN = 1.2 = 12 / 10
849 MAX_SPEED=$(((PRE_FETCHED + \
850 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
851 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
852 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
853 for n in $(seq $MDSCOUNT); do
854 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
855 [ $SPEED -gt $MIN_SPEED ] ||
856 error "(11) Got speed $SPEED, expected more than" \
858 [ $SPEED -lt $MAX_SPEED ] ||
859 error "(12) Got speed $SPEED, expected less than" \
862 do_facet mds$n $LCTL set_param -n \
863 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
866 scrub_check_status 13 completed
868 run_test 9 "OI scrub speed control"
872 scrub_backup_restore 1
873 echo "starting mds$n with OI scrub disabled (1)"
874 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
875 scrub_check_flags 4 inconsistent
876 mount_client $MOUNT || error "(5) Fail to start client!"
879 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
880 do_nodes $(comma_list $(mdts_nodes)) \
881 $LCTL set_param fail_val=1 fail_loc=0x190
885 scrub_check_status 7 scanning
886 umount_client $MOUNT || error "(8) Fail to stop client!"
888 echo "starting MDTs with OI scrub disabled (2)"
889 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
890 scrub_check_status 11 paused
892 echo "starting MDTs without disabling OI scrub"
893 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
894 scrub_check_status 14 scanning
896 do_nodes $(comma_list $(mdts_nodes)) \
897 $LCTL set_param fail_loc=0 fail_val=0
899 scrub_check_status 15 completed
900 scrub_check_flags 16 ""
902 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
904 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
907 scrub_backup_restore 1
908 echo "starting MDTs with OI scrub disabled"
909 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
910 scrub_check_flags 4 inconsistent
912 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
913 do_nodes $(comma_list $(mdts_nodes)) \
914 $LCTL set_param fail_val=3 fail_loc=0x190
917 scrub_check_status 6 scanning
919 echo "starting MDTs with OI scrub disabled"
920 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
921 scrub_check_status 9 paused
923 echo "starting MDTs without disabling OI scrub"
924 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
925 scrub_check_status 12 scanning
927 do_nodes $(comma_list $(mdts_nodes)) \
928 $LCTL set_param fail_loc=0 fail_val=0
930 scrub_check_status 13 completed
931 scrub_check_flags 14 ""
933 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
941 for n in $(seq $MDSCOUNT); do
942 $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
943 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
945 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
946 error "(2) Fail to create under $tdir/mds$n"
949 # reset OI scrub start point by force
951 scrub_check_status 4 completed
956 # OI scrub should skip the new created objects for the first accessing
957 # notice we're creating a new llog for every OST on every startup
958 # new features can make this even less stable, so we only check that
959 # the number of skipped files is more than the number or known created
960 local MINIMUM=$((CREATED + 1)) # files + directory
961 for n in $(seq $MDSCOUNT); do
962 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
963 [ $SKIPPED -lt $MINIMUM ] &&
964 error "(5) Expect at least $MINIMUM objects" \
965 "skipped on mds$n, but got $SKIPPED"
967 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
970 # reset OI scrub start point by force
972 scrub_check_status 7 completed
974 # OI scrub should skip the new created object only once
975 for n in $(seq $MDSCOUNT); do
976 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
977 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
979 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
980 error "(8) Expect 0 objects skipped on mds$n, but" \
984 run_test 11 "OI scrub skips the new created objects only once"
988 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
990 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
991 do_facet ost1 $LCTL set_param fail_loc=0x195
992 local count=$(precreated_ost_obj_count 0 0)
994 createmany -o $DIR/$tdir/f $((count + 32))
995 umount_client $MOUNT || error "(1) Fail to stop client!"
997 stop ost1 || error "(2) Fail to stop ost1"
999 #define OBD_FAIL_OST_NODESTROY 0x233
1000 do_facet ost1 $LCTL set_param fail_loc=0x233
1002 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1003 error "(3) Fail to start ost1"
1005 mount_client $MOUNT || error "(4) Fail to start client!"
1007 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1009 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1011 do_facet ost1 $LCTL set_param fail_loc=0
1012 wait_update_facet ost1 "$LCTL get_param -n \
1013 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1014 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1015 error "(7) Expected '$expected' on ost1"
1017 ls -ail $DIR/$tdir > /dev/null || {
1019 error "(8) ls should succeed"
1022 run_test 12 "OI scrub can rebuild invalid /O entries"
1025 check_mount_and_prep
1026 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1028 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1029 do_facet ost1 $LCTL set_param fail_loc=0x196
1030 local count=$(precreated_ost_obj_count 0 0)
1032 createmany -o $DIR/$tdir/f $((count + 32))
1033 do_facet ost1 $LCTL set_param fail_loc=0
1035 umount_client $MOUNT || error "(1) Fail to stop client!"
1037 stop ost1 || error "(2) Fail to stop ost1"
1039 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1040 error "(3) Fail to start ost1"
1042 mount_client $MOUNT || error "(4) Fail to start client!"
1044 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1046 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1048 wait_update_facet ost1 "$LCTL get_param -n \
1049 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1050 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1051 error "(7) Expected '$expected' on ost1"
1053 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1055 run_test 13 "OI scrub can rebuild missed /O entries"
1058 check_mount_and_prep
1059 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1061 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1062 do_facet ost1 $LCTL set_param fail_loc=0x196
1063 local count=$(precreated_ost_obj_count 0 0)
1065 createmany -o $DIR/$tdir/f $((count + 32))
1066 do_facet ost1 $LCTL set_param fail_loc=0
1068 umount_client $MOUNT || error "(1) Fail to stop client!"
1070 stop ost1 || error "(2) Fail to stop ost1"
1073 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1074 error "(3) Fail to run e2fsck error"
1076 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1077 error "(4) Fail to start ost1"
1079 mount_client $MOUNT || error "(5) Fail to start client!"
1081 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1082 awk '/^lf_repa[ri]*ed/ { print $2 }')
1083 [ $LF_REPAIRED -gt 0 ] ||
1084 error "(6) Some entry under /lost+found should be repaired"
1086 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1088 run_test 14 "OI scrub can repair objects under lost+found"
1091 # skip test_15 for LU-4182
1092 [ $MDSCOUNT -ge 2 ] && skip "skip now for >= 2 MDTs" && return
1093 local server_version=$(lustre_version_code $SINGLEMDS)
1095 scrub_backup_restore 1
1096 echo "starting MDTs with OI scrub disabled"
1097 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1098 scrub_check_status 3 init
1099 scrub_check_flags 4 inconsistent
1101 # run under dryrun mode
1102 if [ $server_version -lt $(version_code 2.5.58) ]; then
1103 scrub_start 5 --dryrun on
1105 scrub_start 5 --dryrun
1107 scrub_check_status 6 completed
1108 scrub_check_flags 7 inconsistent
1109 scrub_check_params 8 dryrun
1110 scrub_check_repaired 9 20
1112 # run under dryrun mode again
1113 if [ $server_version -lt $(version_code 2.5.58) ]; then
1114 scrub_start 10 --dryrun on
1116 scrub_start 10 --dryrun
1118 scrub_check_status 11 completed
1119 scrub_check_flags 12 inconsistent
1120 scrub_check_params 13 dryrun
1121 scrub_check_repaired 14 20
1123 # run under normal mode
1125 # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
1126 # work under Lustre-2.y (y >= 6), the test script should be fixed as
1127 # "-noff" or "--dryrun=off" or nothing by default.
1128 if [ $server_version -lt $(version_code 2.5.58) ]; then
1129 scrub_start 15 --dryrun off
1133 scrub_check_status 16 completed
1134 scrub_check_flags 17 ""
1135 scrub_check_params 18 ""
1136 scrub_check_repaired 19 20
1138 # run under normal mode again
1139 if [ $server_version -lt $(version_code 2.5.58) ]; then
1140 scrub_start 20 --dryrun off
1144 scrub_check_status 21 completed
1145 scrub_check_flags 22 ""
1146 scrub_check_params 23 ""
1147 scrub_check_repaired 24 0
1149 run_test 15 "Dryrun mode OI scrub"
1151 # restore MDS/OST size
1152 MDSSIZE=${SAVED_MDSSIZE}
1153 OSTSIZE=${SAVED_OSTSIZE}
1154 OSTCOUNT=${SAVED_OSTCOUNT}
1156 # cleanup the system at last