3 # Run select tests by setting ONLY, or as arguments to the script.
4 # Skip specific tests by setting EXCEPT.
10 #Bug number for excepting test 6380
11 ALWAYS_EXCEPT="$SANITY_SCRUB_EXCEPT 1b 1c 2 3 4a 4b 4c 5 6 7 8 9 10 15"
13 [ "$SLOW" = "no" ] && EXCEPT_SLOW=""
14 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
16 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
17 . $LUSTRE/tests/test-framework.sh
19 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
22 require_dsh_mds || exit 0
24 SAVED_MDSSIZE=${MDSSIZE}
25 SAVED_OSTSIZE=${OSTSIZE}
26 SAVED_OSTCOUNT=${OSTCOUNT}
27 # use small MDS + OST size to speed formatting time
28 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
29 # 200M MDT device can guarantee uninitialized groups during the OI scrub
32 # no need too much OSTs, to reduce the format/start/stop overhead
33 [ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
37 # build up a clean test environment.
41 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
42 skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
44 [ $(facet_fstype ost1) != "ldiskfs" ] &&
45 skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
47 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
48 skip "Need MDS version at least 2.2.90" && check_and_cleanup_lustre &&
51 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] &&
52 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a"
54 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
55 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4"
57 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] &&
58 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
60 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.90) ]] &&
61 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.50) ]] &&
62 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15"
64 [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] &&
65 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14"
67 [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] &&
72 MDT_DEV="${FSNAME}-MDT0000"
73 OST_DEV="${FSNAME}-OST0000"
74 MDT_DEVNAME=$(mdsdevname ${SINGLEMDS//mds/})
80 # use "lfsck_start -A" when we no longer need testing interop
81 for n in $(seq $MDSCOUNT); do
82 do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \
84 error "($error_id) Failed to start OI scrub on mds$n"
92 # use "lfsck_stop -A" when we no longer need testing interop
93 for n in $(seq $MDSCOUNT); do
94 do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) ||
95 error "($error_id) Failed to stop OI scrub on mds$n"
102 do_facet mds$n $LCTL get_param -n \
103 osd-ldiskfs.$(facet_svc mds$n).oi_scrub
106 START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY"
107 START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY"
108 STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}"
109 SHOW_SCRUB="do_facet $SINGLEMDS \
110 $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub"
111 SHOW_SCRUB_ON_OST="do_facet ost1 \
112 $LCTL get_param -n osd-ldiskfs.${OST_DEV}.oi_scrub"
113 MOUNT_OPTS_SCRUB="-o user_xattr"
114 MOUNT_OPTS_NOSCRUB="-o user_xattr,noscrub"
122 echo "preparing... $(date)"
123 for n in $(seq $MDSCOUNT); do
124 echo "creating $nfiles files on mds$n"
125 if [ $n -eq 1 ]; then
126 mkdir $DIR/$tdir/mds$n ||
127 error "Failed to create directory mds$n"
129 $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
130 error "Failed to create remote directory mds$n"
132 cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
133 error "Failed to copy files to mds$n"
134 mkdir -p $DIR/$tdir/mds$n/d_$tfile ||
135 error "mkdir failed on mds$n"
136 createmany -m $DIR/$tdir/mds$n/d_$tfile/f 2 > \
137 /dev/null || error "create failed on mds$n"
138 if [[ $nfiles -gt 0 ]]; then
139 createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
140 /dev/null || error "createmany failed on mds$n"
143 echo "prepared $(date)."
144 cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
145 for n in $(seq $MDSCOUNT); do
147 stop mds$n > /dev/null || error "Fail to stop MDS$n!"
156 for n in $(seq $MDSCOUNT); do
157 start mds$n $(mdsdevname $n) $opts >/dev/null ||
158 error "($error_id) Failed to start mds$n"
166 for n in $(seq $MDSCOUNT); do
167 echo "stopping mds$n"
168 stop mds$n >/dev/null ||
169 error "($error_id) Failed to stop mds$n"
173 scrub_check_status() {
178 for n in $(seq $MDSCOUNT); do
179 wait_update_facet mds$n "$LCTL get_param -n \
180 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
181 awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
182 error "($error_id) Expected '$expected' on mds$n"
186 scrub_check_flags() {
192 for n in $(seq $MDSCOUNT); do
193 actual=$(do_facet mds$n $LCTL get_param -n \
194 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
195 awk '/^flags/ { print $2 }')
196 if [ "$actual" != "$expected" ]; then
197 error "($error_id) Expected '$expected' on mds$n, but" \
203 scrub_check_params() {
209 for n in $(seq $MDSCOUNT); do
210 actual=$(do_facet mds$n $LCTL get_param -n \
211 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
212 awk '/^param/ { print $2 }')
213 if [ "$actual" != "$expected" ]; then
214 error "($error_id) Expected '$expected' on mds$n, but" \
220 scrub_check_repaired() {
226 for n in $(seq $MDSCOUNT); do
227 actual=$(do_facet mds$n $LCTL get_param -n \
228 osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
229 awk '/^updated/ { print $2 }')
231 if [ $expected -eq 0 -a $actual -ne 0 ]; then
232 error "($error_id) Expected no repaired on mds$n, but" \
236 if [ $expected -ne 0 -a $actual -lt $expected ]; then
237 error "($error_id) Expected '$expected' on mds$n, but" \
247 for n in $(seq $MDSCOUNT); do
248 diff -q $LUSTRE/tests/test-framework.sh \
249 $DIR/$tdir/mds$n/test-framework.sh ||
250 error "($error_id) File data check failed"
254 scrub_check_data2() {
259 for n in $(seq $MDSCOUNT); do
260 diff -q $LUSTRE/tests/$filename \
261 $DIR/$tdir/mds$n/$filename ||
262 error "($error_id) File data check failed"
271 for n in $(seq $MDSCOUNT); do
272 mds_remove_ois mds$n $index ||
273 error "($error_id) Failed to remove OI .$index on mds$n"
277 scrub_backup_restore() {
282 for n in $(seq $MDSCOUNT); do
283 mds_backup_restore mds$n $igif ||
284 error "(error_id) Backup/restore on mds$n failed"
288 scrub_enable_auto() {
289 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
290 osd-ldiskfs.*.auto_scrub=1
294 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
299 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
300 osd-ldiskfs.*.full_scrub_ratio=$ratio
303 full_scrub_threshold_rate() {
304 [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] &&
309 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param -n \
310 osd-ldiskfs.*.full_scrub_threshold_rate=$rate
315 echo "starting MDTs without disabling OI scrub"
316 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
317 scrub_check_status 2 init
318 scrub_check_flags 3 ""
319 mount_client $MOUNT || error "(4) Fail to start client!"
322 run_test 0 "Do not auto trigger OI scrub for non-backup/restore case"
326 echo "start $SINGLEMDS without disabling OI scrub"
327 scrub_start_mds 1 "$MOUNT_OPTS_SCRUB"
329 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
330 [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
332 mount_client $MOUNT || error "(4) Fail to start client!"
333 #define OBD_FAIL_OSD_FID_MAPPING 0x193
334 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
335 # update .lustre OI mapping
337 do_facet $SINGLEMDS $LCTL set_param fail_loc=0
338 umount_client $MOUNT || error "(5) Fail to stop client!"
340 echo "stop $SINGLEMDS"
341 stop $SINGLEMDS > /dev/null || error "(6) Fail to stop MDS!"
343 echo "start $SINGLEMDS with disabling OI scrub"
344 start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
345 error "(7) Fail to start MDS!"
347 local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
348 [ "$FLAGS" == "inconsistent" ] ||
349 error "(9) Expect 'inconsistent', but got '$FLAGS'"
351 run_test 1a "Auto trigger initial OI scrub when server mounts"
356 echo "start MDTs without disabling OI scrub"
357 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
358 scrub_check_status 3 completed
359 mount_client $MOUNT || error "(4) Fail to start client!"
362 run_test 1b "Trigger OI scrub when MDT mounts for OI files remove/recreate case"
367 # OI files to be removed:
369 # idx 2: oi.16.{2,4,8,16,32}
370 # idx 3: oi.16.{3,9,27}
371 for index in 0 2 3; do
373 scrub_remove_ois 1 $index
374 echo "start MDTs with OI scrub disabled"
375 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
376 scrub_check_flags 3 recreated
378 scrub_check_status 5 completed
379 scrub_check_flags 6 ""
382 run_test 1c "Auto detect kinds of OI file(s) removed/recreated cases"
386 scrub_backup_restore 1
387 echo "starting MDTs without disabling OI scrub"
388 scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
389 scrub_check_status 3 completed
390 mount_client $MOUNT || error "(4) Fail to start client!"
393 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
395 # test_3 is obsolete, it will be covered by test_5.
397 formatall > /dev/null
401 scrub_backup_restore 1
402 echo "starting MDTs with OI scrub disabled"
403 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
404 scrub_check_status 3 init
405 scrub_check_flags 4 inconsistent
407 #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
411 scrub_backup_restore 1
412 echo "starting MDTs with OI scrub disabled"
413 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
414 scrub_check_flags 4 inconsistent
415 mount_client $MOUNT || error "(5) Fail to start client!"
421 scrub_check_status 7 completed
422 scrub_check_flags 8 ""
425 for n in $(seq $MDSCOUNT); do
426 updated0[$n]=$(scrub_status $n |
427 awk '/^sf_items_updated_prior/ { print $2 }')
430 scrub_check_data2 sanity-scrub.sh 9
434 for n in $(seq $MDSCOUNT); do
435 updated1[$n]=$(scrub_status $n |
436 awk '/^sf_items_updated_prior/ { print $2 }')
437 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
438 error "(10) NOT auto trigger full scrub as expected"
441 run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)"
445 scrub_backup_restore 1
446 echo "starting MDTs with OI scrub disabled"
447 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
448 scrub_check_flags 4 inconsistent
449 mount_client $MOUNT || error "(5) Fail to start client!"
452 full_scrub_threshold_rate 10000
456 scrub_check_status 7 completed
457 scrub_check_flags 8 ""
460 for n in $(seq $MDSCOUNT); do
461 updated0[$n]=$(scrub_status $n |
462 awk '/^sf_items_updated_prior/ { print $2 }')
465 scrub_check_data2 sanity-scrub.sh 9
468 scrub_check_status 10 completed
469 scrub_check_flags 11 ""
472 for n in $(seq $MDSCOUNT); do
473 updated1[$n]=$(scrub_status $n |
474 awk '/^sf_items_updated_prior/ { print $2 }')
475 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
476 error "(12) Auto trigger full scrub unexpectedly"
479 for n in $(seq $MDSCOUNT); do
480 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
481 error "(13) fail to ls"
485 scrub_check_status 14 completed
486 scrub_check_flags 15 ""
488 for n in $(seq $MDSCOUNT); do
489 updated0[$n]=$(scrub_status $n |
490 awk '/^sf_items_updated_prior/ { print $2 }')
491 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
492 error "(16) Auto trigger full scrub unexpectedly"
495 for n in $(seq $MDSCOUNT); do
496 ls -l $DIR/$tdir/mds$n/d_${tfile}/ || error "(17) fail to ls"
500 for n in $(seq $MDSCOUNT); do
501 updated1[$n]=$(scrub_status $n |
502 awk '/^sf_items_updated_prior/ { print $2 }')
503 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
504 error "(18) NOT auto trigger full scrub as expected"
507 run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)"
511 scrub_backup_restore 1
512 echo "starting MDTs with OI scrub disabled"
513 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
514 scrub_check_flags 4 inconsistent
515 mount_client $MOUNT || error "(5) Fail to start client!"
518 full_scrub_threshold_rate 20
522 scrub_check_status 7 completed
523 scrub_check_flags 8 ""
526 for n in $(seq $MDSCOUNT); do
527 updated0[$n]=$(scrub_status $n |
528 awk '/^sf_items_updated_prior/ { print $2 }')
531 scrub_check_data2 sanity-scrub.sh 9
534 scrub_check_status 10 completed
535 scrub_check_flags 11 ""
538 for n in $(seq $MDSCOUNT); do
539 updated1[$n]=$(scrub_status $n |
540 awk '/^sf_items_updated_prior/ { print $2 }')
541 [ ${updated0[$n]} -lt ${updated1[$n]} ] ||
542 error "(12) Auto trigger full scrub unexpectedly"
545 for n in $(seq $MDSCOUNT); do
546 ls -l $DIR/$tdir/mds$n/*.sh > /dev/null ||
547 error "(13) fail to ls"
551 scrub_check_status 14 completed
552 scrub_check_flags 15 ""
554 for n in $(seq $MDSCOUNT); do
555 updated0[$n]=$(scrub_status $n |
556 awk '/^sf_items_updated_prior/ { print $2 }')
557 [ ${updated0[$n]} -gt ${updated1[$n]} ] ||
558 error "(16) Auto trigger full scrub unexpectedly"
561 for n in $(seq $MDSCOUNT); do
562 ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls"
566 for n in $(seq $MDSCOUNT); do
567 updated1[$n]=$(scrub_status $n |
568 awk '/^sf_items_updated_prior/ { print $2 }')
569 [ ${updated0[$n]} -eq ${updated1[$n]} ] ||
570 error "(18) NOT auto trigger full scrub as expected"
573 run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)"
576 formatall > /dev/null
580 scrub_backup_restore 1
581 echo "starting MDTs with OI scrub disabled"
582 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
583 scrub_check_status 3 init
584 scrub_check_flags 4 inconsistent
585 mount_client $MOUNT || error "(5) Fail to start client!"
588 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
589 do_nodes $(comma_list $(mdts_nodes)) \
590 $LCTL set_param fail_val=3 fail_loc=0x190
594 umount_client $MOUNT || error "(7) Fail to stop client!"
595 scrub_check_status 8 scanning
597 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
598 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
603 do_nodes $(comma_list $(mdts_nodes)) \
604 $LCTL set_param fail_loc=0 fail_val=0
606 echo "starting MDTs with OI scrub disabled"
607 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
608 scrub_check_status 11 crashed
611 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
612 do_nodes $(comma_list $(mdts_nodes)) \
613 $LCTL set_param fail_val=3 fail_loc=0x190
615 echo "starting MDTs without disabling OI scrub"
616 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
617 scrub_check_status 14 scanning
619 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
620 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
622 scrub_check_status 15 failed
623 mount_client $MOUNT || error "(16) Fail to start client!"
626 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
627 do_nodes $(comma_list $(mdts_nodes)) \
628 $LCTL set_param fail_val=3 fail_loc=0x190
631 for n in $(seq $MDSCOUNT); do
632 stat $DIR/$tdir/mds$n/${tfile}800 ||
633 error "(17) Failed to stat mds$n/${tfile}800"
636 scrub_check_status 18 scanning
638 do_nodes $(comma_list $(mdts_nodes)) \
639 $LCTL set_param fail_loc=0 fail_val=0
641 scrub_check_status 19 completed
642 scrub_check_flags 20 ""
644 run_test 5 "OI scrub state machine"
648 scrub_backup_restore 1
649 echo "starting MDTs with OI scrub disabled"
650 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
651 scrub_check_flags 4 inconsistent
652 mount_client $MOUNT || error "(5) Fail to start client!"
655 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
656 do_nodes $(comma_list $(mdts_nodes)) \
657 $LCTL set_param fail_val=2 fail_loc=0x190
662 # Sleep 5 sec to guarantee at least one object processed by OI scrub
664 # Fail the OI scrub to guarantee there is at least one checkpoint
665 #define OBD_FAIL_OSD_SCRUB_FATAL 0x192
666 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
668 scrub_check_status 7 failed
670 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
671 do_nodes $(comma_list $(mdts_nodes)) \
672 $LCTL set_param fail_val=3 fail_loc=0x190
675 for n in $(seq $MDSCOUNT); do
676 # stat will re-trigger OI scrub
677 stat $DIR/$tdir/mds$n/${tfile}800 ||
678 error "(8) Failed to stat mds$n/${tfile}800"
681 umount_client $MOUNT || error "(9) Fail to stop client!"
682 scrub_check_status 10 scanning
684 #define OBD_FAIL_OSD_SCRUB_CRASH 0x191
685 do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
689 for n in $(seq $MDSCOUNT); do
690 position0[$n]=$(scrub_status $n |
691 awk '/^last_checkpoint_position/ {print $2}')
692 position0[$n]=$((${position0[$n]} + 1))
697 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
698 do_nodes $(comma_list $(mdts_nodes)) \
699 $LCTL set_param fail_val=3 fail_loc=0x190
701 echo "starting MDTs without disabling OI scrub"
702 scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
704 scrub_check_status 13 scanning
707 for n in $(seq $MDSCOUNT); do
708 position1[$n]=$(scrub_status $n |
709 awk '/^latest_start_position/ {print $2}')
710 if [ ${position0[$n]} -ne ${position1[$n]} ]; then
711 error "(14) Expected position ${position0[$n]}, but" \
712 "got ${position1[$n]}"
716 do_nodes $(comma_list $(mdts_nodes)) \
717 $LCTL set_param fail_loc=0 fail_val=0
719 scrub_check_status 15 completed
720 scrub_check_flags 16 ""
722 run_test 6 "OI scrub resumes from last checkpoint"
726 scrub_backup_restore 1
727 echo "starting MDTs with OI scrub disabled"
728 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
729 scrub_check_flags 4 inconsistent
730 mount_client $MOUNT || error "(5) Fail to start client!"
733 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
734 do_nodes $(comma_list $(mdts_nodes)) \
735 $LCTL set_param fail_val=3 fail_loc=0x190
741 for n in $(seq $MDSCOUNT); do
742 stat $DIR/$tdir/mds$n/${tfile}300 ||
743 error "(7) Failed to stat mds$n/${tfile}300!"
746 scrub_check_status 8 scanning
747 scrub_check_flags 9 inconsistent,auto
749 do_nodes $(comma_list $(mdts_nodes)) \
750 $LCTL set_param fail_loc=0 fail_val=0
752 scrub_check_status 10 completed
755 run_test 7 "System is available during OI scrub scanning"
759 scrub_backup_restore 1
760 echo "starting MDTs with OI scrub disabled"
761 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
762 scrub_check_flags 4 inconsistent
764 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
765 do_nodes $(comma_list $(mdts_nodes)) \
766 $LCTL set_param fail_val=1 fail_loc=0x190
769 scrub_check_status 6 scanning
771 scrub_check_status 8 stopped
773 scrub_check_status 10 scanning
775 do_nodes $(comma_list $(mdts_nodes)) \
776 $LCTL set_param fail_loc=0 fail_val=0
778 scrub_check_status 11 completed
779 scrub_check_flags 12 ""
781 run_test 8 "Control OI scrub manually"
784 if [ -z "$(grep "processor.*: 1" /proc/cpuinfo)" ]; then
785 skip "Testing on UP system, the speed may be inaccurate."
790 scrub_backup_restore 1
792 echo "starting MDTs with OI scrub disabled"
793 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
794 scrub_check_flags 4 inconsistent
796 local BASE_SPEED1=100
798 # OI scrub should run with full speed under inconsistent case
799 scrub_start 5 -s $BASE_SPEED1
802 scrub_check_status 6 completed
803 scrub_check_flags 7 ""
804 # OI scrub should run with limited speed under non-inconsistent case
805 scrub_start 8 -s $BASE_SPEED1 -r
808 scrub_check_status 9 scanning
810 # Do NOT ignore that there are 1024 pre-fetched items. And there
811 # may be time error, normally it should be less than 2 seconds.
812 # We allow another 20% schedule error.
813 local PRE_FETCHED=1024
815 # MAX_MARGIN = 1.2 = 12 / 10
816 local MAX_SPEED=$(((PRE_FETCHED + BASE_SPEED1 * \
817 (RUN_TIME1 + TIME_DIFF)) / RUN_TIME1 * 12 / 10))
819 for n in $(seq $MDSCOUNT); do
820 local SPEED=$(scrub_status $n | \
821 awk '/^average_speed/ { print $2 }')
822 [ $SPEED -lt $MAX_SPEED ] ||
823 error "(10) Got speed $SPEED, expected less than" \
828 local BASE_SPEED2=300
830 for n in $(seq $MDSCOUNT); do
831 do_facet mds$n $LCTL set_param -n \
832 mdd.$(facet_svc mds$n).lfsck_speed_limit $BASE_SPEED2
836 # MIN_MARGIN = 0.8 = 8 / 10
837 local MIN_SPEED=$(((PRE_FETCHED + \
838 BASE_SPEED1 * (RUN_TIME1 - TIME_DIFF) + \
839 BASE_SPEED2 * (RUN_TIME2 - TIME_DIFF)) / \
840 (RUN_TIME1 + RUN_TIME2) * 8 / 10))
841 # MAX_MARGIN = 1.2 = 12 / 10
842 MAX_SPEED=$(((PRE_FETCHED + \
843 BASE_SPEED1 * (RUN_TIME1 + TIME_DIFF) + \
844 BASE_SPEED2 * (RUN_TIME2 + TIME_DIFF)) / \
845 (RUN_TIME1 + RUN_TIME2) * 12 / 10))
846 for n in $(seq $MDSCOUNT); do
847 SPEED=$(scrub_status $n | awk '/^average_speed/ { print $2 }')
848 [ $SPEED -gt $MIN_SPEED ] ||
849 error "(11) Got speed $SPEED, expected more than" \
851 [ $SPEED -lt $MAX_SPEED ] ||
852 error "(12) Got speed $SPEED, expected less than" \
855 do_facet mds$n $LCTL set_param -n \
856 mdd.$(facet_svc mds$n).lfsck_speed_limit 0
859 scrub_check_status 13 completed
861 run_test 9 "OI scrub speed control"
865 scrub_backup_restore 1
866 echo "starting mds$n with OI scrub disabled"
867 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
868 scrub_check_flags 4 inconsistent
869 mount_client $MOUNT || error "(5) Fail to start client!"
872 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
873 do_nodes $(comma_list $(mdts_nodes)) \
874 $LCTL set_param fail_val=1 fail_loc=0x190
878 scrub_check_status 7 scanning
879 umount_client $MOUNT || error "(8) Fail to stop client!"
881 echo "starting MDTs with OI scrub disabled"
882 scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
883 scrub_check_status 11 paused
885 echo "starting MDTs without disabling OI scrub"
886 scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
887 scrub_check_status 14 scanning
889 do_nodes $(comma_list $(mdts_nodes)) \
890 $LCTL set_param fail_loc=0 fail_val=0
892 scrub_check_status 15 completed
893 scrub_check_flags 16 ""
895 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
897 # test_10b is obsolete, it will be coverded by related sanity-lfsck tests.
900 scrub_backup_restore 1
901 echo "starting MDTs with OI scrub disabled"
902 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
903 scrub_check_flags 4 inconsistent
905 #define OBD_FAIL_OSD_SCRUB_DELAY 0x190
906 do_nodes $(comma_list $(mdts_nodes)) \
907 $LCTL set_param fail_val=3 fail_loc=0x190
910 scrub_check_status 6 scanning
912 echo "starting MDTs with OI scrub disabled"
913 scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
914 scrub_check_status 9 paused
916 echo "starting MDTs without disabling OI scrub"
917 scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
918 scrub_check_status 12 scanning
920 do_nodes $(comma_list $(mdts_nodes)) \
921 $LCTL set_param fail_loc=0 fail_val=0
923 scrub_check_status 13 completed
924 scrub_check_flags 14 ""
926 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
934 for n in $(seq $MDSCOUNT); do
935 $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
936 error "(1) Fail to mkdir $DIR/$tdir/mds$n"
938 createmany -o $DIR/$tdir/mds$n/f $CREATED ||
939 error "(2) Fail to create under $tdir/mds$n"
942 # reset OI scrub start point by force
944 scrub_check_status 4 completed
949 # OI scrub should skip the new created objects for the first accessing
950 # notice we're creating a new llog for every OST on every startup
951 # new features can make this even less stable, so we only check that
952 # the number of skipped files is more than the number or known created
953 local MINIMUM=$((CREATED + 1)) # files + directory
954 for n in $(seq $MDSCOUNT); do
955 local SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
956 [ $SKIPPED -lt $MINIMUM ] &&
957 error "(5) Expect at least $MINIMUM objects" \
958 "skipped on mds$n, but got $SKIPPED"
960 checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
963 # reset OI scrub start point by force
965 scrub_check_status 7 completed
967 # OI scrub should skip the new created object only once
968 for n in $(seq $MDSCOUNT); do
969 SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }')
970 checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }')
972 [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] ||
973 error "(8) Expect 0 objects skipped on mds$n, but" \
977 run_test 11 "OI scrub skips the new created objects only once"
981 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
983 #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195
984 do_facet ost1 $LCTL set_param fail_loc=0x195
985 local count=$(precreated_ost_obj_count 0 0)
987 createmany -o $DIR/$tdir/f $((count + 32))
988 umount_client $MOUNT || error "(1) Fail to stop client!"
990 stop ost1 || error "(2) Fail to stop ost1"
992 #define OBD_FAIL_OST_NODESTROY 0x233
993 do_facet ost1 $LCTL set_param fail_loc=0x233
995 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
996 error "(3) Fail to start ost1"
998 mount_client $MOUNT || error "(4) Fail to start client!"
1000 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1002 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1004 do_facet ost1 $LCTL set_param fail_loc=0
1005 wait_update_facet ost1 "$LCTL get_param -n \
1006 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1007 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1008 error "(7) Expected '$expected' on ost1"
1010 ls -ail $DIR/$tdir > /dev/null || {
1012 error "(8) ls should succeed"
1015 run_test 12 "OI scrub can rebuild invalid /O entries"
1018 check_mount_and_prep
1019 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1021 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1022 do_facet ost1 $LCTL set_param fail_loc=0x196
1023 local count=$(precreated_ost_obj_count 0 0)
1025 createmany -o $DIR/$tdir/f $((count + 32))
1026 do_facet ost1 $LCTL set_param fail_loc=0
1028 umount_client $MOUNT || error "(1) Fail to stop client!"
1030 stop ost1 || error "(2) Fail to stop ost1"
1032 start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
1033 error "(3) Fail to start ost1"
1035 mount_client $MOUNT || error "(4) Fail to start client!"
1037 ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
1039 $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
1041 wait_update_facet ost1 "$LCTL get_param -n \
1042 osd-ldiskfs.$(facet_svc ost1).oi_scrub |
1043 awk '/^status/ { print \\\$2 }'" "completed" 6 ||
1044 error "(7) Expected '$expected' on ost1"
1046 ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
1048 run_test 13 "OI scrub can rebuild missed /O entries"
1051 check_mount_and_prep
1052 $SETSTRIPE -c 1 -i 0 $DIR/$tdir
1054 #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196
1055 do_facet ost1 $LCTL set_param fail_loc=0x196
1056 local count=$(precreated_ost_obj_count 0 0)
1058 createmany -o $DIR/$tdir/f $((count + 32))
1059 do_facet ost1 $LCTL set_param fail_loc=0
1061 umount_client $MOUNT || error "(1) Fail to stop client!"
1063 stop ost1 || error "(2) Fail to stop ost1"
1066 run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
1067 error "(3) Fail to run e2fsck error"
1069 start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
1070 error "(4) Fail to start ost1"
1072 mount_client $MOUNT || error "(5) Fail to start client!"
1074 local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
1075 awk '/^lf_reparied/ { print $2 }')
1076 [ $LF_REPAIRED -gt 0 ] ||
1077 error "(6) Some entry under /lost+found should be repaired"
1079 ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
1081 run_test 14 "OI scrub can repair objects under lost+found"
1084 # skip test_15 for LU-4182
1085 [ $MDSCOUNT -ge 2 ] && skip "skip now for >= 2 MDTs" && return
1086 local server_version=$(lustre_version_code $SINGLEMDS)
1088 scrub_backup_restore 1
1089 echo "starting MDTs with OI scrub disabled"
1090 scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
1091 scrub_check_status 3 init
1092 scrub_check_flags 4 inconsistent
1094 # run under dryrun mode
1095 if [ $server_version -lt $(version_code 2.5.58) ]; then
1096 scrub_start 5 --dryrun on
1098 scrub_start 5 --dryrun
1100 scrub_check_status 6 completed
1101 scrub_check_flags 7 inconsistent
1102 scrub_check_params 8 dryrun
1103 scrub_check_repaired 9 20
1105 # run under dryrun mode again
1106 if [ $server_version -lt $(version_code 2.5.58) ]; then
1107 scrub_start 10 --dryrun on
1109 scrub_start 10 --dryrun
1111 scrub_check_status 11 completed
1112 scrub_check_flags 12 inconsistent
1113 scrub_check_params 13 dryrun
1114 scrub_check_repaired 14 20
1116 # run under normal mode
1118 # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
1119 # work under Lustre-2.y (y >= 6), the test script should be fixed as
1120 # "-noff" or "--dryrun=off" or nothing by default.
1121 if [ $server_version -lt $(version_code 2.5.58) ]; then
1122 scrub_start 15 --dryrun off
1126 scrub_check_status 16 completed
1127 scrub_check_flags 17 ""
1128 scrub_check_params 18 ""
1129 scrub_check_repaired 19 20
1131 # run under normal mode again
1132 if [ $server_version -lt $(version_code 2.5.58) ]; then
1133 scrub_start 20 --dryrun off
1137 scrub_check_status 21 completed
1138 scrub_check_flags 22 ""
1139 scrub_check_params 23 ""
1140 scrub_check_repaired 24 0
1142 run_test 15 "Dryrun mode OI scrub"
1144 # restore MDS/OST size
1145 MDSSIZE=${SAVED_MDSSIZE}
1146 OSTSIZE=${SAVED_OSTSIZE}
1147 OSTCOUNT=${SAVED_OSTCOUNT}
1149 # cleanup the system at last