2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # Tests for multiple mount protection (MMP) feature.
7 # Run select tests by setting ONLY, or as arguments to the script.
8 # Skip specific tests by setting EXCEPT.
10 # e.g. ONLY="5 6" or ONLY="`seq 8 11`" or EXCEPT="7"
15 # bug number for skipped test:
16 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"$MMP_EXCEPT"}
17 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
19 SRCDIR=$(cd $(dirname $0); echo $PWD)
20 export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin
22 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
23 . $LUSTRE/tests/test-framework.sh
25 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
28 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
29 remote_ost_nodsh && skip "remote OST with nodsh" && exit 0
31 # unmount and cleanup the Lustre filesystem
32 MMP_RESTORE_MOUNT=false
33 if is_mounted $MOUNT || is_mounted $MOUNT2; then
35 MMP_RESTORE_MOUNT=true
38 SAVED_FAIL_ON_ERROR=$FAIL_ON_ERROR
43 # Get the failover facet.
44 get_failover_facet() {
46 local failover_facet=${facet}failover
48 local host=$(facet_host $facet)
49 local failover_host=$(facet_host $failover_facet)
51 [ -z "$failover_host" -o "$host" = "$failover_host" ] && \
57 # Initiate the variables for Lustre servers and targets.
59 MMP_MDS=${MMP_MDS:-$SINGLEMDS}
60 MMP_MDS_FAILOVER=$(get_failover_facet $MMP_MDS)
62 local mds_num=$(echo $MMP_MDS | tr -d "mds")
63 MMP_MDSDEV=$(mdsdevname $mds_num)
65 MMP_OSS=${MMP_OSS:-ost1}
66 MMP_OSS_FAILOVER=$(get_failover_facet $MMP_OSS)
68 local oss_num=$(echo $MMP_OSS | tr -d "ost")
69 MMP_OSTDEV=$(ostdevname $oss_num)
72 # Stop the MDS and OSS services on the primary or failover servers.
80 if [ "$flavor" = "failover" ]; then
81 mds_facet=$MMP_MDS_FAILOVER
82 oss_facet=$MMP_OSS_FAILOVER
88 stop $mds_facet $opts || return ${PIPESTATUS[0]}
89 stop $oss_facet $opts || return ${PIPESTATUS[0]}
92 # Enable the MMP feature.
97 do_facet $facet "$TUNE2FS -O mmp $device"
98 return ${PIPESTATUS[0]}
101 # Disable the MMP feature.
106 do_facet $facet "$TUNE2FS -O ^mmp $device"
107 return ${PIPESTATUS[0]}
110 # Set the MMP block to 'fsck' state
115 do_facet $facet "$LUSTRE/tests/mmp_mark.sh $device"
116 return ${PIPESTATUS[0]}
119 # Reset the MMP block (if any) back to the clean state.
124 do_facet $facet "$TUNE2FS -f -E clear-mmp $device"
125 return ${PIPESTATUS[0]}
128 # Check whether the MMP feature is enabled or not.
133 do_facet $facet "$DUMPE2FS -h $device | grep mmp"
134 return ${PIPESTATUS[0]}
137 # Get MMP update interval (in seconds) from the Lustre server target.
138 get_mmp_update_interval() {
143 interval=$(do_facet $facet \
144 "$DEBUGFS -c -R dump_mmp $device 2>$TMP/mmp.debugfs.msg" |
145 awk 'tolower($0) ~ /update.interval/ { print $NF }')
146 [ -z "$interval" ] && interval=5 &&
147 do_facet $facet cat $TMP/mmp.debugfs.msg &&
148 echo "$facet:$device: assume update interval=$interval" 1>&2 ||
149 echo "$facet:$device: got actual update interval=$interval" 1>&2
154 # Get MMP check interval (in seconds) from the Lustre server target.
155 get_mmp_check_interval() {
160 interval=$(do_facet $facet \
161 "$DEBUGFS -c -R dump_mmp $device 2>$TMP/mmp.debugfs.msg" |
162 awk 'tolower($0) ~ /check.interval/ { print $NF }')
163 [ -z "$interval" ] && interval=5 &&
164 do_facet $facet cat $TMP/mmp.debugfs.msg &&
165 echo "$facet:$device: assume check interval=$interval" 1>&2 ||
166 echo "$facet:$device: got actual check interval=$interval" 1>&2
171 # Adjust the MMP update interval (in seconds) on the Lustre server target.
172 # Specifying an interval of 0 means to use the default interval.
173 set_mmp_update_interval() {
176 local interval=${3:-0}
178 do_facet $facet "$TUNE2FS -E mmp_update_interval=$interval $device"
179 return ${PIPESTATUS[0]}
185 # Enable the MMP feature on the Lustre server targets.
189 if [ $(facet_fstype $MMP_MDS) != ldiskfs ]; then
190 skip "ldiskfs only test"
194 if [ $(facet_fstype $MMP_OSS) != ldiskfs ]; then
195 skip "ldiskfs only test"
199 mmp_is_enabled $MMP_MDS $MMP_MDSDEV ||
201 log "MMP is not enabled on MDS, enabling it manually..."
202 enable_mmp $MMP_MDS $MMP_MDSDEV ||
203 error "failed to enable MMP on $MMP_MDSDEV on $MMP_MDS"
207 mmp_is_enabled $MMP_OSS $MMP_OSTDEV ||
209 log "MMP is not enabled on OSS, enabling it manually..."
210 enable_mmp $MMP_OSS $MMP_OSTDEV ||
211 error "failed to enable MMP on $MMP_OSTDEV on $MMP_OSS"
215 # check whether the MMP feature is enabled or not
216 mmp_is_enabled $MMP_MDS $MMP_MDSDEV ||
217 error "MMP was not enabled on $MMP_MDSDEV on $MMP_MDS"
219 mmp_is_enabled $MMP_OSS $MMP_OSTDEV ||
220 error "MMP was not enabled on $MMP_OSTDEV on $MMP_OSS"
223 # Disable the MMP feature on the Lustre server targets
226 if [ $I_ENABLED_MDS -eq 1 ]; then
227 log "Disabling MMP on $MMP_MDSDEV on $MMP_MDS manually..."
228 disable_mmp $MMP_MDS $MMP_MDSDEV ||
229 error "failed to disable MMP on $MMP_MDSDEV on $MMP_MDS"
230 mmp_is_enabled $MMP_MDS $MMP_MDSDEV &&
231 error "MMP was not disabled on $MMP_MDSDEV on $MMP_MDS"
234 if [ $I_ENABLED_OSS -eq 1 ]; then
235 log "Disabling MMP on $MMP_OSTDEV on $MMP_OSS manually..."
236 disable_mmp $MMP_OSS $MMP_OSTDEV ||
237 error "failed to disable MMP on $MMP_OSTDEV on $MMP_OSS"
238 mmp_is_enabled $MMP_OSS $MMP_OSTDEV &&
239 error "MMP was not disabled on $MMP_OSTDEV on $MMP_OSS"
245 # Mount the shared target on the failover server after some interval it's
246 # mounted on the primary server.
247 mount_after_interval_sub() {
255 local failover_facet=$(get_failover_facet $facet)
258 local first_mount_rc=0
259 local second_mount_rc=0
261 log "Mounting $device on $facet..."
262 start $facet $device $opts &
265 if [ $interval -ne 0 ]; then
266 log "sleep $interval..."
270 log "Mounting $device on $failover_facet..."
271 start $failover_facet $device $opts
272 second_mount_rc=${PIPESTATUS[0]}
275 first_mount_rc=${PIPESTATUS[0]}
277 if [ $second_mount_rc -eq 0 -a $first_mount_rc -eq 0 ]; then
278 error_noexit "one mount delayed by mmp interval $interval should fail"
279 stop $facet || return ${PIPESTATUS[0]}
280 [ "$failover_facet" != "$facet" ] && stop $failover_facet || \
281 return ${PIPESTATUS[0]}
283 elif [ $second_mount_rc -ne 0 -a $first_mount_rc -ne 0 ]; then
284 error_noexit "mount failure on failover pair $facet,$failover_facet"
285 return $first_mount_rc
291 mount_after_interval() {
292 local mdt_interval=$1
293 local ost_interval=$2
296 mount_after_interval_sub $mdt_interval $MMP_MDSDEV $MMP_MDS \
297 $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
300 mount_after_interval_sub $ost_interval $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
302 if [ $rc -ne 0 ]; then
310 # Mount the shared target on the failover server
311 # during unmounting it on the primary server.
312 mount_during_unmount() {
318 local failover_facet=$(get_failover_facet $facet)
324 log "Mounting $device on $facet..."
325 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
327 log "Unmounting $device on $facet..."
331 log "Mounting $device on $failover_facet..."
332 start $failover_facet $device $mnt_opts
333 mount_rc=${PIPESTATUS[0]}
335 # check whether the first filesystem is still mounted
336 local mntpt=$(facet_mntpt $facet)
337 local mounted=$(do_facet $facet "grep -w $mntpt /proc/mounts")
340 unmount_rc=${PIPESTATUS[0]}
342 if [ $mount_rc -eq 0 ]; then
343 stop $failover_facet || return ${PIPESTATUS[0]}
345 if [ -n "$mounted" ]; then
346 error_noexit "mount during unmount of first filesystem worked"
351 if [ $unmount_rc -ne 0 ]; then
352 error_noexit "unmount the $device on $facet should succeed"
359 # Mount the shared target on the failover server
360 # after clean unmounting it on the primary server.
361 mount_after_unmount() {
367 local failover_facet=$(get_failover_facet $facet)
369 log "Mounting $device on $facet..."
370 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
372 log "Unmounting $device on $facet..."
373 stop $facet || return ${PIPESTATUS[0]}
375 log "Mounting $device on $failover_facet..."
376 start $failover_facet $device $mnt_opts || return ${PIPESTATUS[0]}
381 # Mount the shared target on the failover server after rebooting
382 # the primary server.
383 mount_after_reboot() {
389 local failover_facet=$(get_failover_facet $facet)
392 log "Mounting $device on $facet..."
393 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
395 if [ "$FAILURE_MODE" = "HARD" ]; then
396 shutdown_facet $facet
398 wait_for_facet $facet
400 replay_barrier_nodf $facet
403 log "Mounting $device on $failover_facet..."
404 start $failover_facet $device $mnt_opts
406 if [ $rc -ne 0 ]; then
407 error_noexit "mount $device on $failover_facet should succeed"
408 stop $facet || return ${PIPESTATUS[0]}
415 # Run e2fsck on the Lustre server target.
423 echo "Running e2fsck on the device $device on $facet..."
424 do_facet $facet "$E2FSCK $opts $device"
425 return ${PIPESTATUS[0]}
428 # Check whether there are failover pairs for MDS and OSS servers.
429 check_failover_pair() {
430 [ "$MMP_MDS" = "$MMP_MDS_FAILOVER" -o "$MMP_OSS" = "$MMP_OSS_FAILOVER" ] \
431 && { skip_env "failover pair is needed" && return 1; }
437 # Test 1 - two mounts at the same time.
439 check_failover_pair || return 0
441 mount_after_interval 0 0 || return ${PIPESTATUS[0]}
442 stop_services primary || return ${PIPESTATUS[0]}
443 stop_services failover || return ${PIPESTATUS[0]}
445 run_test 1 "two mounts at the same time"
447 # Test 2 - one mount delayed by mmp update interval.
449 check_failover_pair || return 0
451 local mdt_interval=$(get_mmp_update_interval $MMP_MDS $MMP_MDSDEV)
452 local ost_interval=$(get_mmp_update_interval $MMP_OSS $MMP_OSTDEV)
454 mount_after_interval $mdt_interval $ost_interval || return ${PIPESTATUS[0]}
455 stop_services primary || return ${PIPESTATUS[0]}
457 run_test 2 "one mount delayed by mmp update interval"
459 # Test 3 - one mount delayed by 2x mmp check interval.
461 check_failover_pair || return 0
463 local mdt_interval=$(get_mmp_check_interval $MMP_MDS $MMP_MDSDEV)
464 local ost_interval=$(get_mmp_check_interval $MMP_OSS $MMP_OSTDEV)
466 mdt_interval=$((2 * $mdt_interval + 1))
467 ost_interval=$((2 * $ost_interval + 1))
469 mount_after_interval $mdt_interval $ost_interval || return ${PIPESTATUS[0]}
470 stop_services primary || return ${PIPESTATUS[0]}
472 run_test 3 "one mount delayed by 2x mmp check interval"
474 # Test 4 - one mount delayed by > 2x mmp check interval.
476 check_failover_pair || return 0
478 local mdt_interval=$(get_mmp_check_interval $MMP_MDS $MMP_MDSDEV)
479 local ost_interval=$(get_mmp_check_interval $MMP_OSS $MMP_OSTDEV)
481 mdt_interval=$((4 * $mdt_interval))
482 ost_interval=$((4 * $ost_interval))
484 mount_after_interval $mdt_interval $ost_interval || return ${PIPESTATUS[0]}
485 stop_services primary || return ${PIPESTATUS[0]}
487 run_test 4 "one mount delayed by > 2x mmp check interval"
489 # Test 5 - mount during unmount of the first filesystem.
492 check_failover_pair || return 0
494 mount_during_unmount $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS || \
495 return ${PIPESTATUS[0]}
498 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
499 mount_during_unmount $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
501 if [ $rc -ne 0 ]; then
502 stop $MMP_MDS || return ${PIPESTATUS[0]}
506 stop $MMP_MDS || return ${PIPESTATUS[0]}
508 run_test 5 "mount during unmount of the first filesystem"
510 # Test 6 - mount after clean unmount.
513 check_failover_pair || return 0
515 mount_after_unmount $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS || \
516 return ${PIPESTATUS[0]}
519 mount_after_unmount $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
521 if [ $rc -ne 0 ]; then
522 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
526 stop_services failover || return ${PIPESTATUS[0]}
528 run_test 6 "mount after clean unmount"
530 # Test 7 - mount after reboot.
533 check_failover_pair || return 0
535 mount_after_reboot $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS || \
536 return ${PIPESTATUS[0]}
539 mount_after_reboot $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
541 if [ $rc -ne 0 ]; then
542 stop $MMP_MDS || return ${PIPESTATUS[0]}
543 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
547 stop_services failover || return ${PIPESTATUS[0]}
548 stop_services primary || return ${PIPESTATUS[0]}
550 run_test 7 "mount after reboot"
552 # Test 8 - mount during e2fsck (should never succeed).
558 # After writing a new sequence number into the MMP block, e2fsck will
559 # sleep at least (2 * new_interval + 1) seconds before it goes into
564 saved_interval=$(get_mmp_update_interval $MMP_MDS $MMP_MDSDEV)
565 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $new_interval
567 run_e2fsck $MMP_MDS $MMP_MDSDEV "-fy" &
571 if start $MMP_MDS_FAILOVER $MMP_MDSDEV $MDS_MOUNT_OPTS; then
573 "mount $MMP_MDSDEV on $MMP_MDS_FAILOVER should fail"
574 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
575 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $saved_interval
580 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $saved_interval
584 saved_interval=$(get_mmp_update_interval $MMP_OSS $MMP_OSTDEV)
585 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $new_interval
587 run_e2fsck $MMP_OSS $MMP_OSTDEV "-fy" &
591 if start $MMP_OSS_FAILOVER $MMP_OSTDEV $OST_MOUNT_OPTS; then
593 "mount $MMP_OSTDEV on $MMP_OSS_FAILOVER should fail"
594 stop $MMP_OSS_FAILOVER || return ${PIPESTATUS[0]}
595 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $saved_interval
600 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $saved_interval
603 run_test 8 "mount during e2fsck"
605 # Test 9 - mount after aborted e2fsck (should never succeed).
607 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
608 if ! start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS; then
609 local rc=${PIPESTATUS[0]}
610 stop $MMP_MDS || return ${PIPESTATUS[0]}
613 stop_services primary || return ${PIPESTATUS[0]}
615 mark_mmp_block $MMP_MDS $MMP_MDSDEV || return ${PIPESTATUS[0]}
617 log "Mounting $MMP_MDSDEV on $MMP_MDS..."
618 if start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS; then
619 error_noexit "mount $MMP_MDSDEV on $MMP_MDS should fail"
620 stop $MMP_MDS || return ${PIPESTATUS[0]}
624 reset_mmp_block $MMP_MDS $MMP_MDSDEV || return ${PIPESTATUS[0]}
626 mark_mmp_block $MMP_OSS $MMP_OSTDEV || return ${PIPESTATUS[0]}
628 log "Mounting $MMP_OSTDEV on $MMP_OSS..."
629 if start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS; then
630 error_noexit "mount $MMP_OSTDEV on $MMP_OSS should fail"
631 stop $MMP_OSS || return ${PIPESTATUS[0]}
635 reset_mmp_block $MMP_OSS $MMP_OSTDEV || return ${PIPESTATUS[0]}
638 run_test 9 "mount after aborted e2fsck"
640 # Test 10 - e2fsck with mounted filesystem.
644 log "Mounting $MMP_MDSDEV on $MMP_MDS..."
645 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
647 run_e2fsck $MMP_MDS_FAILOVER $MMP_MDSDEV "-fn"
650 # e2fsck is called with -n option (Open the filesystem read-only), so
651 # 0 (No errors) and 4 (File system errors left uncorrected) are the only
652 # acceptable exit codes in this case
653 if [ $rc -ne 0 ] && [ $rc -ne 4 ]; then
654 error_noexit "e2fsck $MMP_MDSDEV on $MMP_MDS_FAILOVER returned $rc"
655 stop $MMP_MDS || return ${PIPESTATUS[0]}
659 log "Mounting $MMP_OSTDEV on $MMP_OSS..."
660 start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS
662 if [ $rc -ne 0 ]; then
663 stop $MMP_MDS || return ${PIPESTATUS[0]}
667 run_e2fsck $MMP_OSS_FAILOVER $MMP_OSTDEV "-fn"
669 if [ $rc -ne 0 ] && [ $rc -ne 4 ]; then
670 error_noexit "e2fsck $MMP_OSTDEV on $MMP_OSS_FAILOVER returned $rc"
673 CLEANUP_DM_DEV=true stop_services primary || return ${PIPESTATUS[0]}
676 run_test 10 "e2fsck with mounted filesystem"
679 FAIL_ON_ERROR=$SAVED_FAIL_ON_ERROR
682 $MMP_RESTORE_MOUNT && setupall
683 check_and_cleanup_lustre