3 # Tests for multiple mount protection (MMP) feature.
5 # Run select tests by setting ONLY, or as arguments to the script.
6 # Skip specific tests by setting EXCEPT.
8 # e.g. ONLY="5 6" or ONLY="`seq 8 11`" or EXCEPT="7"
13 # bug number for skipped test:
14 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"$MMP_EXCEPT"}
15 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
17 SRCDIR=$(cd $(dirname $0); echo $PWD)
18 export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin
20 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
21 . $LUSTRE/tests/test-framework.sh
23 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
26 remote_mds_nodsh && skip "remote MDS with nodsh"
27 remote_ost_nodsh && skip "remote OST with nodsh"
29 # unmount and cleanup the Lustre filesystem
30 MMP_RESTORE_MOUNT=false
31 if is_mounted $MOUNT || is_mounted $MOUNT2; then
33 MMP_RESTORE_MOUNT=true
36 SAVED_FAIL_ON_ERROR=$FAIL_ON_ERROR
41 # Get the failover facet.
42 get_failover_facet() {
44 local failover_facet=${facet}failover
46 local host=$(facet_host $facet)
47 local failover_host=$(facet_host $failover_facet)
49 [ -z "$failover_host" -o "$host" = "$failover_host" ] && \
55 # Initiate the variables for Lustre servers and targets.
57 MMP_MDS=${MMP_MDS:-$SINGLEMDS}
58 MMP_MDS_FAILOVER=$(get_failover_facet $MMP_MDS)
60 local mds_num=$(echo $MMP_MDS | tr -d "mds")
61 MMP_MDSDEV=$(mdsdevname $mds_num)
63 MMP_OSS=${MMP_OSS:-ost1}
64 MMP_OSS_FAILOVER=$(get_failover_facet $MMP_OSS)
66 local oss_num=$(echo $MMP_OSS | tr -d "ost")
67 MMP_OSTDEV=$(ostdevname $oss_num)
70 # Stop the MDS and OSS services on the primary or failover servers.
78 if [ "$flavor" = "failover" ]; then
79 mds_facet=$MMP_MDS_FAILOVER
80 oss_facet=$MMP_OSS_FAILOVER
86 stop $mds_facet $opts || return ${PIPESTATUS[0]}
87 stop $oss_facet $opts || return ${PIPESTATUS[0]}
90 # Enable the MMP feature.
95 do_facet $facet "$TUNE2FS -O mmp $device"
96 return ${PIPESTATUS[0]}
99 # Disable the MMP feature.
104 do_facet $facet "$TUNE2FS -O ^mmp $device"
105 return ${PIPESTATUS[0]}
108 # Set the MMP block to 'fsck' state
113 do_facet $facet "$LUSTRE/tests/mmp_mark.sh $device"
114 return ${PIPESTATUS[0]}
117 # Reset the MMP block (if any) back to the clean state.
122 do_facet $facet "$TUNE2FS -f -E clear-mmp $device"
123 return ${PIPESTATUS[0]}
126 # Check whether the MMP feature is enabled or not.
131 do_facet $facet "$DUMPE2FS -h $device | grep mmp"
132 return ${PIPESTATUS[0]}
135 # Get MMP update interval (in seconds) from the Lustre server target.
136 get_mmp_update_interval() {
141 interval=$(do_facet $facet \
142 "$DEBUGFS -c -R dump_mmp $device 2>$TMP/mmp.debugfs.msg" |
143 awk 'tolower($0) ~ /update.interval/ { print $NF }')
144 [ -z "$interval" ] && interval=5 &&
145 do_facet $facet cat $TMP/mmp.debugfs.msg &&
146 echo "$facet:$device: assume update interval=$interval" 1>&2 ||
147 echo "$facet:$device: got actual update interval=$interval" 1>&2
152 # Get MMP check interval (in seconds) from the Lustre server target.
153 get_mmp_check_interval() {
158 interval=$(do_facet $facet \
159 "$DEBUGFS -c -R dump_mmp $device 2>$TMP/mmp.debugfs.msg" |
160 awk 'tolower($0) ~ /check.interval/ { print $NF }')
161 [ -z "$interval" ] && interval=5 &&
162 do_facet $facet cat $TMP/mmp.debugfs.msg &&
163 echo "$facet:$device: assume check interval=$interval" 1>&2 ||
164 echo "$facet:$device: got actual check interval=$interval" 1>&2
169 # Adjust the MMP update interval (in seconds) on the Lustre server target.
170 # Specifying an interval of 0 means to use the default interval.
171 set_mmp_update_interval() {
174 local interval=${3:-0}
176 do_facet $facet "$TUNE2FS -E mmp_update_interval=$interval $device"
177 return ${PIPESTATUS[0]}
183 # Enable the MMP feature on the Lustre server targets.
187 if [ $(facet_fstype $MMP_MDS) != ldiskfs ]; then
188 skip_env "ldiskfs only test"
191 if [ $(facet_fstype $MMP_OSS) != ldiskfs ]; then
192 skip_env "ldiskfs only test"
195 mmp_is_enabled $MMP_MDS $MMP_MDSDEV ||
197 log "MMP is not enabled on MDS, enabling it manually..."
198 enable_mmp $MMP_MDS $MMP_MDSDEV ||
199 error "failed to enable MMP on $MMP_MDSDEV on $MMP_MDS"
203 mmp_is_enabled $MMP_OSS $MMP_OSTDEV ||
205 log "MMP is not enabled on OSS, enabling it manually..."
206 enable_mmp $MMP_OSS $MMP_OSTDEV ||
207 error "failed to enable MMP on $MMP_OSTDEV on $MMP_OSS"
211 # check whether the MMP feature is enabled or not
212 mmp_is_enabled $MMP_MDS $MMP_MDSDEV ||
213 error "MMP was not enabled on $MMP_MDSDEV on $MMP_MDS"
215 mmp_is_enabled $MMP_OSS $MMP_OSTDEV ||
216 error "MMP was not enabled on $MMP_OSTDEV on $MMP_OSS"
219 # Disable the MMP feature on the Lustre server targets
222 if [ $I_ENABLED_MDS -eq 1 ]; then
223 log "Disabling MMP on $MMP_MDSDEV on $MMP_MDS manually..."
224 disable_mmp $MMP_MDS $MMP_MDSDEV ||
225 error "failed to disable MMP on $MMP_MDSDEV on $MMP_MDS"
226 mmp_is_enabled $MMP_MDS $MMP_MDSDEV &&
227 error "MMP was not disabled on $MMP_MDSDEV on $MMP_MDS"
230 if [ $I_ENABLED_OSS -eq 1 ]; then
231 log "Disabling MMP on $MMP_OSTDEV on $MMP_OSS manually..."
232 disable_mmp $MMP_OSS $MMP_OSTDEV ||
233 error "failed to disable MMP on $MMP_OSTDEV on $MMP_OSS"
234 mmp_is_enabled $MMP_OSS $MMP_OSTDEV &&
235 error "MMP was not disabled on $MMP_OSTDEV on $MMP_OSS"
241 # Mount the shared target on the failover server after some interval it's
242 # mounted on the primary server.
243 mount_after_interval_sub() {
251 local failover_facet=$(get_failover_facet $facet)
254 local first_mount_rc=0
255 local second_mount_rc=0
257 log "Mounting $device on $facet..."
258 start $facet $device $opts &
261 if [ $interval -ne 0 ]; then
262 log "sleep $interval..."
266 log "Mounting $device on $failover_facet..."
267 start $failover_facet $device $opts
268 second_mount_rc=${PIPESTATUS[0]}
271 first_mount_rc=${PIPESTATUS[0]}
273 if [ $second_mount_rc -eq 0 -a $first_mount_rc -eq 0 ]; then
274 error_noexit "one mount delayed by mmp interval $interval should fail"
275 stop $facet || return ${PIPESTATUS[0]}
276 [ "$failover_facet" != "$facet" ] && stop $failover_facet || \
277 return ${PIPESTATUS[0]}
279 elif [ $second_mount_rc -ne 0 -a $first_mount_rc -ne 0 ]; then
280 error_noexit "mount failure on failover pair $facet,$failover_facet"
281 return $first_mount_rc
287 mount_after_interval() {
288 local mdt_interval=$1
289 local ost_interval=$2
292 mount_after_interval_sub $mdt_interval $MMP_MDSDEV $MMP_MDS \
293 $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
296 mount_after_interval_sub $ost_interval $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
298 if [ $rc -ne 0 ]; then
306 # Mount the shared target on the failover server
307 # during unmounting it on the primary server.
308 mount_during_unmount() {
314 local failover_facet=$(get_failover_facet $facet)
320 log "Mounting $device on $facet..."
321 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
323 log "Unmounting $device on $facet..."
327 log "Mounting $device on $failover_facet..."
328 start $failover_facet $device $mnt_opts
329 mount_rc=${PIPESTATUS[0]}
331 # check whether the first filesystem is still mounted
332 local mntpt=$(facet_mntpt $facet)
333 local mounted=$(do_facet $facet "grep -w $mntpt /proc/mounts")
336 unmount_rc=${PIPESTATUS[0]}
338 if [ $mount_rc -eq 0 ]; then
339 stop $failover_facet || return ${PIPESTATUS[0]}
341 if [ -n "$mounted" ]; then
342 error_noexit "mount during unmount of first filesystem worked"
347 if [ $unmount_rc -ne 0 ]; then
348 error_noexit "unmount the $device on $facet should succeed"
355 # Mount the shared target on the failover server
356 # after clean unmounting it on the primary server.
357 mount_after_unmount() {
363 local failover_facet=$(get_failover_facet $facet)
365 log "Mounting $device on $facet..."
366 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
368 log "Unmounting $device on $facet..."
369 stop $facet || return ${PIPESTATUS[0]}
371 log "Mounting $device on $failover_facet..."
372 start $failover_facet $device $mnt_opts || return ${PIPESTATUS[0]}
377 # Mount the shared target on the failover server after rebooting
378 # the primary server.
379 mount_after_reboot() {
385 local failover_facet=$(get_failover_facet $facet)
388 log "Mounting $device on $facet..."
389 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
391 if [ "$FAILURE_MODE" = "HARD" ]; then
392 shutdown_facet $facet
394 wait_for_facet $facet
396 replay_barrier_nodf $facet
399 log "Mounting $device on $failover_facet..."
400 start $failover_facet $device $mnt_opts
402 if [ $rc -ne 0 ]; then
403 error_noexit "mount $device on $failover_facet should succeed"
404 stop $facet || return ${PIPESTATUS[0]}
411 # Run e2fsck on the Lustre server target.
419 echo "Running e2fsck on the device $device on $facet..."
420 do_facet $facet "$E2FSCK $opts $device"
421 return ${PIPESTATUS[0]}
424 # Check whether there are failover pairs for MDS and OSS servers.
425 check_failover_pair() {
426 [ "$MMP_MDS" = "$MMP_MDS_FAILOVER" -o "$MMP_OSS" = "$MMP_OSS_FAILOVER" ] &&
427 skip_env "failover pair is needed"
433 # Test 1 - two mounts at the same time.
437 mount_after_interval 0 0 || return ${PIPESTATUS[0]}
438 stop_services primary || return ${PIPESTATUS[0]}
439 stop_services failover || return ${PIPESTATUS[0]}
441 run_test 1 "two mounts at the same time"
443 # Test 2 - one mount delayed by mmp update interval.
447 local mdt_interval=$(get_mmp_update_interval $MMP_MDS $MMP_MDSDEV)
448 local ost_interval=$(get_mmp_update_interval $MMP_OSS $MMP_OSTDEV)
450 mount_after_interval $mdt_interval $ost_interval ||
451 return ${PIPESTATUS[0]}
452 stop_services primary || return ${PIPESTATUS[0]}
454 run_test 2 "one mount delayed by mmp update interval"
456 # Test 3 - one mount delayed by 2x mmp check interval.
460 local mdt_interval=$(get_mmp_check_interval $MMP_MDS $MMP_MDSDEV)
461 local ost_interval=$(get_mmp_check_interval $MMP_OSS $MMP_OSTDEV)
463 mdt_interval=$((2 * $mdt_interval + 1))
464 ost_interval=$((2 * $ost_interval + 1))
466 mount_after_interval $mdt_interval $ost_interval ||
467 return ${PIPESTATUS[0]}
468 stop_services primary || return ${PIPESTATUS[0]}
470 run_test 3 "one mount delayed by 2x mmp check interval"
472 # Test 4 - one mount delayed by > 2x mmp check interval.
476 local mdt_interval=$(get_mmp_check_interval $MMP_MDS $MMP_MDSDEV)
477 local ost_interval=$(get_mmp_check_interval $MMP_OSS $MMP_OSTDEV)
479 mdt_interval=$((4 * $mdt_interval))
480 ost_interval=$((4 * $ost_interval))
482 mount_after_interval $mdt_interval $ost_interval ||
483 return ${PIPESTATUS[0]}
484 stop_services primary || return ${PIPESTATUS[0]}
486 run_test 4 "one mount delayed by > 2x mmp check interval"
488 # Test 5 - mount during unmount of the first filesystem.
493 mount_during_unmount $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS ||
494 return ${PIPESTATUS[0]}
497 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
498 mount_during_unmount $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
500 stop $MMP_MDS || return ${PIPESTATUS[0]}
503 run_test 5 "mount during unmount of the first filesystem"
505 # Test 6 - mount after clean unmount.
510 mount_after_unmount $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS ||
511 return ${PIPESTATUS[0]}
514 mount_after_unmount $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
516 if [ $rc -ne 0 ]; then
517 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
521 stop_services failover || return ${PIPESTATUS[0]}
523 run_test 6 "mount after clean unmount"
525 # Test 7 - mount after reboot.
530 mount_after_reboot $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS ||
531 return ${PIPESTATUS[0]}
534 mount_after_reboot $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
536 if [ $rc -ne 0 ]; then
537 stop $MMP_MDS || return ${PIPESTATUS[0]}
538 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
542 stop_services failover || return ${PIPESTATUS[0]}
543 stop_services primary || return ${PIPESTATUS[0]}
545 run_test 7 "mount after reboot"
547 # Test 8 - mount during e2fsck (should never succeed).
553 # After writing a new sequence number into the MMP block, e2fsck will
554 # sleep at least (2 * new_interval + 1) seconds before it goes into
559 saved_interval=$(get_mmp_update_interval $MMP_MDS $MMP_MDSDEV)
560 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $new_interval
562 run_e2fsck $MMP_MDS $MMP_MDSDEV "-fy" &
566 if start $MMP_MDS_FAILOVER $MMP_MDSDEV $MDS_MOUNT_OPTS; then
568 "mount $MMP_MDSDEV on $MMP_MDS_FAILOVER should fail"
569 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
570 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $saved_interval
575 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $saved_interval
579 saved_interval=$(get_mmp_update_interval $MMP_OSS $MMP_OSTDEV)
580 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $new_interval
582 run_e2fsck $MMP_OSS $MMP_OSTDEV "-fy" &
586 if start $MMP_OSS_FAILOVER $MMP_OSTDEV $OST_MOUNT_OPTS; then
588 "mount $MMP_OSTDEV on $MMP_OSS_FAILOVER should fail"
589 stop $MMP_OSS_FAILOVER || return ${PIPESTATUS[0]}
590 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $saved_interval
595 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $saved_interval
598 run_test 8 "mount during e2fsck"
600 # Test 9 - mount after aborted e2fsck (should never succeed).
602 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
603 if ! start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS; then
604 local rc=${PIPESTATUS[0]}
605 stop $MMP_MDS || return ${PIPESTATUS[0]}
608 stop_services primary || return ${PIPESTATUS[0]}
610 mark_mmp_block $MMP_MDS $MMP_MDSDEV || return ${PIPESTATUS[0]}
612 log "Mounting $MMP_MDSDEV on $MMP_MDS..."
613 if start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS; then
614 error_noexit "mount $MMP_MDSDEV on $MMP_MDS should fail"
615 stop $MMP_MDS || return ${PIPESTATUS[0]}
619 reset_mmp_block $MMP_MDS $MMP_MDSDEV || return ${PIPESTATUS[0]}
621 mark_mmp_block $MMP_OSS $MMP_OSTDEV || return ${PIPESTATUS[0]}
623 log "Mounting $MMP_OSTDEV on $MMP_OSS..."
624 if start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS; then
625 error_noexit "mount $MMP_OSTDEV on $MMP_OSS should fail"
626 stop $MMP_OSS || return ${PIPESTATUS[0]}
630 reset_mmp_block $MMP_OSS $MMP_OSTDEV || return ${PIPESTATUS[0]}
633 run_test 9 "mount after aborted e2fsck"
635 # Test 10 - e2fsck with mounted filesystem.
639 log "Mounting $MMP_MDSDEV on $MMP_MDS..."
640 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
642 run_e2fsck $MMP_MDS_FAILOVER $MMP_MDSDEV "-fn"
645 # e2fsck is called with -n option (Open the filesystem read-only), so
646 # 0 (No errors) and 4 (File system errors left uncorrected) are the only
647 # acceptable exit codes in this case
648 if [ $rc -ne 0 ] && [ $rc -ne 4 ]; then
649 error_noexit "e2fsck $MMP_MDSDEV on $MMP_MDS_FAILOVER returned $rc"
650 stop $MMP_MDS || return ${PIPESTATUS[0]}
654 log "Mounting $MMP_OSTDEV on $MMP_OSS..."
655 start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS
657 if [ $rc -ne 0 ]; then
658 stop $MMP_MDS || return ${PIPESTATUS[0]}
662 run_e2fsck $MMP_OSS_FAILOVER $MMP_OSTDEV "-fn"
664 if [ $rc -ne 0 ] && [ $rc -ne 4 ]; then
665 error_noexit "e2fsck $MMP_OSTDEV on $MMP_OSS_FAILOVER returned $rc"
668 CLEANUP_DM_DEV=true stop_services primary || return ${PIPESTATUS[0]}
671 run_test 10 "e2fsck with mounted filesystem"
674 FAIL_ON_ERROR=$SAVED_FAIL_ON_ERROR
677 $MMP_RESTORE_MOUNT && setupall
678 check_and_cleanup_lustre