3 # Tests for multiple mount protection (MMP) feature.
5 # Run select tests by setting ONLY, or as arguments to the script.
6 # Skip specific tests by setting EXCEPT.
8 # e.g. ONLY="5 6" or ONLY="`seq 8 11`" or EXCEPT="7"
13 LUSTRE=${LUSTRE:-$(dirname $0)/..}
14 . $LUSTRE/tests/test-framework.sh
18 # bug number for skipped test:
19 ALWAYS_EXCEPT=$MMP_EXCEPT
20 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
24 remote_mds_nodsh && skip "remote MDS with nodsh"
25 remote_ost_nodsh && skip "remote OST with nodsh"
27 # unmount and cleanup the Lustre filesystem
28 MMP_RESTORE_MOUNT=false
29 if is_mounted $MOUNT || is_mounted $MOUNT2; then
31 MMP_RESTORE_MOUNT=true
34 SAVED_FAIL_ON_ERROR=$FAIL_ON_ERROR
37 # Get the failover facet.
38 get_failover_facet() {
40 local failover_facet=${facet}failover
42 local host=$(facet_host $facet)
43 local failover_host=$(facet_host $failover_facet)
45 [ -z "$failover_host" -o "$host" = "$failover_host" ] && \
51 # Initiate the variables for Lustre servers and targets.
53 MMP_MDS=${MMP_MDS:-$SINGLEMDS}
54 MMP_MDS_FAILOVER=$(get_failover_facet $MMP_MDS)
56 local mds_num=$(echo $MMP_MDS | tr -d "mds")
57 MMP_MDSDEV=$(mdsdevname $mds_num)
59 MMP_OSS=${MMP_OSS:-ost1}
60 MMP_OSS_FAILOVER=$(get_failover_facet $MMP_OSS)
62 local oss_num=$(echo $MMP_OSS | tr -d "ost")
63 MMP_OSTDEV=$(ostdevname $oss_num)
66 # Stop the MDS and OSS services on the primary or failover servers.
74 if [ "$flavor" = "failover" ]; then
75 mds_facet=$MMP_MDS_FAILOVER
76 oss_facet=$MMP_OSS_FAILOVER
82 stop $mds_facet $opts || return ${PIPESTATUS[0]}
83 stop $oss_facet $opts || return ${PIPESTATUS[0]}
86 # Enable the MMP feature.
91 do_facet $facet "$TUNE2FS -O mmp $device"
92 return ${PIPESTATUS[0]}
95 # Disable the MMP feature.
100 do_facet $facet "$TUNE2FS -O ^mmp $device"
101 return ${PIPESTATUS[0]}
104 # Set the MMP block to 'fsck' state
109 do_facet $facet "$LUSTRE/tests/mmp_mark.sh $device"
110 return ${PIPESTATUS[0]}
113 # Reset the MMP block (if any) back to the clean state.
118 do_facet $facet "$TUNE2FS -f -E clear-mmp $device"
119 return ${PIPESTATUS[0]}
122 # Check whether the MMP feature is enabled or not.
127 do_facet $facet "$DUMPE2FS -h $device | grep mmp"
128 return ${PIPESTATUS[0]}
131 # Get MMP update interval (in seconds) from the Lustre server target.
132 get_mmp_update_interval() {
137 interval=$(do_facet $facet \
138 "$DEBUGFS -c -R dump_mmp $device 2>$TMP/mmp.debugfs.msg" |
139 awk 'tolower($0) ~ /update.interval/ { print $NF }')
140 [ -z "$interval" ] && interval=5 &&
141 do_facet $facet cat $TMP/mmp.debugfs.msg &&
142 echo "$facet:$device: assume update interval=$interval" 1>&2 ||
143 echo "$facet:$device: got actual update interval=$interval" 1>&2
148 # Get MMP check interval (in seconds) from the Lustre server target.
149 get_mmp_check_interval() {
154 interval=$(do_facet $facet \
155 "$DEBUGFS -c -R dump_mmp $device 2>$TMP/mmp.debugfs.msg" |
156 awk 'tolower($0) ~ /check.interval/ { print $NF }')
157 [ -z "$interval" ] && interval=5 &&
158 do_facet $facet cat $TMP/mmp.debugfs.msg &&
159 echo "$facet:$device: assume check interval=$interval" 1>&2 ||
160 echo "$facet:$device: got actual check interval=$interval" 1>&2
165 # Adjust the MMP update interval (in seconds) on the Lustre server target.
166 # Specifying an interval of 0 means to use the default interval.
167 set_mmp_update_interval() {
170 local interval=${3:-0}
172 do_facet $facet "$TUNE2FS -E mmp_update_interval=$interval $device"
173 return ${PIPESTATUS[0]}
179 # Enable the MMP feature on the Lustre server targets.
183 if [ $(facet_fstype $MMP_MDS) != ldiskfs ]; then
184 skip_env "ldiskfs only test"
187 if [ $(facet_fstype $MMP_OSS) != ldiskfs ]; then
188 skip_env "ldiskfs only test"
191 mmp_is_enabled $MMP_MDS $MMP_MDSDEV ||
193 log "MMP is not enabled on MDS, enabling it manually..."
194 enable_mmp $MMP_MDS $MMP_MDSDEV ||
195 error "failed to enable MMP on $MMP_MDSDEV on $MMP_MDS"
199 mmp_is_enabled $MMP_OSS $MMP_OSTDEV ||
201 log "MMP is not enabled on OSS, enabling it manually..."
202 enable_mmp $MMP_OSS $MMP_OSTDEV ||
203 error "failed to enable MMP on $MMP_OSTDEV on $MMP_OSS"
207 # check whether the MMP feature is enabled or not
208 mmp_is_enabled $MMP_MDS $MMP_MDSDEV ||
209 error "MMP was not enabled on $MMP_MDSDEV on $MMP_MDS"
211 mmp_is_enabled $MMP_OSS $MMP_OSTDEV ||
212 error "MMP was not enabled on $MMP_OSTDEV on $MMP_OSS"
215 # Disable the MMP feature on the Lustre server targets
218 if [ $I_ENABLED_MDS -eq 1 ]; then
219 log "Disabling MMP on $MMP_MDSDEV on $MMP_MDS manually..."
220 disable_mmp $MMP_MDS $MMP_MDSDEV ||
221 error "failed to disable MMP on $MMP_MDSDEV on $MMP_MDS"
222 mmp_is_enabled $MMP_MDS $MMP_MDSDEV &&
223 error "MMP was not disabled on $MMP_MDSDEV on $MMP_MDS"
226 if [ $I_ENABLED_OSS -eq 1 ]; then
227 log "Disabling MMP on $MMP_OSTDEV on $MMP_OSS manually..."
228 disable_mmp $MMP_OSS $MMP_OSTDEV ||
229 error "failed to disable MMP on $MMP_OSTDEV on $MMP_OSS"
230 mmp_is_enabled $MMP_OSS $MMP_OSTDEV &&
231 error "MMP was not disabled on $MMP_OSTDEV on $MMP_OSS"
237 # Mount the shared target on the failover server after some interval it's
238 # mounted on the primary server.
239 mount_after_interval_sub() {
247 local failover_facet=$(get_failover_facet $facet)
250 local first_mount_rc=0
251 local second_mount_rc=0
253 log "Mounting $device on $facet..."
254 start $facet $device $opts &
257 if [ $interval -ne 0 ]; then
258 log "sleep $interval..."
262 log "Mounting $device on $failover_facet..."
263 start $failover_facet $device $opts
264 second_mount_rc=${PIPESTATUS[0]}
267 first_mount_rc=${PIPESTATUS[0]}
269 if [ $second_mount_rc -eq 0 -a $first_mount_rc -eq 0 ]; then
270 error_noexit "one mount delayed by mmp interval $interval should fail"
271 stop $facet || return ${PIPESTATUS[0]}
272 [ "$failover_facet" != "$facet" ] && stop $failover_facet || \
273 return ${PIPESTATUS[0]}
275 elif [ $second_mount_rc -ne 0 -a $first_mount_rc -ne 0 ]; then
276 error_noexit "mount failure on failover pair $facet,$failover_facet"
277 return $first_mount_rc
283 mount_after_interval() {
284 local mdt_interval=$1
285 local ost_interval=$2
288 mount_after_interval_sub $mdt_interval $MMP_MDSDEV $MMP_MDS \
289 $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
292 mount_after_interval_sub $ost_interval $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
294 if [ $rc -ne 0 ]; then
302 # Mount the shared target on the failover server
303 # during unmounting it on the primary server.
304 mount_during_unmount() {
310 local failover_facet=$(get_failover_facet $facet)
316 log "Mounting $device on $facet..."
317 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
319 log "Unmounting $device on $facet..."
323 log "Mounting $device on $failover_facet..."
324 start $failover_facet $device $mnt_opts
325 mount_rc=${PIPESTATUS[0]}
327 # check whether the first filesystem is still mounted
328 local mntpt=$(facet_mntpt $facet)
329 local mounted=$(do_facet $facet "grep -w $mntpt /proc/mounts")
332 unmount_rc=${PIPESTATUS[0]}
334 if [ $mount_rc -eq 0 ]; then
335 stop $failover_facet || return ${PIPESTATUS[0]}
337 if [ -n "$mounted" ]; then
338 error_noexit "mount during unmount of first filesystem worked"
343 if [ $unmount_rc -ne 0 ]; then
344 error_noexit "unmount the $device on $facet should succeed"
351 # Mount the shared target on the failover server
352 # after clean unmounting it on the primary server.
353 mount_after_unmount() {
359 local failover_facet=$(get_failover_facet $facet)
361 log "Mounting $device on $facet..."
362 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
364 log "Unmounting $device on $facet..."
365 stop $facet || return ${PIPESTATUS[0]}
367 log "Mounting $device on $failover_facet..."
368 start $failover_facet $device $mnt_opts || return ${PIPESTATUS[0]}
373 # Mount the shared target on the failover server after rebooting
374 # the primary server.
375 mount_after_reboot() {
381 local failover_facet=$(get_failover_facet $facet)
384 log "Mounting $device on $facet..."
385 start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
387 if [ "$FAILURE_MODE" = "HARD" ]; then
388 shutdown_facet $facet
390 wait_for_facet $facet
392 replay_barrier_nodf $facet
395 log "Mounting $device on $failover_facet..."
396 start $failover_facet $device $mnt_opts
398 if [ $rc -ne 0 ]; then
399 error_noexit "mount $device on $failover_facet should succeed"
400 stop $facet || return ${PIPESTATUS[0]}
407 # Run e2fsck on the Lustre server target.
414 # turn on pfsck if it is supported
415 do_facet $facet $E2FSCK -h 2>&1 | grep -qw -- -m && opts+=" -m8"
416 echo "Running e2fsck on the device $device on $facet..."
417 do_facet $facet "$E2FSCK $opts $device"
418 return ${PIPESTATUS[0]}
421 # Check whether there are failover pairs for MDS and OSS servers.
422 check_failover_pair() {
423 [ "$MMP_MDS" = "$MMP_MDS_FAILOVER" -o "$MMP_OSS" = "$MMP_OSS_FAILOVER" ] &&
424 skip_env "failover pair is needed"
430 # Test 1 - two mounts at the same time.
434 mount_after_interval 0 0 || return ${PIPESTATUS[0]}
435 stop_services primary || return ${PIPESTATUS[0]}
436 stop_services failover || return ${PIPESTATUS[0]}
438 run_test 1 "two mounts at the same time"
440 # Test 2 - one mount delayed by mmp update interval.
444 local mdt_interval=$(get_mmp_update_interval $MMP_MDS $MMP_MDSDEV)
445 local ost_interval=$(get_mmp_update_interval $MMP_OSS $MMP_OSTDEV)
447 mount_after_interval $mdt_interval $ost_interval ||
448 return ${PIPESTATUS[0]}
449 stop_services primary || return ${PIPESTATUS[0]}
451 run_test 2 "one mount delayed by mmp update interval"
453 # Test 3 - one mount delayed by 2x mmp check interval.
457 local mdt_interval=$(get_mmp_check_interval $MMP_MDS $MMP_MDSDEV)
458 local ost_interval=$(get_mmp_check_interval $MMP_OSS $MMP_OSTDEV)
460 mdt_interval=$((2 * $mdt_interval + 1))
461 ost_interval=$((2 * $ost_interval + 1))
463 mount_after_interval $mdt_interval $ost_interval ||
464 return ${PIPESTATUS[0]}
465 stop_services primary || return ${PIPESTATUS[0]}
467 run_test 3 "one mount delayed by 2x mmp check interval"
469 # Test 4 - one mount delayed by > 2x mmp check interval.
473 local mdt_interval=$(get_mmp_check_interval $MMP_MDS $MMP_MDSDEV)
474 local ost_interval=$(get_mmp_check_interval $MMP_OSS $MMP_OSTDEV)
476 mdt_interval=$((4 * $mdt_interval))
477 ost_interval=$((4 * $ost_interval))
479 mount_after_interval $mdt_interval $ost_interval ||
480 return ${PIPESTATUS[0]}
481 stop_services primary || return ${PIPESTATUS[0]}
483 run_test 4 "one mount delayed by > 2x mmp check interval"
485 # Test 5 - mount during unmount of the first filesystem.
490 mount_during_unmount $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS ||
491 return ${PIPESTATUS[0]}
494 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
495 mount_during_unmount $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
497 stop $MMP_MDS || return ${PIPESTATUS[0]}
500 run_test 5 "mount during unmount of the first filesystem"
502 # Test 6 - mount after clean unmount.
507 mount_after_unmount $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS ||
508 return ${PIPESTATUS[0]}
511 mount_after_unmount $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
513 if [ $rc -ne 0 ]; then
514 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
518 stop_services failover || return ${PIPESTATUS[0]}
520 run_test 6 "mount after clean unmount"
522 # Test 7 - mount after reboot.
527 mount_after_reboot $MMP_MDSDEV $MMP_MDS $MDS_MOUNT_OPTS ||
528 return ${PIPESTATUS[0]}
531 mount_after_reboot $MMP_OSTDEV $MMP_OSS $OST_MOUNT_OPTS
533 if [ $rc -ne 0 ]; then
534 stop $MMP_MDS || return ${PIPESTATUS[0]}
535 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
539 stop_services failover || return ${PIPESTATUS[0]}
540 stop_services primary || return ${PIPESTATUS[0]}
542 run_test 7 "mount after reboot"
544 # Test 8 - mount during e2fsck (should never succeed).
550 # After writing a new sequence number into the MMP block, e2fsck will
551 # sleep at least (2 * new_interval + 1) seconds before it goes into
556 saved_interval=$(get_mmp_update_interval $MMP_MDS $MMP_MDSDEV)
557 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $new_interval
559 run_e2fsck $MMP_MDS $MMP_MDSDEV "-fy" &
563 if start $MMP_MDS_FAILOVER $MMP_MDSDEV $MDS_MOUNT_OPTS; then
565 "mount $MMP_MDSDEV on $MMP_MDS_FAILOVER should fail"
566 stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]}
567 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $saved_interval
572 set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $saved_interval
576 saved_interval=$(get_mmp_update_interval $MMP_OSS $MMP_OSTDEV)
577 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $new_interval
579 run_e2fsck $MMP_OSS $MMP_OSTDEV "-fy" &
583 if start $MMP_OSS_FAILOVER $MMP_OSTDEV $OST_MOUNT_OPTS; then
585 "mount $MMP_OSTDEV on $MMP_OSS_FAILOVER should fail"
586 stop $MMP_OSS_FAILOVER || return ${PIPESTATUS[0]}
587 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $saved_interval
592 set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $saved_interval
595 run_test 8 "mount during e2fsck"
597 # Test 9 - mount after aborted e2fsck (should never succeed).
599 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
600 if ! start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS; then
601 local rc=${PIPESTATUS[0]}
602 stop $MMP_MDS || return ${PIPESTATUS[0]}
605 stop_services primary || return ${PIPESTATUS[0]}
607 mark_mmp_block $MMP_MDS $MMP_MDSDEV || return ${PIPESTATUS[0]}
609 log "Mounting $MMP_MDSDEV on $MMP_MDS..."
610 if start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS; then
611 error_noexit "mount $MMP_MDSDEV on $MMP_MDS should fail"
612 stop $MMP_MDS || return ${PIPESTATUS[0]}
616 reset_mmp_block $MMP_MDS $MMP_MDSDEV || return ${PIPESTATUS[0]}
618 mark_mmp_block $MMP_OSS $MMP_OSTDEV || return ${PIPESTATUS[0]}
620 log "Mounting $MMP_OSTDEV on $MMP_OSS..."
621 if start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS; then
622 error_noexit "mount $MMP_OSTDEV on $MMP_OSS should fail"
623 stop $MMP_OSS || return ${PIPESTATUS[0]}
627 reset_mmp_block $MMP_OSS $MMP_OSTDEV || return ${PIPESTATUS[0]}
630 run_test 9 "mount after aborted e2fsck"
632 # Test 10 - e2fsck with mounted filesystem.
636 log "Mounting $MMP_MDSDEV on $MMP_MDS..."
637 start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]}
639 run_e2fsck $MMP_MDS_FAILOVER $MMP_MDSDEV "-fn"
642 # e2fsck is called with -n option (Open the filesystem read-only), so
643 # 0 (No errors) and 4 (File system errors left uncorrected) are the only
644 # acceptable exit codes in this case
645 if [ $rc -ne 0 ] && [ $rc -ne 4 ]; then
646 error_noexit "e2fsck $MMP_MDSDEV on $MMP_MDS_FAILOVER returned $rc"
647 stop $MMP_MDS || return ${PIPESTATUS[0]}
651 log "Mounting $MMP_OSTDEV on $MMP_OSS..."
652 start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS
654 if [ $rc -ne 0 ]; then
655 stop $MMP_MDS || return ${PIPESTATUS[0]}
659 run_e2fsck $MMP_OSS_FAILOVER $MMP_OSTDEV "-fn"
661 if [ $rc -ne 0 ] && [ $rc -ne 4 ]; then
662 error_noexit "e2fsck $MMP_OSTDEV on $MMP_OSS_FAILOVER returned $rc"
665 CLEANUP_DM_DEV=true stop_services primary || return ${PIPESTATUS[0]}
668 run_test 10 "e2fsck with mounted filesystem"
671 FAIL_ON_ERROR=$SAVED_FAIL_ON_ERROR
674 $MMP_RESTORE_MOUNT && setupall
675 check_and_cleanup_lustre