X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fmmp.sh;h=db5ce5ae08e63667249c51be398ee031677105f5;hb=5a6aa0e6d1583cc0d4c82ae8c95fb7b9856d6284;hp=2ac19dfeb4f3661b5de650f49cda39747a15c70e;hpb=c14194fd33ccb3a5a6f3899a111f72dfaad52c11;p=fs%2Flustre-release.git diff --git a/lustre/tests/mmp.sh b/lustre/tests/mmp.sh index 2ac19df..db5ce5a 100755 --- a/lustre/tests/mmp.sh +++ b/lustre/tests/mmp.sh @@ -1,5 +1,6 @@ #!/bin/bash -# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: +# -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*- +# vim:shiftwidth=4:softtabstop=4:tabstop=4: # # Tests for multiple mount protection (MMP) feature. # @@ -106,6 +107,15 @@ disable_mmp() { return ${PIPESTATUS[0]} } +# Set the MMP block to 'fsck' state +mark_mmp_block() { + local facet=$1 + local device=$2 + + do_facet $facet "$LUSTRE/tests/mmp_mark.sh $device" + return ${PIPESTATUS[0]} +} + # Reset the MMP block (if any) back to the clean state. reset_mmp_block() { local facet=$1 @@ -126,83 +136,113 @@ mmp_is_enabled() { # Get MMP update interval (in seconds) from the Lustre server target. get_mmp_update_interval() { - local facet=$1 - local device=$2 - local interval - - interval=$(do_facet $facet "$DEBUGFS -c -R dump_mmp $device 2>/dev/null \ - | grep 'MMP Update Interval' | cut -d' ' -f4") - [ -z "$interval" ] && interval=1 - - echo $interval + local facet=$1 + local device=$2 + local interval + + interval=$(do_facet $facet \ + "$DEBUGFS -c -R dump_mmp $device 2>$TMP/mmp.debugfs.msg" | + awk 'tolower($0) ~ /update.interval/ { print $NF }') + [ -z "$interval" ] && interval=5 && + do_facet $facet cat $TMP/mmp.debugfs.msg && + echo "$facet:$device: assume update interval=$interval" 1>&2 || + echo "$facet:$device: got actual update interval=$interval" 1>&2 + + echo $interval } # Get MMP check interval (in seconds) from the Lustre server target. get_mmp_check_interval() { - local facet=$1 - local device=$2 - local interval + local facet=$1 + local device=$2 + local interval + + interval=$(do_facet $facet \ + "$DEBUGFS -c -R dump_mmp $device 2>$TMP/mmp.debugfs.msg" | + awk 'tolower($0) ~ /check.interval/ { print $NF }') + [ -z "$interval" ] && interval=5 && + do_facet $facet cat $TMP/mmp.debugfs.msg && + echo "$facet:$device: assume check interval=$interval" 1>&2 || + echo "$facet:$device: got actual check interval=$interval" 1>&2 + + echo $interval +} - interval=$(do_facet $facet "$DEBUGFS -c -R dump_mmp $device 2>/dev/null \ - | grep 'MMP Check Interval' | cut -d' ' -f4") - [ -z "$interval" ] && interval=5 +# Adjust the MMP update interval (in seconds) on the Lustre server target. +# Specifying an interval of 0 means to use the default interval. +set_mmp_update_interval() { + local facet=$1 + local device=$2 + local interval=${3:-0} - echo $interval + do_facet $facet "$TUNE2FS -E mmp_update_interval=$interval $device" + return ${PIPESTATUS[0]} } +I_ENABLED_MDS=0 +I_ENABLED_OSS=0 + # Enable the MMP feature on the Lustre server targets. mmp_init() { - init_vars - - # The MMP feature is automatically enabled by mkfs.lustre for - # new file system at format time if failover is being used. - # Otherwise, the Lustre administrator has to manually enable - # this feature when the file system is unmounted. - - if [ -z "$mdsfailover_HOST" ]; then - log "Failover is not used on MDS, enabling MMP manually..." - enable_mmp $MMP_MDS $MMP_MDSDEV || \ - error "failed to enable MMP on $MMP_MDSDEV on $MMP_MDS" - fi - - if [ -z "$ostfailover_HOST" ]; then - log "Failover is not used on OSS, enabling MMP manually..." - enable_mmp $MMP_OSS $MMP_OSTDEV || \ - error "failed to enable MMP on $MMP_OSTDEV on $MMP_OSS" - fi - - # check whether the MMP feature is enabled or not - mmp_is_enabled $MMP_MDS $MMP_MDSDEV || \ - error "MMP was not enabled on $MMP_MDSDEV on $MMP_MDS" - - mmp_is_enabled $MMP_OSS $MMP_OSTDEV || \ - error "MMP was not enabled on $MMP_OSTDEV on $MMP_OSS" + init_vars + + if [ $(facet_fstype $MMP_MDS) != ldiskfs ]; then + skip "Only applicable to ldiskfs-based MDTs" + exit + fi + + if [ $(facet_fstype $MMP_OSS) != ldiskfs ]; then + skip "Only applicable to ldiskfs-based OSTs" + exit + fi + + mmp_is_enabled $MMP_MDS $MMP_MDSDEV || + { + log "MMP is not enabled on MDS, enabling it manually..." + enable_mmp $MMP_MDS $MMP_MDSDEV || + error "failed to enable MMP on $MMP_MDSDEV on $MMP_MDS" + I_ENABLED_MDS=1 + } + + mmp_is_enabled $MMP_OSS $MMP_OSTDEV || + { + log "MMP is not enabled on OSS, enabling it manually..." + enable_mmp $MMP_OSS $MMP_OSTDEV || + error "failed to enable MMP on $MMP_OSTDEV on $MMP_OSS" + I_ENABLED_OSS=1 + } + + # check whether the MMP feature is enabled or not + mmp_is_enabled $MMP_MDS $MMP_MDSDEV || + error "MMP was not enabled on $MMP_MDSDEV on $MMP_MDS" + + mmp_is_enabled $MMP_OSS $MMP_OSTDEV || + error "MMP was not enabled on $MMP_OSTDEV on $MMP_OSS" } # Disable the MMP feature on the Lustre server targets -# which did not use failover. mmp_fini() { - if [ -z "$mdsfailover_HOST" ]; then - log "Failover is not used on MDS, disabling MMP manually..." - disable_mmp $MMP_MDS $MMP_MDSDEV || \ - error "failed to disable MMP on $MMP_MDSDEV on $MMP_MDS" - mmp_is_enabled $MMP_MDS $MMP_MDSDEV && \ - error "MMP was not disabled on $MMP_MDSDEV on $MMP_MDS" - fi - - if [ -z "$ostfailover_HOST" ]; then - log "Failover is not used on OSS, disabling MMP manually..." - disable_mmp $MMP_OSS $MMP_OSTDEV || \ - error "failed to disable MMP on $MMP_OSTDEV on $MMP_OSS" - mmp_is_enabled $MMP_OSS $MMP_OSTDEV && \ - error "MMP was not disabled on $MMP_OSTDEV on $MMP_OSS" - fi - - return 0 + if [ $I_ENABLED_MDS -eq 1 ]; then + log "Disabling MMP on $MMP_MDSDEV on $MMP_MDS manually..." + disable_mmp $MMP_MDS $MMP_MDSDEV || + error "failed to disable MMP on $MMP_MDSDEV on $MMP_MDS" + mmp_is_enabled $MMP_MDS $MMP_MDSDEV && + error "MMP was not disabled on $MMP_MDSDEV on $MMP_MDS" + fi + + if [ $I_ENABLED_OSS -eq 1 ]; then + log "Disabling MMP on $MMP_OSTDEV on $MMP_OSS manually..." + disable_mmp $MMP_OSS $MMP_OSTDEV || + error "failed to disable MMP on $MMP_OSTDEV on $MMP_OSS" + mmp_is_enabled $MMP_OSS $MMP_OSTDEV && + error "MMP was not disabled on $MMP_OSTDEV on $MMP_OSS" + fi + + return 0 } -# Mount the shared target on the failover server after some interval it's +# Mount the shared target on the failover server after some interval it's # mounted on the primary server. mount_after_interval_sub() { local interval=$1 @@ -241,7 +281,7 @@ mount_after_interval_sub() { return ${PIPESTATUS[0]} return 1 elif [ $second_mount_rc -ne 0 -a $first_mount_rc -ne 0 ]; then - error_noexit "failed to mount on the failover pair $facet,$failover_facet" + error_noexit "mount failure on failover pair $facet,$failover_facet" return $first_mount_rc fi @@ -267,7 +307,7 @@ mount_after_interval() { return 0 } -# Mount the shared target on the failover server +# Mount the shared target on the failover server # during unmounting it on the primary server. mount_during_unmount() { local device=$1 @@ -309,7 +349,7 @@ mount_during_unmount() { return 0 } -# Mount the shared target on the failover server +# Mount the shared target on the failover server # after clean unmounting it on the primary server. mount_after_unmount() { local device=$1 @@ -323,7 +363,7 @@ mount_after_unmount() { start $facet $device $mnt_opts || return ${PIPESTATUS[0]} log "Unmounting $device on $facet..." - stop $facet || return ${PIPESTATUS[0]} + stop $facet || return ${PIPESTATUS[0]} log "Mounting $device on $failover_facet..." start $failover_facet $device $mnt_opts || return ${PIPESTATUS[0]} @@ -348,7 +388,7 @@ mount_after_reboot() { if [ "$FAILURE_MODE" = "HARD" ]; then shutdown_facet $facet reboot_facet $facet - wait_for $facet + wait_for_facet $facet else replay_barrier_nodf $facet fi @@ -367,15 +407,15 @@ mount_after_reboot() { # Run e2fsck on the Lustre server target. run_e2fsck() { - local facet=$1 - shift - local device=$1 - shift - local opts="$@" - - log "Running e2fsck on the device $device on $facet..." - do_facet $facet "$E2FSCK $opts $device" - return ${PIPESTATUS[0]} + local facet=$1 + shift + local device=$1 + shift + local opts="$@" + + echo "Running e2fsck on the device $device on $facet..." + do_facet $facet "$E2FSCK $opts $device" + return ${PIPESTATUS[0]} } # Check whether there are failover pairs for MDS and OSS servers. @@ -503,44 +543,59 @@ run_test 7 "mount after reboot" # Test 8 - mount during e2fsck (should never succeed). test_8() { - local e2fsck_pid - - run_e2fsck $MMP_MDS $MMP_MDSDEV "-fy" & - e2fsck_pid=$! - sleep 1 - - log "Mounting $MMP_MDSDEV on $MMP_MDS_FAILOVER..." - if start $MMP_MDS_FAILOVER $MMP_MDSDEV $MDS_MOUNT_OPTS; then - error_noexit "mount $MMP_MDSDEV on $MMP_MDS_FAILOVER should fail" - stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]} - return 1 - fi - - wait $e2fsck_pid - - echo - run_e2fsck $MMP_OSS $MMP_OSTDEV "-fy" & - e2fsck_pid=$! - sleep 1 - - log "Mounting $MMP_OSTDEV on $MMP_OSS_FAILOVER..." - if start $MMP_OSS_FAILOVER $MMP_OSTDEV $OST_MOUNT_OPTS; then - error_noexit "mount $MMP_OSTDEV on $MMP_OSS_FAILOVER should fail" - stop $MMP_OSS_FAILOVER || return ${PIPESTATUS[0]} - return 2 - fi - - wait $e2fsck_pid - return 0 + local e2fsck_pid + local saved_interval + local new_interval + + # After writing a new sequence number into the MMP block, e2fsck will + # sleep at least (2 * new_interval + 1) seconds before it goes into + # e2fsck passes. + new_interval=30 + + # MDT + saved_interval=$(get_mmp_update_interval $MMP_MDS $MMP_MDSDEV) + set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $new_interval + + run_e2fsck $MMP_MDS $MMP_MDSDEV "-fy" & + e2fsck_pid=$! + sleep 5 + + if start $MMP_MDS_FAILOVER $MMP_MDSDEV $MDS_MOUNT_OPTS; then + error_noexit \ + "mount $MMP_MDSDEV on $MMP_MDS_FAILOVER should fail" + stop $MMP_MDS_FAILOVER || return ${PIPESTATUS[0]} + set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $saved_interval + return 1 + fi + + wait $e2fsck_pid + set_mmp_update_interval $MMP_MDS $MMP_MDSDEV $saved_interval + + # OST + echo + saved_interval=$(get_mmp_update_interval $MMP_OSS $MMP_OSTDEV) + set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $new_interval + + run_e2fsck $MMP_OSS $MMP_OSTDEV "-fy" & + e2fsck_pid=$! + sleep 5 + + if start $MMP_OSS_FAILOVER $MMP_OSTDEV $OST_MOUNT_OPTS; then + error_noexit \ + "mount $MMP_OSTDEV on $MMP_OSS_FAILOVER should fail" + stop $MMP_OSS_FAILOVER || return ${PIPESTATUS[0]} + set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $saved_interval + return 2 + fi + + wait $e2fsck_pid + set_mmp_update_interval $MMP_OSS $MMP_OSTDEV $saved_interval + return 0 } run_test 8 "mount during e2fsck" # Test 9 - mount after aborted e2fsck (should never succeed). test_9() { - local e2fsck_pid - local mdt_mmp_check_interval - local ost_mmp_check_interval - start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS || return ${PIPESTATUS[0]} if ! start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS; then local rc=${PIPESTATUS[0]} @@ -549,11 +604,7 @@ test_9() { fi stop_services primary || return ${PIPESTATUS[0]} - mdt_mmp_check_interval=$(get_mmp_check_interval $MMP_MDS $MMP_MDSDEV) - run_e2fsck $MMP_MDS_FAILOVER $MMP_MDSDEV "-fy" & - e2fsck_pid=$! - sleep $((2 * $mdt_mmp_check_interval + 1)) - kill -s ABRT $e2fsck_pid + mark_mmp_block $MMP_MDS $MMP_MDSDEV || return ${PIPESTATUS[0]} log "Mounting $MMP_MDSDEV on $MMP_MDS..." if start $MMP_MDS $MMP_MDSDEV $MDS_MOUNT_OPTS; then @@ -564,12 +615,7 @@ test_9() { reset_mmp_block $MMP_MDS $MMP_MDSDEV || return ${PIPESTATUS[0]} - echo - ost_mmp_check_interval=$(get_mmp_check_interval $MMP_OSS $MMP_OSTDEV) - run_e2fsck $MMP_OSS_FAILOVER $MMP_OSTDEV "-fy" & - e2fsck_pid=$! - sleep $((2 * $ost_mmp_check_interval + 1)) - kill -s ABRT $e2fsck_pid + mark_mmp_block $MMP_OSS $MMP_OSTDEV || return ${PIPESTATUS[0]} log "Mounting $MMP_OSTDEV on $MMP_OSS..." if start $MMP_OSS $MMP_OSTDEV $OST_MOUNT_OPTS; then @@ -592,10 +638,14 @@ test_10() { run_e2fsck $MMP_MDS_FAILOVER $MMP_MDSDEV "-fn" rc=${PIPESTATUS[0]} - if [ $rc -ne 8 ]; then - error_noexit "e2fsck $MMP_MDSDEV on $MMP_MDS_FAILOVER should return 8" + + # e2fsck is called with -n option (Open the filesystem read-only), so + # 0 (No errors) and 4 (File system errors left uncorrected) are the only + # acceptable exit codes in this case + if [ $rc -ne 0 ] && [ $rc -ne 4 ]; then + error_noexit "e2fsck $MMP_MDSDEV on $MMP_MDS_FAILOVER returned $rc" stop $MMP_MDS || return ${PIPESTATUS[0]} - [ $rc -ne 0 ] && return $rc || return 1 + return $rc fi log "Mounting $MMP_OSTDEV on $MMP_OSS..." @@ -608,10 +658,8 @@ test_10() { run_e2fsck $MMP_OSS_FAILOVER $MMP_OSTDEV "-fn" rc=${PIPESTATUS[0]} - if [ $rc -ne 8 ]; then - error_noexit "e2fsck $MMP_OSTDEV on $MMP_OSS_FAILOVER should return 8" - stop_services primary || return ${PIPESTATUS[0]} - [ $rc -ne 0 ] && return $rc || return 1 + if [ $rc -ne 0 ] && [ $rc -ne 4 ]; then + error_noexit "e2fsck $MMP_OSTDEV on $MMP_OSS_FAILOVER returned $rc" fi stop_services primary || return ${PIPESTATUS[0]} @@ -622,8 +670,6 @@ run_test 10 "e2fsck with mounted filesystem" mmp_fini FAIL_ON_ERROR=$SAVED_FAIL_ON_ERROR -equals_msg $(basename $0): test complete +complete $SECONDS $MMP_RESTORE_MOUNT && setupall -[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && \ - grep -q FAIL $TESTSUITELOG && exit 1 || true -echo "$0: completed" +exit_status