Whamcloud - gitweb
LU-1895 tests: don't fail mmp test_5 due to race 55/32355/7
authorAndreas Dilger <adilger@whamcloud.com>
Thu, 2 Aug 2018 15:50:42 +0000 (09:50 -0600)
committerAndreas Dilger <adilger@whamcloud.com>
Wed, 15 Aug 2018 21:38:06 +0000 (21:38 +0000)
In the mmp.sh test_5() mount_after_unmount() testing, it is possible
that the first filesystem unmounts successfully before the second
one starts, and there is no contention for the MMP block.

This caused the test to fail on a regular basis.  However, there is
still value in running this test, since non-MMP race conditions have
previously been seen in this area (OBD device refcount, etc).

Make mount_after_unmount() more robust, only failing if the first
filesystem is still mounted at the same time as the second one.

Author: Andreas Dilger <adilger@whamcloud.com>

Test-Parameters: trivial mdtfilesystemtype=ldiskfs failover=true ostfilesystemtype=ldiskfs osscount=2 mdscount=2 mdtcount=1 austeroptions=-R iscsi=1 testlist=mmp
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: James Nunez <jnunez@whamcloud.com>
Change-Id: I186b9ce0a5a0e1ed6f2b46895fec4a32e73ebbe5
Reviewed-on: https://review.whamcloud.com/32355
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Emoly Liu <emoly@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/tests/mmp.sh

index ba2dcbf..6a91d9c 100755 (executable)
@@ -310,43 +310,50 @@ mount_after_interval() {
 # Mount the shared target on the failover server
 # during unmounting it on the primary server.
 mount_during_unmount() {
-    local device=$1
-    shift
-    local facet=$1
-    shift
-    local mnt_opts="$@"
-    local failover_facet=$(get_failover_facet $facet)
+       local device=$1
+       shift
+       local facet=$1
+       shift
+       local mnt_opts="$@"
+       local failover_facet=$(get_failover_facet $facet)
 
-    local unmount_pid
-    local unmount_rc=0
-    local mount_rc=0
+       local unmount_pid
+       local unmount_rc=0
+       local mount_rc=0
 
-    log "Mounting $device on $facet..."
-    start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
+       log "Mounting $device on $facet..."
+       start $facet $device $mnt_opts || return ${PIPESTATUS[0]}
 
-    log "Unmounting $device on $facet..."
-    stop $facet &
-    unmount_pid=$!
+       log "Unmounting $device on $facet..."
+       stop $facet &
+       unmount_pid=$!
 
-    log "Mounting $device on $failover_facet..."
-    start $failover_facet $device $mnt_opts
-    mount_rc=${PIPESTATUS[0]}
+       log "Mounting $device on $failover_facet..."
+       start $failover_facet $device $mnt_opts
+       mount_rc=${PIPESTATUS[0]}
 
-    wait $unmount_pid
-    unmount_rc=${PIPESTATUS[0]}
+       # check whether the first filesystem is still mounted
+       local mntpt=$(facet_mntpt $facet)
+       local mounted=$(do_facet $facet "grep -w $mntpt /proc/mounts")
 
-    if [ $mount_rc -eq 0 ]; then
-        error_noexit "mount during unmount of the first filesystem should fail"
-        stop $failover_facet || return ${PIPESTATUS[0]}
-        return 1
-    fi
+       wait $unmount_pid
+       unmount_rc=${PIPESTATUS[0]}
 
-    if [ $unmount_rc -ne 0 ]; then
-        error_noexit "unmount the $device on $facet should succeed"
-        return $unmount_rc
-    fi
+       if [ $mount_rc -eq 0 ]; then
+               stop $failover_facet || return ${PIPESTATUS[0]}
 
-    return 0
+               if [ -n "$mounted" ]; then
+                       error_noexit "mount during unmount of first filesystem worked"
+                       return 1
+               fi
+       fi
+
+       if [ $unmount_rc -ne 0 ]; then
+               error_noexit "unmount the $device on $facet should succeed"
+               return $unmount_rc
+       fi
+
+       return 0
 }
 
 # Mount the shared target on the failover server