From fb8bf83fabd45730345b3eb8c5289334174462be Mon Sep 17 00:00:00 2001 From: Jian Yu Date: Sat, 4 May 2013 00:48:24 +0800 Subject: [PATCH] LU-3264 tests: support failover testing with ZFS MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch improves test-framework.sh to support failover testing with ZFS under shared storage configuration. After formatting a ZFS target, "cachefile=none" property will be set on the ZFS storage pool so that the pool is not automatically imported on system startup. And then the pool will be exported so as to leave the import and export operations handled by mount_facet() and stop(). For SOFT failure mode (unmount and mount), "zpool export …" will be run on the active server node to export the ZFS storage pool, and "zpool import -f …" will be run on the failover partner node to import the pool. For HARD failure mode (power off and on), since the "cachefile=none" property has been set on the ZFS storage pool on the active server node, after the node is power-cycled, the pool will not be imported. Then on the failover partner node, "zpool import -f …" can be run to import the pool. Test-Parameters: envdefinitions=PTLDEBUG=-1,DEBUG_SIZE=128 \ mdtfilesystemtype=zfs mdsfilesystemtype=zfs ostfilesystemtype=zfs \ clientdistro=el6 serverdistro=el6 clientarch=x86_64 \ serverarch=x86_64 clientcount=4 osscount=2 mdscount=2 \ austeroptions=-R failover=true useiscsi=true \ testlist=recovery-double-scale Signed-off-by: Jian Yu Change-Id: I6b9646cb36f7fa4924af03a055ccb137187843a5 Reviewed-on: http://review.whamcloud.com/6258 Tested-by: Hudson Reviewed-by: Li Wei Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/tests/test-framework.sh | 227 ++++++++++++++++++++++++++++++++--------- 1 file changed, 181 insertions(+), 46 deletions(-) diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index a128b2c..231b6c3 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -185,6 +185,7 @@ init_test_env() { export ZFS=${ZFS:-zfs} export ZPOOL=${ZPOOL:-zpool} export ZDB=${ZDB:-zdb} + export PARTPROBE=${PARTPROBE:-partprobe} #[ -d /r ] && export ROOT=${ROOT:-/r} export TMP=${TMP:-$ROOT/tmp} @@ -808,6 +809,137 @@ ostdevlabel() { } # +# Get the device of a facet. +# +facet_device() { + local facet=$1 + local device + + case $facet in + mgs) device=$(mgsdevname) ;; + mds*) device=$(mdsdevname $(facet_number $facet)) ;; + ost*) device=$(ostdevname $(facet_number $facet)) ;; + fs2mds) device=$(mdsdevname 1_2) ;; + fs2ost) device=$(ostdevname 1_2) ;; + fs3ost) device=$(ostdevname 2_2) ;; + *) ;; + esac + + echo -n $device +} + +# +# Get the virtual device of a facet. +# +facet_vdevice() { + local facet=$1 + local device + + case $facet in + mgs) device=$(mgsvdevname) ;; + mds*) device=$(mdsvdevname $(facet_number $facet)) ;; + ost*) device=$(ostvdevname $(facet_number $facet)) ;; + fs2mds) device=$(mdsvdevname 1_2) ;; + fs2ost) device=$(ostvdevname 1_2) ;; + fs3ost) device=$(ostvdevname 2_2) ;; + *) ;; + esac + + echo -n $device +} + +# +# Re-read the partition table on failover partner host. +# After a ZFS storage pool is created on a shared device, the partition table +# on the device may change. However, the operating system on the failover +# host may not notice the change automatically. Without the up-to-date partition +# block devices, 'zpool import ..' cannot find the labels, whose positions are +# relative to partition rather than disk beginnings. +# +# This function performs partprobe on the failover host to make it re-read the +# partition table. +# +refresh_partition_table() { + local facet=$1 + local device=$2 + local host + + host=$(facet_passive_host $facet) + if [[ -n "$host" ]]; then + do_node $host "$PARTPROBE $device" + fi +} + +# +# Get ZFS storage pool name. +# +zpool_name() { + local facet=$1 + local device + local poolname + + device=$(facet_device $facet) + # poolname is string before "/" + poolname="${device%%/*}" + + echo -n $poolname +} + +# +# Export ZFS storage pool. +# Before exporting the pool, all datasets within the pool should be unmounted. +# +export_zpool() { + local facet=$1 + shift + local opts="$@" + local poolname + + poolname=$(zpool_name $facet) + + if [[ -n "$poolname" ]]; then + do_facet $facet "! $ZPOOL list -H $poolname >/dev/null 2>&1 || + $ZPOOL export $opts $poolname" + fi +} + +# +# Import ZFS storage pool. +# Force importing, even if the pool appears to be potentially active. +# +import_zpool() { + local facet=$1 + shift + local opts=${@:-"-o cachefile=none"} + local poolname + + poolname=$(zpool_name $facet) + + if [[ -n "$poolname" ]]; then + do_facet $facet "$ZPOOL import -f $opts $poolname" + fi +} + +# +# Set the "cachefile=none" property on ZFS storage pool so that the pool +# is not automatically imported on system startup. +# +# In a failover environment, this will provide resource level fencing which +# will ensure that the same ZFS storage pool will not be imported concurrently +# on different nodes. +# +disable_zpool_cache() { + local facet=$1 + local poolname + + poolname=$(zpool_name $facet) + + if [[ -n "$poolname" ]]; then + do_facet $facet "$ZPOOL set cachefile=none $poolname" + fi +} + +# # This and set_osd_param() shall be used to access OSD parameters # once existed under "obdfilter": # @@ -935,6 +1067,11 @@ mount_facet() { opts=$(csa_add "$opts" -o loop) fi + if [[ $(facet_fstype $facet) == zfs ]]; then + # import ZFS storage pool + import_zpool $facet || return ${PIPESTATUS[0]} + fi + echo "Starting ${facet}: $opts ${!dev} $mntpt" # for testing LU-482 error handling in mount_facets() and test_0a() if [ -f $TMP/test-lu482-trigger ]; then @@ -981,39 +1118,6 @@ start() { return $RC } -# -# When a ZFS OSD is made read-only by replay_barrier(), its pool is "freezed". -# Because stopping corresponding target may not clear this in-memory state, we -# need to zap the pool from memory by exporting and reimporting the pool. -# -# Although the uberblocks are not updated when a pool is freezed, transactions -# are still written to the disks. Modified blocks may be cached in memory when -# tests try reading them back. The export-and-reimport process also evicts any -# cached pool data from memory to provide the correct "data loss" semantics. -# -refresh_disk() { - local facet=$1 - local fstype=$(facet_fstype $facet) - local _dev - local dev - local poolname - - if [ "${fstype}" == "zfs" ]; then - _dev=$(facet_active $facet)_dev - dev=${!_dev} # expand _dev to its value, e.g. ${mds1_dev} - poolname="${dev%%/*}" # poolname is string before "/" - - if [ "${poolname}" == "" ]; then - echo "invalid dataset name: $dev" - return - fi - do_facet $facet "cp /etc/zfs/zpool.cache /tmp/zpool.cache.back" - do_facet $facet "$ZPOOL export ${poolname}" - do_facet $facet "$ZPOOL import -f -c /tmp/zpool.cache.back \ - ${poolname}" - fi -} - stop() { local running local facet=$1 @@ -1028,9 +1132,14 @@ stop() { do_facet ${facet} umount -d $@ $mntpt fi - # umount should block, but we should wait for unrelated obd's - # like the MGS or MGC to also stop. - wait_exit_ST ${facet} + # umount should block, but we should wait for unrelated obd's + # like the MGS or MGC to also stop. + wait_exit_ST ${facet} || return ${PIPESTATUS[0]} + + if [[ $(facet_fstype $facet) == zfs ]]; then + # export ZFS storage pool + export_zpool $facet + fi } # save quota version (both administrative and operational quotas) @@ -1488,7 +1597,6 @@ reboot_facet() { if [ "$FAILURE_MODE" = HARD ]; then reboot_node $(facet_active_host $facet) else - refresh_disk ${facet} sleep 10 fi } @@ -2151,7 +2259,7 @@ replay_barrier() { do_facet $facet "sync; sync; sync" df $MOUNT - # make sure there will be no seq change + # make sure there will be no seq change local clients=${CLIENTS:-$HOSTNAME} local f=fsa-\\\$\(hostname\) do_nodes $clients "mcreate $MOUNT/$f; rm $MOUNT/$f" @@ -2159,6 +2267,21 @@ replay_barrier() { local svc=${facet}_svc do_facet $facet $LCTL --device ${!svc} notransno + # + # If a ZFS OSD is made read-only here, its pool is "freezed". This + # in-memory state has to be cleared by either rebooting the host or + # exporting and reimporting the pool. + # + # Although the uberblocks are not updated when a pool is freezed, + # transactions are still written to the disks. Modified blocks may be + # cached in memory when tests try reading them back. The + # export-and-reimport process also evicts any cached pool data from + # memory to provide the correct "data loss" semantics. + # + # In the test framework, the exporting and importing operations are + # handled by stop() and mount_facet() separately, which are used + # inside fail() and fail_abort(). + # do_facet $facet $LCTL --device ${!svc} readonly do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}" $LCTL mark "local REPLAY BARRIER on ${!svc}" @@ -2212,7 +2335,6 @@ fail_nodf() { fail_abort() { local facet=$1 stop $facet - refresh_disk ${facet} change_active $facet wait_for_facet $facet mount_facet $facet -o abort_recovery @@ -2638,13 +2760,26 @@ do_nodesv() { } add() { - local facet=$1 - shift - # make sure its not already running - stop ${facet} -f - rm -f $TMP/${facet}active - [[ $facet = mds1 ]] && combined_mgs_mds && rm -f $TMP/mgsactive - do_facet ${facet} $MKFS $* + local facet=$1 + shift + # make sure its not already running + stop ${facet} -f + rm -f $TMP/${facet}active + [[ $facet = mds1 ]] && combined_mgs_mds && rm -f $TMP/mgsactive + do_facet ${facet} $MKFS $* || return ${PIPESTATUS[0]} + + if [[ $(facet_fstype $facet) == zfs ]]; then + # + # After formatting a ZFS target, "cachefile=none" property will + # be set on the ZFS storage pool so that the pool is not + # automatically imported on system startup. And then the pool + # will be exported so as to leave the importing and exporting + # operations handled by mount_facet() and stop() separately. + # + refresh_partition_table $facet $(facet_vdevice $facet) + disable_zpool_cache $facet + export_zpool $facet + fi } ostdevname() { -- 1.8.3.1