From 17b3b71a676344cf8a95ee0ac7bc4e32faad7a89 Mon Sep 17 00:00:00 2001 From: grev Date: Tue, 11 Nov 2008 12:20:29 +0000 Subject: [PATCH] b=16551 i=Adilger conf-sanity test_32* fix to not be skipped for remote setup --- lustre/tests/conf-sanity.sh | 175 +++++++++++++++++++++++++++++--------------- 1 file changed, 114 insertions(+), 61 deletions(-) diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 2aa9a71..49bd61e 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -755,6 +755,29 @@ test_26() { } run_test 26 "MDT startup failure cleans LOV (should return errs)" +wait_update () { + local node=$1 + local TEST=$2 + local FINAL=$3 + + local RESULT + local MAX=90 + local WAIT=0 + local sleep=5 + while [ $WAIT -lt $MAX ]; do + RESULT=$(do_node $node "$TEST") + if [ $RESULT -eq $FINAL ]; then + echo "Updated config after $WAIT sec: wanted $FINAL got $RESULT" + return 0 + fi + WAIT=$((WAIT + sleep)) + echo "Waiting $((MAX - WAIT)) secs for config update" + sleep $sleep + done + echo "Config update not seen after $MAX sec: wanted $FINAL got $RESULT" + return 3 +} + set_and_check() { local myfacet=$1 local TEST=$2 @@ -768,23 +791,8 @@ set_and_check() { fi echo "Setting $PARAM from $ORIG to $FINAL" do_facet mds "$LCTL conf_param $PARAM=$FINAL" || error conf_param failed - local RESULT - local MAX=90 - local WAIT=0 - while [ 1 ]; do - sleep 5 - RESULT=$(do_facet $myfacet "$TEST") - if [ $RESULT -eq $FINAL ]; then - echo "Updated config after $WAIT sec (got $RESULT)" - break - fi - WAIT=$((WAIT + 5)) - if [ $WAIT -eq $MAX ]; then - echo "Config update not seen: wanted $FINAL got $RESULT" - return 3 - fi - echo "Waiting $(($MAX - $WAIT)) secs for config update" - done + + wait_update $(facet_host $myfacet) "$TEST" $FINAL || error check failed! } test_27a() { @@ -922,18 +930,55 @@ test_31() { # bug 10734 } run_test 31 "Connect to non-existent node (returns errors, should not crash)" +# Use these start32/stop32 fn instead of t-f start/stop fn, +# for local devices, to skip global facet vars init +stop32 () { + local facet=$1 + shift + echo "Stopping local ${MOUNT%/*}/${facet} (opts:$@)" + umount -d $@ ${MOUNT%/*}/${facet} + losetup -a +} + +start32 () { + local facet=$1 + shift + local device=$1 + shift + mkdir -p ${MOUNT%/*}/${facet} + + echo "Starting local ${facet}: $@ $device ${MOUNT%/*}/${facet}" + mount -t lustre $@ ${device} ${MOUNT%/*}/${facet} + RC=$? + if [ $RC -ne 0 ]; then + echo "mount -t lustre $@ ${device} ${MOUNT%/*}/${facet}" + echo "Start of ${device} of local ${facet} failed ${RC}" + fi + losetup -a + return $RC +} + +cleanup_nocli32 () { + stop32 mds -f + stop32 ost1 -f + wait_exit_ST client +} + +cleanup_32() { + trap 0 + echo "Cleanup test_32 umount $MOUNT ..." + umount -f $MOUNT || true + echo "Cleanup local mds ost1 ..." + cleanup_nocli32 + unload_modules +} + test_32a() { - # XXX - make this run on client-only systems with real hardware on - # the OST and MDT - # there appears to be a lot of assumption here about loopback - # devices - # or maybe this test is just totally useless on a client-only system + # this test is totally useless on a client-only system + [ -n "$CLIENTONLY" -o -n "$CLIENTMODSONLY" ] && skip "client only testing" && return 0 [ "$NETTYPE" = "tcp" ] || { skip "NETTYPE != tcp" && return 0; } - [ "$mds_HOST" = "`hostname`" ] || { skip "remote MDS" && return 0; } - [ "$ost_HOST" = "`hostname`" -o "$ost1_HOST" = "`hostname`" ] || \ - { skip "remote OST" && return 0; } + [ -z "$TUNEFS" ] && skip "No tunefs" && return 0 - [ -z "$TUNEFS" ] && skip "No tunefs" && return local DISK1_4=$LUSTRE/tests/disk1_4.zip [ ! -r $DISK1_4 ] && skip "Cant find $DISK1_4, skipping" && return @@ -943,14 +988,17 @@ test_32a() { lctl set_param debug=$PTLDEBUG $TUNEFS $tmpdir/mds || error "tunefs failed" + # nids are wrong, so client wont work, but server should start - start mds $tmpdir/mds "-o loop,exclude=lustre-OST0000" || return 3 - local UUID=$(lctl get_param -n mds.lustre-MDT0000.uuid) + start32 mds $tmpdir/mds "-o loop,exclude=lustre-OST0000" && \ + trap cleanup_32 EXIT INT || return 3 + + local UUID=$(lctl get_param -n mds.lustre-MDT0000.uuid) echo MDS uuid $UUID [ "$UUID" == "mdsA_UUID" ] || error "UUID is wrong: $UUID" $TUNEFS --mgsnode=`hostname` $tmpdir/ost1 || error "tunefs failed" - start ost1 $tmpdir/ost1 "-o loop" || return 5 + start32 ost1 $tmpdir/ost1 "-o loop" || return 5 UUID=$(lctl get_param -n obdfilter.lustre-OST0000.uuid) echo OST uuid $UUID [ "$UUID" == "ost1_UUID" ] || error "UUID is wrong: $UUID" @@ -968,40 +1016,37 @@ test_32a() { # With a new good MDT failover nid, we should be able to mount a client # (but it cant talk to OST) - local OLDMOUNTOPT=$MOUNTOPT - MOUNTOPT="exclude=lustre-OST0000" - mount_client $MOUNT - MOUNTOPT=$OLDMOUNTOPT - set_and_check client "lctl get_param -n mdc.*.max_rpcs_in_flight" "lustre-MDT0000.mdc.max_rpcs_in_flight" || - return 11 + local mountopt="-o exclude=lustre-OST0000" - zconf_umount `hostname` $MOUNT -f - cleanup_nocli - load_modules + local device=`h2$NETTYPE $HOSTNAME`:/lustre + echo "Starting local client: $HOSTNAME: $mountopt $device $MOUNT" + mount -t lustre $mountopt $device $MOUNT || return 1 - # mount a second time to make sure we didnt leave upgrade flag on + local old=$(lctl get_param -n mdc.*.max_rpcs_in_flight) + local new=$((old + 5)) + lctl conf_param lustre-MDT0000.mdc.max_rpcs_in_flight=$new + wait_update $HOSTNAME "lctl get_param -n mdc.*.max_rpcs_in_flight" $new || return 11 + + cleanup_32 + + # mount a second time to make sure we didnt leave upgrade flag on load_modules $TUNEFS --dryrun $tmpdir/mds || error "tunefs failed" - load_modules - start mds $tmpdir/mds "-o loop,exclude=lustre-OST0000" || return 12 - cleanup_nocli + start32 mds $tmpdir/mds "-o loop,exclude=lustre-OST0000" && \ + trap cleanup_32 EXIT INT || return 12 + + cleanup_32 rm -rf $tmpdir || true # true is only for TMP on NFS } run_test 32a "Upgrade from 1.4 (not live)" test_32b() { - # XXX - make this run on client-only systems with real hardware on - # the OST and MDT - # there appears to be a lot of assumption here about loopback - # devices - # or maybe this test is just totally useless on a client-only system - [ "$NETTYPE" = "tcp" ] || { skip "NETTYPE != tcp" && return 0; } - [ "$mds_HOST" = "`hostname`" ] || { skip "remote MDS" && return 0; } - [ "$ost_HOST" = "`hostname`" -o "$ost1_HOST" = "`hostname`" ] || \ - { skip "remote OST" && return 0; } + # this test is totally useless on a client-only system + [ -n "$CLIENTONLY" -o -n "$CLIENTMODSONLY" ] && skip "client only testing" && return 0 + [ "$NETTYPE" = "tcp" ] || { skip "NETTYPE != tcp" && return 0; } + [ -z "$TUNEFS" ] && skip "No tunefs" && return - [ -z "$TUNEFS" ] && skip "No tunefs" && return local DISK1_4=$LUSTRE/tests/disk1_4.zip [ ! -r $DISK1_4 ] && skip "Cant find $DISK1_4, skipping" && return @@ -1009,17 +1054,19 @@ test_32b() { unzip -o -j -d $tmpdir $DISK1_4 || { skip "Cant unzip $DISK1_4, skipping" && return ; } load_modules lctl set_param debug=$PTLDEBUG - NEWNAME=sofia + local NEWNAME=sofia # writeconf will cause servers to register with their current nids $TUNEFS --writeconf --fsname=$NEWNAME $tmpdir/mds || error "tunefs failed" - start mds $tmpdir/mds "-o loop" || return 3 + start32 mds $tmpdir/mds "-o loop" && \ + trap cleanup_32 EXIT INT || return 3 + local UUID=$(lctl get_param -n mds.${NEWNAME}-MDT0000.uuid) echo MDS uuid $UUID [ "$UUID" == "mdsA_UUID" ] || error "UUID is wrong: $UUID" $TUNEFS --mgsnode=`hostname` --fsname=$NEWNAME --writeconf $tmpdir/ost1 || error "tunefs failed" - start ost1 $tmpdir/ost1 "-o loop" || return 5 + start32 ost1 $tmpdir/ost1 "-o loop" || return 5 UUID=$(lctl get_param -n obdfilter.${NEWNAME}-OST0000.uuid) echo OST uuid $UUID [ "$UUID" == "ost1_UUID" ] || error "UUID is wrong: $UUID" @@ -1035,15 +1082,21 @@ test_32b() { # MDT and OST should have registered with new nids, so we should have # a fully-functioning client echo "Check client and old fs contents" - OLDFS=$FSNAME - FSNAME=$NEWNAME - mount_client $MOUNT - FSNAME=$OLDFS - set_and_check client "lctl get_param -n mdc.*.max_rpcs_in_flight" "${NEWNAME}-MDT0000.mdc.max_rpcs_in_flight" || return 11 + + local device=`h2$NETTYPE $HOSTNAME`:/$NEWNAME + echo "Starting local client: $HOSTNAME: $device $MOUNT" + mount -t lustre $device $MOUNT || return 1 + + local old=$(lctl get_param -n mdc.*.max_rpcs_in_flight) + local new=$((old + 5)) + lctl conf_param ${NEWNAME}-MDT0000.mdc.max_rpcs_in_flight=$new + wait_update $HOSTNAME "lctl get_param -n mdc.*.max_rpcs_in_flight" $new || return 11 + [ "$(cksum $MOUNT/passwd | cut -d' ' -f 1,2)" == "2479747619 779" ] || return 12 echo "ok." - cleanup + cleanup_32 + rm -rf $tmpdir || true # true is only for TMP on NFS } run_test 32b "Upgrade from 1.4 with writeconf" -- 1.8.3.1