From 3435b4da94cea33aa288f510660e1f2cb3380b94 Mon Sep 17 00:00:00 2001 From: ericm Date: Mon, 21 Apr 2008 17:11:38 +0000 Subject: [PATCH] branch: b1_6 change at_max according to AT status. b=13659 r=nathan r=rread r=grev --- lustre/tests/recovery-small.sh | 12 ++++++----- lustre/tests/replay-single.sh | 35 ++++++++++++++++++++++++------- lustre/tests/test-framework.sh | 47 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 12 deletions(-) diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index f794e34..13b61ea 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -196,11 +196,13 @@ test_16() { run_test 16 "timeout bulk put, don't evict client (2732)" test_17() { + local at_max_saved=0 + # With adaptive timeouts, bulk_get won't expire until adaptive_timeout_max - local at_max=$(do_facet ost1 "find /sys/ -name at_max") - [ -z "$at_max" ] && skip "missing /sys/.../at_max" && return 0 - OST_AT_MAX=$(do_facet ost1 "cat $at_max") - do_facet ost1 "echo $TIMEOUT >> $at_max" + if at_is_valid && at_is_enabled; then + at_max_saved=$(at_max_get ost1) + at_max_set $TIMEOUT ost1 + fi # OBD_FAIL_PTLRPC_BULK_GET_NET 0x0503 | OBD_FAIL_ONCE # OST bulk will time out here, client retries @@ -218,7 +220,7 @@ test_17() { # expect cmp to succeed, client resent bulk do_facet client "cmp $SAMPLE_FILE $DIR/$tfile" || return 3 do_facet client "rm $DIR/$tfile" || return 4 - do_facet ost1 "echo $OST_AT_MAX >> $at_max" + [ $at_max_saved -ne 0 ] && $(at_max_set $at_max_saved ost1) return 0 } run_test 17 "timeout bulk get, don't evict client (2732)" diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 6e1717d7..1ce5c2c 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -874,13 +874,17 @@ test_43() { # bug 2530 run_test 43 "mds osc import failure during recovery; don't LBUG" test_44() { + local at_max_saved=0 + mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` [ "$mdcdev" ] || exit 2 + # adaptive timeouts slow this way down - local at_max=$(do_facet mds "find /sys/ -name at_max") - [ -z "$at_max" ] && skip "missing /sys/.../at_max" && return 0 - MDS_AT_MAX=$(do_facet mds "cat $at_max") - do_facet mds "echo 40 >> $at_max" + if at_is_valid && at_is_enabled; then + at_max_saved=$(at_max_get mds) + at_max_set 40 mds + fi + for i in `seq 1 10`; do echo "$i of 10 ($(date +%s))" do_facet mds "grep service $LPROC/mdt/MDS/mds/timeouts" @@ -889,8 +893,9 @@ test_44() { $LCTL --device $mdcdev recover df $MOUNT done + do_facet mds "sysctl -w lustre.fail_loc=0" - do_facet mds "echo $MDS_AT_MAX >> $at_max" + [ $at_max_saved -ne 0 ] && at_max_set $at_max_saved mds return 0 } run_test 44 "race in target handle connect" @@ -1362,9 +1367,19 @@ test_61c() { } run_test 61c "test race mds llog sync vs llog cleanup" -#Adaptive Timeouts -at_start() #bug 3055 +#Adaptive Timeouts (bug 3055) +AT_MAX_SET=0 + +at_start() { + at_is_valid || skip "AT env is invalid" + + if ! at_is_enabled; then + echo "AT is disabled, enable it by force temporarily" + at_max_set 600 mds ost client + AT_MAX_SET=1 + fi + if [ -z "$ATOLDBASE" ]; then local at_history=$(do_facet mds "find /sys/ -name at_history") [ -z "$at_history" ] && skip "missing /sys/.../at_history " && return 1 @@ -1540,6 +1555,12 @@ if [ -n "$ATOLDBASE" ]; then do_facet mds "echo $ATOLDBASE >> $at_history" || true do_facet ost1 "echo $ATOLDBASE >> $at_history" || true fi + +if [ $AT_MAX_SET -ne 0 ]; then + echo "restore AT status to be disabled" + at_max_set 0 mds ost client +fi + # end of AT tests includes above lines equals_msg `basename $0`: test complete, cleaning up diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 3b7f839..fb824d3 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -88,6 +88,7 @@ init_test_env() { export NAME=${NAME:-local} export LPROC=/proc/fs/lustre export DIR2 + export AT_MAX_PATH if [ "$ACCEPTOR_PORT" ]; then export PORT_OPT="--port $ACCEPTOR_PORT" @@ -888,6 +889,52 @@ absolute_path() { } ################################## +# Adaptive Timeouts funcs + +at_is_valid() { + if [ -z "$AT_MAX_PATH" ]; then + AT_MAX_PATH=$(do_facet mds "find /sys/ -name at_max") + [ -z "$AT_MAX_PATH" ] && echo "missing /sys/.../at_max " && return 1 + fi + return 0 +} + +at_is_enabled() { + at_is_valid || error "invalid call" + + # only check mds, we assume at_max is the same on all nodes + local at_max=$(do_facet mds "cat $AT_MAX_PATH") + if [ $at_max -eq 0 ]; then + return 1 + else + return 0 + fi +} + +at_max_get() { + at_is_valid || error "invalid call" + + do_facet $1 "cat $AT_MAX_PATH" +} + +at_max_set() { + local at_max=$1 + shift + + at_is_valid || error "invalid call" + + for facet in $@; do + if [ $facet == "ost" ]; then + for i in `seq $OSTCOUNT`; do + do_facet ost$i "echo $at_max > $AT_MAX_PATH" + done + else + do_facet $facet "echo $at_max > $AT_MAX_PATH" + fi + done +} + +################################## # OBD_FAIL funcs drop_request() { -- 1.8.3.1