From: Jian Yu Date: Fri, 26 Mar 2021 23:02:06 +0000 (-0700) Subject: EX-2745 tests: improve hot-pools.sh to gather debug logs X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=e6841f89016facd11d81ba8b07bf230e1afe7fb2;p=fs%2Flustre-release.git EX-2745 tests: improve hot-pools.sh to gather debug logs This patch improves hot-pools.sh to gather debug logs for lamigo and lpurge. Lustre-change: https://review.whamcloud.com/43111 Lustre-commit: 075ed14d944b0078fcd32ce06aa868ecaabb3adb Test-Parameters: trivial testlist=hot-pools,hot-pools Test-Parameters: trivial testgroup=review-dne-part-2 Change-Id: I65f23d00744499853ab099e7f097161e5e1dd66a Signed-off-by: Jian Yu Reviewed-by: Alex Zhuravlev Reviewed-by: John L. Hammond Reviewed-on: https://review.whamcloud.com/43146 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Li Xi --- diff --git a/lustre/tests/hot-pools.sh b/lustre/tests/hot-pools.sh index 920da75..b47e502 100644 --- a/lustre/tests/hot-pools.sh +++ b/lustre/tests/hot-pools.sh @@ -50,7 +50,7 @@ LAMIGO_THREAD_NUM=${LAMIGO_THREAD_NUM:-""} LAMIGO_PROG_INTV=${LAMIGO_PROG_INTV:-""} LAMIGO_CACHE=${LAMIGO_CACHE:-""} LAMIGO_MIRROR_CMD=${LAMIGO_MIRROR_CMD:-""} -LAMIGO_DEBUG=${LAMIGO_DEBUG:-false} +LAMIGO_DEBUG=${LAMIGO_DEBUG:-true} LAMIGO_RESCAN=${LAMIGO_RESCAN:-false} LAMIGO_VERBOSE=${LAMIGO_VERBOSE:-false} LAMIGO_DUMP=${LAMIGO_DUMP:-""} @@ -75,7 +75,7 @@ LPURGE_SCAN_RATE=${LPURGE_SCAN_RATE:-""} LPURGE_SCAN_THREADS=${LPURGE_SCAN_THREADS:-""} LPURGE_INTV=${LPURGE_INTV:-"30"} LPURGE_SLOT_SIZE=${LPURGE_SLOT_SIZE:-""} -LPURGE_DEBUG=${LPURGE_DEBUG:-false} +LPURGE_DEBUG=${LPURGE_DEBUG:-true} LPURGE_DUMP=${LPURGE_DUMP:-""} LPURGE_DUMP_FIDS=${LPURGE_DUMP_FIDS:-""} LPURGE_EXTRA=${LPURGE_EXTRA:-""} @@ -164,6 +164,27 @@ init_hot_pools_env() { $MOUNT" } +hot_pools_logfile() { + local facet="$1" + local tool="${2:-hot_pools}" + local host="$(facet_active_host $facet)" + + local prefix=$TESTLOG_PREFIX + [[ -z "$TESTNAME" ]] || prefix+=".$TESTNAME" + + printf "${prefix}.${tool}_log.${host}.log" +} + +lamigo_logfile() { + local facet="$1" + hot_pools_logfile $facet lamigo +} + +lpurge_logfile() { + local facet="$1" + hot_pools_logfile $facet lpurge +} + build_test_filter check_and_setup_lustre @@ -220,7 +241,7 @@ start_one_lamigo_cmd() { ! $LAMIGO_VERBOSE || cmd+=" -v" echo "Start lamigo on MDS $(facet_active_host $facet): $cmd" - do_facet $facet "$cmd" & + do_facet $facet "$cmd &> $(lamigo_logfile $facet)" & sleep 2 } @@ -324,7 +345,7 @@ start_one_lamigo_cfg() { cmd+=" -f $cfg_file" echo "Start lamigo on MDS $(facet_active_host $facet): $cmd" - do_facet $facet "$cmd" & + do_facet $facet "$cmd &> $(lamigo_logfile $facet)" & sleep 2 } @@ -367,7 +388,8 @@ create_one_lamigo_service() { stack_trap "do_facet $facet \"[[ -e ${srv_file}.saved ]] && mv $srv_file{.saved,} || rm -f $srv_file\"" - do_facet $facet "cat > $srv_file <<'EOF' + do_facet $facet "lamigo=\\\$(which lamigo); +cat > $srv_file < $(lpurge_logfile $facet)" & sleep 2 } @@ -683,7 +707,7 @@ start_one_lpurge_cfg() { cmd+=" -f $cfg_file" echo "Start lpurge on OSS $(facet_active_host $facet): $cmd" - do_facet $facet "$cmd" & + do_facet $facet "$cmd &> $(lpurge_logfile $facet)" & sleep 2 } @@ -726,7 +750,8 @@ create_one_lpurge_service() { stack_trap "do_facet $facet \"[[ -e ${srv_file}.saved ]] && mv $srv_file{.saved,} || rm -f $srv_file\"" - do_facet $facet "cat > $srv_file <<'EOF' + do_facet $facet "lpurge=\\\$(which lpurge); +cat > $srv_file < $stdout_file + start_one_lamigo_cfg check_one_lamigo_is_started || error "failed to start lamigo" - stack_trap "[[ ! -e $stdout_file ]] || rm -f $stdout_file; \ - stop_one_lamigo_cfg" + stack_trap stop_one_lamigo_cfg - cat $stdout_file - grep -q "unknown tunable: $unknown_param" $stdout_file || + grep -q "unknown tunable: $unknown_param" $log_file || error "failed to recognize unknown parameter '$unknown_param'" LAMIGO_THREAD_NUM=2 LAMIGO_PROG_INTV=300 \ @@ -1098,38 +1124,38 @@ test_6() { run_test 6 "lamigo: start with bad changelog user" test_7() { + init_hot_pools_env + local src_pool="ddn_ssd" local tgt_pool="ddn_hdd" - local stdout_file=$TMP/$tfile.debug.log - - init_hot_pools_env + local facet=${LAMIGO_MDT_FACET[0]} + local log_file=$(lamigo_logfile $facet) LAMIGO_AGT_NODES="$(facet_active_host ost1)" \ - LAMIGO_SRC= LAMIGO_TGT= start_one_lamigo_cmd &> $stdout_file + LAMIGO_SRC= LAMIGO_TGT= start_one_lamigo_cmd check_one_lamigo_is_started || error "failed to start lamigo" - stack_trap "[[ ! -e $stdout_file ]] || rm -f $stdout_file; \ - stop_one_lamigo_cmd" + stack_trap stop_one_lamigo_cmd - cat $stdout_file - grep -q "Target pool $tgt_pool is empty, waiting" $stdout_file || + sleep $LAMIGO_AGE + grep -q "Target pool $tgt_pool is empty, waiting" $log_file || error "failed to use default pool '$tgt_pool'" } run_test 7 "lamigo: start with no OST pools" test_8() { + init_hot_pools_env + local td=$DIR/$tdir local tf=$td/$tfile - local stdout_file=$TMP/$tfile.debug.log - - init_hot_pools_env + local facet=${LAMIGO_MDT_FACET[0]} + local log_file=$(lamigo_logfile $facet) mkdir $td || error "mkdir $td failed" $LFS setstripe -p $LAMIGO_SRC $td || error "$LFS setstripe $td failed" - LAMIGO_DEBUG=true start_one_lamigo_cmd &> $stdout_file + LAMIGO_DEBUG=true start_one_lamigo_cmd check_one_lamigo_is_started || error "failed to start lamigo" - stack_trap "[[ ! -e $stdout_file ]] || rm -f $stdout_file; \ - stop_one_lamigo_cmd" + stack_trap stop_one_lamigo_cmd yes "10M file"| dd bs=1M count=10 iflag=fullblock of=$tf || error "failed to create $tf" @@ -1137,8 +1163,8 @@ test_8() { wait_file_mirror $tf 2 - cat $stdout_file - grep -q 'new job extend' $stdout_file || + sleep $LAMIGO_AGE + grep -q 'new job extend' $log_file || error "no debug messages with -b option" } run_test 8 "lamigo: start with debug (-b) command line option" @@ -1177,19 +1203,19 @@ test_9() { run_test 9 "lamigo: replicate from source to target pool" test_10() { + init_hot_pools_env + local td=$DIR/$tdir local tf=$td/$tfile - local stdout_file=$TMP/$tfile.debug.log + local facet=${LAMIGO_MDT_FACET[0]} + local log_file=$(lamigo_logfile $facet) local cksum_orig local cksum_new local i - init_hot_pools_env - - LAMIGO_DEBUG=true start_lamigo_cmd &> $stdout_file - check_lamigo_is_started || error "failed to start lamigo" - stack_trap "[[ ! -e $stdout_file ]] || rm -f $stdout_file; \ - stop_lamigo_cmd" + LAMIGO_DEBUG=true start_one_lamigo_cmd + check_one_lamigo_is_started || error "failed to start lamigo" + stack_trap stop_one_lamigo_cmd # create parent directory mkdir $td || error "mkdir $td failed" @@ -1202,13 +1228,13 @@ test_10() { cksum_orig=$(md5sum $tf) for ((i = 0; i < $((LAMIGO_AGE * 2)); i++)); do - ! grep -q 'new job extend' $stdout_file || break + ! grep -q 'new job extend' $log_file || break sleep 1 done # read from the file cksum_new=$(md5sum $tf) - dump_lamigo_stats + dump_one_lamigo_stats [[ "$cksum_orig" = "$cksum_new" ]] || error "checksum mismatch: '$cksum_orig' != '$cksum_new'"