From 5011f8ad641f7140c4e982784dd841db9c88fded Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Fri, 6 Aug 2021 09:34:31 +0300 Subject: [PATCH] EX-3142 tests: changelog processing verification add extra counter to lamigo stats to catch gaps in changelog processing. add a new test (hot-pools/60) to verify that no gaps happen (i.e. lamigo gets all changelog records), verify that the changelog is purged properly. Test-Parameters: trivial testlist=hot-pools mdscount=2 mdtcount=4 Signed-off-by: Alex Zhuravlev Change-Id: I34d9d6f6f7f5766d945df43ae7d43dab7c70cef1 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/48434 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lipe/src/lamigo.c | 17 ++++++++++++++--- lustre/tests/hot-pools.sh | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/lipe/src/lamigo.c b/lipe/src/lamigo.c index bc3b1be..dd515da 100644 --- a/lipe/src/lamigo.c +++ b/lipe/src/lamigo.c @@ -271,6 +271,7 @@ struct stats { unsigned long s_read; /* llog records read */ unsigned long s_skipped; /* llog records skipped */ unsigned long s_processed; /* llog records processed */ + unsigned long s_changelog_gaps; /* gaps in stream, for debugging */ unsigned long s_removed; /* removed by unlink */ unsigned long s_dups; unsigned long s_spawned; @@ -648,6 +649,7 @@ static void lamigo_dump_stats_file(void) " read: %lu\n" " skipped: %lu\n" " processed: %lu\n" + " changelog_gaps: %lu\n" " removed: %lu\n" " dups: %lu\n" " spawned: %lu\n" @@ -669,7 +671,10 @@ static void lamigo_dump_stats_file(void) stats.s_scan_begin, stats.s_scan_end, stats.s_scan_replicated, - stats.s_read, stats.s_skipped, stats.s_processed, + stats.s_read, + stats.s_skipped, + stats.s_processed, + stats.s_changelog_gaps, stats.s_removed, stats.s_dups, stats.s_spawned, stats.s_replicated, stats.s_busy, head.lh_cached_count, stats.s_skip_hot, stats.s_replicate_ro2hot, @@ -1599,8 +1604,14 @@ static int lamigo_process_record(struct changelog_rec *rec) __u64 index = rec->cr_index; struct fid_rec *f; struct fid_hash *fh; - int rc = 0; + if (rec->cr_index - lamigo_last_processed_idx > 1 && + lamigo_last_processed_idx != 0) { + llapi_err_noerrno(LLAPI_MSG_WARN, + "changelog gap %llu -> %llu\n", + rec->cr_index, lamigo_last_processed_idx); + stats.s_changelog_gaps++; + } lamigo_last_processed_idx = rec->cr_index; stats.s_read++; @@ -1651,7 +1662,7 @@ static int lamigo_process_record(struct changelog_rec *rec) lipe_list_add_tail(&f->fr_link, &head.lh_list); } - return rc; + return 0; skip: stats.s_skipped++; diff --git a/lustre/tests/hot-pools.sh b/lustre/tests/hot-pools.sh index 9514f96..68bdec9 100755 --- a/lustre/tests/hot-pools.sh +++ b/lustre/tests/hot-pools.sh @@ -2071,6 +2071,40 @@ test_59() { } run_test 59 "lpurge: check layout before opening" +test_60() { + local i + local after + local mdt + + init_hot_pools_env + + # start lamigo + LAMIGO_DEBUG=true LAMIGO_EXTRA_OPT="--progress-interval=10" start_lamigo_cmd + check_lamigo_is_started || error "failed to start lamigo" + stack_trap stop_lamigo_cmd + + # put everything on slow, no replication will be made + mkdir $DIR/$tdir + $LFS setstripe -p $LAMIGO_TGT $DIR/$tdir + dbench -D $DIR/$tdir 6 & + PID=$! + sleep 240 + kill $PID + wait $PID + + # let lamigo process remaining records + sleep 30 + + local facet=${LAMIGO_MDT_FACET[0]} + local log_file=$(lamigo_logfile $facet) + do_facet $facet "grep -i gap $log_file" + + echo "verify lamigo stats" + verify_one_lamigo_param 0 changelog_gaps 0 + verify_one_lamigo_param 0 replicated 0 +} +run_test 60 "verify changelog processing under contiguous load" + test_70() { init_hot_pools_env local src=${LAMIGO_SRC} -- 1.8.3.1