Whamcloud - gitweb
EX-3142 tests: changelog processing verification
authorAlex Zhuravlev <bzzz@whamcloud.com>
Fri, 6 Aug 2021 06:34:31 +0000 (09:34 +0300)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 14 Oct 2022 20:02:43 +0000 (20:02 +0000)
add extra counter to lamigo stats to catch gaps in changelog
processing. add a new test (hot-pools/60) to verify that no
gaps happen (i.e. lamigo gets all changelog records), verify
that the changelog is purged properly.

Test-Parameters: trivial testlist=hot-pools mdscount=2 mdtcount=4
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I34d9d6f6f7f5766d945df43ae7d43dab7c70cef1
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/48434
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lipe/src/lamigo.c
lustre/tests/hot-pools.sh

index bc3b1be..dd515da 100644 (file)
@@ -271,6 +271,7 @@ struct stats {
        unsigned long s_read;   /* llog records read */
        unsigned long s_skipped; /* llog records skipped */
        unsigned long s_processed; /* llog records processed */
+       unsigned long s_changelog_gaps; /* gaps in stream, for debugging */
        unsigned long s_removed; /* removed by unlink */
        unsigned long s_dups;
        unsigned long s_spawned;
@@ -648,6 +649,7 @@ static void lamigo_dump_stats_file(void)
                "    read: %lu\n"
                "    skipped: %lu\n"
                "    processed: %lu\n"
+               "    changelog_gaps: %lu\n"
                "    removed: %lu\n"
                "    dups: %lu\n"
                "    spawned: %lu\n"
@@ -669,7 +671,10 @@ static void lamigo_dump_stats_file(void)
                stats.s_scan_begin,
                stats.s_scan_end,
                stats.s_scan_replicated,
-               stats.s_read, stats.s_skipped, stats.s_processed,
+               stats.s_read,
+               stats.s_skipped,
+               stats.s_processed,
+               stats.s_changelog_gaps,
                stats.s_removed, stats.s_dups, stats.s_spawned,
                stats.s_replicated, stats.s_busy, head.lh_cached_count,
                stats.s_skip_hot, stats.s_replicate_ro2hot,
@@ -1599,8 +1604,14 @@ static int lamigo_process_record(struct changelog_rec *rec)
        __u64 index = rec->cr_index;
        struct fid_rec *f;
        struct fid_hash *fh;
-       int rc = 0;
 
+       if (rec->cr_index - lamigo_last_processed_idx > 1 &&
+           lamigo_last_processed_idx != 0) {
+               llapi_err_noerrno(LLAPI_MSG_WARN,
+                                 "changelog gap %llu -> %llu\n",
+                                 rec->cr_index, lamigo_last_processed_idx);
+               stats.s_changelog_gaps++;
+       }
        lamigo_last_processed_idx = rec->cr_index;
 
        stats.s_read++;
@@ -1651,7 +1662,7 @@ static int lamigo_process_record(struct changelog_rec *rec)
                lipe_list_add_tail(&f->fr_link, &head.lh_list);
        }
 
-       return rc;
+       return 0;
 
 skip:
        stats.s_skipped++;
index 9514f96..68bdec9 100755 (executable)
@@ -2071,6 +2071,40 @@ test_59() {
 }
 run_test 59 "lpurge: check layout before opening"
 
+test_60() {
+       local i
+       local after
+       local mdt
+
+       init_hot_pools_env
+
+       # start lamigo
+       LAMIGO_DEBUG=true LAMIGO_EXTRA_OPT="--progress-interval=10" start_lamigo_cmd
+       check_lamigo_is_started || error "failed to start lamigo"
+       stack_trap stop_lamigo_cmd
+
+       # put everything on slow, no replication will be made
+       mkdir $DIR/$tdir
+       $LFS setstripe -p $LAMIGO_TGT $DIR/$tdir
+       dbench -D $DIR/$tdir 6 &
+       PID=$!
+       sleep 240
+       kill $PID
+       wait $PID
+
+       # let lamigo process remaining records
+       sleep 30
+
+       local facet=${LAMIGO_MDT_FACET[0]}
+       local log_file=$(lamigo_logfile $facet)
+       do_facet $facet "grep -i gap $log_file"
+
+       echo "verify lamigo stats"
+       verify_one_lamigo_param 0 changelog_gaps 0
+       verify_one_lamigo_param 0 replicated 0
+}
+run_test 60 "verify changelog processing under contiguous load"
+
 test_70() {
        init_hot_pools_env
        local src=${LAMIGO_SRC}