Whamcloud - gitweb
EX-4697 lipe: Define statistics fields for lpurge / lamigo
authorAlexandre Ioffe <aioffe@ddn.com>
Fri, 5 Aug 2022 05:49:40 +0000 (22:49 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 26 Aug 2022 16:11:40 +0000 (16:11 +0000)
Added JSON stats output in lamigo
Extended JSON stats output in lpurge

Signed-off-by: Alexandre Ioffe <aioffe@ddn.com>
Test-Parameters: trivial testlist=hot-pools
Change-Id: Ib367022dd073c1699d75e3ea7cfa3b586e7b8877
Reviewed-on: https://review.whamcloud.com/48125
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Colin Faber <cfaber@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lipe/src/lamigo.c
lipe/src/lamigo.h
lipe/src/lpurge.c

index 4f7782c..bc3b1be 100644 (file)
@@ -91,6 +91,7 @@
 
 #define LAMIGO_USERFILE        "/var/lib/lamigo-%s.chlg"
 #define LAMIGO_DUMPFILE        "/var/run/lamigo-%s.stats"
+#define LAMIGO_JSONFILE        "/var/run/lamigo-%s.json"
 #define LAMIGO_HEAT_FILE       "/var/run/lamigo-%s.heat"
 #define LAMIGO_PIDFILE "/var/run/lamigo-%s.pid"
 
@@ -130,6 +131,7 @@ static void usage(void)
               "\t--alr-extra-args=ARGS, extra arguments for ALR (default: '%s')\n"
               "\t--fast-pool=POOL (default '%s')\n"
               "\t--fast-pool-max-used=MAX stop mirroring to POOL when %% used reaches MAX (default %d)\n"
+              "\t--json-file=FILE (default: %s)\n"
               "\t--include-dom treat DoM components as if they belong to fast pool\n"
               "\t--ignore-reads, ignore reads when calculating heat (default: no)\n"
               "\t--ignore-writes, ignore writes when calculating heat (default: no)\n"
@@ -154,6 +156,7 @@ static void usage(void)
               DEF_THREAD_COUNT,
               LAMIGO_USERFILE,
               LAMIGO_DUMPFILE,
+              LAMIGO_JSONFILE,
               LAMIGO_HEAT_FILE,
               DEF_ALR_EXTRA_ARGS,
               DEF_FAST_POOL,
@@ -253,6 +256,7 @@ struct options opt = {
        .o_alr_hot_fraction = DEF_HOT_FRACTION,
        .o_alr_hot_after_idle = DEF_HOT_AFTER_IDLE,
        .o_include_dom = 0,
+       .o_json_file = NULL,
 };
 
 struct history {
@@ -439,6 +443,114 @@ static void lamigo_dump_jobs(FILE *out, struct lipe_list_head *jlist)
        }
 }
 
+static void lamigo_dump_json_file(void)
+{
+       struct resync_agent *a;
+       struct pool_list *pl;
+       struct tm tmtime;
+       char timestr[32] = {0};
+       time_t ltime;
+       FILE *f;
+       int i;
+
+       if (opt.o_json_file == NULL)
+               return;
+       LX_DEBUG("dumping json to '%s'\n", opt.o_json_file);
+       f = fopen(opt.o_json_file, "w");
+       if (!f) {
+               LX_ERROR("cannot open '%s': %s\n",
+                        opt.o_json_file, strerror(errno));
+               return;
+       }
+
+       ltime = time(NULL);
+       localtime_r(&ltime, &tmtime);
+       strftime(timestr, sizeof(timestr), "%s", &tmtime);
+
+       fprintf(f,
+               "{\n"
+               "\"version\":\"%s-%s\",\n"
+               "\"revision\": \"%s\",\n"
+               "\"time\": \"%s\",\n",
+               PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION,
+               timestr);
+
+       fprintf(f, "\"fast_pools\":[\n");
+
+       for (pl = fast_pools; pl != NULL; pl = pl->pl_next) {
+               fprintf(f, pl == fast_pools ? "{\n" : ",\n{\n");
+
+               fprintf(f,
+                       "\"pool\":\"%s\",\n"
+                       "\"osts\":%d,\n"
+                       "\"used_kb\":%llu,\n"
+                       "\"total_kb\":%llu,\n"
+                       "\"open\":%d\n",
+                       pl->pl_pool, pl->pl_ostnr,
+                       pl->pl_used_kb, pl->pl_total_kb, (int)pl->pl_is_open);
+               fprintf(f, "}\n");
+       }
+       fprintf(f, "],\n");
+
+       fprintf(f, "\"slow_pools\":[\n");
+       for (pl = slow_pools; pl != NULL; pl = pl->pl_next) {
+               fprintf(f, pl == slow_pools ? "{\n" : ",\n{\n");
+
+               fprintf(f,
+                       "\"pool\":\"%s\",\n"
+                       "\"osts\":%d,\n"
+                       "\"used_kb\":%llu,\n"
+                       "\"total_kb\":%llu,\n"
+                       "\"open\":%d\n",
+                       pl->pl_pool, pl->pl_ostnr,
+                       pl->pl_used_kb, pl->pl_total_kb, (int)pl->pl_is_open);
+               fprintf(f, "}\n");
+       }
+       fprintf(f, "],\n");
+
+       fprintf(f,
+               "\"scan_begin\": %ld,\n"
+               "\"scan_end\": %ld,\n"
+               "\"scan_replicated\": %ld,\n"
+               "\"read\": %lu,\n"
+               "\"skipped\": %lu,\n"
+               "\"processed\": %lu,\n"
+               "\"removed\": %lu,\n"
+               "\"dups\": %lu,\n"
+               "\"spawned\": %lu,\n"
+               "\"replicated\": %lu,\n"
+               "\"busy\": %lu,\n"
+               "\"queued\": %lu,\n"
+               "\"skip_hot\": %lu,\n"
+               "\"ro2hot\": %lu,\n"
+               "\"rw2hot\": %lu,\n"
+               "\"rw2cold\": %lu,\n"
+               "\"skip-by-rule\": %lu,\n"
+               "\"extend-by-pool\": %lu,\n"
+               "\"extend-by-objects\": %lu,\n"
+               "\"skip_unknown\": %lu,\n"
+               "\"resync-stale\": %lu,\n"
+               "\"skip-insync\": %lu,\n"
+               "\"skip-by-source\": %lu,\n"
+               "\"extend-by-target\": %lu\n",
+               stats.s_scan_begin,
+               stats.s_scan_end,
+               stats.s_scan_replicated,
+               stats.s_read, stats.s_skipped, stats.s_processed,
+               stats.s_removed, stats.s_dups, stats.s_spawned,
+               stats.s_replicated, stats.s_busy, head.lh_cached_count,
+               stats.s_skip_hot, stats.s_replicate_ro2hot,
+               stats.s_replicate_rw2hot, stats.s_replicate_rw2cold,
+               stats.s_skip_by_rule, stats.s_extend_by_pool,
+               stats.s_extend_by_objects, stats.s_skip_unknown,
+               stats.s_resync_stale, stats.s_skip_insync,
+               stats.s_skip_by_source, stats.s_extend_by_target);
+
+       fprintf(f, "}\n");
+       fflush(f);
+       fclose(f);
+}  /* lamigo_dump_json_file */
+
 static void lamigo_dump_history(FILE *out)
 {
        int i = 0, cur = stats.s_hist_cur;
@@ -465,9 +577,9 @@ static void lamigo_dump_stats_file(void)
        FILE *f;
        int i;
 
-       LX_DEBUG("dumping stats to '%s'\n", opt.o_dump_file);
        if (opt.o_dump_file == NULL)
                return;
+       LX_DEBUG("dumping stats to '%s'\n", opt.o_dump_file);
        f = fopen(opt.o_dump_file, "w");
        if (!f) {
                LX_ERROR("cannot open '%s': %s\n", opt.o_dump_file, strerror(errno));
@@ -1663,7 +1775,7 @@ static void lamigo_job_fini(struct resync_job *rj, intptr_t retval)
        rj->rj_agent->rag_jobs--;
        lamigo_jobs_running--;
 
-       if (retval == EBUSY) {
+       if (retval == EBUSY) {  /* Must not happen */
                /*
                 * the file was busy, there will be another CLOSE
                 * in the changelog, we can just cancel our record
@@ -1782,6 +1894,7 @@ enum {
        LAMIGO_OPT_IGNORE_READS,
        LAMIGO_OPT_IGNORE_WRITES,
        LAMIGO_OPT_INCLUDE_DOM,
+       LAMIGO_OPT_JSON_FILE,
        LAMIGO_OPT_LARGE_IO,
        LAMIGO_OPT_MIRROR_CMD,
        LAMIGO_OPT_OFD_INTERVAL,
@@ -1817,6 +1930,7 @@ static struct option options[] = {
        { "include-dom", no_argument, NULL, LAMIGO_OPT_INCLUDE_DOM },
        { "ignore-reads", no_argument, NULL, LAMIGO_OPT_IGNORE_READS},
        { "ignore-writes", no_argument, NULL, LAMIGO_OPT_IGNORE_WRITES},
+       { "json-file", required_argument, NULL, LAMIGO_OPT_JSON_FILE},
        { "large-io", required_argument, NULL, LAMIGO_OPT_LARGE_IO},
        { "max-cache", required_argument, NULL, 'c'},
        { "mdt", required_argument, NULL, 'm' },
@@ -2087,6 +2201,9 @@ static void lamigo_process_opt(int c, char *optarg)
                if (opt.o_fast_pool_max_used < 1 || opt.o_fast_pool_max_used > 99)
                        LX_FATAL("invalid argument ('%s') to '--fast-pool-max-used'\n", optarg);
                break;
+       case LAMIGO_OPT_JSON_FILE:
+               opt.o_json_file = xstrdup(optarg);
+               break;
        case LAMIGO_OPT_SRC_DOM:
                LX_WARN("option '--src-dom' is deprecated, please use --include-dom instead\n");
                /* Fall through. */
@@ -2416,6 +2533,11 @@ static void lamigo_parse_opts(int argc, char **argv)
                opt.o_dump_file = xstrdup(buf);
        }
 
+       if (!opt.o_json_file) {
+               snprintf(buf, sizeof(buf), LAMIGO_JSONFILE, opt.o_mdtname);
+               opt.o_json_file = xstrdup(buf);
+       }
+
        if (!opt.o_heat_file) {
                snprintf(buf, sizeof(buf), LAMIGO_HEAT_FILE, opt.o_mdtname);
                opt.o_heat_file = xstrdup(buf);
@@ -3152,6 +3274,7 @@ static void *lamigo_signal_thread_start(void *arg)
                switch (sig) {
                case SIGUSR1:
                        lamigo_dump_stats_file();
+                       lamigo_dump_json_file();
                        break;
                case SIGUSR2:
                        lamigo_dump_heat_file();
index 456e154..6f81554 100644 (file)
@@ -80,6 +80,7 @@ struct options {
        unsigned long    o_cache_size;
        unsigned long    o_batch_sync_cnt;
        char            *o_dump_file;
+       char            *o_json_file;
        int              o_chlg_clear_frequency;
        bool             o_rescan;
        char            *o_mirror_cmd;
index 3e9dd09..74a9e25 100644 (file)
@@ -1851,6 +1851,23 @@ static void lpurge_usr2_handle(int sig)
                               json_object_new_int64(stored_sum));
        json_object_object_add(sum_stats, "total_space",
                               json_object_new_int64(space_sum));
+       json_object_object_add(sum_stats, "scans",
+                              json_object_new_int64(stats.s_scans));
+       json_object_object_add(sum_stats, "scan_time",
+                              json_object_new_int64(stats.s_scan_time));
+       json_object_object_add(sum_stats, "fast_scans",
+                              json_object_new_int64(stats.s_fast_scans));
+       json_object_object_add(sum_stats, "slow_scans",
+                              json_object_new_int64(stats.s_slow_scans));
+       json_object_object_add(sum_stats, "queued",
+                              json_object_new_int64(stats.s_queued));
+       json_object_object_add(sum_stats, "started",
+                              json_object_new_int64(stats.s_started));
+       json_object_object_add(sum_stats, "purged",
+                              json_object_new_int64(stats.s_purged));
+       json_object_object_add(sum_stats, "failed",
+                              json_object_new_int64(stats.s_failed));
+
        json_object_object_add(obj_summary, "Summary", sum_stats);
 
        output = json_object_to_json_string_ext(obj_summary,