Whamcloud - gitweb
EX-2989 lipe: collect lpcc_purge stats
authorLei Feng <flei@whamcloud.com>
Tue, 13 Apr 2021 00:19:52 +0000 (08:19 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 22 Jul 2021 17:35:13 +0000 (17:35 +0000)
Collect stats data for lpcc_purge and dump them by sigusr1.

Change-Id: Ifbc5502e53efd3a40846e4ea8c551f05f6b0ce09
Signed-off-by: Lei Feng <flei@whamcloud.com>
Test-Parameters: trivial
Reviewed-on: https://review.whamcloud.com/43287
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lipe/src/lpcc_purge.c

index 47e0c7b..b8eabeb 100644 (file)
@@ -2,6 +2,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <getopt.h>
+#include <json-c/json.h>
 #include <libgen.h>
 #include <linux/lustre/lustre_idl.h>
 #include <lustre/lustreapi.h>
@@ -45,8 +46,9 @@ struct lpcc_purge_options {
 
        int o_interval;
        int o_scan_threads;
-
        int o_candidate_num;
+       char *o_dumpfile;
+
        bool o_dry_run;
        bool o_force_clear;
 };
@@ -64,7 +66,14 @@ static struct lpcc_purge_options opt = {
 bool exit_flag = false;
 struct lpcc_purge_stats {
        double s_start_usage;
+       uint64_t s_scan_times;
+       time_t s_start_time, s_end_time;
+
        pthread_mutex_t s_lock;
+       uint64_t s_total_purged_objs;
+       uint64_t s_total_failed_objs;
+       uint64_t s_scanned_objs;        /* scanned objects in current scanning */
+       uint64_t s_purged_objs;         /* purged objects in current scanning */
 };
 
 static struct lpcc_purge_stats stats = {
@@ -184,6 +193,63 @@ static void lpcc_purge_sigint_handler(int signal)
        _exit(0);
 }
 
+/**
+ * Dump:
+ *  - config
+ *  - stats
+ */
+static void lpcc_purge_usr1_handler(int sig)
+{
+       char buff[64];
+       FILE *f = NULL;
+
+       f = fopen(opt.o_dumpfile, "w");
+       if (!f) {
+               llapi_printf(LLAPI_MSG_DEBUG, "cannot open dumpfile '%s'\n", opt.o_dumpfile);
+               return;
+       };
+
+       json_object *j_all = json_object_new_object();
+       json_object *j_config = json_object_new_object();
+       json_object_object_add(j_config, "mount", json_object_new_string(opt.o_mount));
+       json_object_object_add(j_config, "cache", json_object_new_string(opt.o_cache));
+       json_object_object_add(j_config, "rwid", json_object_new_int64(opt.o_rwid));
+       json_object_object_add(j_config, "high_usage", json_object_new_double(opt.o_high_usage));
+       json_object_object_add(j_config, "low_usage", json_object_new_double(opt.o_low_usage));
+       json_object_object_add(j_config, "interval", json_object_new_int64(opt.o_interval));
+       json_object_object_add(j_config, "scan_threads", json_object_new_int64(opt.o_scan_threads));
+       json_object_object_add(j_config, "candidate_num", json_object_new_int64(opt.o_candidate_num));
+       json_object_object_add(j_config, "force_clear", json_object_new_boolean(opt.o_force_clear));
+       json_object_object_add(j_all, "config", j_config);
+
+       json_object *j_stats = json_object_new_object();
+       json_object_object_add(j_stats, "scan_times", json_object_new_int64(stats.s_scan_times));
+       json_object_object_add(j_stats, "total_purged_objs", json_object_new_int64(stats.s_total_purged_objs));
+       json_object_object_add(j_stats, "total_failed_objs", json_object_new_int64(stats.s_total_failed_objs));
+       if (stats.s_start_time != 0) {
+               ctime_r(&stats.s_start_time, buff);
+               json_object_object_add(j_stats, "start_time", json_object_new_string(buff));
+       }
+       if (stats.s_end_time != 0) {
+               ctime_r(&stats.s_end_time, buff);
+               json_object_object_add(j_stats, "end_time", json_object_new_string(buff));
+       }
+       json_object_object_add(j_stats, "scanned_objs", json_object_new_int64(stats.s_scanned_objs));
+       json_object_object_add(j_stats, "purged_objs", json_object_new_int64(stats.s_purged_objs));
+       json_object_object_add(j_all, "stats", j_stats);
+
+       const char *str = json_object_to_json_string_ext(j_all,
+               JSON_C_TO_STRING_PRETTY |
+               JSON_C_TO_STRING_SPACED |
+               JSON_C_TO_STRING_NOZERO);
+
+       fprintf(f, "%s\n", str);
+       fflush(f);
+       fclose(f);
+
+       json_object_put(j_all);
+}
+
 static void usage(void)
 {
        printf("Usage: %s [options]\n"
@@ -198,6 +264,7 @@ static void usage(void)
                "\t-i, --interval=NUM, seconds to next check (default: %u)\n"
                "\t-t, --scan-threads=NUM scanning threads (default: %u)\n"
                "\t    --candidate-num=NUM, candidate number of approximate LRU (default: %d)\n"
+               "\t-w, --dump=FILE, dump stats to FILE when signal USR1 is recieved (default: /var/run/lpcc_purge-PID.stats)\n"
                "\t    --dry-run, scan once but don't detach file really\n"
                "\t-h, --help, print this help message\n",
 
@@ -223,6 +290,7 @@ static struct option long_options[] = {
        { "scan-threads", required_argument, NULL, 't'},
        { "dry-run", no_argument, NULL, OPT_DRY_RUN},
        { "candidate-num", required_argument, NULL, OPT_CANDIDATE_NUM},
+       { "dump", required_argument, NULL, 'w'},
        { "force-clear", no_argument, NULL, OPT_FORCE_CLEAR},
        { "help", no_argument, NULL, 'h' },
        { NULL }
@@ -460,6 +528,9 @@ static void lpcc_purge_process_opt(int c, char *optarg)
                }
                opt.o_scan_threads = value;
                break;
+       case 'w':
+               opt.o_dumpfile = strdup(optarg);
+               break;
        case OPT_DRY_RUN:
                opt.o_dry_run = true;
                break;
@@ -490,7 +561,7 @@ static void lpcc_purge_parse_opts(int argc, char **argv)
        int c;
 
        while ((c = getopt_long(argc, argv,
-                               "bf:C:M:A:H:L:i:t:h",
+                               "bf:C:M:A:H:L:i:t:w:h",
                                long_options, NULL))
               != EOF) {
                switch(c) {
@@ -512,6 +583,8 @@ static void lpcc_purge_parse_opts(int argc, char **argv)
 
 void lpcc_purge_verify_opts(void)
 {
+       char buf[PATH_MAX];
+
        if (opt.o_mount == NULL) {
                llapi_error(LLAPI_MSG_FATAL, EINVAL,
                             "lustre mount point must be specified");
@@ -527,7 +600,12 @@ void lpcc_purge_verify_opts(void)
                             "rwid mount point must be specified");
                exit(1);
        }
-       // check freehi > freelo
+       if (opt.o_dumpfile == NULL) {
+               snprintf(buf, sizeof(buf), "/var/run/lpcc_purge-%d.stats", opt.o_rwid);
+               opt.o_dumpfile = strdup(buf);
+       }
+
+       /* check freehi > freelo */
        if (opt.o_high_usage <= opt.o_low_usage) {
                llapi_error(LLAPI_MSG_FATAL, EINVAL,
                            "high usage (%.1f) must be larger than low usage (%.1f)",
@@ -682,6 +760,11 @@ static int lpcc_purge_detach_candidate(const char *mnt,
 
        llapi_printf(LLAPI_MSG_DEBUG, "detach fid: "DFID"\n", PFID(&candidate->c_fid));
 
+       pthread_mutex_lock(&stats.s_lock);
+       stats.s_purged_objs++;
+       stats.s_total_purged_objs++;
+       pthread_mutex_unlock(&stats.s_lock);
+
        if (opt.o_dry_run) {
                return -1;
        }
@@ -690,7 +773,9 @@ static int lpcc_purge_detach_candidate(const char *mnt,
        if (rc) {
                llapi_error(LLAPI_MSG_WARN, -rc,  "cannot detach fid: "DFID,
                        PFID(&candidate->c_fid));
-               return rc;
+               pthread_mutex_lock(&stats.s_lock);
+               stats.s_total_failed_objs++;
+               pthread_mutex_unlock(&stats.s_lock);
        }
 
        if (opt.o_force_clear)
@@ -709,7 +794,7 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance,
        struct lpcc_purge_candidate *candidate = NULL;
 
        if (!S_ISREG(attrs->loa_mode)) {
-               // skip non-regular file
+               /* skip non-regular file */
                return 0;
        }
 
@@ -722,7 +807,7 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance,
        const char * name = basename(path);
        rc = sscanf(name, SFID, RFID(&fid));
        if (rc != 3)  {
-               // Not an valid fid string, skip it
+               /* Not an valid fid string, skip it */
                rc = 0;
                goto out;
        }
@@ -733,19 +818,24 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance,
                goto out;
        }
 
+       /* Increase statistic data */
+       pthread_mutex_lock(&stats.s_lock);
+       stats.s_scanned_objs++;
+       pthread_mutex_unlock(&stats.s_lock);
+
        llapi_printf(LLAPI_MSG_DEBUG, "append fid: "DFID"\n", PFID(&fid));
        pthread_mutex_lock(&candidate_set.cs_lock);
        lpcc_purge_candidate_set_append(candidate);
        candidate = NULL;
 
-       // check whether the pool is full
+       /* check whether the pool is full */
        if (candidate_set.cs_count < candidate_set.cs_capacity) {
                pthread_mutex_unlock(&candidate_set.cs_lock);
                rc = 0;
                goto out;
        }
 
-       // pool is full
+       /* pool is full */
        llapi_printf(LLAPI_MSG_DEBUG, "candidate set is full, sort it first\n");
        lpcc_purge_candidate_set_sort();
 
@@ -777,7 +867,7 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance,
 
        pthread_mutex_unlock(&candidate_set.cs_lock);
 
-       // Detach files from candidates
+       /* Detach files from candidates */
        llapi_printf(LLAPI_MSG_DEBUG, "detach files...\n");
        for (i = 0; i < n_detach; i++) {
                rc = lpcc_purge_detach_candidate(opt.o_mount, candidates[i]);
@@ -800,7 +890,11 @@ static void lpcc_purge_scan(void)
        bool ldd_err;
 
        llapi_printf(LLAPI_MSG_INFO, "do scanning...\n");
+
+       stats.s_scan_times++;
        stats.s_start_usage = lpcc_purge_get_fs_usage(opt.o_cache);
+       stats.s_scanned_objs = 0;
+       stats.s_purged_objs = 0;
 
        lipe_policy_init(&policy);
        policy.lp_attr_bits = LIPE_OBJECT_ATTR_ATTR;
@@ -842,7 +936,7 @@ int main(int argc, char *argv[])
        int rc = 0;
        llapi_msg_set_level(LLAPI_MSG_INFO);
 
-       signal(SIGUSR1, &lpcc_purge_null_handler);
+       signal(SIGUSR1, &lpcc_purge_usr1_handler);
        signal(SIGUSR2, &lpcc_purge_null_handler);
 
        signal(SIGINT, &lpcc_purge_sigint_handler);
@@ -860,10 +954,15 @@ int main(int argc, char *argv[])
        while(1) {
                lpcc_purge_wait_for_scan();
 
+               stats.s_end_time = 0;
+               stats.s_start_time = time(NULL);
+
                lpcc_purge_scan();
 
                lpcc_purge_free_space();
 
+               stats.s_end_time = time(NULL);
+
                if (opt.o_dry_run) {
                        break;
                }