Whamcloud - gitweb
EX-3301 lipe: use larger candidate number in lpcc_purge
authorLei Feng <flei@whamcloud.com>
Mon, 26 Jul 2021 00:51:59 +0000 (08:51 +0800)
committerLi Xi <lixi@ddn.com>
Sun, 1 Aug 2021 02:02:50 +0000 (02:02 +0000)
Increase the default candidate number to 128K. Calculate the
number of n_discard and n_detach dynamically based on the
candidate number. Set timeout of continious scanning.

Change-Id: I8adacb722fdec820a914250c54d05e0abd740140
Signed-off-by: Lei Feng <flei@whamcloud.com>
Test-Parameters: trivial
Reviewed-on: https://review.whamcloud.com/44040
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lipe/src/lpcc_purge.c

index 50689d9..b8f4e24 100644 (file)
 #define DEF_LOW_USAGE                  75
 #define DEF_INTERVAL                   5
 #define DEF_SCAN_THREADS               1
-#define DEF_CANDIDATE_NUM              2000
-#define MAX_CANDIDATE_NUM              100000
+#define DEF_CANDIDATE_NUM              (128 * 1024)
+#define MIN_CANDIDATE_NUM              1024
+#define MAX_CANDIDATE_NUM              (100 * 1024 * 1024)
+#define DEF_MAX_SCAN_SECS              30
+#define MIN_MAX_SCAN_SECS              5
+#define MAX_MAX_SCAN_SECS              300
+
 #define MAX_ATIME_FID_LEN              128
 
 #define OPT_DRY_RUN                    1
 #define OPT_CANDIDATE_NUM              2
 #define OPT_CLEAR_HASHDIR              3
 #define OPT_LOG_LEVEL                  4
+#define OPT_MAX_SCAN_SECS              5
 
 struct lpcc_purge_options {
        char *o_cache;
@@ -48,8 +54,9 @@ struct lpcc_purge_options {
        int o_interval;
        int o_scan_threads;
        int o_candidate_num;
-       char *o_dumpfile;
+       int o_max_scan_secs;
 
+       char *o_dumpfile;
        bool o_dry_run;
        bool o_clear_hashdir;
 };
@@ -60,6 +67,7 @@ static struct lpcc_purge_options opt = {
        .o_interval             = DEF_INTERVAL,
        .o_scan_threads         = DEF_SCAN_THREADS,
        .o_candidate_num        = DEF_CANDIDATE_NUM,
+       .o_max_scan_secs        = DEF_MAX_SCAN_SECS,
        .o_dry_run              = false,
        .o_clear_hashdir        = false,
 };
@@ -69,6 +77,7 @@ struct lpcc_purge_stats {
        double s_start_usage;
        uint64_t s_scan_times;
        time_t s_start_time, s_end_time;
+       time_t s_scan_start_sec;
 
        pthread_mutex_t s_lock;
        uint64_t s_total_purged_objs;
@@ -199,18 +208,25 @@ static void lpcc_purge_sigint_handler(int signal)
  *  - config
  *  - stats
  */
-static void lpcc_purge_usr1_handler(int sig)
+static void lpcc_purge_dump_config_stats(FILE *f)
 {
+       int rc;
        char buff[64];
-       FILE *f = NULL;
-
-       f = fopen(opt.o_dumpfile, "w");
-       if (!f) {
-               llapi_printf(LLAPI_MSG_DEBUG, "cannot open dumpfile '%s'\n", opt.o_dumpfile);
-               return;
-       };
+       time_t curr;
 
        json_object *j_all = json_object_new_object();
+
+       curr = time(NULL);
+       ctime_r(&curr, buff);
+       json_object *j_curr_time = json_object_new_string(buff);
+       json_object_object_add(j_all, "curr_time", j_curr_time);
+       rc = llapi_get_fsname(opt.o_mount, buff, sizeof(buff));
+       if (rc) {
+               llapi_error(LLAPI_MSG_FATAL, rc, "Cannot get fsname.");
+               exit(1);
+       }
+       json_object_object_add(j_all, "fsname", json_object_new_string(buff));
+
        json_object *j_config = json_object_new_object();
        json_object_object_add(j_config, "mount", json_object_new_string(opt.o_mount));
        json_object_object_add(j_config, "cache", json_object_new_string(opt.o_cache));
@@ -221,6 +237,7 @@ static void lpcc_purge_usr1_handler(int sig)
        json_object_object_add(j_config, "scan_threads", json_object_new_int64(opt.o_scan_threads));
        json_object_object_add(j_config, "candidate_num", json_object_new_int64(opt.o_candidate_num));
        json_object_object_add(j_config, "clear_hashdir", json_object_new_boolean(opt.o_clear_hashdir));
+       json_object_object_add(j_config, "max_scan_secs", json_object_new_int64(opt.o_max_scan_secs));
        json_object_object_add(j_all, "config", j_config);
 
        json_object *j_stats = json_object_new_object();
@@ -247,11 +264,26 @@ static void lpcc_purge_usr1_handler(int sig)
 
        fprintf(f, "%s\n", str);
        fflush(f);
-       fclose(f);
 
        json_object_put(j_all);
 }
 
+static void lpcc_purge_usr1_handler(int sig)
+{
+       FILE *f = NULL;
+
+       f = fopen(opt.o_dumpfile, "w");
+       if (!f) {
+               llapi_printf(LLAPI_MSG_DEBUG, "cannot open dumpfile '%s'\n",
+                            opt.o_dumpfile);
+               return;
+       };
+
+       lpcc_purge_dump_config_stats(f);
+
+       fclose(f);
+}
+
 static void usage(void)
 {
        printf("Usage: %s [options]\n"
@@ -265,7 +297,8 @@ static void usage(void)
                "\t-L, --low-usage=NUM %% of space or inode to stop purging (default: %u)\n"
                "\t-i, --interval=NUM, seconds to next check (default: %u)\n"
                "\t-t, --scan-threads=NUM scanning threads (default: %u)\n"
-               "\t    --candidate-num=NUM, candidate number of approximate LRU (default: %d)\n"
+               "\t    --candidate-num=NUM, candidate number of approximate LRU (default: %d, min: %d, max: %d)\n"
+               "\t    --max-scan-secs, max seconds to scan continously before purging (default: %d, min: %d, max: %d)\n"
                "\t-w, --dump=FILE, dump stats to FILE when signal USR1 is recieved (default: /var/run/lpcc_purge-PID.stats)\n"
                "\t    --clear-hashdir, clear empty hash dir after detaching file\n"
                "\t    --dry-run, scan once but do not detach file really\n"
@@ -276,7 +309,8 @@ static void usage(void)
                DEF_LOW_USAGE,
                DEF_INTERVAL,
                DEF_SCAN_THREADS,
-               DEF_CANDIDATE_NUM
+               DEF_CANDIDATE_NUM, MIN_CANDIDATE_NUM, MAX_CANDIDATE_NUM,
+               DEF_MAX_SCAN_SECS, MIN_MAX_SCAN_SECS, MAX_MAX_SCAN_SECS
        );
 }
 
@@ -296,6 +330,7 @@ static struct option long_options[] = {
        { "candidate-num", required_argument, NULL, OPT_CANDIDATE_NUM},
        { "dump", required_argument, NULL, 'w'},
        { "clear-hashdir", no_argument, NULL, OPT_CLEAR_HASHDIR},
+       { "max-scan-secs", required_argument, NULL, OPT_MAX_SCAN_SECS},
        { "help", no_argument, NULL, 'h' },
        { NULL }
 };
@@ -540,7 +575,8 @@ static void lpcc_purge_process_opt(int c, char *optarg)
                break;
        case OPT_CANDIDATE_NUM:
                value = strtol(optarg, &endptr, 10);
-               if (*endptr != '\0' || value < 100 || value > MAX_CANDIDATE_NUM) {
+               if (*endptr != '\0' || value < MIN_CANDIDATE_NUM ||
+                   value > MAX_CANDIDATE_NUM) {
                        llapi_error(LLAPI_MSG_FATAL, -EINVAL,
                                    "invalid candidate number: '%s'",
                                    optarg);
@@ -551,6 +587,17 @@ static void lpcc_purge_process_opt(int c, char *optarg)
        case OPT_CLEAR_HASHDIR:
                opt.o_clear_hashdir = true;
                break;
+       case OPT_MAX_SCAN_SECS:
+               value = strtol(optarg, &endptr, 10);
+               if (*endptr != '\0' || value < MIN_MAX_SCAN_SECS ||
+                   value > MAX_MAX_SCAN_SECS) {
+                       llapi_error(LLAPI_MSG_FATAL, -EINVAL,
+                                   "invalid max_scan_secs: '%s'\n",
+                                   optarg);
+                       exit(1);
+               }
+               opt.o_max_scan_secs = value;
+               break;
        default:
                llapi_error(LLAPI_MSG_FATAL, -EINVAL,
                                    "invalid argument: '%s'",
@@ -827,8 +874,8 @@ static int lpcc_purge_detach_candidate(const char *mnt,
                return 0;
 
        pthread_mutex_lock(&stats.s_lock);
-       stats.s_purged_objs ++;
-       stats.s_total_purged_objs ++;
+       stats.s_purged_objs++;
+       stats.s_total_purged_objs++;
        pthread_mutex_unlock(&stats.s_lock);
 
        if (opt.o_dry_run)
@@ -857,7 +904,9 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance,
        int rc;
        struct lu_fid fid;
        char *path = NULL;
-       struct lpcc_purge_candidate *candidate = NULL;
+       struct lpcc_purge_candidate *candidate = NULL, **to_detach = NULL;
+       uint64_t scan_secs;
+       int i, j, n_detach, n_discard;
 
        if (!S_ISREG(attrs->loa_mode)) {
                /* skip non-regular file */
@@ -894,31 +943,43 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance,
        lpcc_purge_candidate_set_append(candidate);
        candidate = NULL;
 
-       /* check whether the pool is full */
-       if (candidate_set.cs_count < candidate_set.cs_capacity) {
+       /*
+        * check whether the pool is full or has scanned continously
+        * for too long time
+        */
+       scan_secs = time(NULL) - stats.s_scan_start_sec;
+       if (candidate_set.cs_count < candidate_set.cs_capacity &&
+           scan_secs < opt.o_max_scan_secs) {
                pthread_mutex_unlock(&candidate_set.cs_lock);
                rc = 0;
                goto out;
        }
 
-       /* pool is full */
-       llapi_printf(LLAPI_MSG_DEBUG, "candidate set is full, sort it first\n");
+       /* pool is full or scan for too long time */
+       llapi_printf(LLAPI_MSG_DEBUG, "start purging, candidate number: %u, "
+                    "scan secs: %lu\n", candidate_set.cs_count, scan_secs);
        lpcc_purge_candidate_set_sort();
 
-       struct lpcc_purge_candidate *candidates[100];
-       int i, j, n_detach, n_discard;
-
-       n_detach = stats.s_start_usage - opt.o_low_usage;
-       n_discard = opt.o_low_usage;
+       /* (n_detach + n_discard) = 10% * candidate_num if start_ussage is 100 */
+       n_detach = (stats.s_start_usage - opt.o_low_usage) / 1000 *
+                  candidate_set.cs_capacity;
+       n_discard = opt.o_low_usage / 1000 * candidate_set.cs_count;
 
-       if (n_detach < 0) {
+       if (n_detach <= 0) {
                n_detach = 0;
+       } else {
+               to_detach = calloc(n_detach, sizeof(to_detach[0]));
+               if (to_detach == NULL) {
+                       llapi_error(LLAPI_MSG_FATAL, errno,
+                                   "cannot allocate memory");
+                       exit(1);
+               }
        }
 
        llapi_printf(LLAPI_MSG_DEBUG,
                "copy out %d elements from the head\n", n_detach);
        for (i = 0; i < n_detach; i++) {
-               candidates[i] = candidate_set.cs_arr[i];
+               to_detach[i] = candidate_set.cs_arr[i];
        }
 
        llapi_printf(LLAPI_MSG_DEBUG,
@@ -931,19 +992,21 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance,
        }
        candidate_set.cs_count = candidate_set.cs_count - n_detach - n_discard;
 
+       stats.s_scan_start_sec = time(NULL);
        pthread_mutex_unlock(&candidate_set.cs_lock);
 
        /* Detach files from candidates */
        llapi_printf(LLAPI_MSG_DEBUG, "detach files...\n");
        for (i = 0; i < n_detach; i++) {
-               rc = lpcc_purge_detach_candidate(opt.o_mount, candidates[i]);
-               lpcc_purge_candidate_destroy(candidates[i]);
+               rc = lpcc_purge_detach_candidate(opt.o_mount, to_detach[i]);
+               lpcc_purge_candidate_destroy(to_detach[i]);
        }
 
        rc = 0;
 out:
        free(path);
        lpcc_purge_candidate_destroy(candidate);
+       free(to_detach);
 
        return rc;
 }
@@ -961,6 +1024,7 @@ static void lpcc_purge_scan(void)
        stats.s_start_usage = lpcc_purge_get_fs_usage(opt.o_cache);
        stats.s_scanned_objs = 0;
        stats.s_purged_objs = 0;
+       stats.s_scan_start_sec = time(NULL);
 
        lipe_policy_init(&policy);
        policy.lp_attr_bits = LIPE_OBJECT_ATTR_ATTR;