From 724a7d4d2dd6d0b1924d699138420602442abbca Mon Sep 17 00:00:00 2001 From: Lei Feng Date: Mon, 26 Jul 2021 08:51:59 +0800 Subject: [PATCH] EX-3301 lipe: use larger candidate number in lpcc_purge Increase the default candidate number to 128K. Calculate the number of n_discard and n_detach dynamically based on the candidate number. Set timeout of continious scanning. Change-Id: I8adacb722fdec820a914250c54d05e0abd740140 Signed-off-by: Lei Feng Test-Parameters: trivial Reviewed-on: https://review.whamcloud.com/44040 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lipe/src/lpcc_purge.c | 126 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 31 deletions(-) diff --git a/lipe/src/lpcc_purge.c b/lipe/src/lpcc_purge.c index 50689d9..b8f4e24 100644 --- a/lipe/src/lpcc_purge.c +++ b/lipe/src/lpcc_purge.c @@ -28,14 +28,20 @@ #define DEF_LOW_USAGE 75 #define DEF_INTERVAL 5 #define DEF_SCAN_THREADS 1 -#define DEF_CANDIDATE_NUM 2000 -#define MAX_CANDIDATE_NUM 100000 +#define DEF_CANDIDATE_NUM (128 * 1024) +#define MIN_CANDIDATE_NUM 1024 +#define MAX_CANDIDATE_NUM (100 * 1024 * 1024) +#define DEF_MAX_SCAN_SECS 30 +#define MIN_MAX_SCAN_SECS 5 +#define MAX_MAX_SCAN_SECS 300 + #define MAX_ATIME_FID_LEN 128 #define OPT_DRY_RUN 1 #define OPT_CANDIDATE_NUM 2 #define OPT_CLEAR_HASHDIR 3 #define OPT_LOG_LEVEL 4 +#define OPT_MAX_SCAN_SECS 5 struct lpcc_purge_options { char *o_cache; @@ -48,8 +54,9 @@ struct lpcc_purge_options { int o_interval; int o_scan_threads; int o_candidate_num; - char *o_dumpfile; + int o_max_scan_secs; + char *o_dumpfile; bool o_dry_run; bool o_clear_hashdir; }; @@ -60,6 +67,7 @@ static struct lpcc_purge_options opt = { .o_interval = DEF_INTERVAL, .o_scan_threads = DEF_SCAN_THREADS, .o_candidate_num = DEF_CANDIDATE_NUM, + .o_max_scan_secs = DEF_MAX_SCAN_SECS, .o_dry_run = false, .o_clear_hashdir = false, }; @@ -69,6 +77,7 @@ struct lpcc_purge_stats { double s_start_usage; uint64_t s_scan_times; time_t s_start_time, s_end_time; + time_t s_scan_start_sec; pthread_mutex_t s_lock; uint64_t s_total_purged_objs; @@ -199,18 +208,25 @@ static void lpcc_purge_sigint_handler(int signal) * - config * - stats */ -static void lpcc_purge_usr1_handler(int sig) +static void lpcc_purge_dump_config_stats(FILE *f) { + int rc; char buff[64]; - FILE *f = NULL; - - f = fopen(opt.o_dumpfile, "w"); - if (!f) { - llapi_printf(LLAPI_MSG_DEBUG, "cannot open dumpfile '%s'\n", opt.o_dumpfile); - return; - }; + time_t curr; json_object *j_all = json_object_new_object(); + + curr = time(NULL); + ctime_r(&curr, buff); + json_object *j_curr_time = json_object_new_string(buff); + json_object_object_add(j_all, "curr_time", j_curr_time); + rc = llapi_get_fsname(opt.o_mount, buff, sizeof(buff)); + if (rc) { + llapi_error(LLAPI_MSG_FATAL, rc, "Cannot get fsname."); + exit(1); + } + json_object_object_add(j_all, "fsname", json_object_new_string(buff)); + json_object *j_config = json_object_new_object(); json_object_object_add(j_config, "mount", json_object_new_string(opt.o_mount)); json_object_object_add(j_config, "cache", json_object_new_string(opt.o_cache)); @@ -221,6 +237,7 @@ static void lpcc_purge_usr1_handler(int sig) json_object_object_add(j_config, "scan_threads", json_object_new_int64(opt.o_scan_threads)); json_object_object_add(j_config, "candidate_num", json_object_new_int64(opt.o_candidate_num)); json_object_object_add(j_config, "clear_hashdir", json_object_new_boolean(opt.o_clear_hashdir)); + json_object_object_add(j_config, "max_scan_secs", json_object_new_int64(opt.o_max_scan_secs)); json_object_object_add(j_all, "config", j_config); json_object *j_stats = json_object_new_object(); @@ -247,11 +264,26 @@ static void lpcc_purge_usr1_handler(int sig) fprintf(f, "%s\n", str); fflush(f); - fclose(f); json_object_put(j_all); } +static void lpcc_purge_usr1_handler(int sig) +{ + FILE *f = NULL; + + f = fopen(opt.o_dumpfile, "w"); + if (!f) { + llapi_printf(LLAPI_MSG_DEBUG, "cannot open dumpfile '%s'\n", + opt.o_dumpfile); + return; + }; + + lpcc_purge_dump_config_stats(f); + + fclose(f); +} + static void usage(void) { printf("Usage: %s [options]\n" @@ -265,7 +297,8 @@ static void usage(void) "\t-L, --low-usage=NUM %% of space or inode to stop purging (default: %u)\n" "\t-i, --interval=NUM, seconds to next check (default: %u)\n" "\t-t, --scan-threads=NUM scanning threads (default: %u)\n" - "\t --candidate-num=NUM, candidate number of approximate LRU (default: %d)\n" + "\t --candidate-num=NUM, candidate number of approximate LRU (default: %d, min: %d, max: %d)\n" + "\t --max-scan-secs, max seconds to scan continously before purging (default: %d, min: %d, max: %d)\n" "\t-w, --dump=FILE, dump stats to FILE when signal USR1 is recieved (default: /var/run/lpcc_purge-PID.stats)\n" "\t --clear-hashdir, clear empty hash dir after detaching file\n" "\t --dry-run, scan once but do not detach file really\n" @@ -276,7 +309,8 @@ static void usage(void) DEF_LOW_USAGE, DEF_INTERVAL, DEF_SCAN_THREADS, - DEF_CANDIDATE_NUM + DEF_CANDIDATE_NUM, MIN_CANDIDATE_NUM, MAX_CANDIDATE_NUM, + DEF_MAX_SCAN_SECS, MIN_MAX_SCAN_SECS, MAX_MAX_SCAN_SECS ); } @@ -296,6 +330,7 @@ static struct option long_options[] = { { "candidate-num", required_argument, NULL, OPT_CANDIDATE_NUM}, { "dump", required_argument, NULL, 'w'}, { "clear-hashdir", no_argument, NULL, OPT_CLEAR_HASHDIR}, + { "max-scan-secs", required_argument, NULL, OPT_MAX_SCAN_SECS}, { "help", no_argument, NULL, 'h' }, { NULL } }; @@ -540,7 +575,8 @@ static void lpcc_purge_process_opt(int c, char *optarg) break; case OPT_CANDIDATE_NUM: value = strtol(optarg, &endptr, 10); - if (*endptr != '\0' || value < 100 || value > MAX_CANDIDATE_NUM) { + if (*endptr != '\0' || value < MIN_CANDIDATE_NUM || + value > MAX_CANDIDATE_NUM) { llapi_error(LLAPI_MSG_FATAL, -EINVAL, "invalid candidate number: '%s'", optarg); @@ -551,6 +587,17 @@ static void lpcc_purge_process_opt(int c, char *optarg) case OPT_CLEAR_HASHDIR: opt.o_clear_hashdir = true; break; + case OPT_MAX_SCAN_SECS: + value = strtol(optarg, &endptr, 10); + if (*endptr != '\0' || value < MIN_MAX_SCAN_SECS || + value > MAX_MAX_SCAN_SECS) { + llapi_error(LLAPI_MSG_FATAL, -EINVAL, + "invalid max_scan_secs: '%s'\n", + optarg); + exit(1); + } + opt.o_max_scan_secs = value; + break; default: llapi_error(LLAPI_MSG_FATAL, -EINVAL, "invalid argument: '%s'", @@ -827,8 +874,8 @@ static int lpcc_purge_detach_candidate(const char *mnt, return 0; pthread_mutex_lock(&stats.s_lock); - stats.s_purged_objs ++; - stats.s_total_purged_objs ++; + stats.s_purged_objs++; + stats.s_total_purged_objs++; pthread_mutex_unlock(&stats.s_lock); if (opt.o_dry_run) @@ -857,7 +904,9 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance, int rc; struct lu_fid fid; char *path = NULL; - struct lpcc_purge_candidate *candidate = NULL; + struct lpcc_purge_candidate *candidate = NULL, **to_detach = NULL; + uint64_t scan_secs; + int i, j, n_detach, n_discard; if (!S_ISREG(attrs->loa_mode)) { /* skip non-regular file */ @@ -894,31 +943,43 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance, lpcc_purge_candidate_set_append(candidate); candidate = NULL; - /* check whether the pool is full */ - if (candidate_set.cs_count < candidate_set.cs_capacity) { + /* + * check whether the pool is full or has scanned continously + * for too long time + */ + scan_secs = time(NULL) - stats.s_scan_start_sec; + if (candidate_set.cs_count < candidate_set.cs_capacity && + scan_secs < opt.o_max_scan_secs) { pthread_mutex_unlock(&candidate_set.cs_lock); rc = 0; goto out; } - /* pool is full */ - llapi_printf(LLAPI_MSG_DEBUG, "candidate set is full, sort it first\n"); + /* pool is full or scan for too long time */ + llapi_printf(LLAPI_MSG_DEBUG, "start purging, candidate number: %u, " + "scan secs: %lu\n", candidate_set.cs_count, scan_secs); lpcc_purge_candidate_set_sort(); - struct lpcc_purge_candidate *candidates[100]; - int i, j, n_detach, n_discard; - - n_detach = stats.s_start_usage - opt.o_low_usage; - n_discard = opt.o_low_usage; + /* (n_detach + n_discard) = 10% * candidate_num if start_ussage is 100 */ + n_detach = (stats.s_start_usage - opt.o_low_usage) / 1000 * + candidate_set.cs_capacity; + n_discard = opt.o_low_usage / 1000 * candidate_set.cs_count; - if (n_detach < 0) { + if (n_detach <= 0) { n_detach = 0; + } else { + to_detach = calloc(n_detach, sizeof(to_detach[0])); + if (to_detach == NULL) { + llapi_error(LLAPI_MSG_FATAL, errno, + "cannot allocate memory"); + exit(1); + } } llapi_printf(LLAPI_MSG_DEBUG, "copy out %d elements from the head\n", n_detach); for (i = 0; i < n_detach; i++) { - candidates[i] = candidate_set.cs_arr[i]; + to_detach[i] = candidate_set.cs_arr[i]; } llapi_printf(LLAPI_MSG_DEBUG, @@ -931,19 +992,21 @@ static int lpcc_purge_scan_callback(struct lipe_instance *instance, } candidate_set.cs_count = candidate_set.cs_count - n_detach - n_discard; + stats.s_scan_start_sec = time(NULL); pthread_mutex_unlock(&candidate_set.cs_lock); /* Detach files from candidates */ llapi_printf(LLAPI_MSG_DEBUG, "detach files...\n"); for (i = 0; i < n_detach; i++) { - rc = lpcc_purge_detach_candidate(opt.o_mount, candidates[i]); - lpcc_purge_candidate_destroy(candidates[i]); + rc = lpcc_purge_detach_candidate(opt.o_mount, to_detach[i]); + lpcc_purge_candidate_destroy(to_detach[i]); } rc = 0; out: free(path); lpcc_purge_candidate_destroy(candidate); + free(to_detach); return rc; } @@ -961,6 +1024,7 @@ static void lpcc_purge_scan(void) stats.s_start_usage = lpcc_purge_get_fs_usage(opt.o_cache); stats.s_scanned_objs = 0; stats.s_purged_objs = 0; + stats.s_scan_start_sec = time(NULL); lipe_policy_init(&policy); policy.lp_attr_bits = LIPE_OBJECT_ATTR_ATTR; -- 1.8.3.1