From d3023734692ca911ecd77d17d8a17c801476b3aa Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Tue, 26 Oct 2021 13:24:41 -0500 Subject: [PATCH] EX-3002 lipe: rename {fast,slow}_pool_free to {fast,slow}_pool_max_used Rename o_{fast,slow}_pool_free to o_{fast,slow}_pool_max_used and adjust logic. In struct pool_list, rename pl_avail to pl_used_kb (and adjust logic) pl_total to pl_total_kb. Replace defaults with equivalent values: DEF_FAST_POOL_FREE=70 becomes DEF_FAST_POOL_MAX_USED=30 DEF_SLOW_POOL_FREE=10 becomes DEF_SLOW_POOL_MAX_USED=90 This change does not add, remove, or rename any command line options. Signed-off-by: John L. Hammond Test-Parameters: trivial testlist=hot-pools Change-Id: I5f5551230856eabdaa84972218b5dcb73959c029 Reviewed-on: https://review.whamcloud.com/45377 Reviewed-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Jian Yu Tested-by: Maloo Reviewed-on: https://review.whamcloud.com/46120 --- lipe/src/lamigo.c | 106 +++++++++++++++++++++++++++--------------------------- lipe/src/lamigo.h | 10 +++--- 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/lipe/src/lamigo.c b/lipe/src/lamigo.c index ed8f7fc..ab52d26 100644 --- a/lipe/src/lamigo.c +++ b/lipe/src/lamigo.c @@ -86,10 +86,8 @@ #define DEF_HOT_AFTER_IDLE 3 #define DEF_ALR_EXTRA_ARGS "--exit-on-close" #define DEF_STATFS_REFRESH_INTV 5 /* OST statfs update interval, in seconds */ -#define DEF_FAST_POOL_FREE 70 /* fast pool is open for migration */ - /* if available space is greater, in percent */ -#define DEF_SLOW_POOL_FREE 10 /* slow pool is open for migration */ - /* if available space is greater, in percent */ +#define DEF_FAST_POOL_MAX_USED 30 /* open for migration if % space used is less than */ +#define DEF_SLOW_POOL_MAX_USED 90 /* open for migration if % space used is less than */ #define LAMIGO_USERFILE "/var/lib/lamigo-%s.chlg" #define LAMIGO_DUMPFILE "/var/run/lamigo-%s.stats" @@ -156,8 +154,8 @@ static void usage(void) DEF_ALR_PERIOD_SECS, DEF_HOT_FRACTION, DEF_HOT_AFTER_IDLE, - DEF_FAST_POOL_FREE, - DEF_SLOW_POOL_FREE); + 100 - DEF_FAST_POOL_MAX_USED, + 100 - DEF_SLOW_POOL_MAX_USED); exit(EXIT_SUCCESS); } @@ -219,8 +217,8 @@ struct options opt = { .o_num_threads = DEF_THREAD_COUNT, .o_pool_refresh = DEF_POOL_REFRESH_INTV, .o_statfs_refresh = DEF_STATFS_REFRESH_INTV, - .o_fast_pool_free = DEF_FAST_POOL_FREE, - .o_slow_pool_free = DEF_SLOW_POOL_FREE, + .o_fast_pool_max_used = DEF_FAST_POOL_MAX_USED, + .o_slow_pool_max_used = DEF_SLOW_POOL_MAX_USED, .o_progress_interval = DEF_PROGRESS_INTV, .o_alr_extra_args = DEF_ALR_EXTRA_ARGS, .o_alr_periods = DEF_ALR_PERIODS, @@ -482,8 +480,8 @@ static void lamigo_dump_stats_file(void) " ofd_interval: %d\n" " hot_fraction: %d\n" " hot_after_idle: %d\n" - " src_free: %d\n" - " tgt_free: %d\n" + " fast_pool_max_used: %d\n" + " slow_pool_max_used: %d\n" " include_dom: %d\n", opt.o_slow_pool, opt.o_min_age, opt.o_cache_size, opt.o_rescan, opt.o_num_threads, opt.o_pool_refresh, @@ -491,22 +489,24 @@ static void lamigo_dump_stats_file(void) opt.o_alr_period_time, opt.o_alr_warmup_k, opt.o_alr_cooldown_k, opt.o_alr_ofd_interval, opt.o_alr_hot_fraction, opt.o_alr_hot_after_idle, - opt.o_fast_pool_free, opt.o_slow_pool_free, opt.o_include_dom); + opt.o_fast_pool_max_used, + opt.o_slow_pool_max_used, + opt.o_include_dom); for (pl = fast_pools; pl != NULL; pl = pl->pl_next, i++) fprintf(f, "pool %s:\n" " osts: %d\n" - " avail: %llu\n" - " total: %llu\n" + " used_kb: %llu\n" + " total_kb: %llu\n" " open: %d\n", pl->pl_pool, pl->pl_ostnr, - pl->pl_avail, pl->pl_total, (int)pl->pl_open); + pl->pl_used_kb, pl->pl_total_kb, (int)pl->pl_is_open); for (pl = slow_pools; pl != NULL; pl = pl->pl_next, i++) fprintf(f, "pool %s:\n" " osts: %d\n" - " avail: %llu\n" - " total: %llu\n" + " used_kb: %llu\n" + " total_kb: %llu\n" " open: %d\n", pl->pl_pool, pl->pl_ostnr, - pl->pl_avail, pl->pl_total, (int)pl->pl_open); + pl->pl_used_kb, pl->pl_total_kb, (int)pl->pl_is_open); fprintf(f, "stats:\n" " read: %lu\n" @@ -1336,7 +1336,7 @@ static int lamigo_update_one(struct fid_rec *f) int resync, rc; struct alr_heat ah; - if (slow_pools->pl_open == 0) { + if (slow_pools->pl_is_open == 0) { /* cold pool is close to full, skip replication */ /* do this check before expensive layout fetching, rules, etc */ stats.s_skip_tgt_closed++; @@ -2034,9 +2034,9 @@ void lamigo_process_opt(int c, char *optarg) opt.o_alr_extra_args = optarg; break; case LAMIGO_OPT_SRC_FREE: - opt.o_fast_pool_free = atoi(optarg); - if (opt.o_fast_pool_free < 1 || opt.o_fast_pool_free > 99) - LX_FATAL("invalid source free space '%s'\n", optarg); + opt.o_fast_pool_max_used = 100 - atoi(optarg); + if (opt.o_fast_pool_max_used < 1 || opt.o_fast_pool_max_used > 99) + LX_FATAL("invalid argument ('%s') to --src-free\n", optarg); break; case LAMIGO_OPT_SRC_DOM: opt.o_include_dom = 1; @@ -2045,9 +2045,9 @@ void lamigo_process_opt(int c, char *optarg) lipe_ssh_log_verbosity = atoi(optarg); break; case LAMIGO_OPT_TGT_FREE: - opt.o_slow_pool_free = atoi(optarg); - if (opt.o_slow_pool_free < 1 || opt.o_slow_pool_free > 99) - LX_FATAL("invalid target free space '%s'\n", optarg); + opt.o_slow_pool_max_used = 100 - atoi(optarg); + if (opt.o_slow_pool_max_used < 1 || opt.o_slow_pool_max_used > 99) + LX_FATAL("invalid argument ('%s') to --tgt-free\n", optarg); break; case LAMIGO_OPT_VERSION: lipe_version(); @@ -3372,7 +3372,7 @@ static void lamigo_check_hot(void) return; /* don't try to replicate to fast pool if it's close to full */ - if (fast_pools->pl_open) { + if (fast_pools->pl_is_open) { /* get most recent hot files */ ht = lamigo_get_hot(alr_hot_period, &nr); if (ht) { @@ -3400,9 +3400,9 @@ static void lamigo_check_hot(void) LX_DEBUG("idle "DFID": P: %Lu/%Lu, live %d, idle %d\n", PFID(&ah->ah_fid), ah->ah_pools[ALR_FAST], ah->ah_pools[ALR_SLOW], ah->ah_livetime, ah->ah_idle); - if (fast_pools->pl_open) + if (fast_pools->pl_is_open) lamigo_check_hot_one(ht + i); - if (slow_pools->pl_open) + if (slow_pools->pl_is_open) lamigo_check_hot_on_cold(ht + i); } free(ht); @@ -3443,43 +3443,45 @@ static __u64 lamigo_read_osp_param(const int ostidx, const char *param) return retval; } -static void lamigo_refresh_pool_statfs(struct pool_list *pl, int threshold) +static void lamigo_refresh_pool_statfs(struct pool_list *pl, int max_used) { - __u64 tavail = 0, ttotal = 0; /* sum for the pool */ - bool old_status; + __u64 pool_total_kb = 0; + __u64 pool_used_kb = 0; + bool pool_is_open; int i; + /* @max_used is a percentage of total pool space. */ + assert(0 <= max_used && max_used <= 100); + pthread_rwlock_rdlock(&pl->pl_lock); for (i = 0; i < pl->pl_ostnr; i++) { - __u64 kbavail, kbtotal, active, status; + __u64 ost_used_kb, ost_total_kb, active, status; int ostidx = pl->pl_osts[i]; - /* get kbavail */ - kbavail = lamigo_read_osp_param(ostidx, "kbytesfree"); - /* get kbtotal */ - kbtotal = lamigo_read_osp_param(ostidx, "kbytestotal"); - /* check OSP is active */ active = lamigo_read_osp_param(ostidx, "active"); status = lamigo_read_osp_param(ostidx, "prealloc_status"); if (!active || status) continue; - tavail += kbavail; - ttotal += kbtotal; + + ost_total_kb = lamigo_read_osp_param(ostidx, "kbytestotal"); + ost_used_kb = ost_total_kb - lamigo_read_osp_param(ostidx, "kbytesfree"); + + pool_total_kb += ost_total_kb; + pool_used_kb += ost_used_kb; } + pthread_rwlock_unlock(&pl->pl_lock); - pl->pl_total = ttotal; - pl->pl_avail = tavail; - old_status = pl->pl_open; - if (ttotal == 0 || (tavail < ttotal * threshold / 100)) - pl->pl_open = false; - else - pl->pl_open = true; - /* whether pool is good for replicas */ - if (old_status != pl->pl_open) - LX_DEBUG("statfs for %s %s pool: %llu from %llu, thresh %llu\n", - pl->pl_open ? "open" : "closed", pl->pl_pool, tavail, - ttotal, ttotal * threshold / 100); + pl->pl_total_kb = pool_total_kb; + pl->pl_used_kb = pool_used_kb; + pool_is_open = (pool_used_kb < pool_total_kb * max_used / 100); + + if (pl->pl_is_open != pool_is_open) + LX_DEBUG("statfs for %s %s pool: used_kb %llu, total_kb %llu KB, max_used_kb %llu\n", + pool_is_open ? "open" : "closed", pl->pl_pool, + pool_used_kb, pool_total_kb, pool_total_kb * max_used / 100); + + pl->pl_is_open = pool_is_open; } static void lamigo_refresh_statfs(void) @@ -3494,12 +3496,12 @@ static void lamigo_refresh_statfs(void) pl = fast_pools; while (pl) { - lamigo_refresh_pool_statfs(pl, opt.o_fast_pool_free); + lamigo_refresh_pool_statfs(pl, opt.o_fast_pool_max_used); pl = pl->pl_next; } pl = slow_pools; while (pl) { - lamigo_refresh_pool_statfs(pl, opt.o_slow_pool_free); + lamigo_refresh_pool_statfs(pl, opt.o_slow_pool_max_used); pl = pl->pl_next; } } diff --git a/lipe/src/lamigo.h b/lipe/src/lamigo.h index 2cd7cf2..c5e75d6 100644 --- a/lipe/src/lamigo.h +++ b/lipe/src/lamigo.h @@ -24,9 +24,9 @@ struct pool_list { int pl_ostnr; int *pl_osts; pthread_rwlock_t pl_lock; - __u64 pl_total; - __u64 pl_avail; - bool pl_open; /* open for new replicas, based on avail/total */ + __u64 pl_total_kb; + __u64 pl_used_kb; + bool pl_is_open; /* open for new replicas, based on avail/total */ struct pool_list *pl_next; }; @@ -85,8 +85,8 @@ struct options { int o_num_threads; int o_pool_refresh; int o_statfs_refresh; - int o_fast_pool_free; - int o_slow_pool_free; + int o_fast_pool_max_used; + int o_slow_pool_max_used; int o_progress_interval; /* how often to show progress */ char *o_alr_extra_args; int o_alr_periods; -- 1.8.3.1