From 43da9bd6f50e4bcb83629feb73d133f000fe011b Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Wed, 27 Oct 2021 12:51:28 -0500 Subject: [PATCH] EX-3002 lipe: swap 'used' and 'free' within lpurge Within lpurge, convert: o_freelo to o_max_used o_freehi to o_min_used freelo to lpurge_max_used_kb freehi to lpurge_min_used_kb And adjust logic according to the formulas: o_max_used = 100 - o_freelo o_min_used = 100 - o_freehi lpurge_max_used_kb = kbtotal - freelo lpurge_min_used_kb = kbtotal - freehi This change does not add, remove, or rename any command line options. The relevant old lpurge stats values (free_high, free_low, kbfree, low, hi) are retained. New values (min_used, max_used, min_used_kb, max_used_kb, used_kb, total_kb) are added. Signed-off-by: John L. Hammond Test-Parameters: trivial testlist=hot-pools Change-Id: I772ac32041a27904b8b6c50725b149c0b5fa4f45 Reviewed-on: https://review.whamcloud.com/45387 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexandre Ioffe Reviewed-by: Jian Yu Reviewed-on: https://review.whamcloud.com/46124 --- lipe/src/lpurge.c | 215 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 127 insertions(+), 88 deletions(-) diff --git a/lipe/src/lpurge.c b/lipe/src/lpurge.c index e2832eb..dcea77c 100644 --- a/lipe/src/lpurge.c +++ b/lipe/src/lpurge.c @@ -110,7 +110,7 @@ static struct lpurge_slot lpurge_hist[LPURGE_HIST_MAX]; struct stats { unsigned long long s_scan_time; unsigned long s_scans; - unsigned long s_low_hits; + unsigned long s_fast_scans; unsigned long s_slow_scans; unsigned long s_queued; /* # files queued for purge */ unsigned long s_started; /* # files dequeued for purge by worker */ @@ -120,8 +120,8 @@ struct stats { }; static struct stats stats; -#define DEF_FREEHI 90 -#define DEF_FREELO 75 +#define DEF_MIN_USED 10 /* 100 - DEF_FREEHI */ +#define DEF_MAX_USED 25 /* 100 - DEF_FREELO */ #define DEF_SLOT_SIZE 1048576 #define DEF_SCAN_RATE 10000 #define DEF_MAX_JOBS 8 @@ -141,8 +141,8 @@ struct options { unsigned int o_scan_rate; unsigned int o_scan_threads; unsigned int o_slot_size; - unsigned int o_freelo; - unsigned int o_freehi; + unsigned int o_max_used; /* 100 - o_freelo */ + unsigned int o_min_used; /* 100 - o_freehi */ }; static struct options opt = { @@ -157,9 +157,8 @@ enum llapi_message_level lx_log_level = LLAPI_MSG_INFO; char *lx_log_prefix; /* To print device name in log messages */ static struct lipe_instance instance; -static unsigned long long freelo; /* low free space when to scan quickly and remove */ -static unsigned long long freehi; /* high free space when stop to scan */ -static unsigned long long kbfree; +static unsigned long long lpurge_max_used_kb; /* kbtotal - freelo */ +static unsigned long long lpurge_min_used_kb; /* kbtotal - freehi */ static char *ostname; static char *ostprefix; static char ost_mntdev[PATH_MAX]; @@ -263,10 +262,10 @@ static void usage(void) "\t-w, --dump, stats file (via USR1 signal, default: %s)\n" "\t--version, print version information and exit\n", program_invocation_short_name, - DEF_FREEHI, DEF_INTERVAL, + 100 - DEF_MIN_USED, DEF_MAX_JOBS, - DEF_FREELO, + 100 - DEF_MAX_USED, DEF_POOL, DEF_SCAN_RATE, DEF_SLOT_SIZE, @@ -369,12 +368,17 @@ static int lpurge_get_ost_mntpt(void) return -1; } -static int lpurge_kbfree(unsigned long long *kbfreesize) +static int lpurge_get_used_and_total_kb(unsigned long long *p_used_kb, unsigned long long *p_total_kb) { int rc; struct statfs statfs_buf; + unsigned long long free_kb, total_kb; - *kbfreesize = 0; + if (p_used_kb != NULL) + *p_used_kb = 0; + + if (p_total_kb != NULL) + *p_total_kb = 0; rc = statfs(ost_mntpt, &statfs_buf); if (rc) { @@ -382,75 +386,81 @@ static int lpurge_kbfree(unsigned long long *kbfreesize) return rc; } - *kbfreesize = (statfs_buf.f_bsize * statfs_buf.f_bfree) >> 10; + free_kb = (statfs_buf.f_bsize * statfs_buf.f_bfree) >> 10; + total_kb = (statfs_buf.f_bsize * statfs_buf.f_blocks) >> 10; + + if (p_used_kb != NULL) + *p_used_kb = total_kb - free_kb; + + if (p_total_kb != NULL) + *p_total_kb = total_kb; + return 0; } -static int lpurge_kbtotal(unsigned long long *kbtotalsize) +static int lpurge_get_used_kb(unsigned long long *p_used_kb) { - int rc; - struct statfs statfs_buf; - - *kbtotalsize = 0; - - rc = statfs(ost_mntpt, &statfs_buf); - if (rc) - return rc; + return lpurge_get_used_and_total_kb(p_used_kb, NULL); +} - *kbtotalsize = (statfs_buf.f_bsize * statfs_buf.f_blocks) >> 10; - return 0; +static int lpurge_get_total_kb(unsigned long long *p_total_kb) +{ + return lpurge_get_used_and_total_kb(NULL, p_total_kb); } -static unsigned long long lpurge_last_kbtotal; +static unsigned long long lpurge_total_kb; static void lpurge_configure_thresholds(void) { int rc; - unsigned long long fresh; + unsigned long long total_kb; - rc = lpurge_kbtotal(&fresh); + rc = lpurge_get_total_kb(&total_kb); if (rc) { LX_ERROR("failed to get total space: %s\n", strerror(errno)); return; } - if (fresh == lpurge_last_kbtotal) + if (total_kb == lpurge_total_kb) return; - lpurge_last_kbtotal = fresh; - freelo = opt.o_freelo * lpurge_last_kbtotal / 100; - freehi = opt.o_freehi * lpurge_last_kbtotal / 100; - if (freehi <= freelo) - LX_FATAL("freehi (%llu) <= freelo (%llu)\n", freehi, freelo); + lpurge_total_kb = total_kb; + lpurge_max_used_kb = opt.o_max_used * lpurge_total_kb / 100; + lpurge_min_used_kb = opt.o_min_used * lpurge_total_kb / 100; + + if (lpurge_max_used_kb <= lpurge_min_used_kb) + LX_FATAL("lpurge_max_used_kb (%llu) <= lpurge_min_used_kb (%llu)\n", + lpurge_max_used_kb, lpurge_min_used_kb); - LX_DEBUG("total: %llu, free lo: %llu hi: %llu\n", - lpurge_last_kbtotal, freelo, freehi); + LX_DEBUG_U(lpurge_total_kb); + LX_DEBUG_U(lpurge_max_used_kb); + LX_DEBUG_U(lpurge_min_used_kb); } static void lpurge_wait_for_scan(void) { int rc; - unsigned long long kbfree; + unsigned long long used_kb = 0; while (1) { - rc = lpurge_kbfree(&kbfree); + rc = lpurge_get_used_kb(&used_kb); if (rc) goto wait; - LX_DEBUG("free %llu low %llu high %llu\n", - kbfree, freelo, freehi); - /* no free space, don't wait any longer */ - if (kbfree <= freelo && + /* High used space don't wait any longer */ + if (used_kb >= lpurge_max_used_kb && time(NULL) - scan_finished_time >= lpurge_suspend_time) { - LX_INFO("low watermark hit, scan at full rate\n"); - stats.s_low_hits++; + LX_INFO("used_kb (%llu) exceeds max_used_kb (%llu): start fast scan\n", + used_kb, lpurge_max_used_kb); + stats.s_fast_scans++; break; } /* low free space and scanned objects are too old, start a scan */ - if (kbfree <= freehi && + if (used_kb >= lpurge_min_used_kb && time(NULL) - scan_finished_time >= lpurge_expire_time) { - LX_INFO("hi watermark hit, scan slowly\n"); + LX_INFO("used_kb (%llu) exceeds min_used_kb (%llu): start slow scan\n", + used_kb, lpurge_min_used_kb); stats.s_slow_scans++; break; } @@ -776,19 +786,20 @@ out: * then sleep for a while */ if (lpurge_scan_rate && lpurge_scanned_since >= lpurge_scan_rate) { + unsigned long long used_kb; int rc; sleep(1); lpurge_scanned_since = 0; - rc = lpurge_kbfree(&kbfree); + rc = lpurge_get_used_kb(&used_kb); if (rc) return 0; /* check for low space */ - if (kbfree < freelo && lpurge_scan_rate > 0) { - /* full speed mode */ - LX_INFO("low watermark hit, scan at full rate\n"); + if (used_kb >= lpurge_max_used_kb && lpurge_scan_rate > 0) { + LX_INFO("used_kb (%llu) >= max_used_kb (%llu): start fast scan\n", + used_kb, lpurge_max_used_kb); lpurge_scan_rate = 0; } } @@ -1094,7 +1105,8 @@ static void lpurge_work_submit(struct lpurge_object *lo) static void lpurge_purge_slot(struct lpurge_slot *ls, long long target) { struct lpurge_object *lo; - __u64 total, was, kbfree; + unsigned long long total, prev_used_kb, used_kb; + long long purged_kb; int i, rc; /* try to remove some replicas */ @@ -1134,11 +1146,14 @@ again: /* estimate how much space has been released */ - rc = lpurge_kbfree(&was); + rc = lpurge_get_used_kb(&prev_used_kb); if (rc) return; + /* FIXME Clarify units of total. */ LX_DEBUG("spawn, expect %llu back\n", total); + LX_DEBUG_D(target); + LX_DEBUG_U(total); /* Wait for purge threads to complete all submitted work. */ pthread_mutex_lock(&lpurge_work_lock); @@ -1153,18 +1168,24 @@ again: */ for (i = 0; i < 20; i++) { sleep(1); - rc = lpurge_kbfree(&kbfree); + rc = lpurge_get_used_kb(&used_kb); if (rc) return; - if (kbfree > was && kbfree - was >= total) + + purged_kb = prev_used_kb - used_kb; + LX_DEBUG_U(prev_used_kb); + LX_DEBUG_U(used_kb); + LX_DEBUG_D(purged_kb); + + /* XXX purged_kb is signed. */ + if (purged_kb > 0 && purged_kb >= total) break; } - LX_DEBUG("got %llu back (now %llu, was %llu)\n", - kbfree - was, kbfree, was); - if (kbfree > was) - target -= kbfree - was; - if (target <= 0 || kbfree >= freehi) { + if (purged_kb > 0) + target -= purged_kb; + + if (target <= 0 || used_kb <= lpurge_min_used_kb) { /* got enough space back, relax */ LX_DEBUG("relax\n"); return; @@ -1215,14 +1236,14 @@ again: static void lpurge_free_space(void) { - unsigned long long kbfree; + unsigned long long used_kb; int i, rc; - rc = lpurge_kbfree(&kbfree); - if (rc || kbfree >= freehi) + rc = lpurge_get_used_kb(&used_kb); + if (rc || used_kb <= lpurge_min_used_kb) return; - LX_INFO("%lluM space free, try to release some\n", kbfree >> 10); + LX_INFO("used_kb %llu, try to release some\n", used_kb); /* start from the oldest group */ for (i = LPURGE_HIST_MAX - 1; i >= 0; i--) { @@ -1231,19 +1252,19 @@ static void lpurge_free_space(void) if (ls->ls_found == 0/* || ls->ls_space == 0*/) continue; - rc = lpurge_kbfree(&kbfree); - if (rc || kbfree >= freehi) { + rc = lpurge_get_used_kb(&used_kb); + if (rc || used_kb <= lpurge_min_used_kb) { /* got enough space back */ break; } LX_DEBUG("try to release slot %d\n", i); - lpurge_purge_slot(ls, freehi - kbfree); + lpurge_purge_slot(ls, used_kb - lpurge_min_used_kb); } - rc = lpurge_kbfree(&kbfree); + rc = lpurge_get_used_kb(&used_kb); if (!rc) - LX_INFO("%lluM space free\n", kbfree >> 10); + LX_INFO("used_kb = %llu\n", used_kb); } static void lpurge_scan(void) @@ -1411,7 +1432,7 @@ static void lpurge_process_opt(int c, char *optarg) if (*endptr != '\0' || value < 0) LX_FATAL("invalid high watermark: '%s'\n", optarg); - opt.o_freehi = value; + opt.o_min_used = 100 - value; break; case 'H': usage(); @@ -1435,7 +1456,7 @@ static void lpurge_process_opt(int c, char *optarg) if (*endptr != '\0' || value < 0) LX_FATAL("invalid low watermark: '%s'\n", optarg); - opt.o_freelo = value; + opt.o_max_used = 100 - value; break; case 'm': /* parse_mds(optarg); */ @@ -1542,19 +1563,19 @@ static void lpurge_verify_opts(void) opt.o_pool = DEF_POOL; LX_INFO("source pool isn't defined, use '%s'\n", opt.o_pool); } - if (opt.o_freelo == 0) { - opt.o_freelo = DEF_FREELO; - LX_INFO("low watermark is not defined, use %u\n", opt.o_freelo); + if (opt.o_max_used == 0) { + opt.o_max_used = DEF_MAX_USED; + LX_INFO("max_used is not defined, using %u\n", opt.o_max_used); } - if (opt.o_freelo < 1 || opt.o_freelo > 99) - LX_FATAL("Invalid free low threshold: %u\n", opt.o_freelo); + if (opt.o_max_used < 1 || opt.o_max_used > 99) + LX_FATAL("invalid max_used value %u\n", opt.o_max_used); - if (opt.o_freehi == 0) { - opt.o_freehi = DEF_FREEHI; - LX_INFO("high watermark is not defined, use %u\n", opt.o_freehi); + if (opt.o_min_used == 0) { + opt.o_min_used = DEF_MIN_USED; + LX_INFO("min_used is not defined, useing %u\n", opt.o_min_used); } - if (opt.o_freehi < 1 || opt.o_freehi > 99) - LX_FATAL("Invalid free high threshold: %u\n", opt.o_freehi); + if (opt.o_min_used < 1 || opt.o_min_used > 99) + LX_FATAL("invalid min_used value %u\n", opt.o_min_used); if (!ostprefix) LX_FATAL("OST device is not defined\n"); @@ -1630,7 +1651,7 @@ static void lpurge_parse_opts(int argc, char **argv) */ static void lpurge_usr1_handle(int sig) { - unsigned long long kbfree; + unsigned long long used_kb, total_kb; FILE *f; int i; @@ -1650,6 +1671,8 @@ static void lpurge_usr1_handle(int sig) "config:\n" " free_high: %u\n" " free_low: %u\n" + " min_used: %u\n" + " max_used: %u\n" " ostname: %s\n" " mountpoint: %s\n" " pool: %s\n" @@ -1659,7 +1682,10 @@ static void lpurge_usr1_handle(int sig) " scan_threads: %u\n" " slot_size: %u\n", PACKAGE_VERSION, LIPE_RELEASE, LIPE_REVISION, - opt.o_freehi, opt.o_freelo, + 100 - opt.o_min_used, + 100 - opt.o_max_used, + opt.o_min_used, + opt.o_max_used, opt.o_device, opt.o_mountpoint, opt.o_pool, opt.o_max_jobs, opt.o_interval, opt.o_scan_rate, opt.o_scan_threads, opt.o_slot_size); @@ -1667,21 +1693,34 @@ static void lpurge_usr1_handle(int sig) fprintf(f, "stats:\n" " scans: %lu\n" " scan_time: %llu\n" + " fast_scans: %lu\n" " slow_scans: %lu\n" " queued: %lu\n" " started: %lu\n" " purged: %lu\n" " failed: %lu\n" - " low_hits: %lu\n", - stats.s_scans, stats.s_scan_time, stats.s_slow_scans, - stats.s_queued, stats.s_started, stats.s_purged, stats.s_failed, - stats.s_low_hits); - lpurge_kbfree(&kbfree); + , + stats.s_scans, stats.s_scan_time, stats.s_fast_scans, stats.s_slow_scans, + stats.s_queued, stats.s_started, stats.s_purged, stats.s_failed); + + used_kb = 0; + total_kb = 0; + lpurge_get_used_and_total_kb(&used_kb, &total_kb); fprintf(f, "space:\n" " kbfree: %llu\n" " low: %llu\n" - " hi: %llu\n", - kbfree, freelo, freehi); + " hi: %llu\n" + " min_used_kb: %llu\n" + " max_used_kb: %llu\n" + " used_kb: %llu\n" + " total_kb: %llu\n", + total_kb - used_kb, + total_kb - lpurge_max_used_kb, + total_kb - lpurge_min_used_kb, + lpurge_min_used_kb, + lpurge_max_used_kb, + used_kb, + total_kb); #define HIST_FMT \ " hist%u: { age: %lu, found: %lu, space: %lu, stored: %lu, nomirror_cnt: %lu, nomirror_space: %lu, nopfid_cnt: %lu, nopfid_space: %lu, notfirst_cnt: %lu, notfirst_space: %lu }\n" -- 1.8.3.1