Whamcloud - gitweb
EX-3002 lipe: rename {fast,slow}_pool_free to {fast,slow}_pool_max_used
authorJohn L. Hammond <jhammond@whamcloud.com>
Tue, 26 Oct 2021 18:24:41 +0000 (13:24 -0500)
committerJohn L. Hammond <jhammond@whamcloud.com>
Tue, 18 Jan 2022 14:43:21 +0000 (14:43 +0000)
Rename o_{fast,slow}_pool_free to o_{fast,slow}_pool_max_used and
adjust logic.

In struct pool_list, rename
  pl_avail to pl_used_kb (and adjust logic)
  pl_total to pl_total_kb.

Replace defaults with equivalent values:
  DEF_FAST_POOL_FREE=70 becomes DEF_FAST_POOL_MAX_USED=30
  DEF_SLOW_POOL_FREE=10 becomes DEF_SLOW_POOL_MAX_USED=90

This change does not add, remove, or rename any command line options.

Signed-off-by: John L. Hammond <jhammond@whamcloud.com>
Test-Parameters: trivial testlist=hot-pools
Change-Id: I5f5551230856eabdaa84972218b5dcb73959c029
Reviewed-on: https://review.whamcloud.com/45377
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/46120

lipe/src/lamigo.c
lipe/src/lamigo.h

index ed8f7fc..ab52d26 100644 (file)
 #define DEF_HOT_AFTER_IDLE     3
 #define DEF_ALR_EXTRA_ARGS     "--exit-on-close"
 #define DEF_STATFS_REFRESH_INTV        5 /* OST statfs update interval, in seconds */
-#define DEF_FAST_POOL_FREE             70 /* fast pool is open for migration */
-                                  /* if available space is greater, in percent */
-#define DEF_SLOW_POOL_FREE             10 /* slow pool is open for migration */
-                                  /* if available space is greater, in percent */
+#define DEF_FAST_POOL_MAX_USED 30 /* open for migration if % space used is less than */
+#define DEF_SLOW_POOL_MAX_USED 90 /* open for migration if % space used is less than */
 
 #define LAMIGO_USERFILE        "/var/lib/lamigo-%s.chlg"
 #define LAMIGO_DUMPFILE        "/var/run/lamigo-%s.stats"
@@ -156,8 +154,8 @@ static void usage(void)
               DEF_ALR_PERIOD_SECS,
               DEF_HOT_FRACTION,
               DEF_HOT_AFTER_IDLE,
-              DEF_FAST_POOL_FREE,
-              DEF_SLOW_POOL_FREE);
+              100 - DEF_FAST_POOL_MAX_USED,
+              100 - DEF_SLOW_POOL_MAX_USED);
        exit(EXIT_SUCCESS);
 }
 
@@ -219,8 +217,8 @@ struct options opt = {
        .o_num_threads = DEF_THREAD_COUNT,
        .o_pool_refresh = DEF_POOL_REFRESH_INTV,
        .o_statfs_refresh = DEF_STATFS_REFRESH_INTV,
-       .o_fast_pool_free = DEF_FAST_POOL_FREE,
-       .o_slow_pool_free = DEF_SLOW_POOL_FREE,
+       .o_fast_pool_max_used = DEF_FAST_POOL_MAX_USED,
+       .o_slow_pool_max_used = DEF_SLOW_POOL_MAX_USED,
        .o_progress_interval = DEF_PROGRESS_INTV,
        .o_alr_extra_args = DEF_ALR_EXTRA_ARGS,
        .o_alr_periods = DEF_ALR_PERIODS,
@@ -482,8 +480,8 @@ static void lamigo_dump_stats_file(void)
                "    ofd_interval: %d\n"
                "    hot_fraction: %d\n"
                "    hot_after_idle: %d\n"
-               "    src_free: %d\n"
-               "    tgt_free: %d\n"
+               "    fast_pool_max_used: %d\n"
+               "    slow_pool_max_used: %d\n"
                "    include_dom: %d\n",
                opt.o_slow_pool, opt.o_min_age, opt.o_cache_size,
                opt.o_rescan, opt.o_num_threads, opt.o_pool_refresh,
@@ -491,22 +489,24 @@ static void lamigo_dump_stats_file(void)
                opt.o_alr_period_time, opt.o_alr_warmup_k,
                opt.o_alr_cooldown_k, opt.o_alr_ofd_interval,
                opt.o_alr_hot_fraction, opt.o_alr_hot_after_idle,
-               opt.o_fast_pool_free, opt.o_slow_pool_free, opt.o_include_dom);
+               opt.o_fast_pool_max_used,
+               opt.o_slow_pool_max_used,
+               opt.o_include_dom);
        for (pl = fast_pools; pl != NULL; pl = pl->pl_next, i++)
                fprintf(f, "pool %s:\n"
                        "    osts: %d\n"
-                       "    avail: %llu\n"
-                       "    total: %llu\n"
+                       "    used_kb: %llu\n"
+                       "    total_kb: %llu\n"
                        "    open: %d\n", pl->pl_pool, pl->pl_ostnr,
-                       pl->pl_avail, pl->pl_total, (int)pl->pl_open);
+                       pl->pl_used_kb, pl->pl_total_kb, (int)pl->pl_is_open);
 
        for (pl = slow_pools; pl != NULL; pl = pl->pl_next, i++)
                fprintf(f, "pool %s:\n"
                        "    osts: %d\n"
-                       "    avail: %llu\n"
-                       "    total: %llu\n"
+                       "    used_kb: %llu\n"
+                       "    total_kb: %llu\n"
                        "    open: %d\n", pl->pl_pool, pl->pl_ostnr,
-                       pl->pl_avail, pl->pl_total, (int)pl->pl_open);
+                       pl->pl_used_kb, pl->pl_total_kb, (int)pl->pl_is_open);
 
        fprintf(f, "stats:\n"
                "    read: %lu\n"
@@ -1336,7 +1336,7 @@ static int lamigo_update_one(struct fid_rec *f)
        int resync, rc;
        struct alr_heat ah;
 
-       if (slow_pools->pl_open == 0) {
+       if (slow_pools->pl_is_open == 0) {
                /* cold pool is close to full, skip replication */
                /* do this check before expensive layout fetching, rules, etc */
                stats.s_skip_tgt_closed++;
@@ -2034,9 +2034,9 @@ void lamigo_process_opt(int c, char *optarg)
                opt.o_alr_extra_args = optarg;
                break;
        case LAMIGO_OPT_SRC_FREE:
-               opt.o_fast_pool_free = atoi(optarg);
-               if (opt.o_fast_pool_free < 1 || opt.o_fast_pool_free > 99)
-                       LX_FATAL("invalid source free space '%s'\n", optarg);
+               opt.o_fast_pool_max_used = 100 - atoi(optarg);
+               if (opt.o_fast_pool_max_used < 1 || opt.o_fast_pool_max_used > 99)
+                       LX_FATAL("invalid argument ('%s') to --src-free\n", optarg);
                break;
        case LAMIGO_OPT_SRC_DOM:
                opt.o_include_dom = 1;
@@ -2045,9 +2045,9 @@ void lamigo_process_opt(int c, char *optarg)
                lipe_ssh_log_verbosity = atoi(optarg);
                break;
        case LAMIGO_OPT_TGT_FREE:
-               opt.o_slow_pool_free = atoi(optarg);
-               if (opt.o_slow_pool_free < 1 || opt.o_slow_pool_free > 99)
-                       LX_FATAL("invalid target free space '%s'\n", optarg);
+               opt.o_slow_pool_max_used = 100 - atoi(optarg);
+               if (opt.o_slow_pool_max_used < 1 || opt.o_slow_pool_max_used > 99)
+                       LX_FATAL("invalid argument ('%s') to --tgt-free\n", optarg);
                break;
        case LAMIGO_OPT_VERSION:
                lipe_version();
@@ -3372,7 +3372,7 @@ static void lamigo_check_hot(void)
                return;
 
        /* don't try to replicate to fast pool if it's close to full */
-       if (fast_pools->pl_open) {
+       if (fast_pools->pl_is_open) {
                /* get most recent hot files */
                ht = lamigo_get_hot(alr_hot_period, &nr);
                if (ht) {
@@ -3400,9 +3400,9 @@ static void lamigo_check_hot(void)
                LX_DEBUG("idle "DFID": P: %Lu/%Lu, live %d, idle %d\n",
                         PFID(&ah->ah_fid), ah->ah_pools[ALR_FAST], ah->ah_pools[ALR_SLOW],
                         ah->ah_livetime, ah->ah_idle);
-               if (fast_pools->pl_open)
+               if (fast_pools->pl_is_open)
                        lamigo_check_hot_one(ht + i);
-               if (slow_pools->pl_open)
+               if (slow_pools->pl_is_open)
                        lamigo_check_hot_on_cold(ht + i);
        }
        free(ht);
@@ -3443,43 +3443,45 @@ static __u64 lamigo_read_osp_param(const int ostidx, const char *param)
        return retval;
 }
 
-static void lamigo_refresh_pool_statfs(struct pool_list *pl, int threshold)
+static void lamigo_refresh_pool_statfs(struct pool_list *pl, int max_used)
 {
-       __u64 tavail = 0, ttotal = 0; /* sum for the pool */
-       bool old_status;
+       __u64 pool_total_kb = 0;
+       __u64 pool_used_kb = 0;
+       bool pool_is_open;
        int i;
 
+       /* @max_used is a percentage of total pool space. */
+       assert(0 <= max_used && max_used <= 100);
+
        pthread_rwlock_rdlock(&pl->pl_lock);
        for (i = 0; i < pl->pl_ostnr; i++) {
-               __u64 kbavail, kbtotal, active, status;
+               __u64 ost_used_kb, ost_total_kb, active, status;
                int ostidx = pl->pl_osts[i];
 
-               /* get kbavail */
-               kbavail = lamigo_read_osp_param(ostidx, "kbytesfree");
-               /* get kbtotal */
-               kbtotal = lamigo_read_osp_param(ostidx, "kbytestotal");
-               /* check OSP is active */
                active = lamigo_read_osp_param(ostidx, "active");
                status = lamigo_read_osp_param(ostidx, "prealloc_status");
                if (!active || status)
                        continue;
-               tavail += kbavail;
-               ttotal += kbtotal;
+
+               ost_total_kb = lamigo_read_osp_param(ostidx, "kbytestotal");
+               ost_used_kb = ost_total_kb - lamigo_read_osp_param(ostidx, "kbytesfree");
+
+               pool_total_kb += ost_total_kb;
+               pool_used_kb += ost_used_kb;
        }
+
        pthread_rwlock_unlock(&pl->pl_lock);
-       pl->pl_total = ttotal;
-       pl->pl_avail = tavail;
-       old_status = pl->pl_open;
-       if (ttotal == 0 || (tavail < ttotal * threshold / 100))
-               pl->pl_open = false;
-       else
-               pl->pl_open = true;
 
-       /* whether pool is good for replicas */
-       if (old_status != pl->pl_open)
-               LX_DEBUG("statfs for %s %s pool: %llu from %llu, thresh %llu\n",
-                        pl->pl_open ? "open" : "closed", pl->pl_pool, tavail,
-                        ttotal, ttotal * threshold / 100);
+       pl->pl_total_kb = pool_total_kb;
+       pl->pl_used_kb = pool_used_kb;
+       pool_is_open = (pool_used_kb < pool_total_kb * max_used / 100);
+
+       if (pl->pl_is_open != pool_is_open)
+               LX_DEBUG("statfs for %s %s pool: used_kb %llu, total_kb %llu KB, max_used_kb %llu\n",
+                        pool_is_open ? "open" : "closed", pl->pl_pool,
+                        pool_used_kb, pool_total_kb, pool_total_kb * max_used / 100);
+
+       pl->pl_is_open = pool_is_open;
 }
 
 static void lamigo_refresh_statfs(void)
@@ -3494,12 +3496,12 @@ static void lamigo_refresh_statfs(void)
 
        pl = fast_pools;
        while (pl) {
-               lamigo_refresh_pool_statfs(pl, opt.o_fast_pool_free);
+               lamigo_refresh_pool_statfs(pl, opt.o_fast_pool_max_used);
                pl = pl->pl_next;
        }
        pl = slow_pools;
        while (pl) {
-               lamigo_refresh_pool_statfs(pl, opt.o_slow_pool_free);
+               lamigo_refresh_pool_statfs(pl, opt.o_slow_pool_max_used);
                pl = pl->pl_next;
        }
 }
index 2cd7cf2..c5e75d6 100644 (file)
@@ -24,9 +24,9 @@ struct pool_list {
        int pl_ostnr;
        int *pl_osts;
        pthread_rwlock_t pl_lock;
-       __u64 pl_total;
-       __u64 pl_avail;
-       bool pl_open; /* open for new replicas, based on avail/total */
+       __u64 pl_total_kb;
+       __u64 pl_used_kb;
+       bool pl_is_open; /* open for new replicas, based on avail/total */
        struct pool_list *pl_next;
 };
 
@@ -85,8 +85,8 @@ struct options {
        int              o_num_threads;
        int              o_pool_refresh;
        int              o_statfs_refresh;
-       int              o_fast_pool_free;
-       int              o_slow_pool_free;
+       int              o_fast_pool_max_used;
+       int              o_slow_pool_max_used;
        int              o_progress_interval; /* how often to show progress */
        char            *o_alr_extra_args;
        int              o_alr_periods;