From 548843be0333acd3f36938a5d0a4747facec2447 Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Fri, 29 Oct 2021 11:09:26 -0500 Subject: [PATCH] EX-4103 lamigo: rename some "check" functions Despite the name of the feature, files are "hot" or "cold", while pools are "fast" or "slow". Rename and reorganize some functions with this in mind: lamigo_check_hot -> lamigo_sync_hot_files lamigo_new_job_for_hot -> lamigo_submit_sync lamigo_check_hot_one -> lamigo_sync_hot_to_fast lamigo_check_hot_on_cold -> lamigo_sync_hot_to_slow lamigo_get_hot -> lamigo_get_hot_files Test-Parameters: trivial testlist=hot-pools Change-Id: I2833c8828d73e50a72db8a19aae16d1400eccd66 Reviewed-on: https://review.whamcloud.com/45410 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alexandre Ioffe Reviewed-by: John L. Hammond --- lipe/src/lamigo.c | 215 ++++++++++++++++++++++++++++---------------------- lipe/src/lamigo.h | 1 + lipe/src/lamigo_alr.c | 2 +- 3 files changed, 121 insertions(+), 97 deletions(-) diff --git a/lipe/src/lamigo.c b/lipe/src/lamigo.c index 4eb318f..9099530 100644 --- a/lipe/src/lamigo.c +++ b/lipe/src/lamigo.c @@ -215,6 +215,20 @@ enum amigo_resync_type { AMIGO_RESYNC_RESYNC = 2 }; +static const char *PSYNC(enum amigo_resync_type type) +{ + switch (type) { + case AMIGO_RESYNC_NONE: + return "none"; + case AMIGO_RESYNC_EXTEND: + return "extend"; + case AMIGO_RESYNC_RESYNC: + return "resync"; + } + + return "unknown"; +} + struct options opt = { .o_min_age = DEF_MIN_AGE, .o_cache_size = DEF_CACHE_SIZE, @@ -327,9 +341,9 @@ static int lamigo_sigpipe[2]; int cfs_get_param_paths(glob_t *paths, const char *pattern, ...); static int lamigo_read_file(const char *param, char *val, const int vallen); -static void lamigo_check_hot(void); static void lamigo_alr_mirror_cb(struct resync_job *rj, void *cbdata, int rc); static void lamigo_parse_rules(const char *rule_str, const char *filename); +static void lamigo_sync_hot_files(void); struct pool_list *fast_pools; /* fast pools */ struct pool_list *slow_pools; /* slow pool */ @@ -406,14 +420,12 @@ static void lamigo_dump_jobs(FILE *out, struct lipe_list_head *jlist) int i = 0; lipe_list_for_each_entry(j, jlist, rj_list) { - char *cmd; + const char *cmd; if (j->rj_setprefer) cmd = "setprefer"; else if (j->rj_resync == AMIGO_RESYNC_EXTEND) - cmd = "extend"; - else - cmd = "resync"; + cmd = PSYNC(j->rj_resync); fprintf(out, JOB_FMT, i++, j->rj_pid, PFID(&j->rj_fid), j->rj_index, j->rj_agent ? j->rj_agent->rag_index : -1, @@ -421,12 +433,9 @@ static void lamigo_dump_jobs(FILE *out, struct lipe_list_head *jlist) } } -static char *lamigo_resync2str[] = { "none", "extend", "resync" }; - static void lamigo_dump_history(FILE *out) { int i = 0, cur = stats.s_hist_cur; - char *action; for (i = 0; i < ARRAY_SIZE(stats.s_hist); i++) { if (--cur < 0) @@ -435,13 +444,9 @@ static void lamigo_dump_history(FILE *out) if (stats.s_hist[cur].h_fid.f_seq == 0) break; - if (stats.s_hist[cur].h_result > AMIGO_RESYNC_RESYNC) - action = "unknown"; - else - action = lamigo_resync2str[stats.s_hist[cur].h_result]; - fprintf(out, " hist%d: { fid: "DFID", result: %s }\n", cur, - PFID(&stats.s_hist[cur].h_fid), action); + PFID(&stats.s_hist[cur].h_fid), + PSYNC(stats.s_hist[cur].h_result)); } } @@ -1331,7 +1336,7 @@ static int lamigo_update_one(struct fid_rec *f) struct alr_heat ah; if (slow_pools->pl_is_open == 0) { - /* cold pool is close to full, skip replication */ + /* slow pool is close to full, skip replication */ /* do this check before expensive layout fetching, rules, etc */ stats.s_skip_tgt_closed++; LX_DEBUG("pool %s closed for "DFID"\n", @@ -1345,7 +1350,7 @@ static int lamigo_update_one(struct fid_rec *f) return 1; } - /* prevent hot file migration from hot pool to slow */ + /* prevent hot file migration from fast pool to slow pool */ rc = lamigo_alr_check_is_hot(&f->fr_fh.fh_fid, &ah); if (rc) { if (ah.ah_mark & ALR_TAG_NO_ACCT) { @@ -1851,7 +1856,7 @@ bool lamigo_lookup_ost(struct pool_list *pl, int ost) return false; } -static struct pool_list *lamigo_alloc_pool(char *pool) +static struct pool_list *lamigo_alloc_pool(const char *pool) { struct pool_list *pl; @@ -1864,7 +1869,7 @@ static struct pool_list *lamigo_alloc_pool(char *pool) return pl; } -static void lamigo_parse_pool(char *pool) +static void lamigo_add_fast_pool(const char *pool) { struct pool_list *pl; @@ -1879,6 +1884,8 @@ static void lamigo_parse_pool(char *pool) pl = lamigo_alloc_pool(pool); if (!pl) return; + + pl->pl_is_fast = true; /* Faster than slow, anyway. */ pl->pl_next = fast_pools; fast_pools = pl; } @@ -2146,7 +2153,7 @@ static void lamigo_process_opt(int c, char *optarg) LX_WARN("options '-s' and '--src' are deprecated, please use --fast-pool instead\n"); /* Fall through. */ case LAMIGO_OPT_FAST_POOL: - lamigo_parse_pool(optarg); + lamigo_add_fast_pool(optarg); break; case 't': LX_WARN("options '-t' and '--tgt' are deprecated, please use --slow-pool instead\n"); @@ -2352,7 +2359,7 @@ static void lamigo_parse_opts(int argc, char **argv) LX_FATAL("cannot open '%s': %s\n", buf, strerror(errno)); if (fast_pools == NULL) { - lamigo_parse_pool(DEF_FAST_POOL); + lamigo_add_fast_pool(DEF_FAST_POOL); LX_WARN("fast pools aren't defined, using '%s'\n", DEF_FAST_POOL); } @@ -3205,7 +3212,7 @@ int main(int argc, char **argv) lamigo_wait_for_job_completion(3); if (enable_heat) - lamigo_check_hot(); + lamigo_sync_hot_files(); if (!are_agents_busy()) { rc = lamigo_check_sync(); @@ -3229,8 +3236,9 @@ static void lamigo_alr_mirror_cb(struct resync_job *rj, void *cbdata, int rc) ALR_TAG_NO_ACCT); } -static void lamigo_new_job_for_hot(struct lu_fid *fid, enum amigo_resync_type sync, - struct pool_list *tgt, int stripes) +/* Create and submit a job to @sync (extend or resync) @fid to @pl. */ +static void lamigo_submit_sync(const struct lu_fid *fid, enum amigo_resync_type sync, + struct pool_list *pl, int stripes) { struct resync_job *rj; int rc; @@ -3242,10 +3250,9 @@ static void lamigo_new_job_for_hot(struct lu_fid *fid, enum amigo_resync_type sy rj->rj_resync = sync; rj->rj_check_job = 0; rj->rj_pid = 0; - rj->rj_pool = tgt->pl_pool; - /* only mark hot pool mirrors with "prefer" */ - if (lamigo_lookup_fast_pool(tgt->pl_pool)) - rj->rj_mirror_opts = "prefer"; + rj->rj_pool = pl->pl_pool; + /* only fast pool mirrors should be marked "prefer" */ + rj->rj_mirror_opts = pl->pl_is_fast ? "prefer" : NULL; rj->rj_callback = lamigo_alr_mirror_cb; rc = lamigo_submit_job(rj); @@ -3255,89 +3262,103 @@ static void lamigo_new_job_for_hot(struct lu_fid *fid, enum amigo_resync_type sy } } -static int lamigo_check_hot_one(struct alr_heat *ht) +/* If file (ah->ah_fid) is on the slow pool and should be synced + * (extended or resynced) to the fast pool then submit a job to + * DTRT. */ +static void lamigo_sync_hot_to_fast(struct alr_heat *ht) { struct mirror_opts mo = { 0 }; + bool should_sync = false; + bool is_rw = false; int sync; - LX_DEBUG("check hot "DFID": H: %Lu/%Lu, P: %Lu/%Lu, L %d, I %d %s\n", + LX_DEBUG("sync hot to fast "DFID": H: %Lu/%Lu, P: %Lu/%Lu, L %d, I %d %s\n", PFID(&ht->ah_fid), ht->ah_heat[ALR_READ], ht->ah_heat[ALR_WRITE], ht->ah_pools[ALR_FAST], ht->ah_pools[ALR_SLOW], ht->ah_livetime, ht->ah_idle, ht->ah_mark ? "M" : ""); + if (ht->ah_mark & ALR_TAG_PROCESSED) { - /* already tried to replicate */ - LX_DEBUG(DFID" tried to replicate already\n", - PFID(&ht->ah_fid)); - return 0; + LX_DEBUG(DFID" already processed\n", PFID(&ht->ah_fid)); + return; } - /* - * the hot file has been read from the cold pool - * try to mirror it to the hot pool and make that - * replica preferred - */ - if (ht->ah_heat[ALR_READ] && ht->ah_heat[ALR_WRITE] == 0 && - ht->ah_pools[ALR_FAST] == 0 && ht->ah_pools[ALR_SLOW]) { - sync = lamigo_is_in_sync(&ht->ah_fid, slow_pools, fast_pools, &mo); - LX_DEBUG("try to replicate RO "DFID": %d\n", - PFID(&ht->ah_fid), sync); - if (sync != AMIGO_RESYNC_NONE) { - lamigo_new_job_for_hot(&ht->ah_fid, sync, fast_pools, - mo.mo_stripes); - stats.s_replicate_ro2hot++; - } - lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0); - } + /* The hot file has been read from the slow pool. Try to mirror + * it to the fast pool and make that replica preferred. */ + if (ht->ah_heat[ALR_READ] && + ht->ah_heat[ALR_WRITE] == 0 && + ht->ah_pools[ALR_SLOW] && + ht->ah_pools[ALR_FAST] == 0) + should_sync = true; - /* - * the hot file was modified in the past, - * try to replicate that from the cold to the hot pool - * XXX: we can track OPEN/CLOSE events to skip this - * try if it's still open - */ - if (ht->ah_idle > 0 && ht->ah_heat[ALR_WRITE] && - ht->ah_pools[ALR_FAST] == 0 && ht->ah_pools[ALR_SLOW]) { - sync = lamigo_is_in_sync(&ht->ah_fid, slow_pools, fast_pools, &mo); - LX_DEBUG("try to replicate RW "DFID": %d\n", - PFID(&ht->ah_fid), sync); - if (sync != AMIGO_RESYNC_NONE) { - lamigo_new_job_for_hot(&ht->ah_fid, sync, fast_pools, - mo.mo_stripes); - stats.s_replicate_rw2hot++; - } - /* XXX: mark existing replica preferred if it's not */ - lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0); - } + /* The file is idle but was modified in the past. Try to + * replicate that from the slow to the fast pool. + * + * XXX: we can track OPEN/CLOSE events to skip this try if it's still open. */ + if (ht->ah_idle > 0 && + ht->ah_heat[ALR_WRITE] && + ht->ah_pools[ALR_SLOW] && + ht->ah_pools[ALR_FAST] == 0) + should_sync = true, is_rw = true; /* XXX: do not handle mix yet */ - return 0; + + if (!should_sync) + return; + + sync = lamigo_is_in_sync(&ht->ah_fid, slow_pools, fast_pools, &mo); + lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0); + if (sync == AMIGO_RESYNC_NONE) + return; + + LX_DEBUG("try to %s %s "DFID" to pool '%s'\n", + PSYNC(sync), + is_rw ? "RW" : "RO", + PFID(&ht->ah_fid), + fast_pools->pl_pool); + + lamigo_submit_sync(&ht->ah_fid, sync, fast_pools, mo.mo_stripes); + + if (is_rw) + stats.s_replicate_rw2hot++; + else + stats.s_replicate_ro2hot++; } -static void lamigo_check_hot_on_cold(struct alr_heat *ht) +/* If file (ah->ah_fid) is on the fast pool and should be synced + * (extended or resynced) to the slow pool then submit a job to + * DTRT. */ +static void lamigo_sync_hot_to_slow(struct alr_heat *ht) { struct mirror_opts mo = { 0 }; + bool should_sync = false; int sync; - /* the file stored on hot pool was hot but still being written. - * now that it's idling try to replicate it to the cold pool */ - if (ht->ah_idle > 0 && ht->ah_heat[ALR_WRITE] && - ht->ah_pools[ALR_SLOW] == 0 && ht->ah_pools[ALR_FAST]) { - sync = lamigo_is_in_sync(&ht->ah_fid, fast_pools, - slow_pools, &mo); - LX_DEBUG("try to replicate idling hot to CP "DFID": %d\n", - PFID(&ht->ah_fid), sync); - if (sync != AMIGO_RESYNC_NONE) { - lamigo_new_job_for_hot(&ht->ah_fid, sync, slow_pools, - mo.mo_stripes); - stats.s_replicate_rw2cold++; - } - /* XXX: mark existing replica preferred if it's not */ - lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0); - } + /* The file stored on fast pool was hot but still being written. + * Now that it's idling try to replicate it to the slow pool */ + if (ht->ah_idle > 0 && + ht->ah_heat[ALR_WRITE] && + ht->ah_pools[ALR_FAST] && + ht->ah_pools[ALR_SLOW] == 0) + should_sync = true; + + if (!should_sync) + return; + + sync = lamigo_is_in_sync(&ht->ah_fid, fast_pools, slow_pools, &mo); + lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0); + if (sync == AMIGO_RESYNC_NONE) + return; + + LX_DEBUG("try to %s idling hot "DFID" to pool '%s'\n", + PSYNC(sync), PFID(&ht->ah_fid), slow_pools->pl_pool); + + lamigo_submit_sync(&ht->ah_fid, sync, slow_pools, mo.mo_stripes); + stats.s_replicate_rw2cold++; + /* XXX: mark existing replica preferred if it's not */ } -struct alr_heat *lamigo_get_hot(int period, int *nr) +struct alr_heat *lamigo_get_hot_files(int period, int *nr) { struct alr_heat *ht; int i; @@ -3365,7 +3386,7 @@ struct alr_heat *lamigo_get_hot(int period, int *nr) static unsigned alr_hot_check_at = 0; static unsigned long alr_hot_period = 1; -static void lamigo_check_hot(void) +static void lamigo_sync_hot_files(void) { struct alr_heat *ht; int i, nr; @@ -3377,15 +3398,15 @@ static void lamigo_check_hot(void) if (alr_period <= alr_hot_period) return; - /* don't try to replicate to fast pool if it's close to full */ + /* don't try to replicate to fast pool if it's nearly full */ if (fast_pools->pl_is_open) { /* get most recent hot files */ - ht = lamigo_get_hot(alr_hot_period, &nr); + ht = lamigo_get_hot_files(alr_hot_period, &nr); if (ht) { LX_DEBUG("check hot in period %lu - %d\n", alr_hot_period, nr); for (i = 0; i < nr; i++) - lamigo_check_hot_one(ht + i); + lamigo_sync_hot_to_fast(&ht[i]); free(ht); } } @@ -3395,7 +3416,7 @@ static void lamigo_check_hot(void) /* now check hot idling files - the files we found hot and * skipped replication. now it's time to try again */ - ht = lamigo_get_hot(alr_hot_period - opt.o_alr_hot_after_idle, &nr); + ht = lamigo_get_hot_files(alr_hot_period - opt.o_alr_hot_after_idle, &nr); if (!ht) goto out; @@ -3406,13 +3427,15 @@ static void lamigo_check_hot(void) LX_DEBUG("idle "DFID": P: %Lu/%Lu, live %d, idle %d\n", PFID(&ah->ah_fid), ah->ah_pools[ALR_FAST], ah->ah_pools[ALR_SLOW], ah->ah_livetime, ah->ah_idle); + if (fast_pools->pl_is_open) - lamigo_check_hot_one(ht + i); + lamigo_sync_hot_to_fast(&ht[i]); + if (slow_pools->pl_is_open) - lamigo_check_hot_on_cold(ht + i); + lamigo_sync_hot_to_slow(&ht[i]); } - free(ht); + free(ht); out: alr_hot_period++; } diff --git a/lipe/src/lamigo.h b/lipe/src/lamigo.h index f83cfe9..456e154 100644 --- a/lipe/src/lamigo.h +++ b/lipe/src/lamigo.h @@ -26,6 +26,7 @@ struct pool_list { pthread_rwlock_t pl_lock; __u64 pl_total_kb; __u64 pl_used_kb; + bool pl_is_fast; bool pl_is_open; /* open for new replicas, based on avail/total */ struct pool_list *pl_next; }; diff --git a/lipe/src/lamigo_alr.c b/lipe/src/lamigo_alr.c index e152d02..0f4cfbb 100644 --- a/lipe/src/lamigo_alr.c +++ b/lipe/src/lamigo_alr.c @@ -180,7 +180,7 @@ static void lamigo_alr_parse_one(const char *host, const char *line) /* f_ver contains compid, we track files */ fid.f_ver = 0; - /* attribute IO to hot or cold pool */ + /* attribute IO to fast or slow pool */ ostidx = strstr(ostname, "-OST"); if (!ostidx) { LX_ERROR("cannot parse access log OST name '%s'\n", ostname); -- 1.8.3.1