AMIGO_RESYNC_RESYNC = 2
};
+static const char *PSYNC(enum amigo_resync_type type)
+{
+ switch (type) {
+ case AMIGO_RESYNC_NONE:
+ return "none";
+ case AMIGO_RESYNC_EXTEND:
+ return "extend";
+ case AMIGO_RESYNC_RESYNC:
+ return "resync";
+ }
+
+ return "unknown";
+}
+
struct options opt = {
.o_min_age = DEF_MIN_AGE,
.o_cache_size = DEF_CACHE_SIZE,
int cfs_get_param_paths(glob_t *paths, const char *pattern, ...);
static int lamigo_read_file(const char *param, char *val, const int vallen);
-static void lamigo_check_hot(void);
static void lamigo_alr_mirror_cb(struct resync_job *rj, void *cbdata, int rc);
static void lamigo_parse_rules(const char *rule_str, const char *filename);
+static void lamigo_sync_hot_files(void);
struct pool_list *fast_pools; /* fast pools */
struct pool_list *slow_pools; /* slow pool */
int i = 0;
lipe_list_for_each_entry(j, jlist, rj_list) {
- char *cmd;
+ const char *cmd;
if (j->rj_setprefer)
cmd = "setprefer";
else if (j->rj_resync == AMIGO_RESYNC_EXTEND)
- cmd = "extend";
- else
- cmd = "resync";
+ cmd = PSYNC(j->rj_resync);
fprintf(out, JOB_FMT, i++, j->rj_pid, PFID(&j->rj_fid),
j->rj_index, j->rj_agent ? j->rj_agent->rag_index : -1,
}
}
-static char *lamigo_resync2str[] = { "none", "extend", "resync" };
-
static void lamigo_dump_history(FILE *out)
{
int i = 0, cur = stats.s_hist_cur;
- char *action;
for (i = 0; i < ARRAY_SIZE(stats.s_hist); i++) {
if (--cur < 0)
if (stats.s_hist[cur].h_fid.f_seq == 0)
break;
- if (stats.s_hist[cur].h_result > AMIGO_RESYNC_RESYNC)
- action = "unknown";
- else
- action = lamigo_resync2str[stats.s_hist[cur].h_result];
-
fprintf(out, " hist%d: { fid: "DFID", result: %s }\n", cur,
- PFID(&stats.s_hist[cur].h_fid), action);
+ PFID(&stats.s_hist[cur].h_fid),
+ PSYNC(stats.s_hist[cur].h_result));
}
}
struct alr_heat ah;
if (slow_pools->pl_is_open == 0) {
- /* cold pool is close to full, skip replication */
+ /* slow pool is close to full, skip replication */
/* do this check before expensive layout fetching, rules, etc */
stats.s_skip_tgt_closed++;
LX_DEBUG("pool %s closed for "DFID"\n",
return 1;
}
- /* prevent hot file migration from hot pool to slow */
+ /* prevent hot file migration from fast pool to slow pool */
rc = lamigo_alr_check_is_hot(&f->fr_fh.fh_fid, &ah);
if (rc) {
if (ah.ah_mark & ALR_TAG_NO_ACCT) {
return false;
}
-static struct pool_list *lamigo_alloc_pool(char *pool)
+static struct pool_list *lamigo_alloc_pool(const char *pool)
{
struct pool_list *pl;
return pl;
}
-static void lamigo_parse_pool(char *pool)
+static void lamigo_add_fast_pool(const char *pool)
{
struct pool_list *pl;
pl = lamigo_alloc_pool(pool);
if (!pl)
return;
+
+ pl->pl_is_fast = true; /* Faster than slow, anyway. */
pl->pl_next = fast_pools;
fast_pools = pl;
}
LX_WARN("options '-s' and '--src' are deprecated, please use --fast-pool instead\n");
/* Fall through. */
case LAMIGO_OPT_FAST_POOL:
- lamigo_parse_pool(optarg);
+ lamigo_add_fast_pool(optarg);
break;
case 't':
LX_WARN("options '-t' and '--tgt' are deprecated, please use --slow-pool instead\n");
LX_FATAL("cannot open '%s': %s\n", buf, strerror(errno));
if (fast_pools == NULL) {
- lamigo_parse_pool(DEF_FAST_POOL);
+ lamigo_add_fast_pool(DEF_FAST_POOL);
LX_WARN("fast pools aren't defined, using '%s'\n", DEF_FAST_POOL);
}
lamigo_wait_for_job_completion(3);
if (enable_heat)
- lamigo_check_hot();
+ lamigo_sync_hot_files();
if (!are_agents_busy()) {
rc = lamigo_check_sync();
ALR_TAG_NO_ACCT);
}
-static void lamigo_new_job_for_hot(struct lu_fid *fid, enum amigo_resync_type sync,
- struct pool_list *tgt, int stripes)
+/* Create and submit a job to @sync (extend or resync) @fid to @pl. */
+static void lamigo_submit_sync(const struct lu_fid *fid, enum amigo_resync_type sync,
+ struct pool_list *pl, int stripes)
{
struct resync_job *rj;
int rc;
rj->rj_resync = sync;
rj->rj_check_job = 0;
rj->rj_pid = 0;
- rj->rj_pool = tgt->pl_pool;
- /* only mark hot pool mirrors with "prefer" */
- if (lamigo_lookup_fast_pool(tgt->pl_pool))
- rj->rj_mirror_opts = "prefer";
+ rj->rj_pool = pl->pl_pool;
+ /* only fast pool mirrors should be marked "prefer" */
+ rj->rj_mirror_opts = pl->pl_is_fast ? "prefer" : NULL;
rj->rj_callback = lamigo_alr_mirror_cb;
rc = lamigo_submit_job(rj);
}
}
-static int lamigo_check_hot_one(struct alr_heat *ht)
+/* If file (ah->ah_fid) is on the slow pool and should be synced
+ * (extended or resynced) to the fast pool then submit a job to
+ * DTRT. */
+static void lamigo_sync_hot_to_fast(struct alr_heat *ht)
{
struct mirror_opts mo = { 0 };
+ bool should_sync = false;
+ bool is_rw = false;
int sync;
- LX_DEBUG("check hot "DFID": H: %Lu/%Lu, P: %Lu/%Lu, L %d, I %d %s\n",
+ LX_DEBUG("sync hot to fast "DFID": H: %Lu/%Lu, P: %Lu/%Lu, L %d, I %d %s\n",
PFID(&ht->ah_fid),
ht->ah_heat[ALR_READ], ht->ah_heat[ALR_WRITE], ht->ah_pools[ALR_FAST],
ht->ah_pools[ALR_SLOW], ht->ah_livetime, ht->ah_idle,
ht->ah_mark ? "M" : "");
+
if (ht->ah_mark & ALR_TAG_PROCESSED) {
- /* already tried to replicate */
- LX_DEBUG(DFID" tried to replicate already\n",
- PFID(&ht->ah_fid));
- return 0;
+ LX_DEBUG(DFID" already processed\n", PFID(&ht->ah_fid));
+ return;
}
- /*
- * the hot file has been read from the cold pool
- * try to mirror it to the hot pool and make that
- * replica preferred
- */
- if (ht->ah_heat[ALR_READ] && ht->ah_heat[ALR_WRITE] == 0 &&
- ht->ah_pools[ALR_FAST] == 0 && ht->ah_pools[ALR_SLOW]) {
- sync = lamigo_is_in_sync(&ht->ah_fid, slow_pools, fast_pools, &mo);
- LX_DEBUG("try to replicate RO "DFID": %d\n",
- PFID(&ht->ah_fid), sync);
- if (sync != AMIGO_RESYNC_NONE) {
- lamigo_new_job_for_hot(&ht->ah_fid, sync, fast_pools,
- mo.mo_stripes);
- stats.s_replicate_ro2hot++;
- }
- lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0);
- }
+ /* The hot file has been read from the slow pool. Try to mirror
+ * it to the fast pool and make that replica preferred. */
+ if (ht->ah_heat[ALR_READ] &&
+ ht->ah_heat[ALR_WRITE] == 0 &&
+ ht->ah_pools[ALR_SLOW] &&
+ ht->ah_pools[ALR_FAST] == 0)
+ should_sync = true;
- /*
- * the hot file was modified in the past,
- * try to replicate that from the cold to the hot pool
- * XXX: we can track OPEN/CLOSE events to skip this
- * try if it's still open
- */
- if (ht->ah_idle > 0 && ht->ah_heat[ALR_WRITE] &&
- ht->ah_pools[ALR_FAST] == 0 && ht->ah_pools[ALR_SLOW]) {
- sync = lamigo_is_in_sync(&ht->ah_fid, slow_pools, fast_pools, &mo);
- LX_DEBUG("try to replicate RW "DFID": %d\n",
- PFID(&ht->ah_fid), sync);
- if (sync != AMIGO_RESYNC_NONE) {
- lamigo_new_job_for_hot(&ht->ah_fid, sync, fast_pools,
- mo.mo_stripes);
- stats.s_replicate_rw2hot++;
- }
- /* XXX: mark existing replica preferred if it's not */
- lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0);
- }
+ /* The file is idle but was modified in the past. Try to
+ * replicate that from the slow to the fast pool.
+ *
+ * XXX: we can track OPEN/CLOSE events to skip this try if it's still open. */
+ if (ht->ah_idle > 0 &&
+ ht->ah_heat[ALR_WRITE] &&
+ ht->ah_pools[ALR_SLOW] &&
+ ht->ah_pools[ALR_FAST] == 0)
+ should_sync = true, is_rw = true;
/* XXX: do not handle mix yet */
- return 0;
+
+ if (!should_sync)
+ return;
+
+ sync = lamigo_is_in_sync(&ht->ah_fid, slow_pools, fast_pools, &mo);
+ lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0);
+ if (sync == AMIGO_RESYNC_NONE)
+ return;
+
+ LX_DEBUG("try to %s %s "DFID" to pool '%s'\n",
+ PSYNC(sync),
+ is_rw ? "RW" : "RO",
+ PFID(&ht->ah_fid),
+ fast_pools->pl_pool);
+
+ lamigo_submit_sync(&ht->ah_fid, sync, fast_pools, mo.mo_stripes);
+
+ if (is_rw)
+ stats.s_replicate_rw2hot++;
+ else
+ stats.s_replicate_ro2hot++;
}
-static void lamigo_check_hot_on_cold(struct alr_heat *ht)
+/* If file (ah->ah_fid) is on the fast pool and should be synced
+ * (extended or resynced) to the slow pool then submit a job to
+ * DTRT. */
+static void lamigo_sync_hot_to_slow(struct alr_heat *ht)
{
struct mirror_opts mo = { 0 };
+ bool should_sync = false;
int sync;
- /* the file stored on hot pool was hot but still being written.
- * now that it's idling try to replicate it to the cold pool */
- if (ht->ah_idle > 0 && ht->ah_heat[ALR_WRITE] &&
- ht->ah_pools[ALR_SLOW] == 0 && ht->ah_pools[ALR_FAST]) {
- sync = lamigo_is_in_sync(&ht->ah_fid, fast_pools,
- slow_pools, &mo);
- LX_DEBUG("try to replicate idling hot to CP "DFID": %d\n",
- PFID(&ht->ah_fid), sync);
- if (sync != AMIGO_RESYNC_NONE) {
- lamigo_new_job_for_hot(&ht->ah_fid, sync, slow_pools,
- mo.mo_stripes);
- stats.s_replicate_rw2cold++;
- }
- /* XXX: mark existing replica preferred if it's not */
- lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0);
- }
+ /* The file stored on fast pool was hot but still being written.
+ * Now that it's idling try to replicate it to the slow pool */
+ if (ht->ah_idle > 0 &&
+ ht->ah_heat[ALR_WRITE] &&
+ ht->ah_pools[ALR_FAST] &&
+ ht->ah_pools[ALR_SLOW] == 0)
+ should_sync = true;
+
+ if (!should_sync)
+ return;
+
+ sync = lamigo_is_in_sync(&ht->ah_fid, fast_pools, slow_pools, &mo);
+ lamigo_alr_mark(&ht->ah_fid, ALR_TAG_PROCESSED, 0);
+ if (sync == AMIGO_RESYNC_NONE)
+ return;
+
+ LX_DEBUG("try to %s idling hot "DFID" to pool '%s'\n",
+ PSYNC(sync), PFID(&ht->ah_fid), slow_pools->pl_pool);
+
+ lamigo_submit_sync(&ht->ah_fid, sync, slow_pools, mo.mo_stripes);
+ stats.s_replicate_rw2cold++;
+ /* XXX: mark existing replica preferred if it's not */
}
-struct alr_heat *lamigo_get_hot(int period, int *nr)
+struct alr_heat *lamigo_get_hot_files(int period, int *nr)
{
struct alr_heat *ht;
int i;
static unsigned alr_hot_check_at = 0;
static unsigned long alr_hot_period = 1;
-static void lamigo_check_hot(void)
+static void lamigo_sync_hot_files(void)
{
struct alr_heat *ht;
int i, nr;
if (alr_period <= alr_hot_period)
return;
- /* don't try to replicate to fast pool if it's close to full */
+ /* don't try to replicate to fast pool if it's nearly full */
if (fast_pools->pl_is_open) {
/* get most recent hot files */
- ht = lamigo_get_hot(alr_hot_period, &nr);
+ ht = lamigo_get_hot_files(alr_hot_period, &nr);
if (ht) {
LX_DEBUG("check hot in period %lu - %d\n",
alr_hot_period, nr);
for (i = 0; i < nr; i++)
- lamigo_check_hot_one(ht + i);
+ lamigo_sync_hot_to_fast(&ht[i]);
free(ht);
}
}
/* now check hot idling files - the files we found hot and
* skipped replication. now it's time to try again */
- ht = lamigo_get_hot(alr_hot_period - opt.o_alr_hot_after_idle, &nr);
+ ht = lamigo_get_hot_files(alr_hot_period - opt.o_alr_hot_after_idle, &nr);
if (!ht)
goto out;
LX_DEBUG("idle "DFID": P: %Lu/%Lu, live %d, idle %d\n",
PFID(&ah->ah_fid), ah->ah_pools[ALR_FAST], ah->ah_pools[ALR_SLOW],
ah->ah_livetime, ah->ah_idle);
+
if (fast_pools->pl_is_open)
- lamigo_check_hot_one(ht + i);
+ lamigo_sync_hot_to_fast(&ht[i]);
+
if (slow_pools->pl_is_open)
- lamigo_check_hot_on_cold(ht + i);
+ lamigo_sync_hot_to_slow(&ht[i]);
}
- free(ht);
+ free(ht);
out:
alr_hot_period++;
}