From 53afaf69828d4fcde640066fbe35ce1ffe9e9463 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 22 Oct 2021 16:40:24 -0600 Subject: [PATCH] EX-4103 lamigo: do not mark cold pool mirror "prefer" When lamigo_check_hot_on_cold() calls lamigo_new_job_for_hot() it passes "tgt_pools" as the "tgt" pool argument, for the case when an active file in the hot pool needs to be mirrored to the cold pool, unlike other callers pass "src_pools" as "tgt" to mirror to the hot pool. This should not result in the cold pool mirror being marked "prefer", which can trigger a chain of later problems with the file. Fix comment in lamigo_check_hot_on_cold() to make it clear what case is being checked, since it described the opposite of what is done. Test-Parameters: trivial testlist=hot-pools Fixes: e582abc629e ("EX-978 lamigo: set prefer flag on fast replica") Signed-off-by: Andreas Dilger Change-Id: Iad48d6eb2d57817241b8ca3c22c03e38b93ebbe5 Reviewed-on: https://review.whamcloud.com/45345 Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: John L. Hammond Reviewed-on: https://review.whamcloud.com/45351 Tested-by: jenkins --- lipe/src/lamigo.c | 14 ++++++++------ lustre/tests/hot-pools.sh | 11 +++++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/lipe/src/lamigo.c b/lipe/src/lamigo.c index fa67491..a6452d1 100644 --- a/lipe/src/lamigo.c +++ b/lipe/src/lamigo.c @@ -3243,7 +3243,9 @@ static void lamigo_new_job_for_hot(struct lu_fid *fid, enum amigo_resync_type sy rj->rj_check_job = 0; rj->rj_pid = 0; rj->rj_pool = tgt->pl_pool; - rj->rj_mirror_opts = "prefer"; + /* only mark hot pool mirrors with "prefer" */ + if (lamigo_lookup_fast_pool(tgt->pl_pool)) + rj->rj_mirror_opts = "prefer"; rj->rj_callback = lamigo_alr_mirror_cb; rc = lamigo_submit_job(rj); @@ -3317,17 +3319,17 @@ static void lamigo_check_hot_on_cold(struct alr_heat *ht) struct mirror_opts mo = { 0 }; int sync; - /* the file stored on cold pool was hat, - * now it's idling, try to move it to hot pool */ + /* the file stored on hot pool was hot but still being written. + * now that it's idling try to replicate it to the cold pool */ if (ht->ah_idle > 0 && ht->ah_heat[ALR_WRITE] && ht->ah_pools[ALR_SLOW] == 0 && ht->ah_pools[ALR_FAST]) { sync = lamigo_is_in_sync(&ht->ah_fid, fast_pools, slow_pools, &mo); - LX_DEBUG("replicate idling hot to CP "DFID": %d\n", + LX_DEBUG("try to replicate idling hot to CP "DFID": %d\n", PFID(&ht->ah_fid), sync); if (sync != AMIGO_RESYNC_NONE) { - lamigo_new_job_for_hot(&ht->ah_fid, sync, - slow_pools, mo.mo_stripes); + lamigo_new_job_for_hot(&ht->ah_fid, sync, slow_pools, + mo.mo_stripes); stats.s_replicate_rw2cold++; } /* XXX: mark existing replica preferred if it's not */ diff --git a/lustre/tests/hot-pools.sh b/lustre/tests/hot-pools.sh index 6ff413d..c19bb64 100755 --- a/lustre/tests/hot-pools.sh +++ b/lustre/tests/hot-pools.sh @@ -993,11 +993,14 @@ verify_file_mirror() { ids=($($LFS getstripe $file | awk '/lcme_id/{print $2}' | tr '\n' ' ')) for id in "${ids[@]}"; do - [[ "$id" = "${ids[${#ids[*]}-1]}" ]] && - verify_comp_attr pool $file $id $LAMIGO_TGT || + # last mirror should be on target pool, but not marked prefer + if [[ "$id" = "${ids[${#ids[*]}-1]}" ]]; then + verify_comp_attr pool $file $id $LAMIGO_TGT + verify_comp_attr lcme_flags $file $id init,^prefer + else verify_comp_attr pool $file $id $LAMIGO_SRC - - verify_comp_attr lcme_flags $file $id init + verify_comp_attr lcme_flags $file $id init + fi done } -- 1.8.3.1