From 727ec3db42f77c40383a0d0012c4fb3f15c68afe Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Mon, 20 Feb 2023 12:14:16 +0300 Subject: [PATCH] EX-6917 lamigo: replicate stripe size along with stripe count to match the documentation. Signed-off-by: Alex Zhuravlev Change-Id: If5733fac7f92b821cdec4ca91e4e1d32656cd5b0 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/50071 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lipe/src/lamigo.c | 45 ++++++++++++++++++++++++++++++++------------- lustre/tests/hot-pools.sh | 23 +++++++++++++++++++++++ 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/lipe/src/lamigo.c b/lipe/src/lamigo.c index 95dce07..1faa1e2 100644 --- a/lipe/src/lamigo.c +++ b/lipe/src/lamigo.c @@ -214,6 +214,7 @@ struct resync_job { int rj_setprefer; int rj_check_job; int rj_stripes; + int rj_stripe_size; char *rj_pool; char *rj_mirror_opts; void (*rj_callback)(struct resync_job *, void *, int rc); @@ -338,6 +339,7 @@ static struct lamigo_head head = { struct mirror_opts { int mo_stripes; + int mo_stripe_size; bool mo_need_prefer; struct pool_list *mo_src_pool; }; @@ -827,6 +829,9 @@ static void *lamigo_replicate_one(void *args) if (rj->rj_stripes > 0) i += snprintf(cmd + i, sizeof(cmd) - i, " --stripe-count=%d", rj->rj_stripes); + if (rj->rj_stripe_size > 0) + i += snprintf(cmd + i, sizeof(cmd) - i, + " --stripe-size=%d", rj->rj_stripe_size); if (rj->rj_mirror_opts) i += snprintf(cmd + i, sizeof(cmd) - i, " --flags='%s'", rj->rj_mirror_opts); @@ -936,19 +941,22 @@ static bool lamigo_entry_needs_resync(struct lov_comp_md_entry_v1 *entry) static int lamigo_get_objects(struct lov_user_md_v3 *v3, struct lov_user_ost_data_v1 **objects, - int *stripes) + int *stripes, int *stripesz) { if (v3->lmm_magic == LOV_USER_MAGIC_V1) { struct lov_user_md_v1 *v1 = (struct lov_user_md_v1 *) v3; *objects = v1->lmm_objects; *stripes = v1->lmm_stripe_count; + *stripesz = v1->lmm_stripe_size; } else if (v3->lmm_magic == LOV_USER_MAGIC_V3) { *objects = v3->lmm_objects; *stripes = v3->lmm_stripe_count; + *stripesz = v3->lmm_stripe_size; } else { *objects = NULL; *stripes = 0; + *stripesz = 0; LX_ERROR("unsupported LOV magic %x\n", v3->lmm_magic); return -EINVAL; } @@ -982,13 +990,14 @@ static int lamigo_striping_is_in_sync(struct lov_user_md *lum, struct lov_comp_md_v1 *comp_v1; struct lov_user_md_v3 *v3 = (struct lov_user_md_v3 *)lum; enum amigo_resync_type resync; - int i, k, rc, onsrc = 0, ontgt = 0, stale, stripes, mirid; + int i, k, rc, onsrc = 0, ontgt = 0, stale, stripes, mirid, stripesz = 0; int objs_in_mirror; struct pool_list *pl, *found_on; resync = AMIGO_RESYNC_NONE; assert(mo); mo->mo_stripes = 0; + mo->mo_stripe_size = 0; mo->mo_need_prefer = false; mo->mo_src_pool = NULL; @@ -998,8 +1007,8 @@ static int lamigo_striping_is_in_sync(struct lov_user_md *lum, * then we don't need to check each individual OST * do replication only for specific pool */ - stripes = v3->lmm_stripe_count; - mo->mo_stripes = stripes; + mo->mo_stripes = stripes = v3->lmm_stripe_count; + mo->mo_stripe_size = stripesz = v3->lmm_stripe_size; if ((pl = lamigo_lookup_fast_pool(v3->lmm_pool_name))) { resync = AMIGO_RESYNC_EXTEND; mo->mo_src_pool = pl; @@ -1013,7 +1022,7 @@ static int lamigo_striping_is_in_sync(struct lov_user_md *lum, if (v3->lmm_magic == LOV_USER_MAGIC_V1 || v3->lmm_magic == LOV_USER_MAGIC_V3) { - rc = lamigo_get_objects(v3, &objects, &stripes); + rc = lamigo_get_objects(v3, &objects, &stripes, &stripesz); if (rc) goto out; @@ -1025,6 +1034,7 @@ static int lamigo_striping_is_in_sync(struct lov_user_md *lum, stats.s_extend_by_objects++; } mo->mo_stripes = stripes; + mo->mo_stripe_size = stripesz; goto out; } @@ -1097,7 +1107,7 @@ static int lamigo_striping_is_in_sync(struct lov_user_md *lum, continue; } - rc = lamigo_get_objects(v3, &objects, &stripes); + rc = lamigo_get_objects(v3, &objects, &stripes, &stripesz); if (rc) { stats.s_skip_unknown++; goto out; @@ -1136,8 +1146,10 @@ static int lamigo_striping_is_in_sync(struct lov_user_md *lum, } /* find largest stripe count */ - if (mo->mo_stripes < stripes) + if (mo->mo_stripes < stripes) { mo->mo_stripes = stripes; + mo->mo_stripe_size = stripesz; + } } if (!onsrc) { @@ -1167,8 +1179,10 @@ static int lamigo_striping_is_in_sync(struct lov_user_md *lum, stats.s_extend_by_target++; } out: - if (resync == AMIGO_RESYNC_EXTEND) + if (resync == AMIGO_RESYNC_EXTEND) { mo->mo_stripes = stripes; + mo->mo_stripe_size = stripesz; + } return resync; } @@ -1377,8 +1391,8 @@ static int lamigo_is_in_sync(struct lu_fid *fid, out: lamigo_hist_add(fid, resync); - LX_DEBUG("check "DFID" stripes=%d: resync=%d\n", - PFID(fid), mo->mo_stripes, resync); + LX_DEBUG("check "DFID" stripes=%d stripesz=%d: resync=%d\n", + PFID(fid), mo->mo_stripes, mo->mo_stripe_size, resync); return resync; } @@ -1529,6 +1543,7 @@ static int lamigo_update_one(struct fid_rec *f) rj = xcalloc(1, sizeof(*rj)); rj->rj_fid = f->fr_fh.fh_fid; rj->rj_stripes = mo.mo_stripes; + rj->rj_stripe_size = mo.mo_stripe_size; rj->rj_index = f->fr_index; rj->rj_resync = resync; rj->rj_pool = opt.o_slow_pool; @@ -2635,6 +2650,7 @@ static int lamigo_create_job(struct lu_fid *fid, rj = xcalloc(1, sizeof(*rj)); rj->rj_fid = *fid; rj->rj_stripes = mo->mo_stripes; + rj->rj_stripe_size = mo->mo_stripe_size; rj->rj_resync = resync; rj->rj_pool = slow_pools->pl_pool; rj->rj_callback = lamigo_alr_mirror_cb; @@ -3469,7 +3485,7 @@ static void lamigo_alr_mirror_cb(struct resync_job *rj, void *cbdata, int rc) /* Create and submit a job to @sync (extend or resync) @fid to @pl. */ static void lamigo_submit_sync(const struct lu_fid *fid, enum amigo_resync_type sync, - struct pool_list *pl, int stripes) + struct pool_list *pl, int stripes, int stripesz) { struct resync_job *rj; int rc; @@ -3477,6 +3493,7 @@ static void lamigo_submit_sync(const struct lu_fid *fid, enum amigo_resync_type rj = xcalloc(1, sizeof(*rj)); rj->rj_fid = *fid; rj->rj_stripes = stripes; + rj->rj_stripe_size = stripesz; rj->rj_index = 0; rj->rj_resync = sync; rj->rj_check_job = 0; @@ -3549,7 +3566,8 @@ static void lamigo_sync_hot_to_fast(struct alr_heat *ht) PFID(&ht->ah_fid), fast_pools->pl_pool); - lamigo_submit_sync(&ht->ah_fid, sync, fast_pools, mo.mo_stripes); + lamigo_submit_sync(&ht->ah_fid, sync, fast_pools, + mo.mo_stripes, mo.mo_stripe_size); if (is_rw) stats.s_replicate_rw2hot++; @@ -3585,7 +3603,8 @@ static void lamigo_sync_hot_to_slow(struct alr_heat *ht) LX_DEBUG("try to %s idling hot "DFID" to pool '%s'\n", PSYNC(sync), PFID(&ht->ah_fid), slow_pools->pl_pool); - lamigo_submit_sync(&ht->ah_fid, sync, slow_pools, mo.mo_stripes); + lamigo_submit_sync(&ht->ah_fid, sync, slow_pools, + mo.mo_stripes, mo.mo_stripe_size); stats.s_replicate_rw2cold++; /* XXX: mark existing replica preferred if it's not */ } diff --git a/lustre/tests/hot-pools.sh b/lustre/tests/hot-pools.sh index 495f291..4e4372b 100755 --- a/lustre/tests/hot-pools.sh +++ b/lustre/tests/hot-pools.sh @@ -2231,6 +2231,29 @@ test_72() { } run_test 72 "lamigo: --oss option and ALR delivery to lamigo" +test_73() { + local tf=$DIR/$tfile + init_hot_pools_env + + start_lamigo_cmd + check_lamigo_is_started || error "failed to start lamigo" + stack_trap stop_lamigo_cmd + stack_trap "rm -f $tf" + + $LFS setstripe -E 4M -c 2 -S 1M -p $LAMIGO_SRC \ + -E eof -c 4 -S 4M -p $LAMIGO_SRC $tf || error "setstripe failed" + dd if=/dev/zero of=$tf bs=1M count=6 || error "dd failed" + cancel_lru_locks osc + sleep $((LAMIGO_AGE * 2)) + verify_file_mirror $tf 2 + local stripesize=$($LFS getstripe --mirror-id=2 -S $tf) + (( stripesize == 4*1024*1024 )) || { + $LFS getstripe $tf + error "expected 4M stripe size on a new mirror" + } +} +run_test 73 "check strip size & count after replication" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1