From 3b2bda581a2994dfaf5343da455416203531d67f Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 6 Dec 2023 10:32:57 -0800 Subject: [PATCH] LU-17334 lov: handle object created on newly added OST When a new OST is added to a filesystem without no_create, then a new object created on the OST relatively quickly after it is added to the filesystem, in particular because the new OST would be preferred by QOS space balancing due to lots of free space. However, it might take a few seconds for the addition of the new OST to be propagated across all of the clients, so there is a risk that the MDS creates file object on OSTs that a client is not yet aware of, which returns an error to the application immediately. This patch fixes the issue by adding a loop in lsme_unpack() that is waiting and retrying for some number of seconds for the filesystem layout to be updated if either the "loi->loi_ost_idx >= lov->desc.ld_tgt_count" or "!ltd" condition is hit. Lustre-change: https://review.whamcloud.com/53335 Lustre-commit: TBD (from e1de624373ce6082253ddbdd987d36eb56ca6490) Change-Id: Idc29b8c66079afaea25428577daf51370fa2b084 Signed-off-by: Andreas Dilger Signed-off-by: Jian Yu Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53353 Tested-by: jenkins Tested-by: Maloo --- lustre/lov/lov_ea.c | 51 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index 1fe52c1..b1546e1 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -206,6 +206,7 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, loff_t lov_bytes; u32 magic; u32 pattern; + time64_t retry_limit = 0; unsigned int stripe_count; unsigned int i; int rc; @@ -268,7 +269,9 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, for (i = 0; i < stripe_count; i++) { struct lov_oinfo *loi; - struct lov_tgt_desc *ltd; + struct lov_tgt_desc *ltd = NULL; + static time64_t next_print; + unsigned int level; OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS); if (!loi) @@ -282,21 +285,41 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, if (lov_oinfo_is_dummy(loi)) continue; - if (loi->loi_ost_idx >= lov->desc.ld_tgt_count && - !lov2obd(lov)->obd_process_conf) { - CERROR("%s: OST index %d more than OST count %d\n", - (char*)lov->desc.ld_uuid.uuid, - loi->loi_ost_idx, lov->desc.ld_tgt_count); - lov_dump_lmm_v1(D_WARNING, lmm); - GOTO(out_lsme, rc = -EINVAL); - } +retry_new_ost: + if (unlikely(loi->loi_ost_idx >= lov->desc.ld_tgt_count || + !(ltd = lov->lov_tgts[loi->loi_ost_idx]))) { + time64_t now = ktime_get_seconds(); + + /* print message on the first hit, error if giving up */ + if (retry_limit == 0) { + level = now > next_print ? D_WARNING : D_INFO; + retry_limit = now + RECONNECT_DELAY_MAX; + } else if (now > retry_limit) { + level = D_ERROR; + } else { + level = D_INFO; + } - ltd = lov->lov_tgts[loi->loi_ost_idx]; - if (!ltd) { - CERROR("%s: OST index %d missing\n", - (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx); + /* log debug every loop, just to see it is trying */ + CDEBUG_LIMIT(level, + loi->loi_ost_idx < lov->desc.ld_tgt_count ? + "%s: FID "DOSTID" OST index %d/%u missing\n" : + "%s: FID "DOSTID" OST index %d more than OST count %u\n", + lov->desc.ld_uuid.uuid, POSTID(&loi->loi_oi), + loi->loi_ost_idx, lov->desc.ld_tgt_count); + if (now > next_print) { + LCONSOLE_INFO("%s: wait %ds while client connects to new OST\n", + lov->desc.ld_uuid.uuid, + (int)(retry_limit - now)); + next_print = retry_limit + 600; + } + if (now < retry_limit) { + rc = schedule_timeout_interruptible(cfs_time_seconds(1)); + if (rc == 0) + goto retry_new_ost; + } lov_dump_lmm_v1(D_WARNING, lmm); - continue; + GOTO(out_lsme, rc = -EINVAL); } lov_bytes = lov_tgt_maxbytes(ltd); -- 1.8.3.1