From f35f897ec8ec0752ea4d4830e72f5193375a474b Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Tue, 5 Dec 2023 12:45:44 -0800 Subject: [PATCH] LU-17334 lov: handle object created on newly added OST When a new OST is added to a filesystem without no_create, then a new object created on the OST relatively quickly after it is added to the filesystem, in particular because the new OST would be preferred by QOS space balancing due to lots of free space. However, it might take a few seconds for the addition of the new OST to be propagated across all of the clients, so there is a risk that the MDS creates file object on OSTs that a client is not yet aware of, which returns an error to the application immediately. This patch fixes the issue by adding a loop in lsme_unpack() that is waiting and retrying for some number of seconds for the filesystem layout to be updated if either the "loi->loi_ost_idx >= lov->desc.ld_tgt_count" or "!ltd" condition is hit. Change-Id: Idc29b8c66079afaea25428577daf51370fa2b084 Signed-off-by: Andreas Dilger Signed-off-by: Jian Yu Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53335 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/lov/lov_ea.c | 51 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index 62656c3..17cf85c 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -196,6 +196,7 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, loff_t lov_bytes; u32 magic; u32 pattern; + time64_t retry_limit = 0; unsigned int stripe_count; unsigned int i; int rc; @@ -258,7 +259,9 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, for (i = 0; i < stripe_count; i++) { struct lov_oinfo *loi; - struct lov_tgt_desc *ltd; + struct lov_tgt_desc *ltd = NULL; + static time64_t next_print; + unsigned int level; OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS); if (!loi) @@ -272,21 +275,41 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size, if (lov_oinfo_is_dummy(loi)) continue; - if (loi->loi_ost_idx >= lov->desc.ld_tgt_count && - !lov2obd(lov)->obd_process_conf) { - CERROR("%s: OST index %d more than OST count %d\n", - (char*)lov->desc.ld_uuid.uuid, - loi->loi_ost_idx, lov->desc.ld_tgt_count); - lov_dump_lmm_v1(D_WARNING, lmm); - GOTO(out_lsme, rc = -EINVAL); - } +retry_new_ost: + if (unlikely(loi->loi_ost_idx >= lov->desc.ld_tgt_count || + !(ltd = lov->lov_tgts[loi->loi_ost_idx]))) { + time64_t now = ktime_get_seconds(); + + /* print message on the first hit, error if giving up */ + if (retry_limit == 0) { + level = now > next_print ? D_WARNING : D_INFO; + retry_limit = now + RECONNECT_DELAY_MAX; + } else if (now > retry_limit) { + level = D_ERROR; + } else { + level = D_INFO; + } - ltd = lov->lov_tgts[loi->loi_ost_idx]; - if (!ltd) { - CERROR("%s: OST index %d missing\n", - (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx); + /* log debug every loop, just to see it is trying */ + CDEBUG_LIMIT(level, + loi->loi_ost_idx < lov->desc.ld_tgt_count ? + "%s: FID "DOSTID" OST index %d/%u missing\n" : + "%s: FID "DOSTID" OST index %d more than OST count %u\n", + lov->desc.ld_uuid.uuid, POSTID(&loi->loi_oi), + loi->loi_ost_idx, lov->desc.ld_tgt_count); + if (now > next_print) { + LCONSOLE_INFO("%s: wait %ds while client connects to new OST\n", + lov->desc.ld_uuid.uuid, + (int)(retry_limit - now)); + next_print = retry_limit + 600; + } + if (now < retry_limit) { + rc = schedule_timeout_interruptible(cfs_time_seconds(1)); + if (rc == 0) + goto retry_new_ost; + } lov_dump_lmm_v1(D_WARNING, lmm); - continue; + GOTO(out_lsme, rc = -EINVAL); } lov_bytes = lov_tgt_maxbytes(ltd); -- 1.8.3.1