From bedfafcae478dcb3591e6621e8e74d7255bc617f Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Thu, 9 May 2024 20:34:29 +0300 Subject: [PATCH] LU-17204 lod: don't panic on short LOVEA when we request LOVEA and find the existing buffer is not enough, we ask for LOVEA's size and reallocate the buffer. but LOVEA can shrink in parallel (e.g. new default striping), so our expectation that the size must be greater than size of the existing buffer is not correct. replace the corresponding assertion with a simple repeat + extra check for a livelock. Lustre-commit: 8fa3532b1ee887be378adbf9432707b2d8a2d814 Lustre-change: https://review.whamcloud.com/52727 Signed-off-by: Alex Zhuravlev Change-Id: I26ad5091228bf78858f8538478dbcbdb235cddf4 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/55065 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/lod/lod_lov.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index df16ab2..eada8eb 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -457,7 +457,6 @@ int lod_ea_store_resize(struct lod_thread_info *info, size_t size) if (info->lti_ea_store) { LASSERT(info->lti_ea_store_size); - LASSERT(info->lti_ea_store_size < round); CDEBUG(D_INFO, "EA store size %d is not enough, need %d\n", info->lti_ea_store_size, round); OBD_FREE_LARGE(info->lti_ea_store, info->lti_ea_store_size); @@ -1062,7 +1061,7 @@ int lod_get_ea(const struct lu_env *env, struct lod_object *lo, { struct lod_thread_info *info = lod_env_info(env); struct dt_object *next = dt_object_child(&lo->ldo_obj); - int rc; + int rc, count = 0; ENTRY; LASSERT(info); @@ -1090,6 +1089,11 @@ repeat: RETURN(rc); LASSERT(rc > 0); + if (rc <= info->lti_ea_store_size) { + /* sometimes LOVEA can shrink in parallel */ + LASSERT(count++ < 10); + goto repeat; + } rc = lod_ea_store_resize(info, rc); if (rc) RETURN(rc); -- 1.8.3.1