From cacdaa925172d26c19ee841dd5a2a4c30afe10ce Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Fri, 12 May 2023 17:32:20 -0400 Subject: [PATCH] LU-16830 lod: improve rr allocation Roundrobin allocation uses atomic_inc() % ost_count for generation OST index. When some OSTs are unavailable and many threads make object creation, it could happen that OST idx is the same for all attempts. For example with 4 OSTs configuration when 2 OSTs do faiover, estimation of probability is 0.5^12=0.024%. The result is ENOSPC for user application. Let's try one by one OSTs for a last speed loop. HPE-bug-id: LUS-11265 Signed-off-by: Alexander Boyko Change-Id: I325cf4ad706c9b0df64cf53792e77c1fad6f7739 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50996 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Sergey Cheremencev Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger --- lustre/lod/lod_qos.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 4775b40..978e81c 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -749,7 +749,7 @@ static int lod_ost_alloc_rr(const struct lu_env *env, struct lod_object *lo, __u32 stripe_idx = 0; __u32 stripe_count, stripe_count_min, ost_idx; int rc, speed = 0, ost_connecting = 0; - int stripes_per_ost = 1; + int idx, stripes_per_ost = 1; bool overstriped = false; ENTRY; @@ -808,11 +808,13 @@ repeat_find: lqr->lqr_start_count, lqr->lqr_offset_idx, osts->op_count, osts->op_count); - for (i = 0; i < osts->op_count * stripes_per_ost && + for (i = 0, idx = 0; i < osts->op_count * stripes_per_ost && stripe_idx < stripe_count; i++) { - int idx; + if (likely(speed < 2) || i == 0) + idx = atomic_inc_return(&lqr->lqr_start_idx); + else + idx++; - idx = atomic_inc_return(&lqr->lqr_start_idx); array_idx = (idx + lqr->lqr_offset_idx) % osts->op_count; ost_idx = lqr->lqr_pool.op_array[array_idx]; -- 1.8.3.1