From 2d59294d52b696125acc464e5910c893d9aef237 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Wed, 4 May 2022 20:50:57 -0400 Subject: [PATCH] LU-15821 ldlm: Prioritize blocking callbacks The current code places bl_ast lock callbacks at the end of the global BL callback queue. This is bad because it causes urgent requests from the server to wait behind non-urgent cleanup tasks to keep lru_size at the right level. This can lead to evictions if there is a large queue of items in the global queue so the callback is not serviced in a timely manner. Put bl_ast callbacks on the priority queue so they do not wait behind the background traffic. Add some additional debug in this area. Signed-off-by: Patrick Farrell Change-Id: Ic6eb65819a4a93e9d30e807d386ca18380b30c7d Reviewed-on: https://review.whamcloud.com/47215 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_lockd.c | 60 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index f82df7d..7e59709 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -83,27 +83,29 @@ static inline timeout_t ldlm_get_rq_timeout(void) } struct ldlm_bl_pool { - spinlock_t blp_lock; + spinlock_t blp_lock; /* * blp_prio_list is used for callbacks that should be handled * as a priority. It is used for LDLM_FL_DISCARD_DATA requests. * see b=13843 */ - struct list_head blp_prio_list; + struct list_head blp_prio_list; /* * blp_list is used for all other callbacks which are likely * to take longer to process. */ - struct list_head blp_list; - - wait_queue_head_t blp_waitq; - struct completion blp_comp; - atomic_t blp_num_threads; - atomic_t blp_busy_threads; - int blp_min_threads; - int blp_max_threads; + struct list_head blp_list; + + wait_queue_head_t blp_waitq; + struct completion blp_comp; + atomic_t blp_num_threads; + atomic_t blp_busy_threads; + int blp_min_threads; + int blp_max_threads; + int blp_total_locks; + int blp_total_blwis; }; struct ldlm_bl_work_item { @@ -2116,22 +2118,41 @@ static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, enum ldlm_cancel_flags cancel_flags) { struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool; + char *prio = "regular"; + int count; ENTRY; spin_lock(&blp->blp_lock); + /* cannot access blwi after added to list and lock is dropped */ + count = blwi->blwi_lock ? 1 : blwi->blwi_count; + + /* if the server is waiting on a lock to be cancelled (bl_ast), this is + * an urgent request and should go in the priority queue so it doesn't + * get stuck behind non-priority work (eg, lru size management) + * + * We also prioritize discard_data, which is for eviction handling + */ if (blwi->blwi_lock && - ldlm_is_discard_data(blwi->blwi_lock)) { - /* add LDLM_FL_DISCARD_DATA requests to the priority list */ + (ldlm_is_discard_data(blwi->blwi_lock) || + ldlm_is_bl_ast(blwi->blwi_lock))) { list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list); + prio = "priority"; } else { /* other blocking callbacks are added to the regular list */ list_add_tail(&blwi->blwi_entry, &blp->blp_list); } + blp->blp_total_locks += count; + blp->blp_total_blwis++; spin_unlock(&blp->blp_lock); wake_up(&blp->blp_waitq); + /* unlocked read of blp values is intentional - OK for debug */ + CDEBUG(D_DLMTRACE, + "added %d/%d locks to %s blp list, %d blwis in pool\n", + count, blp->blp_total_locks, prio, blp->blp_total_blwis); + /* * can not check blwi->blwi_flags as blwi could be already freed in * LCF_ASYNC mode @@ -2749,10 +2770,23 @@ static int ldlm_bl_get_work(struct ldlm_bl_pool *blp, if (++num_bl >= num_th) num_bl = 0; list_del(&blwi->blwi_entry); + blp->blp_total_locks -= blwi->blwi_lock ? 1 : blwi->blwi_count; + blp->blp_total_blwis--; } spin_unlock(&blp->blp_lock); *p_blwi = blwi; + /* intentional unlocked read of blp values - OK for debug */ + if (blwi) { + CDEBUG(D_DLMTRACE, + "Got %d locks of %d total in blp. (%d blwis in pool)\n", + blwi->blwi_lock ? 1 : blwi->blwi_count, + blp->blp_total_locks, blp->blp_total_blwis); + } else { + CDEBUG(D_DLMTRACE, + "No blwi found in queue (no bl locks in queue)\n"); + } + if (*p_exp != NULL && *p_blwi != NULL) { obd_stale_export_put(*p_exp); *p_exp = NULL; @@ -3293,6 +3327,8 @@ static int ldlm_setup(void) init_waitqueue_head(&blp->blp_waitq); atomic_set(&blp->blp_num_threads, 0); atomic_set(&blp->blp_busy_threads, 0); + blp->blp_total_locks = 0; + blp->blp_total_blwis = 0; if (ldlm_num_threads == 0) { blp->blp_min_threads = LDLM_NTHRS_INIT; -- 1.8.3.1