From 1732704711488d2d233f0b8e5bc9814f443405c6 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Mon, 4 Mar 2024 13:15:17 +1100 Subject: [PATCH] LU-17510 obdclass: fix wake up when queuing close request. The waitqueue for requests that need to be sent but that haven't been allocated a slot is kept ordered by request arrival for fairness. So new requests are added to the end. For requests other than 'close' there is a limit to the number of active requests (slots) and requests are assigned to slot on a first-come-first-served basis, so they are simply removed from the head of the list. For 'close' requests it is important that these not block indefinitely behind other other requests so there is one slot that can only be used by a close request - and only if no other slots are used by a close request. These requests do not follow a strict FIFO order. When a non-"close" request completes we wake the first request on the list. There is no point searching all the way down the list for a close request that could also be woken. We only do that when a "close" request completes. This optimises the common case. However: when a request is first queued we add it to the end of the queue and then wake up the first deserving request if there is one. When there are free slots, this is expected to wake the request just queued. When there are no free slots, nothing is woken. When a "close" request is queued and added to the end of the queue after other non-close requests, we need to potentially search to the end of the queue for a close request to wake, just as we do when a close request completes. Unfortunately we don't. This can result in a close request blocking indefinitely. So: change the wakeup in obd_get_mod_rpc_slot() to match the wakeup in obd_put_mod_rpc_slot(). This ensure consistent handling and in particular will handle a close request immediately if there are no other close requests in flight. Clarify comment in claim_mod_rpc_function() to make and perform minor code cleanup there. Lustre-change: https://review.whamcloud.com/54259 Lustre-commit: 7a2296a397381a5f6f9473b297f0062e8ff15948 Fixes: b5fde4d6c023 ("LU-17197 obdclass: preserve fairness when waiting for rpc slot") Signed-off-by: Mr NeilBrown Change-Id: I7b658efc0298a091166f0f18ce460fc3148047eb Reviewed-by: James Simmons Reviewed-by: Shaun Tancheff Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54689 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/obdclass/genops.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 969bcc5..8487b03 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -2264,15 +2264,16 @@ static int claim_mod_rpc_function(wait_queue_entry_t *wq_entry, (close_req && cli->cl_close_rpcs_in_flight == 0); if (avail) { cli->cl_mod_rpcs_in_flight++; - if (w->close_req) + if (close_req) cli->cl_close_rpcs_in_flight++; ret = woken_wake_function(wq_entry, mode, flags, key); } else if (cli->cl_close_rpcs_in_flight) /* No other waiter could be woken */ ret = -1; - else if (key == NULL) - /* This was not a wakeup from a close completion, so there is no - * point seeing if there are close waiters to be woken + else if (!key) + /* This was not a wakeup from a close completion or a new close + * being queued, so there is no point seeing if there are close + * waiters to be woken. */ ret = -1; else @@ -2305,8 +2306,12 @@ __u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc) /* This wakeup will only succeed if the maximums haven't * been reached. If that happens, WQ_FLAG_WOKEN will be cleared * and there will be no need to wait. + * If a close_req was enqueue, ensure we search all the way to the + * end of the waitqueue for a close request. */ - wake_up_locked(&cli->cl_mod_rpcs_waitq); + __wake_up_locked_key(&cli->cl_mod_rpcs_waitq, TASK_NORMAL, + (void*)wait.close_req); + if (!(wait.wqe.flags & WQ_FLAG_WOKEN)) { spin_unlock_irq(&cli->cl_mod_rpcs_waitq.lock); wait_woken(&wait.wqe, TASK_UNINTERRUPTIBLE, -- 1.8.3.1