From 91a3726f313df33e099320d171039f8371fec27f Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Fri, 10 Mar 2023 20:47:05 +0300 Subject: [PATCH] LU-16633 obdclass: fix rpc slot leakage obd_get_mod_rpc_slot() can race with obd_put_mod_rpc_slot(): finishing wait_woken() resets WQ_FLAG_WOKEN (which is set when the corresponding thread gets a slot incrementing cl_mod_rpcs_in_flight. then another thread execting __wake_up_locked_key() may find that wq_entry again and call claim_mod_rpc_function() one more time again incrementing cl_mod_rpc_in_flight. thus it's incremented twice for a single obd_get_mod_rpc_slot(). #1: obd_get_mod_rpc_slot() #2: obd_put_mod_rpc_slot() flags &= ~WQ_FLAG_WOKEN list_add() wait_woken() schedule claim_mod_rpc_function() cl_mod_rpcs_in_flight++ wake_up() flags &= ~WQ_FLAG_WOKEN #3: obd_put_mod_rpc_slot() claim_mod_rpc_function() cl_mod_rpcs_in_flight++ wake_up() list_del() the patch introduces a replacement for WQ_FLAG_WOKEN which is never reset once set. Fixes: 5243630b09 ("LU-15947 obdclass: improve precision of wakeups for mod_rpcs") Signed-off-by: Alex Zhuravlev Change-Id: I29371c8c85414413c5a8e41dec3632f64ad127bb Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50261 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/mdc/mdc_request.c | 2 ++ lustre/obdclass/genops.c | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 0fcdb49..32a54c9 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -2959,6 +2959,8 @@ static int mdc_precleanup(struct obd_device *obd) static int mdc_cleanup(struct obd_device *obd) { + struct client_obd *cli = &obd->u.cli; + LASSERT(cli->cl_mod_rpcs_in_flight == 0); return osc_cleanup_common(obd); } diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 1dcd579..ae0bca5 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -2247,6 +2247,7 @@ EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show); struct mod_waiter { struct client_obd *cli; bool close_req; + bool woken; wait_queue_entry_t wqe; }; static int claim_mod_rpc_function(wait_queue_entry_t *wq_entry, @@ -2259,10 +2260,9 @@ static int claim_mod_rpc_function(wait_queue_entry_t *wq_entry, int ret; /* As woken_wake_function() doesn't remove us from the wait_queue, - * we could get called twice for the same thread - take care. + * we use own flag to ensure we're called just once. */ - if (wq_entry->flags & WQ_FLAG_WOKEN) - /* Already woke this thread, don't try again */ + if (w->woken) return 0; /* A slot is available if @@ -2276,6 +2276,7 @@ static int claim_mod_rpc_function(wait_queue_entry_t *wq_entry, if (w->close_req) cli->cl_close_rpcs_in_flight++; ret = woken_wake_function(wq_entry, mode, flags, key); + w->woken = true; } else if (cli->cl_close_rpcs_in_flight) /* No other waiter could be woken */ ret = -1; @@ -2303,6 +2304,7 @@ __u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc) struct mod_waiter wait = { .cli = cli, .close_req = (opc == MDS_CLOSE), + .woken = false, }; __u16 i, max; @@ -2316,7 +2318,8 @@ __u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc) * and there will be no need to wait. */ wake_up_locked(&cli->cl_mod_rpcs_waitq); - if (!(wait.wqe.flags & WQ_FLAG_WOKEN)) { + /* XXX: handle spurious wakeups (from unknown yet source */ + while (wait.woken == false) { spin_unlock_irq(&cli->cl_mod_rpcs_waitq.lock); wait_woken(&wait.wqe, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); -- 1.8.3.1