From 1757e9b119720016f6fa2c2ac35c144e0c92d75f Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Fri, 10 Mar 2023 20:47:05 +0300 Subject: [PATCH] LU-16633 obdclass: fix rpc slot leakage obd_get_mod_rpc_slot() can race with obd_put_mod_rpc_slot(): finishing wait_woken() resets WQ_FLAG_WOKEN (which is set when the corresponding thread gets a slot incrementing cl_mod_rpcs_in_flight. then another thread execting __wake_up_locked_key() may find that wq_entry again and call claim_mod_rpc_function() one more time again incrementing cl_mod_rpc_in_flight. thus it's incremented twice for a single obd_get_mod_rpc_slot(). #1: obd_get_mod_rpc_slot() #2: obd_put_mod_rpc_slot() flags &= ~WQ_FLAG_WOKEN list_add() wait_woken() schedule claim_mod_rpc_function() cl_mod_rpcs_in_flight++ wake_up() flags &= ~WQ_FLAG_WOKEN #3: obd_put_mod_rpc_slot() claim_mod_rpc_function() cl_mod_rpcs_in_flight++ wake_up() list_del() the patch introduces a replacement for WQ_FLAG_WOKEN which is never reset once set. Lustre-change: https://review.whamcloud.com/50261 Lustre-commit: 91a3726f313df33e099320d171039f8371fec27f Fixes: 5243630b09 ("LU-15947 obdclass: improve precision of wakeups for mod_rpcs") Signed-off-by: Alex Zhuravlev Change-Id: I29371c8c85414413c5a8e41dec3632f64ad127bb Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Signed-off-by: Etienne AUJAMES Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51539 Reviewed-by: James Simmons Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/mdc/mdc_request.c | 2 ++ lustre/obdclass/genops.c | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 5af81a4..80bbb66 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -2941,6 +2941,8 @@ static int mdc_precleanup(struct obd_device *obd) static int mdc_cleanup(struct obd_device *obd) { + struct client_obd *cli = &obd->u.cli; + LASSERT(cli->cl_mod_rpcs_in_flight == 0); return osc_cleanup_common(obd); } diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 8487b03..90088dc 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -2238,6 +2238,7 @@ EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show); struct mod_waiter { struct client_obd *cli; bool close_req; + bool woken; wait_queue_entry_t wqe; }; static int claim_mod_rpc_function(wait_queue_entry_t *wq_entry, @@ -2250,10 +2251,9 @@ static int claim_mod_rpc_function(wait_queue_entry_t *wq_entry, int ret; /* As woken_wake_function() doesn't remove us from the wait_queue, - * we could get called twice for the same thread - take care. + * we use own flag to ensure we're called just once. */ - if (wq_entry->flags & WQ_FLAG_WOKEN) - /* Already woke this thread, don't try again */ + if (w->woken) return 0; /* A slot is available if @@ -2267,6 +2267,7 @@ static int claim_mod_rpc_function(wait_queue_entry_t *wq_entry, if (close_req) cli->cl_close_rpcs_in_flight++; ret = woken_wake_function(wq_entry, mode, flags, key); + w->woken = true; } else if (cli->cl_close_rpcs_in_flight) /* No other waiter could be woken */ ret = -1; @@ -2295,6 +2296,7 @@ __u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc) struct mod_waiter wait = { .cli = cli, .close_req = (opc == MDS_CLOSE), + .woken = false, }; __u16 i, max; @@ -2312,7 +2314,8 @@ __u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc) __wake_up_locked_key(&cli->cl_mod_rpcs_waitq, TASK_NORMAL, (void*)wait.close_req); - if (!(wait.wqe.flags & WQ_FLAG_WOKEN)) { + /* XXX: handle spurious wakeups (from unknown yet source */ + while (wait.woken == false) { spin_unlock_irq(&cli->cl_mod_rpcs_waitq.lock); wait_woken(&wait.wqe, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); -- 1.8.3.1