From b94ba0b8075bf82bde071a7c0135fcf50d0d2724 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Sat, 6 Jul 2024 14:10:26 -0400 Subject: [PATCH] LU-18006 osc: rework sdio locking We cannot hold a spinlock across a call to kthread_create, so we have to rearrange the locking for this, moving the locking inside the data copy function. This handles the multithreading in a slightly different spot. Test-Parameters: testlist=sanity env=ONLY=119f,ONLY_REPEAT=100 Test-Parameters: testlist=sanity env=ONLY=119f,ONLY_REPEAT=100 Signed-off-by: Patrick Farrell Change-Id: I30c1c6600ebfe2e1fb54606608ea55469ae06937 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55649 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Qian Yingjin Reviewed-by: Oleg Drokin --- lustre/obdclass/cl_io.c | 21 ++++++++++++++++++++- lustre/osc/osc_request.c | 23 +++++++---------------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/lustre/obdclass/cl_io.c b/lustre/obdclass/cl_io.c index 2fd6ca3..0c409fe 100644 --- a/lustre/obdclass/cl_io.c +++ b/lustre/obdclass/cl_io.c @@ -1475,6 +1475,7 @@ static ssize_t __ll_dio_user_copy(struct cl_sub_dio *sdio) size_t original_count = count; int short_copies = 0; bool mm_used = false; + bool locked = false; int status = 0; int i = 0; int rw; @@ -1488,6 +1489,18 @@ static ssize_t __ll_dio_user_copy(struct cl_sub_dio *sdio) else rw = READ; + /* read copying is protected by the reference count on the sdio, since + * it's done as part of getting rid of the sdio, but write copying is + * done at the start, where there may be multiple ptlrpcd threads + * using this sdio, so we must lock and check if the copying has + * been done + */ + if (rw == WRITE) { + spin_lock(&sdio->csd_lock); + locked = true; + if (sdio->csd_write_copied) + GOTO(out, status = 0); + } /* if there's no mm, io is being done from a kernel thread, so there's * no need to transition to its mm context anyway. * @@ -1596,15 +1609,21 @@ static ssize_t __ll_dio_user_copy(struct cl_sub_dio *sdio) i++; } -out: + if (rw == WRITE && status == 0) + sdio->csd_write_copied = true; + /* if we complete successfully, we should reach all of the pages */ LASSERTF(ergo(status == 0, i == pvec->ldp_count - 1), "status: %d, i: %d, pvec->ldp_count %zu, count %zu\n", status, i, pvec->ldp_count, count); +out: if (mm_used) kthread_unuse_mm(mm); + if (locked) + spin_unlock(&sdio->csd_lock); + /* the total bytes copied, or status */ RETURN(original_count - count ? original_count - count : status); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index c016380..8db5523 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2773,23 +2773,14 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, obj = ext->oe_obj; /* for unaligned writes, we do the data copying here */ - if (sdio && sdio->csd_unaligned && sdio->csd_write && - !sdio->csd_write_copied) { - /* note a single sdio can correspond to multiple RPCs, - * so we use this lock to ensure the data copy is only - * done once (an sdio can also correspond to multiple - * extents, which is also handled by this) + if (sdio && sdio->csd_unaligned && sdio->csd_write) { + rc = ll_dio_user_copy(sdio); + if (rc < 0) + GOTO(out, rc); + /* dio_user_copy has some concurrency handling in it, + * so we add this assert to ensure it did its job... */ - spin_lock(&sdio->csd_lock); - if (!sdio->csd_write_copied) { - rc = ll_dio_user_copy(sdio); - if (rc <= 0) { - spin_unlock(&sdio->csd_lock); - GOTO(out, rc); - } - sdio->csd_write_copied = true; - } - spin_unlock(&sdio->csd_lock); + LASSERT(sdio->csd_write_copied); } } -- 1.8.3.1