Whamcloud - gitweb
LU-18006 osc: rework sdio locking 49/55649/7
authorPatrick Farrell <paf0187@gmail.com>
Sat, 6 Jul 2024 18:10:26 +0000 (14:10 -0400)
committerOleg Drokin <green@whamcloud.com>
Tue, 23 Jul 2024 04:42:47 +0000 (04:42 +0000)
We cannot hold a spinlock across a call to kthread_create,
so we have to rearrange the locking for this, moving the
locking inside the data copy function.  This handles the
multithreading in a slightly different spot.

Test-Parameters: testlist=sanity env=ONLY=119f,ONLY_REPEAT=100
Test-Parameters: testlist=sanity env=ONLY=119f,ONLY_REPEAT=100
Signed-off-by: Patrick Farrell <patrick.farrell@oracle.com>
Change-Id: I30c1c6600ebfe2e1fb54606608ea55469ae06937
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55649
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/obdclass/cl_io.c
lustre/osc/osc_request.c

index 2fd6ca3..0c409fe 100644 (file)
@@ -1475,6 +1475,7 @@ static ssize_t __ll_dio_user_copy(struct cl_sub_dio *sdio)
        size_t original_count = count;
        int short_copies = 0;
        bool mm_used = false;
+       bool locked = false;
        int status = 0;
        int i = 0;
        int rw;
@@ -1488,6 +1489,18 @@ static ssize_t __ll_dio_user_copy(struct cl_sub_dio *sdio)
        else
                rw = READ;
 
+       /* read copying is protected by the reference count on the sdio, since
+        * it's done as part of getting rid of the sdio, but write copying is
+        * done at the start, where there may be multiple ptlrpcd threads
+        * using this sdio, so we must lock and check if the copying has
+        * been done
+        */
+       if (rw == WRITE) {
+               spin_lock(&sdio->csd_lock);
+               locked = true;
+               if (sdio->csd_write_copied)
+                       GOTO(out, status = 0);
+       }
        /* if there's no mm, io is being done from a kernel thread, so there's
         * no need to transition to its mm context anyway.
         *
@@ -1596,15 +1609,21 @@ static ssize_t __ll_dio_user_copy(struct cl_sub_dio *sdio)
                i++;
        }
 
-out:
+       if (rw == WRITE && status == 0)
+               sdio->csd_write_copied = true;
+
        /* if we complete successfully, we should reach all of the pages */
        LASSERTF(ergo(status == 0, i == pvec->ldp_count - 1),
                 "status: %d, i: %d, pvec->ldp_count %zu, count %zu\n",
                  status, i, pvec->ldp_count, count);
 
+out:
        if (mm_used)
                kthread_unuse_mm(mm);
 
+       if (locked)
+               spin_unlock(&sdio->csd_lock);
+
        /* the total bytes copied, or status */
        RETURN(original_count - count ? original_count - count : status);
 }
index c016380..8db5523 100644 (file)
@@ -2773,23 +2773,14 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                        obj = ext->oe_obj;
 
                /* for unaligned writes, we do the data copying here */
-               if (sdio && sdio->csd_unaligned && sdio->csd_write &&
-                   !sdio->csd_write_copied) {
-                       /* note a single sdio can correspond to multiple RPCs,
-                        * so we use this lock to ensure the data copy is only
-                        * done once (an sdio can also correspond to multiple
-                        * extents, which is also handled by this)
+               if (sdio && sdio->csd_unaligned && sdio->csd_write) {
+                       rc = ll_dio_user_copy(sdio);
+                       if (rc < 0)
+                               GOTO(out, rc);
+                       /* dio_user_copy has some concurrency handling in it,
+                        * so we add this assert to ensure it did its job...
                         */
-                       spin_lock(&sdio->csd_lock);
-                       if (!sdio->csd_write_copied) {
-                               rc = ll_dio_user_copy(sdio);
-                               if (rc <= 0) {
-                                       spin_unlock(&sdio->csd_lock);
-                                       GOTO(out, rc);
-                               }
-                               sdio->csd_write_copied = true;
-                       }
-                       spin_unlock(&sdio->csd_lock);
+                       LASSERT(sdio->csd_write_copied);
                }
        }