Whamcloud - gitweb
EX-7601 ofd: add chunk rounding to write
authorPatrick Farrell <pfarrell@whamcloud.com>
Thu, 2 Nov 2023 21:40:38 +0000 (17:40 -0400)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 29 Dec 2023 11:07:40 +0000 (11:07 +0000)
For compressed files, we need to round all niobufs to
chunk size in the write process, so we have buffers for
reading in and rewriting the complete chunks.

dt_bufs_get sets up the local niobuf for the write, so we
round before calling it.

Note this breaks writing to compressed files, which is not
fixed until a few patches later.  For this reason, we
disable the compression tests.  They will be reenabled
shortly - similar to how we handled the read series.

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I413aaba9866dd7d6c4463fa620eadf1423379ba1
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52963
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
lustre/ofd/ofd_io.c
lustre/tests/sanity-compr.sh
lustre/tests/sanity-pfl.sh
lustre/tests/sanity.sh

index b505f9f..7dc9baf 100644 (file)
@@ -751,13 +751,20 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                            struct niobuf_remote *rnb, int *nr_local,
                            struct niobuf_local *lnb, int chunk_bits)
 {
+       struct range_lock *range = &ofd_info(env)->fti_write_range;
+       struct dt_object *dt_obj = NULL;
        struct ofd_object *fo;
-       int i, j, k, rc = 0, tot_bytes = 0;
        enum dt_bufs_type dbt = DT_BUFS_TYPE_WRITE;
        int chunk_size = chunk_bits ? 1 << chunk_bits : 0;
        int maxlnb = *nr_local;
-       __u64 begin, end;
-       struct range_lock *range = &ofd_info(env)->fti_write_range;
+       __u64 prev_buf_end = 0;
+       int tot_bytes = 0;
+       __u64 begin;
+       int rc = 0;
+       __u64 end;
+       int i;
+       int j;
+       int k;
 
        ENTRY;
        LASSERT(env != NULL);
@@ -843,14 +850,86 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
 
        /* parse remote buffers to local buffers and prepare the latter */
        for (*nr_local = 0, i = 0, j = 0; i < obj->ioo_bufcnt; i++) {
+               __u64 orig_start;
+               __u64 buf_start;
+               __u64 orig_end;
+               __u64 buf_end;
+               int buf_len;
+
                begin = min_t(__u64, begin, rnb[i].rnb_offset);
                end = max_t(__u64, end, rnb[i].rnb_offset + rnb[i].rnb_len);
+               CDEBUG(D_SEC, "begin %llu, end %llu\n", begin, end);
 
                if (OBD_FAIL_CHECK(OBD_FAIL_OST_2BIG_NIOBUF))
                        rnb[i].rnb_len += PAGE_SIZE;
-               rc = dt_bufs_get(env, ofd_object_child(fo), lnb + j,
-                                rnb[i].rnb_offset, rnb[i].rnb_len, maxlnb,
-                                dbt);
+
+               buf_start = rnb[i].rnb_offset;
+               buf_end = rnb[i].rnb_offset + rnb[i].rnb_len;
+               orig_start = buf_start;
+               orig_end = buf_end;
+
+               CDEBUG(D_SEC, "buf_start %llu, buf_end %llu\n", buf_start,
+                      buf_end);
+
+               /* when writing to a compressed file, we have to round the write
+                * to cover full chunks so we can read-modify-write full chunks
+                *
+                * we know the client will not compress unaligned writes
+                * unless they are at or beyond EOF, in which case there is no
+                * need to do read-modify write.  So if a write is compressed,
+                * we can ignore it.
+                *
+                * There's a gap here, which is if we had incompressible data
+                * being written beyond EOF, we will do read-modify-write for
+                * that data.  This shouldn't be too bad, since read beyond EOF
+                * is basically free.
+                */
+               if (chunk_size && !(rnb[i].rnb_flags & OBD_BRW_COMPRESSED)) {
+                       chunk_round(&buf_start, &buf_end, chunk_size);
+
+                       /* rounded rnbs can overlap at the chunk level, but it's
+                        * important we don't allocate multiple buffers for the
+                        * same page, so move the start of this buffer to the
+                        * end of the previous one
+                        */
+                       if (buf_start < prev_buf_end) {
+                               buf_start = prev_buf_end;
+                               /* two rnbs may be entirely inside the same
+                                * chunk, in which case we're already doing IO
+                                * for that chunk, so skip it
+                                */
+                               prev_buf_end = buf_end;
+                               if (buf_start == buf_end)
+                                       continue;
+                       }
+
+                       if (buf_start != orig_start || buf_end != orig_end) {
+                               /* get attr only once for each IO */
+                               if (!dt_obj) {
+                                       dt_obj = ofd_object_child(fo);
+                                       rc = dt_attr_get(env, dt_obj, la);
+                                       if (rc)
+                                               GOTO(err_nolock, rc);
+                               }
+                               /* if this write is beyond EOF, there's no
+                                * compressed data under it, so no need to do
+                                * read-modify-write, so no rounding required
+                                */
+                               if (buf_start >= la->la_size) {
+                                       buf_start = orig_start;
+                                       buf_end = orig_end;
+                               }
+                       }
+                       prev_buf_end = buf_end;
+               }
+
+               buf_len = buf_end - buf_start;
+
+               CDEBUG(D_SEC, "buf_start %llu, buf_end %llu\n", buf_start,
+                      buf_end);
+
+               rc = dt_bufs_get(env, ofd_object_child(fo), lnb + j, buf_start,
+                                buf_len, maxlnb, dbt);
                if (unlikely(rc < 0))
                        GOTO(err_nolock, rc);
                LASSERT(rc <= PTLRPC_MAX_BRW_PAGES);
index f80359d..c769d68 100644 (file)
@@ -15,6 +15,11 @@ init_logging
 
 # bug number for skipped test:
 ALWAYS_EXCEPT="$SANITY_COMPR_EXCEPT "
+### TEMPORARY WILL BE REMOVED IN A FUTURE PATCH ###
+always_except EX-7601  1000
+always_except EX-7601  1001
+always_except EX-7601  1002
+always_except EX-7601  1003
 
 build_test_filter
 
index 33d3c2c..41cbc84 100644 (file)
@@ -28,6 +28,9 @@ fi
 
 # until data compression on MDT works
 always_except EX-7806  100k
+### TEMPORARY WILL BE REMOVED IN A FUTURE PATCH ###
+always_except EX-7601  100j
+always_except EX-7601  100l
 
 build_test_filter
 
index adb0919..973fef0 100755 (executable)
@@ -46,6 +46,8 @@ always_except LU-16515 118c 118d
 always_except LU-9054  312
 always_except LU-8411  407
 always_except EX-4334  428
+### TEMPORARY - REMOVED IN LATER PATCH ###
+always_except EX-7601  460
 
 if $SHARED_KEY; then
        always_except LU-14181 64e 64f