Whamcloud - gitweb
EX-7601 ofd: don't read for writes past eof
authorPatrick Farrell <pfarrell@whamcloud.com>
Tue, 12 Dec 2023 16:37:48 +0000 (11:37 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Sat, 6 Jan 2024 08:18:25 +0000 (08:18 +0000)
There's no data past EOF, so there's no need to do
read-modify-writes when the entire write is past the chunk
at EOF.  So in that case, don't read up data and don't
attempt decompression.

There's no explicit test for this, but this shows up
immediately in the random-offset copy tests, because they
seek and write various sizes to offsets past current EOF.

We also need this functionality for reads, because in some
cases the client will do reads past EOF (this is unusual,
but can still happen sometimes).  This is added in a
separate patch because it requires some code reorganization.

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: Ia2b598165d5645c5a44c3d58bea69c7e42f10e41
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53425
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/obd.h
lustre/include/obd_class.h
lustre/ofd/ofd_internal.h
lustre/ofd/ofd_io.c

index 44a0538..52443ba 100644 (file)
@@ -1077,7 +1077,7 @@ struct obd_ops {
                                 struct niobuf_remote *rnb,
                                 struct niobuf_local *lnb,
                                 struct obd_ioobj *obj, int npages,
-                                enum ll_compr_type type, int lvl,
+                                int eof_rnb, enum ll_compr_type type, int lvl,
                                 int chunk_bits, bool write);
        int (*o_preprw)(const struct lu_env *env, int cmd,
                        struct obd_export *exp, struct obdo *oa, int objcount,
index ccd4c6b..81e4c96 100644 (file)
@@ -1130,8 +1130,8 @@ static inline int obd_decompress_read(const struct lu_env *env,
        }
 
        rc = OBP(exp->exp_obd, decompress_read)(env, exp, oa, rnb, lnb, obj,
-                                               npages, type, lvl, chunk_bits,
-                                               false);
+                                               npages, -1, type, lvl,
+                                               chunk_bits, false);
 
        RETURN(rc);
 }
index a01ee27..075a0ce 100644 (file)
@@ -348,8 +348,8 @@ int ofd_verify_layout_version(const struct lu_env *env,
 int ofd_decompress_read(const struct lu_env *env, struct obd_export *exp,
                        struct obdo *oa, struct niobuf_remote *rnb,
                        struct niobuf_local *lnb, struct obd_ioobj *obj,
-                       int npages, enum ll_compr_type type, int lvl,
-                       int chunk_bits, bool write);
+                       int npages, int eof_rnb, enum ll_compr_type type,
+                       int lvl, int chunk_bits, bool write);
 int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
               struct obdo *oa, int objcount, struct obd_ioobj *obj,
               struct niobuf_remote *rnb, int *nr_local,
index 4b33142..afed3b5 100644 (file)
@@ -767,6 +767,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
        bool compr_unaligned_write = false;
        __u64 prev_buf_end = 0;
        int maxlnb = *nr_write;
+       int eof_rnb = INT_MAX;
        int tot_bytes = 0;
        int nr_read = 0;
        __u64 begin;
@@ -900,7 +901,11 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                 * that data.  This shouldn't be too bad, since read beyond EOF
                 * is basically free.
                 */
-               if (chunk_size && !(rnb[i].rnb_flags & OBD_BRW_COMPRESSED)) {
+               if (chunk_size && !(rnb[i].rnb_flags & OBD_BRW_COMPRESSED)
+                   /* if a previous rnb was past eof, there's no need to keep
+                    * checking
+                    */
+                   && (eof_rnb == INT_MAX)) {
                        chunk_round(&buf_start, &buf_end, chunk_size);
 
                        if (buf_start < prev_buf_end) {
@@ -936,8 +941,12 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                                 * read-modify-write, so no rounding required
                                 */
                                if (buf_start >= la->la_size) {
+                                       CDEBUG(D_SEC,
+                                              "rnb %d from %llu to %llu (chunk rounded: %llu to %llu) is past eof\n",
+                                              i, orig_start, orig_end, buf_start, buf_end);
                                        buf_start = orig_start;
                                        buf_end = orig_end;
+                                       eof_rnb = i;
                                } else {
                                        compr_unaligned_write = true;
                                }
@@ -975,7 +984,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                 * overlap, then we ignore the first chunk - it's being handled
                 * as part of the previous rnb
                 */
-               if (buf_start != orig_start && !start_rounded_up) {
+               CDEBUG(D_SEC, "i: %d, eof_rnb %d, test %d\n", i, eof_rnb, i < eof_rnb);
+               if (i < eof_rnb && buf_start != orig_start && !start_rounded_up) {
                        first_chunk_start_idx = j;
                        CDEBUG(D_SEC,
                               "buf count %d buf_start %llu orig_start %llu, first_chunk_start_idx %d\n",
@@ -996,7 +1006,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                 * the end of this rnb is unaligned, so we need to read the
                 * chunk there.  map it to the read lnb
                 */
-               if (buf_end != orig_end) {
+               if (i < eof_rnb && buf_end != orig_end) {
                        /* calculate the start index of the last chunk */
                        int chunk_start_idx = j + rc - pages_per_chunk;
 
@@ -1055,8 +1065,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                if (unlikely(rc != 0))
                        GOTO(err, rc);
                rc = ofd_decompress_read(env, exp, oa, rnb, read_lnb, obj,
-                                        nr_read, type, lvl, chunk_bits,
-                                        true);
+                                        nr_read, eof_rnb, type, lvl,
+                                        chunk_bits, true);
                if (unlikely(rc != 0))
                        GOTO(err, rc);
                /* read_prep sets lnb_rc if it read data, or on error, but the
@@ -1154,8 +1164,8 @@ out:
 int ofd_decompress_read(const struct lu_env *env, struct obd_export *exp,
                        struct obdo *oa, struct niobuf_remote *rnb,
                        struct niobuf_local *lnb, struct obd_ioobj *obj,
-                       int npages, enum ll_compr_type type, int lvl,
-                       int chunk_bits, bool write)
+                       int npages, int eof_rnb, enum ll_compr_type type,
+                       int lvl, int chunk_bits, bool write)
 {
        struct ofd_device *ofd = ofd_exp(exp);
        struct lu_fid *fid = &oa->o_oi.oi_fid;
@@ -1190,6 +1200,12 @@ int ofd_decompress_read(const struct lu_env *env, struct obd_export *exp,
                }
                rnb_start = rnb[i].rnb_offset;
                rnb_end = rnb[i].rnb_offset + rnb[i].rnb_len;
+               if (i == eof_rnb) {
+                       CDEBUG(D_SEC,
+                              "rnb %d at %llu to %llu is past EOF, so no need to decompress\n",
+                               i, rnb_start, rnb_end);
+                       break;
+               }
 
                chunk_start = rnb_start;
                chunk_end = rnb_end;