Whamcloud - gitweb
EX-7601 ofd: add decompress_read to ofd_preprw_write
authorPatrick Farrell <pfarrell@whamcloud.com>
Fri, 3 Nov 2023 18:20:37 +0000 (14:20 -0400)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 29 Dec 2023 11:08:40 +0000 (11:08 +0000)
We have read up the compressed data from disk, now we must
decompress it so we can rewrite it successfully.

This code still works on the whole lnbs rather than just on
the portion of it which is unaligned.  This is temporary
and will be resolved by a future patch.

With this patch, we have basic read-modify-write support,
so we can re-enable testing.  The next patch adds tests
for read-modify-write.

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: Ib6503c15e9fb3d425a7bc295bcc61b41c089a1f0
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52983
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
15 files changed:
lustre/include/obd.h
lustre/include/obd_class.h
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_io.c
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/ofd/ofd_compress.c
lustre/ofd/ofd_compress.h
lustre/ofd/ofd_internal.h
lustre/ofd/ofd_io.c
lustre/target/tgt_handler.c
lustre/tests/sanity-compr.sh
lustre/tests/sanity-pfl.sh
lustre/tests/sanity.sh
lustre/tests/sanityn.sh

index c2448bb..b7bd835 100644 (file)
@@ -1078,12 +1078,12 @@ struct obd_ops {
                                 struct niobuf_local *lnb,
                                 struct obd_ioobj *obj, int npages,
                                 enum ll_compr_type type, int lvl,
-                                int chunk_bits);
+                                int chunk_bits, bool write);
        int (*o_preprw)(const struct lu_env *env, int cmd,
                        struct obd_export *exp, struct obdo *oa, int objcount,
                        struct obd_ioobj *obj, struct niobuf_remote *remote,
                        int *nr_pages, struct niobuf_local *local,
-                       int chunk_bits);
+                       enum ll_compr_type type, int lvl, int chunk_bits);
        int (*o_commitrw)(const struct lu_env *env, int cmd,
                          struct obd_export *exp, struct obdo *oa,
                          int objcount, struct obd_ioobj *obj,
index 89f19b8..4a899ab 100644 (file)
@@ -1130,7 +1130,8 @@ static inline int obd_decompress_read(const struct lu_env *env,
        }
 
        rc = OBP(exp->exp_obd, decompress_read)(env, exp, oa, rnb, lnb, obj,
-                                               npages, type, lvl, chunk_bits);
+                                               npages, type, lvl, chunk_bits,
+                                               false);
 
        RETURN(rc);
 }
@@ -1139,7 +1140,8 @@ static inline int obd_preprw(const struct lu_env *env, int cmd,
                             struct obd_export *exp, struct obdo *oa,
                             int objcount, struct obd_ioobj *obj,
                             struct niobuf_remote *remote, int *pages,
-                            struct niobuf_local *local, int chunk_bits)
+                            struct niobuf_local *local,
+                            enum ll_compr_type type, int lvl, int chunk_bits)
 {
        int rc;
 
@@ -1156,7 +1158,7 @@ static inline int obd_preprw(const struct lu_env *env, int cmd,
        }
 
        rc = OBP(exp->exp_obd, preprw)(env, cmd, exp, oa, objcount, obj, remote,
-                                      pages, local, chunk_bits);
+                                      pages, local, type, lvl, chunk_bits);
 
        RETURN(rc);
 }
index 6188eda..d6121b5 100644 (file)
@@ -1385,7 +1385,8 @@ static inline char *mdt_req_get_jobid(struct ptlrpc_request *req)
 int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
                   struct obdo *oa, int objcount, struct obd_ioobj *obj,
                   struct niobuf_remote *rnb, int *nr_local,
-                  struct niobuf_local *lnb, int chunk_size);
+                  struct niobuf_local *lnb, enum ll_compr_type type, int lvl,
+                  int chunk_bits);
 
 int mdt_obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                     struct obdo *oa, int objcount, struct obd_ioobj *obj,
index 377ba40..29ad053 100644 (file)
@@ -512,7 +512,8 @@ unlock:
 int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
                   struct obdo *oa, int objcount, struct obd_ioobj *obj,
                   struct niobuf_remote *rnb, int *nr_local,
-                  struct niobuf_local *lnb, int chunk_size)
+                  struct niobuf_local *lnb, enum ll_compr_type type, int lvl,
+                  int chunk_bits)
 {
        struct tgt_session_info *tsi = tgt_ses_info(env);
        struct mdt_thread_info *info = tsi2mdt_info(tsi);
@@ -524,7 +525,7 @@ int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
        /* this should never be hit because setstripe does not allow DOM +
         * compression, but it's here just in case
         */
-       if (chunk_size != 0) {
+       if (chunk_bits != 0) {
                rc = -EOPNOTSUPP;
                CERROR("%s: Compression is not supported on dom, rc %d\n",
                       exp->exp_obd->obd_name, rc);
index 56d5b0c..7f5972b 100644 (file)
@@ -309,7 +309,8 @@ static int echo_preprw(const struct lu_env *env, int cmd,
                       struct obd_export *export, struct obdo *oa,
                       int objcount, struct obd_ioobj *obj,
                       struct niobuf_remote *nb, int *pages,
-                      struct niobuf_local *res, int chunk_size)
+                      struct niobuf_local *res, enum ll_compr_type type,
+                      int lvl, int chunk_bits)
 {
        struct obd_device *obd;
        int tot_bytes = 0;
index 40d32fc..68d9ab8 100644 (file)
@@ -2693,7 +2693,7 @@ static int echo_client_prep_commit(const struct lu_env *env,
 
                lpages = npages;
                ret = obd_preprw(env, rw, exp, oa, 1, &ioo, &rnb, &lpages, lnb,
-                                0);
+                                0, 0, 0);
                if (ret != 0)
                        GOTO(out, ret);
 
index 24a454c..9eac26a 100644 (file)
@@ -153,7 +153,8 @@ out:
 int decompress_rnb(const char *obd_name, struct niobuf_local *lnbs,
                   int lnb_npages, __u64 rnb_start, __u64 rnb_end,
                   int *lnb_start, void **bounce_src, void **bounce_dst,
-                  enum ll_compr_type type, int lvl, int chunk_size)
+                  enum ll_compr_type type, int lvl, int chunk_size,
+                  bool write)
 {
        struct niobuf_local *lnb = NULL;
        int pages_per_chunk = chunk_size / PAGE_SIZE;
@@ -254,7 +255,7 @@ int decompress_rnb(const char *obd_name, struct niobuf_local *lnbs,
                         * a short chunk, so the read will return the complete
                         * chunk to the client and the client will decompress it
                         */
-                       if (lnbs[j].lnb_rc == 0) {
+                       if (lnbs[j].lnb_rc == 0 && !write) {
                                CDEBUG(D_SEC, "Hit EOF in lnb %d at %llu\n",
                                        j, lnbs[j].lnb_file_offset);
                                if (prev &&
index 7fdf0f5..78f3ea1 100644 (file)
@@ -35,6 +35,7 @@
 int decompress_rnb(const char *obd_name, struct niobuf_local *lnbs,
                   int lnb_npages, __u64 rnb_start, __u64 rnb_end,
                   int *lnb_offset, void **bounce_src, void **bounce_dst,
-                  enum ll_compr_type type, int lvl, int chunk_size);
+                  enum ll_compr_type type, int lvl, int chunk_size,
+                  bool write);
 
 #endif /* _OFD_COMPRESS_H */
index d94942e..bf78e70 100644 (file)
@@ -350,11 +350,12 @@ int ofd_decompress_read(const struct lu_env *env, struct obd_export *exp,
                        struct obdo *oa, struct niobuf_remote *rnb,
                        struct niobuf_local *lnb, struct obd_ioobj *obj,
                        int npages, enum ll_compr_type type, int lvl,
-                       int chunk_bits);
-int ofd_preprw(const struct lu_env *env,int cmd, struct obd_export *exp,
+                       int chunk_bits, bool write);
+int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
               struct obdo *oa, int objcount, struct obd_ioobj *obj,
               struct niobuf_remote *rnb, int *nr_local,
-              struct niobuf_local *lnb, int chunk_bits);
+              struct niobuf_local *lnb, enum ll_compr_type type, int lvl,
+              int chunk_bits);
 int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp,
                 struct obdo *oa, int objcount, struct obd_ioobj *obj,
                 struct niobuf_remote *rnb, int npages,
index 42d5d28..f98fc46 100644 (file)
@@ -749,7 +749,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                            struct lu_attr *la, struct obdo *oa,
                            int objcount, struct obd_ioobj *obj,
                            struct niobuf_remote *rnb, int *nr_local,
-                           struct niobuf_local *lnb, int chunk_bits)
+                           struct niobuf_local *lnb, enum ll_compr_type type,
+                           int lvl, int chunk_bits)
 {
        struct range_lock *range = &ofd_info(env)->fti_write_range;
        struct dt_object *dt_obj = NULL;
@@ -888,11 +889,6 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                if (chunk_size && !(rnb[i].rnb_flags & OBD_BRW_COMPRESSED)) {
                        chunk_round(&buf_start, &buf_end, chunk_size);
 
-                       /* rounded rnbs can overlap at the chunk level, but it's
-                        * important we don't allocate multiple buffers for the
-                        * same page, so move the start of this buffer to the
-                        * end of the previous one
-                        */
                        if (buf_start < prev_buf_end) {
                                buf_start = prev_buf_end;
                                /* two rnbs may be entirely inside the same
@@ -904,7 +900,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                                        continue;
                        }
 
-                       /* this write is unaligned */
+                       /* this write is not aligned to chunk size */
                        if (buf_start != orig_start || buf_end != orig_end) {
                                /* get attr only once for each IO */
                                if (!dt_obj) {
@@ -973,16 +969,17 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
        }
 
        if (compr_unaligned_write) {
-               /* for now, read will not hold pages locked, since it's not
-                * doing decompression.  this will be changed shortly.
-                */
                rc = dt_read_prep(env, ofd_object_child(fo), lnb, *nr_local,
-                                 false);
+                                 true);
+               if (unlikely(rc != 0))
+                       GOTO(err, rc);
+               rc = ofd_decompress_read(env, exp, oa, rnb, lnb, obj, *nr_local,
+                                        type, lvl, chunk_bits, true);
                if (unlikely(rc != 0))
                        GOTO(err, rc);
-               /* read_prep sets rc if it read data, or on error, but the write
-                * code expects rc to be zero, so we clear rc here except on
-                * error
+               /* read_prep sets lnb_rc if it read data, or on error, but the
+                * write code expects rc to be zero, so we clear rc here except
+                * on error
                 */
                for (i = 0; i < *nr_local; i++) {
                        if (lnb[i].lnb_rc > 0)
@@ -1058,7 +1055,7 @@ int ofd_decompress_read(const struct lu_env *env, struct obd_export *exp,
                        struct obdo *oa, struct niobuf_remote *rnb,
                        struct niobuf_local *lnb, struct obd_ioobj *obj,
                        int npages, enum ll_compr_type type, int lvl,
-                       int chunk_bits)
+                       int chunk_bits, bool write)
 {
        struct ofd_device *ofd = ofd_exp(exp);
        struct lu_fid *fid = &oa->o_oi.oi_fid;
@@ -1105,6 +1102,16 @@ int ofd_decompress_read(const struct lu_env *env, struct obd_export *exp,
                       rnb_start, rnb_end);
                chunk_round(&chunk_start, &chunk_end, chunk_size);
                if (chunk_start != rnb_start || chunk_end != rnb_end) {
+                       /* if the client provided this unaligned write already
+                        * compressed, the client knows the write was at/past
+                        * EOF, so we can skip it
+                        */
+                       if (write && rnb[i].rnb_flags & OBD_BRW_COMPRESSED) {
+                               CDEBUG(D_SEC,
+                                      "skipping unaligned rnb %d, already compressed\n",
+                                      i);
+                               continue;
+                       }
                        /* rounded rnbs can overlap at the chunk level, but if
                         * they do, we've already decompressed that chunk, so
                         * start at the end of that chunk
@@ -1144,7 +1151,7 @@ int ofd_decompress_read(const struct lu_env *env, struct obd_export *exp,
                        rc = decompress_rnb(exp->exp_obd->obd_name, lnb, npages,
                                            rnb_start, rnb_end, &lnb_start,
                                            bounce_src, bounce_dst, type, lvl,
-                                           chunk_size);
+                                           chunk_size, write);
                        if (rc)
                                GOTO(out, rc);
                }
@@ -1154,10 +1161,11 @@ out:
        /* after decompression, we can now unlock the pages in the lnb so they
         * can be read from other threads.
         *
-        * NB: We will skip this on decompress-for-rewrite, since the pages
-        * must be locked for write.
+        * we skip this on decompress-for-rewrite, since the pages must be
+        * locked for write.
         */
-       dt_unlock_pages(ofd_object_child(fo), lnb, npages);
+       if (!write)
+               dt_unlock_pages(ofd_object_child(fo), lnb, npages);
 
        if (bounce_dst)
                sptlrpc_pool_put_pages(&bounce_dst, buf_bits);
@@ -1192,7 +1200,8 @@ out:
 int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
               struct obdo *oa, int objcount, struct obd_ioobj *obj,
               struct niobuf_remote *rnb, int *nr_local,
-              struct niobuf_local *lnb, int chunk_bits)
+              struct niobuf_local *lnb, enum ll_compr_type type, int lvl,
+              int chunk_bits)
 {
        struct tgt_session_info *tsi = tgt_ses_info(env);
        struct ofd_device       *ofd = ofd_exp(exp);
@@ -1241,8 +1250,8 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
        if (cmd == OBD_BRW_WRITE) {
                la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR);
                rc = ofd_preprw_write(env, exp, ofd, fid, &info->fti_attr, oa,
-                                     objcount, obj, rnb, nr_local, lnb,
-                                     chunk_bits);
+                                     objcount, obj, rnb, nr_local, lnb, type,
+                                     lvl, chunk_bits);
        } else if (cmd == OBD_BRW_READ) {
                tgt_grant_prepare_read(env, exp, oa);
                rc = ofd_preprw_read(env, exp, ofd, fid, &info->fti_attr, oa,
index 1868caa..1e58a6c 100644 (file)
@@ -2542,7 +2542,7 @@ int tgt_brw_read(struct tgt_session_info *tsi)
 
        rc = obd_preprw(tsi->tsi_env, OBD_BRW_READ, exp, &repbody->oa, 1,
                        ioo, remote_nb, &npages_local, local_io_nb,
-                       chunk_bits);
+                       compr_type, compr_lvl, chunk_bits);
        if (rc != 0)
                GOTO(out_lock, rc);
 
@@ -2826,6 +2826,7 @@ int tgt_brw_write(struct tgt_session_info *tsi)
        int chunk_bits = 0;
        int npages_local;
        ktime_t kstart;
+       int compr_lvl;
        int objcount;
        int niocount;
        int nob = 0;
@@ -2932,6 +2933,7 @@ int tgt_brw_write(struct tgt_session_info *tsi)
 
        olc = &body->oa.o_layout_compr;
        compr_type = olc->ol_compr_type;
+       compr_lvl = olc->ol_compr_lvl;
        if (compr_type != LL_COMPR_TYPE_NONE) {
                int nrbufs = ioo->ioo_bufcnt;
                unsigned int chunk_log_bits;
@@ -3016,7 +3018,7 @@ int tgt_brw_write(struct tgt_session_info *tsi)
        kstart = ktime_get();
        rc = obd_preprw(tsi->tsi_env, OBD_BRW_WRITE, exp, &repbody->oa,
                        objcount, ioo, remote_nb, &npages_local, local_io_nb,
-                       chunk_bits);
+                       compr_type, compr_lvl, chunk_bits);
        if (rc < 0)
                GOTO(out_lock, rc);
 
index c769d68..f80359d 100644 (file)
@@ -15,11 +15,6 @@ init_logging
 
 # bug number for skipped test:
 ALWAYS_EXCEPT="$SANITY_COMPR_EXCEPT "
-### TEMPORARY WILL BE REMOVED IN A FUTURE PATCH ###
-always_except EX-7601  1000
-always_except EX-7601  1001
-always_except EX-7601  1002
-always_except EX-7601  1003
 
 build_test_filter
 
index 41cbc84..33d3c2c 100644 (file)
@@ -28,9 +28,6 @@ fi
 
 # until data compression on MDT works
 always_except EX-7806  100k
-### TEMPORARY WILL BE REMOVED IN A FUTURE PATCH ###
-always_except EX-7601  100j
-always_except EX-7601  100l
 
 build_test_filter
 
index 973fef0..adb0919 100755 (executable)
@@ -46,8 +46,6 @@ always_except LU-16515 118c 118d
 always_except LU-9054  312
 always_except LU-8411  407
 always_except EX-4334  428
-### TEMPORARY - REMOVED IN LATER PATCH ###
-always_except EX-7601  460
 
 if $SHARED_KEY; then
        always_except LU-14181 64e 64f
index fb4a7bf..8c74126 100755 (executable)
@@ -19,7 +19,7 @@ init_logging
 
 ALWAYS_EXCEPT="$SANITYN_EXCEPT "
 # bug number for skipped test:  LU-7105
-ALWAYS_EXCEPT+="                28     "
+ALWAYS_EXCEPT+="                28"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 if [ $mds1_FSTYPE = "zfs" ]; then