EX-8993 ofd: do not write 'hole' pages on compression

author Patrick Farrell <pfarrell@whamcloud.com>

Tue, 13 Feb 2024 16:45:17 +0000 (11:45 -0500)

committer Andreas Dilger <adilger@whamcloud.com>

Sat, 24 Feb 2024 03:44:50 +0000 (03:44 +0000)
author Patrick Farrell <pfarrell@whamcloud.com>
Tue, 13 Feb 2024 16:45:17 +0000 (11:45 -0500)
committer Andreas Dilger <adilger@whamcloud.com>
Sat, 24 Feb 2024 03:44:50 +0000 (03:44 +0000)
diff --git a/lustre/include/obd.h b/lustre/include/obd.h

index 8031bf8..9b80c01 100644 (file)
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -477,6 +477,10 @@ struct niobuf_local {
         __u16           lnb_guard_disk:1;
         /* separate unlock for read path to allow shared access */
         __u16           lnb_locked:1;
+       /* this lnb represents a hole in the file and should not be written out
+        * or transferred to the client
+        */
+       __u16           lnb_hole:1;
  };
  
  void chunk_round(__u64 *buf_start, __u64 *buf_end, int chunk_size);
diff --git a/lustre/ofd/ofd_compress.c b/lustre/ofd/ofd_compress.c

index d5200a9..260207d 100644 (file)
--- a/lustre/ofd/ofd_compress.c
+++ b/lustre/ofd/ofd_compress.c
@@ -37,7 +37,7 @@ static int decompress_chunk_in_lnb(const char *obd_name, struct lu_fid *fid,
                                    struct niobuf_local *lnbs, int lnb_start,
                                    void **bounce_src, void **bounce_dst,
                                    enum ll_compr_type type, int lvl,
-                                  int chunk_size)
+                                  int chunk_size, bool write)
  {
         struct ll_compr_hdr *llch = NULL;
         struct crypto_comp *cc = NULL;
@@ -148,11 +148,15 @@ out:
          */
         for(i = lnb_start; i < lnb_start + pages_in_chunk; i++) {
                 /* if there's no data in this page, we must clear it */
-               if (lnbs[i].lnb_rc == 0) {
+               if (!write && lnbs[i].lnb_rc == 0) {
                         CDEBUG(D_SEC, "%s: no data, clearing: page  %d, "DFID" at %llu\n",
                                obd_name, i, PFID(fid), lnbs[i].lnb_file_offset);
                         memset(kmap(lnbs[i].lnb_page), 0, PAGE_SIZE);
                         kunmap(lnbs[i].lnb_page);
+               } else if (write && lnbs[i].lnb_rc == 0) {
+                       lnbs[i].lnb_hole = true;
+                       CDEBUG(D_SEC, "no data in page %d at %llu, marking hole\n",
+                              i, lnbs[i].lnb_file_offset);
                 }
         }
         if (cc)
@@ -216,7 +220,8 @@ int decompress_rnb(const char *obd_name, struct lu_fid *fid,
                         GOTO(out, rc = -EINVAL);
  
                 rc = decompress_chunk_in_lnb(obd_name, fid, lnbs, i, bounce_src,
-                                            bounce_dst, type, lvl, chunk_size);
+                                            bounce_dst, type, lvl, chunk_size,
+                                            write);
                 if (rc)
                         GOTO(out, rc);
                 i += pages_per_chunk;
@@ -255,7 +260,8 @@ int decompress_rnb(const char *obd_name, struct lu_fid *fid,
                         GOTO(out, rc = -EINVAL);
  
                 rc = decompress_chunk_in_lnb(obd_name, fid, lnbs, i, bounce_src,
-                                            bounce_dst, type, lvl, chunk_size);
+                                            bounce_dst, type, lvl, chunk_size,
+                                            write);
                 if (rc)
                         GOTO(out, rc);
                 i += pages_per_chunk;
diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c

index 0e426a4..33321be 100644 (file)
--- a/lustre/ofd/ofd_io.c
+++ b/lustre/ofd/ofd_io.c
@@ -723,8 +723,9 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp,
                                      */
                                     (buf_start == orig_start && orig_end >= la->la_size)) {
                                         CDEBUG(D_SEC,
-                                              "rnb %d from %llu to %llu (chunk rounded: %llu to %llu) is past eof\n",
-                                              i, orig_start, orig_end, buf_start, buf_end);
+                                              "rnb %d from %llu to %llu (chunk rounded: %llu to %llu) is past eof (%llu)\n",
+                                              i, orig_start, orig_end,
+                                              buf_start, buf_end, la->la_size);
                                         buf_start = orig_start;
                                         buf_end = orig_end;
                                         eof_rnb = i;
@@ -1166,6 +1167,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp,
                                 CDEBUG(D_SEC, "read_lnb %d is write_lnb %d (offset %llu), lnb_rc %d\n",
                                        i, j, write_lnb[j].lnb_file_offset, read_lnb[i].lnb_rc);
                                 write_lnb[j] = read_lnb[i];
+                               CDEBUG(D_SEC, "write lnb %d at %llu hole %d\n", j, write_lnb[j].lnb_file_offset, write_lnb[j].lnb_hole);
                                 break;
                         }
                 }
diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c

index 837ab29..166f641 100644 (file)
--- a/lustre/osd-ldiskfs/osd_io.c
+++ b/lustre/osd-ldiskfs/osd_io.c
@@ -690,6 +690,7 @@ static int osd_init_lnbs(loff_t offset, ssize_t len, int *nrpages,
                 lnb->lnb_guard_rpc = 0;
                 lnb->lnb_guard_disk = 0;
                 lnb->lnb_locked = 0;
+               lnb->lnb_hole = 0;
  
                 LASSERTF(plen <= len, "plen %u, len %lld\n", plen,
                          (long long) len);
@@ -1656,9 +1657,15 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt,
                         lnb[i].lnb_rc = 0;
                 }
  
-               if (lnb[i].lnb_rc) { /* ENOSPC, network RPC error, etc. */
-                       CDEBUG(D_INODE, "Skipping [%d] == %d\n", i,
-                              lnb[i].lnb_rc);
+               /* ENOSPC, network RPC error, hole, etc. */
+               if (lnb[i].lnb_rc || lnb[i].lnb_hole) {
+                       CDEBUG(D_INODE, "Skipping [%d] at %llu == %d%s\n", i,
+                              lnb[i].lnb_file_offset, lnb[i].lnb_rc,
+                              /* lnb_hole indicates this is a hole due to
+                               * compression rounding, so we do not write this
+                               * page to disk
+                               */
+                              lnb[i].lnb_hole ? "because no data in page\n" : "");
                         LASSERT(lnb[i].lnb_page);
                         generic_error_remove_page(inode->i_mapping,
                                                   lnb[i].lnb_page);
diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c

index 4b70cce..69dc7b0 100644 (file)
--- a/lustre/target/tgt_handler.c
+++ b/lustre/target/tgt_handler.c
@@ -2372,6 +2372,17 @@ int io_lnb_to_tx_lnb(struct niobuf_local *io_lnb, struct niobuf_local *tx_lnb,
                 CDEBUG(D_SEC, "nio %d npages_remote %d, lnb_index %d\n", i,
                        npages_remote, lnb_index);
                 for (j = 0; j < npages_remote; j++) {
+                       /* we are transferring to this page, so if it had no
+                        * data before (if it was a hole), it will now have data
+                        * (this only occurs on writes)
+                        */
+                       if (io_lnb[lnb_index + j].lnb_hole == true) {
+                               CDEBUG(D_SEC,
+                                      "write to hole, so clearing hole in %d at %llu\n",
+                                      lnb_index + j,
+                                      io_lnb[lnb_index + j].lnb_file_offset);
+                               io_lnb[lnb_index + j].lnb_hole = false;
+                       }
                         /* point the tx_ln member at the io_lnb member */
                         tx_lnb[tx_lnb_pg] = io_lnb[lnb_index + j];
  
diff --git a/lustre/tests/sanity-compr.sh b/lustre/tests/sanity-compr.sh

index 36d9823..d6ced48 100644 (file)
--- a/lustre/tests/sanity-compr.sh
+++ b/lustre/tests/sanity-compr.sh
@@ -701,6 +701,7 @@ test_1005() {
                 skip "Need OST >= 2.14.0-ddn127-8-g46708e4636 for partial compr"
  
         local tf=$DIR/$tfile
+       local tf_copy=$TMP/$tfile.copy
         # We read from $tfile to this
         # Larger than arm page size
         local chunksize=128
@@ -744,22 +745,23 @@ test_1005() {
         # chunk, just 4K at the beginning (this won't be compressed, but it's
         # still a valid test)
         dd if=$source bs=4K of=$tf count=1 || error "(0) dd failed"
-       dd if=$source bs=4K of=$tf.2 count=1 || error "(1) dd failed"
+       dd if=$source bs=4K of=$tf_copy count=1 || error "(1) dd failed"
  
-       flush_and_compare $tf $tf.2 "(2)"
+       flush_and_compare $tf $tf_copy "(2)"
  
         # Now we write adjacent in this first chunk, confirming updating it works
+       # Write 4K twice starting at an offset of 4K
         dd if=$source bs=4K of=$tf conv=notrunc count=2 seek=1 skip=1 || error "(3) dd failed"
-       dd if=$source bs=4K of=$tf.2 conv=notrunc count=2 seek=1 skip=1 || error "(4) dd failed"
+       dd if=$source bs=4K of=$tf_copy conv=notrunc count=2 seek=1 skip=1 || error "(4) dd failed"
  
-       flush_and_compare $tf $tf.2 "(5)"
+       flush_and_compare $tf $tf_copy "(5)"
  
         # Now we jump a bit further on and do a write
+       # Write 8K at an offset of 32K
         dd if=$source bs=8K of=$tf conv=notrunc count=1 seek=4 skip=4 || error "(6) dd failed"
-       dd if=$source bs=8K of=$tf.2 conv=notrunc count=1 seek=4 skip=4 || error "(7) dd failed"
-       sync
+       dd if=$source bs=8K of=$tf_copy conv=notrunc count=1 seek=4 skip=4 || error "(7) dd failed"
  
-       flush_and_compare $tf $tf.2 "(8)"
+       flush_and_compare $tf $tf_copy "(8)"
  
         # None of the above was compressed, because the first write was too
         # small and the later writes weren't chunk aligned.  Confirm the server
@@ -768,89 +770,98 @@ test_1005() {
  
         # OK, now we truncate the file back to zero and start with a compressed
         # chunk at the beginning.
-       # 16K - this will be compressed
+       # Write 16K at 0 - this will be compressed
         dd if=$source bs=16K of=$tf count=1 || error "(9) dd failed"
-       dd if=$source bs=16K of=$tf.2 count=1 || error "(10) dd failed"
+       dd if=$source bs=16K of=$tf_copy count=1 || error "(10) dd failed"
  
-       flush_and_compare $tf $tf.2 "(11)"
+       flush_and_compare $tf $tf_copy "(11)"
  
         # Now we do the same size of write slightly further down
-       # 16K, in to an existing chunk - leads to read-modify-write
+       # 16K in to existing chunk - read-modify-write
+       # Seek to 32K and write 16K
         dd if=$source bs=16K of=$tf count=1 seek=2 skip=2 conv=notrunc || error "(12) dd failed"
-       dd if=$source bs=16K of=$tf.2 count=1 seek=2 skip=2 conv=notrunc || error "(13) dd failed"
+       dd if=$source bs=16K of=$tf_copy count=1 seek=2 skip=2 conv=notrunc || error "(13) dd failed"
  
-       flush_and_compare $tf $tf.2 "(14)"
+       flush_and_compare $tf $tf_copy "(14)"
  
         # OK, now we're going to create a complete compressed chunk further
         # along in the file
+       # Seek to 128K and write 128K
         dd if=$source bs=128K count=1 skip=1 seek=1 conv=notrunc of=$tf ||
                 error "(15) dd failed"
-       dd if=$source bs=128K count=1 skip=1 seek=1 conv=notrunc of=$tf.2 ||
+       dd if=$source bs=128K count=1 skip=1 seek=1 conv=notrunc of=$tf_copy ||
                 error "(16) dd failed"
  
-       flush_and_compare $tf $tf.2 "(17)"
+       flush_and_compare $tf $tf_copy "(17)"
  
         # Now we're going to write *into* the middle of this compressed chunk
+       # Seek to 160K and write 32K
         dd if=$source bs=32K count=1 skip=5 seek=5 conv=notrunc of=$tf ||
                 error "(18) dd failed"
-       dd if=$source bs=32K count=1 skip=5 seek=5 conv=notrunc of=$tf.2 ||
+       dd if=$source bs=32K count=1 skip=5 seek=5 conv=notrunc of=$tf_copy ||
                 error "(19) dd failed"
  
-       flush_and_compare $tf $tf.2 "(20)"
+       flush_and_compare $tf $tf_copy "(20)"
  
         # OK, now we're going to add another incomplete chunk after those
         # two - starting in the third chunk, ie, offset of 256K
         # This will be compressed
+       # Seek to 256K and write 64K
         dd if=$source bs=64K count=1 skip=4 seek=4 conv=notrunc of=$tf ||
                 error "(21) dd failed"
-       dd if=$source bs=64K count=1 skip=4 seek=4 conv=notrunc of=$tf.2 ||
+       dd if=$source bs=64K count=1 skip=4 seek=4 conv=notrunc of=$tf_copy ||
                 error "(22) dd failed"
  
-       flush_and_compare $tf $tf.2 "(23)"
+       flush_and_compare $tf $tf_copy "(23)"
  
         # Now we're going to write the second 'half' of this chunk
+       # Seek to 320K and write 64K
         dd if=$source bs=64K count=1 skip=5 seek=5 conv=notrunc of=$tf ||
                 error "(24) dd failed"
-       dd if=$source bs=64K count=1 skip=5 seek=5 conv=notrunc of=$tf.2 ||
+       dd if=$source bs=64K count=1 skip=5 seek=5 conv=notrunc of=$tf_copy ||
                 error "(25) dd failed"
  
-       flush_and_compare $tf $tf.2 "(26)"
+       flush_and_compare $tf $tf_copy "(26)"
  
         # And now we're going to add a chunk that doesn't start at offset 0,
         # so it won't be compressed.  This is the fourth chunk, so starts at
         # 384K.  So we'll start at 448K
+       # Seek to 448K and write 64K
         dd if=$source bs=64K count=1 skip=7 seek=7 conv=notrunc of=$tf ||
                 error "(27) dd failed"
-       dd if=$source bs=64K count=1 skip=7 seek=7 conv=notrunc of=$tf.2 ||
+       dd if=$source bs=64K count=1 skip=7 seek=7 conv=notrunc of=$tf_copy ||
                 error "(28) dd failed"
  
-       flush_and_compare $tf $tf.2 "(29)"
+       flush_and_compare $tf $tf_copy "(29)"
  
         # Now we'll write a few KiB in to the middle of the first part of this
         # chunk - starting at 400K
+       # Seek to 400K and write 8K
         dd if=$source bs=8K count=2 skip=50 seek=50 conv=notrunc of=$tf ||
                 error "(30) dd failed"
-       dd if=$source bs=8K count=2 skip=50 seek=50 conv=notrunc of=$tf.2 ||
+       dd if=$source bs=8K count=2 skip=50 seek=50 conv=notrunc of=$tf_copy ||
                 error "(31) dd failed"
  
-       flush_and_compare $tf $tf.2 "(32)"
+       flush_and_compare $tf $tf_copy "(32)"
  
         # Now let's skip an entire chunk and do a full chunk at 640K
+       # Seek 640K and write 128K
         dd if=$source bs=128K count=1 skip=5 seek=5 conv=notrunc of=$tf ||
                 error "(33) dd failed"
-       dd if=$source bs=128K count=1 skip=5 seek=5 conv=notrunc of=$tf.2 ||
+       dd if=$source bs=128K count=1 skip=5 seek=5 conv=notrunc of=$tf_copy ||
                 error "(34) dd failed"
  
-       flush_and_compare $tf $tf.2 "(35)"
+       flush_and_compare $tf $tf_copy "(35)"
  
         # Then one more partial, but compressible, chunk after that at
         # 768K
+       # Seek to 768K and write 64K
         dd if=$source bs=64K count=1 skip=12 seek=12 conv=notrunc of=$tf ||
                 error "(36) dd failed"
-       dd if=$source bs=64K count=1 skip=12 seek=12 conv=notrunc of=$tf.2 ||
+       dd if=$source bs=64K count=1 skip=12 seek=12 conv=notrunc of=$tf_copy ||
                 error "(37) dd failed"
  
-       flush_and_compare $tf $tf.2 "(38)"
+       flush_and_compare $tf $tf_copy "(38)"
  
         # OK, now we have this complex file, let's test reading it at a
         # number of block sizes to give us unusual offsets
author	Patrick Farrell <pfarrell@whamcloud.com>
	Tue, 13 Feb 2024 16:45:17 +0000 (11:45 -0500)
committer	Andreas Dilger <adilger@whamcloud.com>
	Sat, 24 Feb 2024 03:44:50 +0000 (03:44 +0000)
lustre/include/obd.h		patch \| blob \| history
lustre/ofd/ofd_compress.c		patch \| blob \| history
lustre/ofd/ofd_io.c		patch \| blob \| history
lustre/osd-ldiskfs/osd_io.c		patch \| blob \| history
lustre/target/tgt_handler.c		patch \| blob \| history
lustre/tests/sanity-compr.sh		patch \| blob \| history