From 0ebc400d8ec32aa809da56240409a7dab4b732b8 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Sun, 5 Nov 2023 10:55:29 -0500 Subject: [PATCH] EX-7601 ofd: create read mapping for read-modify-write When we need to do a read-modify-write for unaligned writes to a compressed file, it's important we read only the portion of the file which is receiving unaligned IO. This patch identifies these chunks in preprw_write and creates a read lnb mapping from a subset of the pages for write. These pages we read up are then decompressed. Note one issue this patch does not address is reading of data past EOF. If the final chunk is unaligned, we will round the write to cover it. This results in extending the file inappropriately, writing zeroes where they aren't needed. The read side gives us the info to address this, which we will do in a future patch. Signed-off-by: Patrick Farrell Change-Id: Iede43f12127cbb93e73c22a915192aa2f814a927 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52997 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Artem Blagodarenko --- lustre/ofd/ofd_io.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 43f91db..2592248 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -757,13 +757,13 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, struct dt_object *dt_obj = NULL; struct ofd_object *fo; int chunk_size = chunk_bits ? 1 << chunk_bits : 0; + int pages_per_chunk = chunk_size / PAGE_SIZE; enum dt_bufs_type dbt = DT_BUFS_TYPE_WRITE; bool compr_unaligned_write = false; __u64 prev_buf_end = 0; int maxlnb = *nr_write; int tot_bytes = 0; - /* not implemented yet - will be determined later */ - int nr_read; + int nr_read = 0; __u64 begin; int rc = 0; __u64 end; @@ -855,6 +855,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, /* parse remote buffers to local buffers and prepare the latter */ for (*nr_write = 0, i = 0, j = 0; i < obj->ioo_bufcnt; i++) { + int first_chunk_start_idx = -1; + bool start_rounded_up = false; __u64 orig_start; __u64 buf_start; __u64 orig_end; @@ -893,6 +895,11 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, chunk_round(&buf_start, &buf_end, chunk_size); if (buf_start < prev_buf_end) { + CDEBUG(D_SEC, + "buf_start %llu orig_start %llu buf_end %llu orig_end %llu\n", + buf_start, orig_start, buf_end, + orig_end); + start_rounded_up = true; buf_start = prev_buf_end; /* two rnbs may be entirely inside the same * chunk, in which case we're already doing IO @@ -902,6 +909,9 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, if (buf_start == buf_end) continue; } + CDEBUG(D_SEC, + "buf_start %llu orig_start %llu buf_end %llu orig_end %llu\n", + buf_start, orig_start, buf_end, orig_end); /* this write is not aligned to chunk size */ if (buf_start != orig_start || buf_end != orig_end) { @@ -943,6 +953,64 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, if (!(rnb[i].rnb_flags & OBD_BRW_GRANTED)) write_lnb[j+k].lnb_rc = -ENOSPC; } + /* compression: + * when writing, we must do read-modify-write to update + * compressed files correctly. this means writes must read and + * decompress before modifying. so we don't read everything, + * we build a special read lnb by mapping from the write lnb + * + * the start of this rnb is unaligned, so we need to read the + * chunk there. map it to the read lnb. + * + * note if the start of the buffer was moved up to avoid + * overlap, then we ignore the first chunk - it's being handled + * as part of the previous rnb + */ + if (buf_start != orig_start && !start_rounded_up) { + first_chunk_start_idx = j; + CDEBUG(D_SEC, + "buf count %d buf_start %llu orig_start %llu, first_chunk_start_idx %d\n", + rc, buf_start, orig_start, first_chunk_start_idx); + for (k = 0; k < rc; k++) { + CDEBUG(D_SEC, "k %d nr_read %d write_lnb %d, at offset %llu\n", + k, nr_read, j+k, + write_lnb[j + k].lnb_file_offset); + + if (k == pages_per_chunk) + break; + read_lnb[nr_read] = write_lnb[j+k]; + nr_read++; + } + } + + /* compression: + * the end of this rnb is unaligned, so we need to read the + * chunk there. map it to the read lnb + */ + if (buf_end != orig_end) { + /* calculate the start index of the last chunk */ + int chunk_start_idx = j + rc - pages_per_chunk; + + CDEBUG(D_SEC, + "rc %d buf_end %llu orig_end %llu, chunk_start_idx %d\n", + rc, buf_end, orig_end, chunk_start_idx); + /* if both the beginning and end are unaligned and + * hit the same chunk, then we've already mapped this + * chunk for reading, so skip it + */ + if (chunk_start_idx != first_chunk_start_idx) { + for (k = 0; k < rc; k++) { + CDEBUG(D_SEC, "k %d nr_read %d write_lnb %d, at offset %llu\n", + k, nr_read, j+k, + write_lnb[j + k].lnb_file_offset); + if (k == pages_per_chunk) + break; + read_lnb[nr_read] = + write_lnb[chunk_start_idx + k]; + nr_read++; + } + } + } j += rc; *nr_write += rc; maxlnb -= rc; @@ -972,9 +1040,7 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, } if (compr_unaligned_write) { - read_lnb = write_lnb; - nr_read = *nr_write; - + LASSERT(nr_read); rc = dt_read_prep(env, ofd_object_child(fo), read_lnb, nr_read, true); if (unlikely(rc != 0)) @@ -1000,6 +1066,24 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, ofd_read_unlock(env, fo); + /* nr_read > 0 indicates compression, so we must map back from the read + * lnbs to the write lnbs + */ + j = 0; + for (i = 0; i < nr_read; i++) { + LASSERT(chunk_size); + CDEBUG(D_SEC, "read lnb %d at %llu\n", i, read_lnb[i].lnb_file_offset); + for (; j < *nr_write; j++) { + CDEBUG(D_SEC, "write lnb %d at %llu\n", j, read_lnb[j].lnb_file_offset); + if (read_lnb[i].lnb_file_offset == write_lnb[j].lnb_file_offset) { + CDEBUG(D_SEC, "read_lnb %d is write_lnb %d (offset %llu), lnb_rc %d\n", + i, j, write_lnb[j].lnb_file_offset, read_lnb[i].lnb_rc); + write_lnb[j] = read_lnb[i]; + break; + } + } + } + ofd_access(env, ofd, &(struct lu_fid) { .f_seq = oa->o_parent_seq, -- 1.8.3.1