From 612963872ec0f3b4e14e9e2a33710d5e28130074 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Tue, 12 Dec 2023 12:35:25 -0500 Subject: [PATCH] EX-7601 ofd: add past eof check for reads The client does not normally generate reads past EOF, but this can occur during some racing situations. We need to check for that case and not attempt decompression, since there's no data to decompress if we're reading past EOF. This covers a failure which shows up occasionally in the racing parts of the test suite, but it's challenging to write an explicit test for this. We also add handling for complete reads of the last chunk, even if that chunk is partial, because we can send that to the client for decompression. This allows us to remove the slightly funky eof handling in decompress_rnb, since we'll just not call that code in this case now. Note we'll still call decompress_rnb, etc, for writes if they start before EOF and finish after EOF (and are unaligned). This is fine - this case should be rare and if we hit it, we'll notice there's nothing to decompress and proceed accordingly. Signed-off-by: Patrick Farrell Change-Id: I50295f2803af611de5069d094c0a5d1b0a4a9c2d Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53428 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Artem Blagodarenko Reviewed-by: Andreas Dilger --- lustre/ofd/ofd_compress.c | 41 ------------------------------------- lustre/ofd/ofd_io.c | 51 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/lustre/ofd/ofd_compress.c b/lustre/ofd/ofd_compress.c index 3a59dad..5e73e44 100644 --- a/lustre/ofd/ofd_compress.c +++ b/lustre/ofd/ofd_compress.c @@ -219,9 +219,6 @@ int decompress_rnb(const char *obd_name, struct niobuf_local *lnbs, chunk_start = round_down(rnb_end, chunk_size); chunk_end = chunk_start + chunk_size; if (chunk_start != rnb_end) { - struct niobuf_local *prev = NULL; - int j; - chunk_found = false; for (; i < lnb_npages; i++) { lnb = lnbs + i; @@ -241,39 +238,6 @@ int decompress_rnb(const char *obd_name, struct niobuf_local *lnbs, if (!chunk_found) GOTO(out, rc = -EINVAL); - /* the read is not chunk aligned at the end, but it's possible - * the last part of this read is a not-full chunk, and if so, - * we may be able to send it to the client - */ - for (j = i; j < lnb_npages; j++) { - CDEBUG(D_SEC, "page %d, lnb_rc %d\n", j, lnbs[j].lnb_rc); - /* we have a complete chunk, proceed to decompression */ - if (j - i == pages_per_chunk - 1) { - CDEBUG(D_SEC, "complete chunk, from %d to %d\n", - i, j); - break; - } - /* we've hit the end of the data in this lnb; if the - * end of data is before the end of the read, then we - * hit a hole, and we can skip decompression - this is - * a short chunk, so the read will return the complete - * chunk to the client and the client will decompress it - */ - if (lnbs[j].lnb_rc == 0 && !write) { - CDEBUG(D_SEC, "Hit EOF in lnb %d at %llu\n", - j, lnbs[j].lnb_file_offset); - if (prev && - prev->lnb_file_offset + prev->lnb_len <= rnb_end) { - CDEBUG(D_SEC, - "read ends at %llu, beyond EOF, client will decompress chunk\n", - rnb_end); - GOTO(out_eof, rc = 0); - } - break; - } - prev = lnbs + j; - } - rc = decompress_chunk_in_lnb(obd_name, lnbs, i, bounce_src, bounce_dst, type, lvl, chunk_size); if (rc) @@ -293,11 +257,6 @@ out: i -= pages_per_chunk; *lnb_start = i; } - /* if we hit eof, we just skip moving the chunk pointer because it's - * easier than accounting for an uneven number of pages, and the cost - * is purely that further reads start their search early - */ -out_eof: /* we were given this rnb because it's unaligned, so if we succeed, we * must've found a chunk */ diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index b0479f0..3ccdb11 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -589,6 +589,7 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, int *nr_local, struct niobuf_local *lnb, enum ll_compr_type type, int lvl, int chunk_bits) { + struct dt_object *dt_obj = NULL; struct ofd_object *fo; int chunk_size = chunk_bits ? 1 << chunk_bits : 0; enum dt_bufs_type dbt = DT_BUFS_TYPE_READ; @@ -596,6 +597,7 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, int niocount = obj->ioo_bufcnt; int maxlnb = *nr_local; __u64 prev_buf_end = 0; + int eof_rnb = INT_MAX; int tot_bytes = 0; __u64 begin; __u64 end; @@ -656,8 +658,11 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, CDEBUG(D_SEC, "rnb %d buf_start %llu, buf_end %llu\n", i, buf_start, buf_end); - /* compressed reads must be rounded to cover whole chunks */ - if (chunk_size) { + /* compressed reads must be rounded to cover whole chunks, + * unless they're past EOF, which is true if eof_rnb has been + * set + */ + if (chunk_size && (eof_rnb == INT_MAX)) { chunk_round(&buf_start, &buf_end, chunk_size); /* rounded rnbs can overlap at the chunk level, but it's @@ -688,8 +693,44 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, * decompression and dt_read_prep needs to know this */ if (buf_start != orig_start || - buf_end != orig_end) - compr_unaligned_read = true; + buf_end != orig_end) { + /* get attr only once for each IO */ + if (!dt_obj) { + dt_obj = ofd_object_child(fo); + rc = dt_attr_get(env, dt_obj, la); + if (rc) + GOTO(buf_put, rc); + } + /* if this read is beyond EOF, there's no + * compressed data under it, so no need to do + * decompression, so no rounding required + */ + if (buf_start >= la->la_size || + /* this is a related - and very common - + * case, where the read starts aligned and + * goes to or past EOF. In that case we + * can also skip decompression. This + * covers the case of reading a partial + * chunk at EOF. Note a limitation here: + * if the read is not chunk aligned at the + * start, we'll decompress unecessarily. + * DIO does not chunk align reads, so this + * may cause a lot of unnecessary + * decompressions, but if that's the case, + * we can pass EOF in to the decompression + * code and check there. + */ + (buf_start == orig_start && orig_end >= la->la_size)) { + CDEBUG(D_SEC, + "rnb %d from %llu to %llu (chunk rounded: %llu to %llu) is past eof\n", + i, orig_start, orig_end, buf_start, buf_end); + buf_start = orig_start; + buf_end = orig_end; + eof_rnb = i; + } else { + compr_unaligned_read = true; + } + } } buf_len = buf_end - buf_start; @@ -715,7 +756,7 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, if (compr_unaligned_read) { rc = ofd_decompress_read(env, exp, oa, rnb, lnb, obj, - *nr_local, INT_MAX, type, lvl, + *nr_local, eof_rnb, type, lvl, chunk_bits, false); if (unlikely(rc)) GOTO(buf_put, rc); -- 1.8.3.1