From 7577b04433bcf3b15f6c50f9bca1da425f23ce13 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Tue, 31 Oct 2023 11:11:01 -0400 Subject: [PATCH] EX-7601 ofd: series description and reorder declarations Reorder declarations in tgt_brw_read prior to adding things. This trivial patch is a good place to put the description of this series, which handles unaligned reads to compressed files. ----------------------- These patches handle compression chunk unaligned reads on the server. When using compression, the client attempts to send chunk aligned reads, but sometimes it can't, and the client will send a read to the server which is not chunk aligned. In this case, the server must read the full chunk, decompress it, and provide the requested data to the client. Here's how we do this. The server receives a set of remote niobufs describing IO from the client. Each remote niobuf (rnb) describes a range of data the client wants to do IO to. These are translated to a set of local niobufs on the server, which we then use to do the read. For compression, the server has to read complete chunks on unalinged reads. So we walk these remote niobufs and identify unaligned read requests (in ofd_preprw_read), then round them to chunk size. The server then reads the chunk rounded read request from storage. The local niobufs now contain a set of complete compressed chunks, ie, the raw data from disk. We need to decompress the chunks where the client is doing an unaligned read, but leave the other chunks compressed (because the client will uncompress them). So, in obd_decompress_read, we use the remote niobuf to identify unaligned reads from the client. We then walk the local niobufs, identify the chunks which match the unaligned reads from the client, and decompress them 'in place'. The decompression uses temporary buffers, but the decompressed data is placed back in the local niobuf. (If the data is uncompressed on disk, we of course do not decompress it. This happens for incompressible data.) Now the local niobuf contains some raw chunks and some chunks which have been decompressed. This is *more* data than the client asked for. Normally, the server local niobuf contains exactly what the client asked for, so the server checksums and sends the entire local niobuf. But because we read complete chunks, the local niobuf contains more data than the client requested. This means we need to identify the subset of the local niobuf which the client actually wants to read and present that to the client. In order to do that, we walk the local niobuf and use the remote niobufs (the description of the pages the client needs) and create a special tx niobuf which points to only the pages the client wants (io_lnb_to_tx_lnb). Then we use this tx niobuf for checksum and transfer to the client. Test-Parameters: trivial Signed-off-by: Patrick Farrell Change-Id: Ic89dcef7e169879725caa6cdef4619b9a76b2b37 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/52915 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Artem Blagodarenko --- lustre/obdclass/lustre_compr.c | 56 ++++++++++++++++++++++++++++++++++++++++++ lustre/target/tgt_handler.c | 17 ++++++++----- 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/lustre/obdclass/lustre_compr.c b/lustre/obdclass/lustre_compr.c index 33cb305..a62c5c5 100644 --- a/lustre/obdclass/lustre_compr.c +++ b/lustre/obdclass/lustre_compr.c @@ -22,6 +22,62 @@ /* * Copyright (c) 2023, DataDirect Networks Inc, all rights reserved. * Author: Artem Blagodarenko + * Author: Patrick Farrell + */ + +/* + * When using compression, the client attempts to send chunk + * aligned reads, but sometimes it can't, and the client will + * send a read to the server which is not chunk aligned. + * + * In this case, the server must read the full chunk, + * decompress it, and provide the requested data to the client. + * + * The server receives a set of remote niobufs describing IO + * from the client. Each remote niobuf (rnb) describes a range + * of data the client wants to do IO to. + * + * These are translated to a set of local niobufs on the + * server, which we then use to do the read. For compression, + * the server has to read complete chunks on unalinged reads. + * + * So we walk these remote niobufs and identify unaligned read + * requests (in ofd_preprw_read), then round them to chunk + * size. The server then reads the chunk rounded read request + * from storage. + * + * The local niobufs now contain a set of complete compressed + * chunks, ie, the raw data from disk. We need to decompress + * the chunks where the client is doing an unaligned read, but + * leave the other chunks compressed (because the client will + * uncompress them). + * + * So, in obd_decompress_read, we use the remote niobuf to + * identify unaligned reads from the client. We then walk the + * local niobufs, identify the chunks which match the unaligned + * reads from the client, and decompress them 'in place'. + * The decompression uses temporary buffers, but the + * decompressed data is placed back in the local niobuf. + * (If the data is uncompressed on disk, we of course do not + * decompress it. This happens for incompressible data.) + * + * Now the local niobuf contains some raw chunks and some + * chunks which have been decompressed. This is *more* data + * than the client asked for. Normally, the server local + * niobuf contains exactly what the client asked for, so the + * server checksums and sends the entire local niobuf. But + * because we read complete chunks, the local niobuf contains + * more data than the client requested. + * + * This means we need to identify the subset of the local + * niobuf which the client actually wants to read and present + * that to the client. + * + * In order to do that, we walk the local niobuf and use the + * remote niobufs (the description of the pages the client + * needs) and create a special tx niobuf which points to only + * the pages the client wants (io_lnb_to_tx_lnb). Then we use + * this tx niobuf for checksum and transfer to the client. */ #define DEBUG_SUBSYSTEM S_SEC diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index a1f9501..13dafe0 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -2299,18 +2299,23 @@ static int tgt_checksum_niobuf_rw(struct lu_target *tgt, int tgt_brw_read(struct tgt_session_info *tsi) { struct ptlrpc_request *req = tgt_ses_req(tsi); - struct ptlrpc_bulk_desc *desc = NULL; + struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data; struct obd_export *exp = tsi->tsi_exp; + struct lustre_handle lockh = { 0 }; + struct ptlrpc_bulk_desc *desc = NULL; struct niobuf_remote *remote_nb; struct niobuf_local *local_nb; + struct ost_body *repbody; + struct ost_body *body; struct obd_ioobj *ioo; - struct ost_body *body, *repbody; - struct lustre_handle lockh = { 0 }; - int npages, nob = 0, rc, i, no_reply = 0, - npages_read; - struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data; const char *obd_name = exp->exp_obd->obd_name; + int no_reply = 0; + int npages_read; ktime_t kstart; + int nob = 0; + int npages; + int rc; + int i; ENTRY; -- 1.8.3.1