/*
* Copyright (c) 2023, DataDirect Networks Inc, all rights reserved.
* Author: Artem Blagodarenko <ablagodarenko@whamcloud.com>
+ * Author: Patrick Farrell <pfarrell@whamcloud.com>
+ */
+
+/*
+ * When using compression, the client attempts to send chunk
+ * aligned reads, but sometimes it can't, and the client will
+ * send a read to the server which is not chunk aligned.
+ *
+ * In this case, the server must read the full chunk,
+ * decompress it, and provide the requested data to the client.
+ *
+ * The server receives a set of remote niobufs describing IO
+ * from the client. Each remote niobuf (rnb) describes a range
+ * of data the client wants to do IO to.
+ *
+ * These are translated to a set of local niobufs on the
+ * server, which we then use to do the read. For compression,
+ * the server has to read complete chunks on unalinged reads.
+ *
+ * So we walk these remote niobufs and identify unaligned read
+ * requests (in ofd_preprw_read), then round them to chunk
+ * size. The server then reads the chunk rounded read request
+ * from storage.
+ *
+ * The local niobufs now contain a set of complete compressed
+ * chunks, ie, the raw data from disk. We need to decompress
+ * the chunks where the client is doing an unaligned read, but
+ * leave the other chunks compressed (because the client will
+ * uncompress them).
+ *
+ * So, in obd_decompress_read, we use the remote niobuf to
+ * identify unaligned reads from the client. We then walk the
+ * local niobufs, identify the chunks which match the unaligned
+ * reads from the client, and decompress them 'in place'.
+ * The decompression uses temporary buffers, but the
+ * decompressed data is placed back in the local niobuf.
+ * (If the data is uncompressed on disk, we of course do not
+ * decompress it. This happens for incompressible data.)
+ *
+ * Now the local niobuf contains some raw chunks and some
+ * chunks which have been decompressed. This is *more* data
+ * than the client asked for. Normally, the server local
+ * niobuf contains exactly what the client asked for, so the
+ * server checksums and sends the entire local niobuf. But
+ * because we read complete chunks, the local niobuf contains
+ * more data than the client requested.
+ *
+ * This means we need to identify the subset of the local
+ * niobuf which the client actually wants to read and present
+ * that to the client.
+ *
+ * In order to do that, we walk the local niobuf and use the
+ * remote niobufs (the description of the pages the client
+ * needs) and create a special tx niobuf which points to only
+ * the pages the client wants (io_lnb_to_tx_lnb). Then we use
+ * this tx niobuf for checksum and transfer to the client.
*/
#define DEBUG_SUBSYSTEM S_SEC
int tgt_brw_read(struct tgt_session_info *tsi)
{
struct ptlrpc_request *req = tgt_ses_req(tsi);
- struct ptlrpc_bulk_desc *desc = NULL;
+ struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
struct obd_export *exp = tsi->tsi_exp;
+ struct lustre_handle lockh = { 0 };
+ struct ptlrpc_bulk_desc *desc = NULL;
struct niobuf_remote *remote_nb;
struct niobuf_local *local_nb;
+ struct ost_body *repbody;
+ struct ost_body *body;
struct obd_ioobj *ioo;
- struct ost_body *body, *repbody;
- struct lustre_handle lockh = { 0 };
- int npages, nob = 0, rc, i, no_reply = 0,
- npages_read;
- struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data;
const char *obd_name = exp->exp_obd->obd_name;
+ int no_reply = 0;
+ int npages_read;
ktime_t kstart;
+ int nob = 0;
+ int npages;
+ int rc;
+ int i;
ENTRY;