From 29eabeb34c5ba2cffdb5353d108ea56e0549665b Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Wed, 5 Feb 2020 20:23:20 -0800 Subject: [PATCH 1/1] LU-14798 lustre: Support RDMA only pages Some memory architectures and CPU-offload cards with on-board memory do not map data pages into the CPU address space. Allow RDMA of data directly into those pages without accessing contents. Therefore, made changes to prevent doing checksum on these type of pages. Signed-off-by: Wang Shilong Signed-off-by: Amir Shehata Change-Id: I189c34893ffa500ed275f2a1f79e8fb817a2489d lustre-change: https://review.whamcloud.com/37454 Reviewed-by: Andreas Dilger Whamcloud-bug-id: EX-773 Reviewed-on: https://review.whamcloud.com/44111 Tested-by: jenkins Reviewed-by: Wang Shilong Reviewed-by: Patrick Farrell Tested-by: Maloo --- lustre/include/lustre_osc.h | 5 +++-- lustre/osc/osc_io.c | 3 +++ lustre/osc/osc_request.c | 16 ++++++++++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index 766b02a..e7c290d 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -940,8 +940,9 @@ struct osc_extent { oe_ndelay:1, /** direct IO pages */ oe_dio:1, - /** this extent consists of RDMA only pages */ - oe_is_rdma_only; + /** this extent consists of pages that are not directly accessible + * from the CPU */ + oe_is_rdma_only:1; /** how many grants allocated for this extent. * Grant allocated for this extent. There is no grant allocated * for reading extents and sync write extents. */ diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 5fe14d0..84c1831 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -42,6 +42,7 @@ #include #include "osc_internal.h" +#include /** \addtogroup osc * @{ @@ -153,6 +154,8 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, page = cl_page_list_first(qin); if (page->cp_type == CPT_TRANSIENT) brw_flags |= OBD_BRW_NOCACHE; + if (lnet_is_rdma_only_page(page->cp_vmpage)) + brw_flags |= OBD_BRW_RDMA_ONLY; /* * NOTE: here @page is a top-level page. This is done to avoid diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 725dadc..b9c2e05 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -48,6 +48,7 @@ #include #include "osc_internal.h" +#include atomic_t osc_pool_req_count; unsigned int osc_reqpool_maxreqcount; @@ -1391,6 +1392,7 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa, const char *obd_name = cli->cl_import->imp_obd->obd_name; struct inode *inode = NULL; bool directio = false; + bool enable_checksum = true; ENTRY; if (pga[0]->pg) { @@ -1534,6 +1536,11 @@ retry_encrypt: } } + if (lnet_is_rdma_only_page(pga[0]->pg)) { + enable_checksum = false; + short_io_size = 0; + } + /* Check if read/write is small enough to be a short io. */ if (short_io_size > cli->cl_max_short_io_bytes || niocount > 1 || !imp_connect_shortio(cli->cl_import)) @@ -1683,10 +1690,12 @@ no_bulk: if (osc_should_shrink_grant(cli)) osc_shrink_grant_local(cli, &body->oa); + if (!cli->cl_checksum || sptlrpc_flavor_has_bulk(&req->rq_flvr)) + enable_checksum = false; + /* size[REQ_REC_OFF] still sizeof (*body) */ if (opc == OST_WRITE) { - if (cli->cl_checksum && - !sptlrpc_flavor_has_bulk(&req->rq_flvr)) { + if (enable_checksum) { /* store cl_cksum_type in a local variable since * it can be changed via lprocfs */ enum cksum_types cksum_type = cli->cl_cksum_type; @@ -1724,8 +1733,7 @@ no_bulk: req_capsule_set_size(pill, &RMF_RCS, RCL_SERVER, sizeof(__u32) * niocount); } else { - if (cli->cl_checksum && - !sptlrpc_flavor_has_bulk(&req->rq_flvr)) { + if (enable_checksum) { if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0) body->oa.o_flags = 0; body->oa.o_flags |= obd_cksum_type_pack(obd_name, -- 1.8.3.1