Whamcloud - gitweb
EX-7433 osc: disable CPU-access features for RDMA only pages
authorPatrick Farrell <pfarrell@whamcloud.com>
Wed, 13 Sep 2023 15:36:58 +0000 (11:36 -0400)
committerAndreas Dilger <adilger@whamcloud.com>
Mon, 18 Sep 2023 06:26:45 +0000 (06:26 +0000)
Pages which cannot be accessed by the CPU are referred to
as RDMA only pages.  If pages cannot be accessed by the
CPU, it is impossible for us to do compression,
encryption, checksums, or short-io (data-in-RPC) on them.

This patch disables compression and encryption for these
pages and cleans up the code so checksums and short-io
are disabled by the same code.

The only user of RDMA only pages today is Nvidia's GPU
direct, so this patch disables compression and
encryption with GPU direct.

NB: We eventually intend to handle compression for
GPU direct with server side compress/decompress.

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: Iad9311617cddf27d3ff75a17429499c573067ea0
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51770
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/osc/osc_compress.c
lustre/osc/osc_request.c

index cfccf8e..2e80212 100644 (file)
@@ -516,10 +516,14 @@ int decompress_request(struct osc_brw_async_args *aa, int page_count)
        pages_in_chunk = chunk_size / PAGE_SIZE;
 
        for (i = 0; i < page_count; i+=pages_in_chunk) {
+               struct cl_page *cl_page =
+                               oap2cl_page(brw_page2oap(pga[i]));
 
                if (!is_chunk_start(pga[i]->pg, &llch))
                        continue;
 
+               LASSERT(cl_page->cp_comp_type != LL_COMPR_TYPE_NONE);
+
                if (!src) { /* get chunk size once */
                        int rpc_chunk_bits;
 
index 6ebda6e..b59735c 100644 (file)
@@ -1528,6 +1528,7 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
        bool directio = false;
        bool enable_checksum = true;
        struct cl_page *clpage;
+       bool page_access_allowed = true;
        bool compressed = false;
 
        ENTRY;
@@ -1537,6 +1538,19 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
                if (clpage->cp_type == CPT_TRANSIENT)
                        directio = true;
 
+               if (lnet_is_rdma_only_page(pga[0]->pg)) {
+                       /* RDMA only pages cannot be accessed by the CPU, so
+                        * functionality which requires read/write access to
+                        * the file data in these pages must be disabled
+                        * (encryption, compression, checksums, and
+                        *  short io (RPC data inlining))
+                        */
+                       page_access_allowed = false;
+               }
+
+               /* If compression disabled for the file NONE is set for all
+                * pages, so it is enough to check only one
+                */
                compressed = clpage->cp_comp_type != LL_COMPR_TYPE_NONE;
        }
        if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ))
@@ -1564,17 +1578,13 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
                        oap->oap_page_off;
        }
 
-       if (opc == OST_WRITE && compressed) {
-               /*
-                * If compression disabled for the file -1 is set to
-                * all pages, so it is enough to check only one
-                * */
-               if ((oap2cl_page(brw_page2oap(pga[0])))->cp_comp_type
-                                                       == LL_COMPR_TYPE_NONE) {
-                       compressed = 0;
-                       goto skip_compression;
-               }
+       if (compressed && !page_access_allowed)
+               RETURN(-EINVAL);
+
+       if (inode && IS_ENCRYPTED(inode) && !page_access_allowed)
+               RETURN(-EINVAL);
 
+       if (opc == OST_WRITE && compressed) {
                rc = compress_request(obd_name, oa, pga, &pga, page_count,
                                      &page_count);
                if (rc) {
@@ -1597,7 +1607,6 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
                }
        }
 
-skip_compression:
        if (opc == OST_WRITE && inode && IS_ENCRYPTED(inode) &&
            llcrypt_has_encryption_key(inode)) {
                struct page **pa = NULL;
@@ -1750,20 +1759,18 @@ retry_encrypt:
         req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
                              niocount * sizeof(*niobuf));
 
-       for (i = 0; i < page_count; i++) {
-               short_io_size += pga[i]->count;
-               if (!inode || !IS_ENCRYPTED(inode) ||
-                   !ll_has_encryption_key(inode)) {
-                       pga[i]->bp_count_diff = 0;
-                       pga[i]->bp_off_diff = 0;
+       /* short writes require that the CPU be able to access page contents */
+       if (page_access_allowed) {
+               for (i = 0; i < page_count; i++) {
+                       short_io_size += pga[i]->count;
+                       if (!inode || !IS_ENCRYPTED(inode) ||
+                           !ll_has_encryption_key(inode)) {
+                               pga[i]->bp_count_diff = 0;
+                               pga[i]->bp_off_diff = 0;
+                       }
                }
        }
 
-       if (lnet_is_rdma_only_page(pga[0]->pg)) {
-               enable_checksum = false;
-               short_io_size = 0;
-       }
-
        /* Check if read/write is small enough to be a short io. */
        if (short_io_size > cli->cl_max_short_io_bytes || niocount > 1 ||
            !imp_connect_shortio(cli->cl_import))
@@ -1916,7 +1923,8 @@ no_bulk:
         if (osc_should_shrink_grant(cli))
                 osc_shrink_grant_local(cli, &body->oa);
 
-       if (!cli->cl_checksum || sptlrpc_flavor_has_bulk(&req->rq_flvr))
+       if (!cli->cl_checksum || sptlrpc_flavor_has_bulk(&req->rq_flvr) ||
+           !page_access_allowed)
                enable_checksum = false;
 
         /* size[REQ_REC_OFF] still sizeof (*body) */