Whamcloud - gitweb
LU-14798 lustre: Support RDMA only pages 11/44111/2
authorAmir Shehata <ashehata@whamcloud.com>
Thu, 6 Feb 2020 04:23:20 +0000 (20:23 -0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 10 Aug 2021 15:40:52 +0000 (15:40 +0000)
Some memory architectures and CPU-offload cards with
on-board memory do not map data pages into the CPU
address space. Allow RDMA of data directly into those
pages without accessing contents.

Therefore, made changes to prevent doing checksum on
these type of pages.

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Change-Id: I189c34893ffa500ed275f2a1f79e8fb817a2489d
lustre-change: https://review.whamcloud.com/37454
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Whamcloud-bug-id: EX-773
Reviewed-on: https://review.whamcloud.com/44111
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Wang Shilong <wangshilong1991@gmail.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/lustre_osc.h
lustre/osc/osc_io.c
lustre/osc/osc_request.c

index 766b02a..e7c290d 100644 (file)
@@ -940,8 +940,9 @@ struct osc_extent {
                                oe_ndelay:1,
        /** direct IO pages */
                                oe_dio:1,
                                oe_ndelay:1,
        /** direct IO pages */
                                oe_dio:1,
-       /** this extent consists of RDMA only pages */
-                               oe_is_rdma_only;
+       /** this extent consists of pages that are not directly accessible
+        *  from the CPU */
+                               oe_is_rdma_only:1;
        /** how many grants allocated for this extent.
         *  Grant allocated for this extent. There is no grant allocated
         *  for reading extents and sync write extents. */
        /** how many grants allocated for this extent.
         *  Grant allocated for this extent. There is no grant allocated
         *  for reading extents and sync write extents. */
index 5fe14d0..84c1831 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/falloc.h>
 
 #include "osc_internal.h"
 #include <linux/falloc.h>
 
 #include "osc_internal.h"
+#include <lnet/lnet_rdma.h>
 
 /** \addtogroup osc
  *  @{
 
 /** \addtogroup osc
  *  @{
@@ -153,6 +154,8 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
        page = cl_page_list_first(qin);
        if (page->cp_type == CPT_TRANSIENT)
                brw_flags |= OBD_BRW_NOCACHE;
        page = cl_page_list_first(qin);
        if (page->cp_type == CPT_TRANSIENT)
                brw_flags |= OBD_BRW_NOCACHE;
+       if (lnet_is_rdma_only_page(page->cp_vmpage))
+               brw_flags |= OBD_BRW_RDMA_ONLY;
 
         /*
          * NOTE: here @page is a top-level page. This is done to avoid
 
         /*
          * NOTE: here @page is a top-level page. This is done to avoid
index 725dadc..b9c2e05 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/falloc.h>
 
 #include "osc_internal.h"
 #include <linux/falloc.h>
 
 #include "osc_internal.h"
+#include <lnet/lnet_rdma.h>
 
 atomic_t osc_pool_req_count;
 unsigned int osc_reqpool_maxreqcount;
 
 atomic_t osc_pool_req_count;
 unsigned int osc_reqpool_maxreqcount;
@@ -1391,6 +1392,7 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
        const char *obd_name = cli->cl_import->imp_obd->obd_name;
        struct inode *inode = NULL;
        bool directio = false;
        const char *obd_name = cli->cl_import->imp_obd->obd_name;
        struct inode *inode = NULL;
        bool directio = false;
+       bool enable_checksum = true;
 
        ENTRY;
        if (pga[0]->pg) {
 
        ENTRY;
        if (pga[0]->pg) {
@@ -1534,6 +1536,11 @@ retry_encrypt:
                }
        }
 
                }
        }
 
+       if (lnet_is_rdma_only_page(pga[0]->pg)) {
+               enable_checksum = false;
+               short_io_size = 0;
+       }
+
        /* Check if read/write is small enough to be a short io. */
        if (short_io_size > cli->cl_max_short_io_bytes || niocount > 1 ||
            !imp_connect_shortio(cli->cl_import))
        /* Check if read/write is small enough to be a short io. */
        if (short_io_size > cli->cl_max_short_io_bytes || niocount > 1 ||
            !imp_connect_shortio(cli->cl_import))
@@ -1683,10 +1690,12 @@ no_bulk:
         if (osc_should_shrink_grant(cli))
                 osc_shrink_grant_local(cli, &body->oa);
 
         if (osc_should_shrink_grant(cli))
                 osc_shrink_grant_local(cli, &body->oa);
 
+       if (!cli->cl_checksum || sptlrpc_flavor_has_bulk(&req->rq_flvr))
+               enable_checksum = false;
+
         /* size[REQ_REC_OFF] still sizeof (*body) */
         if (opc == OST_WRITE) {
         /* size[REQ_REC_OFF] still sizeof (*body) */
         if (opc == OST_WRITE) {
-                if (cli->cl_checksum &&
-                    !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
+                if (enable_checksum) {
                         /* store cl_cksum_type in a local variable since
                          * it can be changed via lprocfs */
                        enum cksum_types cksum_type = cli->cl_cksum_type;
                         /* store cl_cksum_type in a local variable since
                          * it can be changed via lprocfs */
                        enum cksum_types cksum_type = cli->cl_cksum_type;
@@ -1724,8 +1733,7 @@ no_bulk:
                 req_capsule_set_size(pill, &RMF_RCS, RCL_SERVER,
                                      sizeof(__u32) * niocount);
         } else {
                 req_capsule_set_size(pill, &RMF_RCS, RCL_SERVER,
                                      sizeof(__u32) * niocount);
         } else {
-                if (cli->cl_checksum &&
-                    !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
+                if (enable_checksum) {
                         if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
                                 body->oa.o_flags = 0;
                        body->oa.o_flags |= obd_cksum_type_pack(obd_name,
                         if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
                                 body->oa.o_flags = 0;
                        body->oa.o_flags |= obd_cksum_type_pack(obd_name,