Whamcloud - gitweb
LU-16011 lnet: use preallocate bulk for server 52/47952/6
authorAlexey Lyashkov <alexey.lyashkov@hpe.com>
Thu, 14 Jul 2022 13:39:39 +0000 (16:39 +0300)
committerOleg Drokin <green@whamcloud.com>
Thu, 1 Sep 2022 05:52:15 +0000 (05:52 +0000)
Server side want to have a preallocate bulk to avoid large lock
contention on the page cache.
Without it LST limited with 35Gb/s speed with 3 rail host (HDR each)
due large CPU usage.
Preallocate bulks increase a memory consumption for small bulk,
but performance improved dramatically up to 74Gb/s with very low
cpu usage.

Test-Parameters: trivial testlist=sanity-lnet,lnet-selftest
Signed-off-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Change-Id: If1eaf5addf6c9d9f695a892dc66023b3bc293208
Reviewed-on: https://review.whamcloud.com/47952
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/selftest/brw_test.c
lnet/selftest/framework.c
lnet/selftest/rpc.c
lnet/selftest/selftest.h

index 2e77d8f..83e0548 100644 (file)
@@ -120,11 +120,12 @@ brw_client_init(struct sfw_test_instance *tsi)
 
        list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
                bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL),
 
        list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
                bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL),
-                                      off, npg, len, opc == LST_BRW_READ);
+                                      npg);
                if (bulk == NULL) {
                        brw_client_fini(tsi);
                        return -ENOMEM;
                }
                if (bulk == NULL) {
                        brw_client_fini(tsi);
                        return -ENOMEM;
                }
+               srpc_init_bulk(bulk, off, npg, len, opc == LST_BRW_READ);
 
                tsu->tsu_private = bulk;
        }
 
                tsu->tsu_private = bulk;
        }
@@ -388,8 +389,6 @@ brw_server_rpc_done(struct srpc_server_rpc *rpc)
                CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n",
                       blk->bk_niov, blk->bk_sink ? "from" : "to",
                       libcfs_id2str(rpc->srpc_peer));
                CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n",
                       blk->bk_niov, blk->bk_sink ? "from" : "to",
                       libcfs_id2str(rpc->srpc_peer));
-
-       sfw_free_pages(rpc);
 }
 
 static int
 }
 
 static int
@@ -437,7 +436,6 @@ brw_server_handle(struct srpc_server_rpc *rpc)
        struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply;
        struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst;
        int npg;
        struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply;
        struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst;
        int npg;
-       int rc;
 
         LASSERT (sv->sv_id == SRPC_SERVICE_BRW);
 
 
         LASSERT (sv->sv_id == SRPC_SERVICE_BRW);
 
@@ -488,37 +486,66 @@ brw_server_handle(struct srpc_server_rpc *rpc)
                return 0;
        }
 
                return 0;
        }
 
-       rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
-                            reqst->brw_len,
-                            reqst->brw_rw == LST_BRW_WRITE);
-       if (rc != 0)
-               return rc;
+       srpc_init_bulk(rpc->srpc_bulk, 0, npg, reqst->brw_len,
+                      reqst->brw_rw == LST_BRW_WRITE);
 
 
-        if (reqst->brw_rw == LST_BRW_READ)
-                brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
-        else
-                brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
+       if (reqst->brw_rw == LST_BRW_READ)
+               brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
+       else
+               brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
 
 
-        return 0;
+       return 0;
 }
 
 }
 
-struct sfw_test_client_ops brw_test_client;
+static int
+brw_srpc_init(struct srpc_server_rpc *rpc, int cpt)
+{
+       /* just alloc a maximal size - actual values will be adjusted later */
+       rpc->srpc_bulk = srpc_alloc_bulk(cpt, LNET_MAX_IOV);
+       if (rpc->srpc_bulk == NULL)
+               return -ENOMEM;
+
+       srpc_init_bulk(rpc->srpc_bulk, 0, LNET_MAX_IOV, 0, 0);
 
 
-void brw_init_test_client(void)
+       return 0;
+}
+
+static void
+brw_srpc_fini(struct srpc_server_rpc *rpc)
 {
 {
-        brw_test_client.tso_init       = brw_client_init;
-        brw_test_client.tso_fini       = brw_client_fini;
-        brw_test_client.tso_prep_rpc   = brw_client_prep_rpc;
-        brw_test_client.tso_done_rpc   = brw_client_done_rpc;
+       /* server RPC have just MAX_IOV size */
+       srpc_init_bulk(rpc->srpc_bulk, 0, LNET_MAX_IOV, 0, 0);
+
+       srpc_free_bulk(rpc->srpc_bulk);
+       rpc->srpc_bulk = NULL;
+}
+
+struct sfw_test_client_ops brw_test_client = {
+       .tso_init       = brw_client_init,
+       .tso_fini       = brw_client_fini,
+       .tso_prep_rpc   = brw_client_prep_rpc,
+       .tso_done_rpc   = brw_client_done_rpc,
 };
 
 };
 
-struct srpc_service brw_test_service;
+struct srpc_service brw_test_service = {
+       .sv_id         = SRPC_SERVICE_BRW,
+       .sv_name       = "brw_test",
+       .sv_handler    = brw_server_handle,
+       .sv_bulk_ready = brw_bulk_ready,
+
+       .sv_srpc_init  = brw_srpc_init,
+       .sv_srpc_fini  = brw_srpc_fini,
+};
 
 void brw_init_test_service(void)
 {
 
 void brw_init_test_service(void)
 {
-        brw_test_service.sv_id         = SRPC_SERVICE_BRW;
-        brw_test_service.sv_name       = "brw_test";
-        brw_test_service.sv_handler    = brw_server_handle;
-        brw_test_service.sv_bulk_ready = brw_bulk_ready;
+       unsigned long cache_size = cfs_totalram_pages() >> 1;
+
+       /* brw prealloc cache should don't eat more than half memory */
+       cache_size /= LNET_MAX_IOV;
+
        brw_test_service.sv_wi_total   = brw_srv_workitems;
        brw_test_service.sv_wi_total   = brw_srv_workitems;
+
+       if (brw_test_service.sv_wi_total > cache_size)
+               brw_test_service.sv_wi_total = cache_size;
 }
 }
index 7e048ad..766ea28 100644 (file)
@@ -299,9 +299,10 @@ sfw_server_rpc_done(struct srpc_server_rpc *rpc)
                 sv->sv_name, libcfs_id2str(rpc->srpc_peer),
                 swi_state2str(rpc->srpc_wi.swi_state),
                 status);
                 sv->sv_name, libcfs_id2str(rpc->srpc_peer),
                 swi_state2str(rpc->srpc_wi.swi_state),
                 status);
-
-        if (rpc->srpc_bulk != NULL)
-                sfw_free_pages(rpc);
+       if (rpc->srpc_bulk) {
+               srpc_free_bulk(rpc->srpc_bulk);
+               rpc->srpc_bulk = NULL;
+       }
 }
 
 static void
 }
 
 static void
@@ -1095,13 +1096,6 @@ sfw_query_batch(struct sfw_batch *tsb, int testidx,
         return -ENOENT;
 }
 
         return -ENOENT;
 }
 
-void
-sfw_free_pages(struct srpc_server_rpc *rpc)
-{
-        srpc_free_bulk(rpc->srpc_bulk);
-        rpc->srpc_bulk = NULL;
-}
-
 int
 sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
                int sink)
 int
 sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
                int sink)
@@ -1109,10 +1103,12 @@ sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
        LASSERT(rpc->srpc_bulk == NULL);
        LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
 
        LASSERT(rpc->srpc_bulk == NULL);
        LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
 
-       rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink);
+       rpc->srpc_bulk = srpc_alloc_bulk(cpt, npages);
        if (rpc->srpc_bulk == NULL)
                return -ENOMEM;
 
        if (rpc->srpc_bulk == NULL)
                return -ENOMEM;
 
+       srpc_init_bulk(rpc->srpc_bulk, 0, npages, len, sink);
+
        return 0;
 }
 
        return 0;
 }
 
@@ -1638,7 +1634,6 @@ sfw_startup (void)
        INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
        INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
 
        INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
        INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
 
-        brw_init_test_client();
         brw_init_test_service();
         rc = sfw_register_test(&brw_test_service, &brw_test_client);
         LASSERT (rc == 0);
         brw_init_test_service();
         rc = sfw_register_test(&brw_test_service, &brw_test_client);
         LASSERT (rc == 0);
index b85e045..c1f4b5f 100644 (file)
@@ -108,14 +108,12 @@ void srpc_get_counters(struct srpc_counters *cnt)
 }
 
 static int
 }
 
 static int
-srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off,
-                  int nob)
+srpc_init_bulk_page(struct srpc_bulk *bk, int i, int off, int nob)
 {
        LASSERT(off < PAGE_SIZE);
        LASSERT(nob > 0 && nob <= PAGE_SIZE);
 
        bk->bk_iovs[i].bv_offset = off;
 {
        LASSERT(off < PAGE_SIZE);
        LASSERT(nob > 0 && nob <= PAGE_SIZE);
 
        bk->bk_iovs[i].bv_offset = off;
-       bk->bk_iovs[i].bv_page   = pg;
        bk->bk_iovs[i].bv_len    = nob;
        return nob;
 }
        bk->bk_iovs[i].bv_len    = nob;
        return nob;
 }
@@ -140,8 +138,7 @@ srpc_free_bulk(struct srpc_bulk *bk)
 }
 
 struct srpc_bulk *
 }
 
 struct srpc_bulk *
-srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg,
-               unsigned bulk_len, int sink)
+srpc_alloc_bulk(int cpt, unsigned int bulk_npg)
 {
        struct srpc_bulk *bk;
        int i;
 {
        struct srpc_bulk *bk;
        int i;
@@ -156,13 +153,10 @@ srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg,
        }
 
        memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
        }
 
        memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
-       bk->bk_sink   = sink;
-       bk->bk_len    = bulk_len;
        bk->bk_niov   = bulk_npg;
 
        for (i = 0; i < bulk_npg; i++) {
                struct page *pg;
        bk->bk_niov   = bulk_npg;
 
        for (i = 0; i < bulk_npg; i++) {
                struct page *pg;
-               int nob;
 
                pg = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_KERNEL);
                if (pg == NULL) {
 
                pg = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_KERNEL);
                if (pg == NULL) {
@@ -170,16 +164,37 @@ srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg,
                        srpc_free_bulk(bk);
                        return NULL;
                }
                        srpc_free_bulk(bk);
                        return NULL;
                }
+               bk->bk_iovs[i].bv_page   = pg;
+       }
+
+       return bk;
+}
+
+void
+srpc_init_bulk(struct srpc_bulk *bk, unsigned int bulk_off,
+               unsigned int bulk_npg, unsigned int bulk_len, int sink)
+{
+       int i;
+
+       LASSERT(bk != NULL);
+       LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
+
+       bk->bk_sink   = sink;
+       bk->bk_len    = bulk_len;
+       bk->bk_niov   = bulk_npg;
+
+       for (i = 0; i < bulk_npg && bulk_len > 0; i++) {
+               int nob;
+
+               LASSERT(bk->bk_iovs[i].bv_page != NULL);
 
                nob = min_t(unsigned, bulk_off + bulk_len, PAGE_SIZE) -
                      bulk_off;
 
 
                nob = min_t(unsigned, bulk_off + bulk_len, PAGE_SIZE) -
                      bulk_off;
 
-               srpc_add_bulk_page(bk, pg, i, bulk_off, nob);
+               srpc_init_bulk_page(bk, i, bulk_off, nob);
                bulk_len -= nob;
                bulk_off = 0;
        }
                bulk_len -= nob;
                bulk_off = 0;
        }
-
-       return bk;
 }
 
 static inline __u64
 }
 
 static inline __u64
@@ -193,7 +208,6 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc,
                     struct srpc_service_cd *scd,
                     struct srpc_buffer *buffer)
 {
                     struct srpc_service_cd *scd,
                     struct srpc_buffer *buffer)
 {
-       memset(rpc, 0, sizeof(*rpc));
        swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc,
                          srpc_serv_is_framework(scd->scd_svc) ?
                          lst_sched_serial : lst_sched_test[scd->scd_cpt]);
        swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc,
                          srpc_serv_is_framework(scd->scd_svc) ?
                          lst_sched_serial : lst_sched_test[scd->scd_cpt]);
@@ -205,6 +219,9 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc,
        rpc->srpc_peer     = buffer->buf_peer;
        rpc->srpc_self     = buffer->buf_self;
        LNetInvalidateMDHandle(&rpc->srpc_replymdh);
        rpc->srpc_peer     = buffer->buf_peer;
        rpc->srpc_self     = buffer->buf_self;
        LNetInvalidateMDHandle(&rpc->srpc_replymdh);
+
+       rpc->srpc_aborted  = 0;
+       rpc->srpc_status   = 0;
 }
 
 static void
 }
 
 static void
@@ -244,6 +261,8 @@ srpc_service_fini(struct srpc_service *svc)
                                         struct srpc_server_rpc,
                                         srpc_list);
                        list_del(&rpc->srpc_list);
                                         struct srpc_server_rpc,
                                         srpc_list);
                        list_del(&rpc->srpc_list);
+                       if (svc->sv_srpc_fini)
+                               svc->sv_srpc_fini(rpc);
                        LIBCFS_FREE(rpc, sizeof(*rpc));
                }
        }
                        LIBCFS_FREE(rpc, sizeof(*rpc));
                }
        }
@@ -311,7 +330,8 @@ srpc_service_init(struct srpc_service *svc)
                for (j = 0; j < nrpcs; j++) {
                        LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(),
                                         i, sizeof(*rpc));
                for (j = 0; j < nrpcs; j++) {
                        LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(),
                                         i, sizeof(*rpc));
-                       if (rpc == NULL) {
+                       if (rpc == NULL ||
+                          (svc->sv_srpc_init && svc->sv_srpc_init(rpc, i))) {
                                srpc_service_fini(svc);
                                return -ENOMEM;
                        }
                                srpc_service_fini(svc);
                                return -ENOMEM;
                        }
@@ -940,7 +960,6 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
 
        if (rpc->srpc_done != NULL)
                (*rpc->srpc_done) (rpc);
 
        if (rpc->srpc_done != NULL)
                (*rpc->srpc_done) (rpc);
-       LASSERT(rpc->srpc_bulk == NULL);
 
        spin_lock(&scd->scd_lock);
 
 
        spin_lock(&scd->scd_lock);
 
index f19685d..fb73aae 100644 (file)
@@ -316,6 +316,12 @@ struct srpc_service {
          */
        int              (*sv_handler)(struct srpc_server_rpc *);
        int              (*sv_bulk_ready)(struct srpc_server_rpc *, int);
          */
        int              (*sv_handler)(struct srpc_server_rpc *);
        int              (*sv_bulk_ready)(struct srpc_server_rpc *, int);
+
+       /** Service side srpc constructor/destructor.
+        *  used for the bulk preallocation as usual.
+        */
+       int              (*sv_srpc_init)(struct srpc_server_rpc *, int);
+       void             (*sv_srpc_fini)(struct srpc_server_rpc *);
 };
 
 struct sfw_session {
 };
 
 struct sfw_session {
@@ -417,7 +423,6 @@ void sfw_abort_rpc(struct srpc_client_rpc *rpc);
 void sfw_post_rpc(struct srpc_client_rpc *rpc);
 void sfw_client_rpc_done(struct srpc_client_rpc *rpc);
 void sfw_unpack_message(struct srpc_msg *msg);
 void sfw_post_rpc(struct srpc_client_rpc *rpc);
 void sfw_client_rpc_done(struct srpc_client_rpc *rpc);
 void sfw_unpack_message(struct srpc_msg *msg);
-void sfw_free_pages(struct srpc_server_rpc *rpc);
 void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i);
 int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
                    int sink);
 void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i);
 int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
                    int sink);
@@ -432,9 +437,10 @@ srpc_create_client_rpc(struct lnet_process_id peer, int service,
 void srpc_post_rpc(struct srpc_client_rpc *rpc);
 void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
 void srpc_free_bulk(struct srpc_bulk *bk);
 void srpc_post_rpc(struct srpc_client_rpc *rpc);
 void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
 void srpc_free_bulk(struct srpc_bulk *bk);
-struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off,
-                                 unsigned int bulk_npg, unsigned int bulk_len,
-                                 int sink);
+struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int bulk_npg);
+void srpc_init_bulk(struct srpc_bulk *bk, unsigned int off,
+                   unsigned int bulk_npg, unsigned int bulk_len, int sink);
+
 int srpc_send_rpc(struct swi_workitem *wi);
 int srpc_send_reply(struct srpc_server_rpc *rpc);
 int srpc_add_service(struct srpc_service *sv);
 int srpc_send_rpc(struct swi_workitem *wi);
 int srpc_send_reply(struct srpc_server_rpc *rpc);
 int srpc_add_service(struct srpc_service *sv);
@@ -606,7 +612,6 @@ void ping_init_test_service(void);
 
 extern struct sfw_test_client_ops brw_test_client;
 extern struct srpc_service brw_test_service;
 
 extern struct sfw_test_client_ops brw_test_client;
 extern struct srpc_service brw_test_service;
-void brw_init_test_client(void);
 void brw_init_test_service(void);
 
 #endif /* __SELFTEST_SELFTEST_H__ */
 void brw_init_test_service(void);
 
 #endif /* __SELFTEST_SELFTEST_H__ */