Whamcloud - gitweb
LU-16011 lnet: use preallocate bulk for server 76/50276/6
authorAlexey Lyashkov <alexey.lyashkov@hpe.com>
Tue, 6 Feb 2024 14:58:04 +0000 (17:58 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 13 Mar 2024 03:20:57 +0000 (03:20 +0000)
Server side want to have a preallocate bulk to avoid large lock
contention on the page cache.
Without it LST limited with 35Gb/s speed with 3 rail host (HDR each)
due large CPU usage.
Preallocate bulks increase a memory consumption for small bulk,
but performance improved dramatically up to 74Gb/s with very low
cpu usage.

Test-Parameters: testgroup=review-ldiskfs-arm testlist=sanity-lnet,lnet-selftest
Signed-off-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Change-Id: Icf396ba2ecfbded807b5722bb2c4cbe4d0084300
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50276
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/selftest/brw_test.c
lnet/selftest/framework.c
lnet/selftest/rpc.c
lnet/selftest/selftest.h

index 8a15306..8cd8b44 100644 (file)
@@ -71,8 +71,7 @@ brw_client_init(struct sfw_test_instance *tsi)
        struct sfw_session *sn = tsi->tsi_batch->bat_session;
        int               flags;
        int               off;
-       int               npg;
-       int               len;
+       unsigned int      len;
        int               opc;
        struct srpc_bulk *bulk;
        struct sfw_test_unit *tsu;
@@ -85,10 +84,9 @@ brw_client_init(struct sfw_test_instance *tsi)
 
                opc   = breq->blk_opc;
                flags = breq->blk_flags;
-               npg   = breq->blk_npg;
                /* NB: this is not going to work for variable page size,
                 * but we have to keep it for compatibility */
-               len   = npg * PAGE_SIZE;
+               len   = breq->blk_npg * PAGE_SIZE;
                off   = 0;
 
        } else {
@@ -102,13 +100,12 @@ brw_client_init(struct sfw_test_instance *tsi)
                flags = breq->blk_flags;
                len   = breq->blk_len;
                off   = breq->blk_offset & ~PAGE_MASK;
-               npg   = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
        }
 
        if (off % BRW_MSIZE != 0)
                return -EINVAL;
 
-       if (npg > LNET_MAX_IOV || npg <= 0)
+       if (len > LNET_MTU)
                return -EINVAL;
 
        if (opc != LST_BRW_READ && opc != LST_BRW_WRITE)
@@ -120,11 +117,12 @@ brw_client_init(struct sfw_test_instance *tsi)
 
        list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
                bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL),
-                                      off, npg, len, opc == LST_BRW_READ);
+                                      len);
                if (bulk == NULL) {
                        brw_client_fini(tsi);
                        return -ENOMEM;
                }
+               srpc_init_bulk(bulk, off, len, opc == LST_BRW_READ);
 
                tsu->tsu_private = bulk;
        }
@@ -278,6 +276,7 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
        int flags;
        int npg;
        int len;
+       int off;
        int opc;
        int rc;
 
@@ -289,8 +288,8 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
 
                opc   = breq->blk_opc;
                flags = breq->blk_flags;
-               npg   = breq->blk_npg;
-               len   = npg * PAGE_SIZE;
+               len   = breq->blk_npg * PAGE_SIZE;
+               off   = 0;
 
        } else {
                struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1;
@@ -304,8 +303,8 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest,
                flags = breq->blk_flags;
                len   = breq->blk_len;
                off   = breq->blk_offset;
-               npg   = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
        }
+       npg   = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
        rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc);
        if (rc != 0)
@@ -390,8 +389,6 @@ brw_server_rpc_done(struct srpc_server_rpc *rpc)
                CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n",
                       blk->bk_niov, blk->bk_sink ? "from" : "to",
                       libcfs_id2str(rpc->srpc_peer));
-
-       sfw_free_pages(rpc);
 }
 
 static int
@@ -438,8 +435,6 @@ brw_server_handle(struct srpc_server_rpc *rpc)
        struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg;
        struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply;
        struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst;
-       int npg;
-       int rc;
 
         LASSERT (sv->sv_id == SRPC_SERVICE_BRW);
 
@@ -477,50 +472,72 @@ brw_server_handle(struct srpc_server_rpc *rpc)
                        reply->brw_status = EINVAL;
                        return 0;
                }
-               npg = reqst->brw_len >> PAGE_SHIFT;
-
-       } else {
-               npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
        }
 
        replymsg->msg_ses_feats = reqstmsg->msg_ses_feats;
 
-       if (reqst->brw_len == 0 || npg > LNET_MAX_IOV) {
+       if (reqst->brw_len == 0 || reqst->brw_len > LNET_MTU) {
                reply->brw_status = EINVAL;
                return 0;
        }
 
-       rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg,
-                            reqst->brw_len,
-                            reqst->brw_rw == LST_BRW_WRITE);
-       if (rc != 0)
-               return rc;
+       srpc_init_bulk(rpc->srpc_bulk, 0, reqst->brw_len,
+                      reqst->brw_rw == LST_BRW_WRITE);
 
-        if (reqst->brw_rw == LST_BRW_READ)
-                brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
-        else
-                brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
+       if (reqst->brw_rw == LST_BRW_READ)
+               brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC);
+       else
+               brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON);
 
-        return 0;
+       return 0;
 }
 
-struct sfw_test_client_ops brw_test_client;
+static int
+brw_srpc_init(struct srpc_server_rpc *rpc, int cpt)
+{
+       /* just alloc a maximal size - actual values will be adjusted later */
+       rpc->srpc_bulk = srpc_alloc_bulk(cpt, LNET_MTU);
+       if (rpc->srpc_bulk == NULL)
+               return -ENOMEM;
+
+       srpc_init_bulk(rpc->srpc_bulk, 0, 0, 0);
 
-void brw_init_test_client(void)
+       return 0;
+}
+
+static void
+brw_srpc_fini(struct srpc_server_rpc *rpc)
 {
-        brw_test_client.tso_init       = brw_client_init;
-        brw_test_client.tso_fini       = brw_client_fini;
-        brw_test_client.tso_prep_rpc   = brw_client_prep_rpc;
-        brw_test_client.tso_done_rpc   = brw_client_done_rpc;
+       srpc_free_bulk(rpc->srpc_bulk);
+       rpc->srpc_bulk = NULL;
+}
+
+struct sfw_test_client_ops brw_test_client = {
+       .tso_init       = brw_client_init,
+       .tso_fini       = brw_client_fini,
+       .tso_prep_rpc   = brw_client_prep_rpc,
+       .tso_done_rpc   = brw_client_done_rpc,
 };
 
-struct srpc_service brw_test_service;
+struct srpc_service brw_test_service = {
+       .sv_id         = SRPC_SERVICE_BRW,
+       .sv_name       = "brw_test",
+       .sv_handler    = brw_server_handle,
+       .sv_bulk_ready = brw_bulk_ready,
+
+       .sv_srpc_init  = brw_srpc_init,
+       .sv_srpc_fini  = brw_srpc_fini,
+};
 
 void brw_init_test_service(void)
 {
-        brw_test_service.sv_id         = SRPC_SERVICE_BRW;
-        brw_test_service.sv_name       = "brw_test";
-        brw_test_service.sv_handler    = brw_server_handle;
-        brw_test_service.sv_bulk_ready = brw_bulk_ready;
+       unsigned long cache_size = cfs_totalram_pages() >> 4;
+
+       /* brw prealloc cache should don't eat more than half memory */
+       cache_size /= ((LNET_MTU >> PAGE_SHIFT) + 1) ;
+
        brw_test_service.sv_wi_total   = brw_srv_workitems;
+
+       if (brw_test_service.sv_wi_total > cache_size)
+               brw_test_service.sv_wi_total = cache_size;
 }
index cef458a..9bd14ff 100644 (file)
@@ -309,8 +309,10 @@ sfw_server_rpc_done(struct srpc_server_rpc *rpc)
               sv->sv_name, libcfs_id2str(rpc->srpc_peer),
               swi_state2str(rpc->srpc_wi.swi_state), status);
 
-       if (rpc->srpc_bulk != NULL)
-               sfw_free_pages(rpc);
+       if (rpc->srpc_bulk) {
+               srpc_free_bulk(rpc->srpc_bulk);
+               rpc->srpc_bulk = NULL;
+       }
 }
 
 static void
@@ -1127,24 +1129,19 @@ sfw_query_batch(struct sfw_batch *tsb, int testidx,
        return -ENOENT;
 }
 
-void
-sfw_free_pages(struct srpc_server_rpc *rpc)
-{
-       srpc_free_bulk(rpc->srpc_bulk);
-       rpc->srpc_bulk = NULL;
-}
-
 int
-sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
+sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int len,
                int sink)
 {
        LASSERT(rpc->srpc_bulk == NULL);
-       LASSERT(npages > 0 && npages <= LNET_MAX_IOV);
+       LASSERT(len > 0 && len <= LNET_MTU);
 
-       rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink);
+       rpc->srpc_bulk = srpc_alloc_bulk(cpt, len);
        if (rpc->srpc_bulk == NULL)
                return -ENOMEM;
 
+       srpc_init_bulk(rpc->srpc_bulk, 0, len, sink);
+
        return 0;
 }
 
@@ -1192,19 +1189,13 @@ sfw_add_test(struct srpc_server_rpc *rpc)
 
        if (request->tsr_is_client && rpc->srpc_bulk == NULL) {
                /* rpc will be resumed later in sfw_bulk_ready */
-               int     npg = sfw_id_pages(request->tsr_ndest);
                int     len;
 
-               if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) {
-                       len = npg * PAGE_SIZE;
-
-               } else  {
-                       len = sizeof(struct lnet_process_id_packed) *
-                               request->tsr_ndest;
-               }
+               len = sizeof(struct lnet_process_id_packed) *
+                             request->tsr_ndest;
 
-               return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1);
-       }
+               return sfw_alloc_pages(rpc, CFS_CPT_ANY, len, 1);
+        }
 
        rc = sfw_add_test_instance(bat, rpc);
        CDEBUG(rc == 0 ? D_NET : D_WARNING,
@@ -1667,7 +1658,6 @@ sfw_startup(void)
        INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs);
        INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions);
 
-       brw_init_test_client();
        brw_init_test_service();
        rc = sfw_register_test(&brw_test_service, &brw_test_client);
        LASSERT(rc == 0);
index 4d5a787..c848431 100644 (file)
@@ -108,14 +108,12 @@ void srpc_get_counters(struct srpc_counters *cnt)
 }
 
 static int
-srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off,
-                  int nob)
+srpc_init_bulk_page(struct srpc_bulk *bk, int i, int off, int nob)
 {
        LASSERT(off < PAGE_SIZE);
        LASSERT(nob > 0 && nob <= PAGE_SIZE);
 
        bk->bk_iovs[i].bv_offset = off;
-       bk->bk_iovs[i].bv_page   = pg;
        bk->bk_iovs[i].bv_len    = nob;
        return nob;
 }
@@ -128,7 +126,7 @@ srpc_free_bulk(struct srpc_bulk *bk)
 
        LASSERT(bk != NULL);
 
-       for (i = 0; i < bk->bk_niov; i++) {
+       for (i = 0; i < bk->bk_alloc; i++) {
                pg = bk->bk_iovs[i].bv_page;
                if (pg == NULL)
                        break;
@@ -136,15 +134,15 @@ srpc_free_bulk(struct srpc_bulk *bk)
                __free_page(pg);
        }
 
-       LIBCFS_FREE(bk, offsetof(struct srpc_bulk, bk_iovs[bk->bk_niov]));
+       LIBCFS_FREE(bk, offsetof(struct srpc_bulk, bk_iovs[bk->bk_alloc]));
 }
 
 struct srpc_bulk *
-srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg,
-               unsigned bulk_len, int sink)
+srpc_alloc_bulk(int cpt, unsigned int bulk_len)
 {
        struct srpc_bulk *bk;
        int i;
+       int bulk_npg = (bulk_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
        LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV);
 
@@ -156,13 +154,11 @@ srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg,
        }
 
        memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg]));
-       bk->bk_sink   = sink;
-       bk->bk_len    = bulk_len;
-       bk->bk_niov   = bulk_npg;
+       bk->bk_alloc   = bulk_npg;
+       LASSERTF(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV, "b: %u\n", bulk_npg);
 
        for (i = 0; i < bulk_npg; i++) {
                struct page *pg;
-               int nob;
 
                pg = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_KERNEL);
                if (pg == NULL) {
@@ -170,16 +166,42 @@ srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg,
                        srpc_free_bulk(bk);
                        return NULL;
                }
+               bk->bk_iovs[i].bv_page   = pg;
+       }
+
+       return bk;
+}
+
+void
+srpc_init_bulk(struct srpc_bulk *bk, unsigned int bulk_off,
+               unsigned int bulk_len, int sink)
+{
+       int i;
+       ENTRY;
+
+       CDEBUG(D_INFO, "bulk %p o %u l %u s %u\n",
+               bk, bulk_off, bulk_len, sink);
+
+       LASSERT(bk != NULL);
+
+       bk->bk_sink   = sink;
+       bk->bk_len    = bulk_len;
+
+       for (i = 0; bulk_len > 0; i++) {
+               int nob;
+
+               LASSERT(bk->bk_iovs[i].bv_page != NULL);
 
                nob = min_t(unsigned, bulk_off + bulk_len, PAGE_SIZE) -
                      bulk_off;
 
-               srpc_add_bulk_page(bk, pg, i, bulk_off, nob);
+               srpc_init_bulk_page(bk, i, bulk_off, nob);
                bulk_len -= nob;
                bulk_off = 0;
        }
-
-       return bk;
+       bk->bk_niov = i;
+       LASSERTF(bk->bk_niov >= 0 && bk->bk_niov <= bk->bk_alloc,
+               "bk %p - n: %u/%u\n", bk, bk->bk_niov, bk->bk_alloc);
 }
 
 static inline __u64
@@ -193,7 +215,6 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc,
                     struct srpc_service_cd *scd,
                     struct srpc_buffer *buffer)
 {
-       memset(rpc, 0, sizeof(*rpc));
        swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc,
                          srpc_serv_is_framework(scd->scd_svc) ?
                          lst_serial_wq : lst_test_wq[scd->scd_cpt]);
@@ -205,6 +226,9 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc,
        rpc->srpc_peer     = buffer->buf_peer;
        rpc->srpc_self     = buffer->buf_self;
        LNetInvalidateMDHandle(&rpc->srpc_replymdh);
+
+       rpc->srpc_aborted  = 0;
+       rpc->srpc_status   = 0;
 }
 
 static void
@@ -244,6 +268,8 @@ srpc_service_fini(struct srpc_service *svc)
                                               struct srpc_server_rpc,
                                               srpc_list);
                        list_del(&rpc->srpc_list);
+                       if (svc->sv_srpc_fini)
+                               svc->sv_srpc_fini(rpc);
                        LIBCFS_FREE(rpc, sizeof(*rpc));
                }
        }
@@ -311,7 +337,8 @@ srpc_service_init(struct srpc_service *svc)
                for (j = 0; j < nrpcs; j++) {
                        LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(),
                                         i, sizeof(*rpc));
-                       if (rpc == NULL) {
+                       if (rpc == NULL ||
+                          (svc->sv_srpc_init && svc->sv_srpc_init(rpc, i))) {
                                srpc_service_fini(svc);
                                return -ENOMEM;
                        }
@@ -933,7 +960,6 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status)
 
        if (rpc->srpc_done != NULL)
                (*rpc->srpc_done) (rpc);
-       LASSERT(rpc->srpc_bulk == NULL);
 
        spin_lock(&scd->scd_lock);
 
@@ -1094,6 +1120,7 @@ srpc_client_rpc_expired (void *data)
              rpc->crpc_service, libcfs_id2str(rpc->crpc_dest),
              rpc->crpc_timeout);
 
+       LBUG();
        spin_lock(&rpc->crpc_lock);
 
        rpc->crpc_timeout = 0;
index 24b37fb..7cffc75 100644 (file)
@@ -234,6 +234,7 @@ struct srpc_bulk {
        int                     bk_len;  /* len of bulk data */
        struct lnet_handle_md   bk_mdh;
        int                     bk_sink; /* sink/source */
+       int                     bk_alloc; /* # allocated iov */
        int                     bk_niov; /* # iov in bk_iovs */
        struct bio_vec          bk_iovs[0];
 };
@@ -397,6 +398,12 @@ struct srpc_service {
         */
        int              (*sv_handler)(struct srpc_server_rpc *);
        int              (*sv_bulk_ready)(struct srpc_server_rpc *, int);
+
+       /** Service side srpc constructor/destructor.
+        *  used for the bulk preallocation as usual.
+        */
+       int              (*sv_srpc_init)(struct srpc_server_rpc *, int);
+       void             (*sv_srpc_fini)(struct srpc_server_rpc *);
 };
 
 struct lst_session_id {
@@ -513,9 +520,8 @@ void sfw_abort_rpc(struct srpc_client_rpc *rpc);
 void sfw_post_rpc(struct srpc_client_rpc *rpc);
 void sfw_client_rpc_done(struct srpc_client_rpc *rpc);
 void sfw_unpack_message(struct srpc_msg *msg);
-void sfw_free_pages(struct srpc_server_rpc *rpc);
 void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i);
-int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len,
+int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int len,
                    int sink);
 int sfw_make_session(struct srpc_mksn_reqst *request,
                     struct srpc_mksn_reply *reply);
@@ -528,9 +534,11 @@ srpc_create_client_rpc(struct lnet_process_id peer, int service,
 void srpc_post_rpc(struct srpc_client_rpc *rpc);
 void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why);
 void srpc_free_bulk(struct srpc_bulk *bk);
-struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off,
-                                 unsigned int bulk_npg, unsigned int bulk_len,
-                                 int sink);
+
+struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int bulk_len);
+void srpc_init_bulk(struct srpc_bulk *bk, unsigned int off,
+                   unsigned int bulk_len, int sink);
+
 void srpc_send_rpc(struct swi_workitem *wi);
 int srpc_send_reply(struct srpc_server_rpc *rpc);
 int srpc_add_service(struct srpc_service *sv);
@@ -696,7 +704,6 @@ void ping_init_test_service(void);
 
 extern struct sfw_test_client_ops brw_test_client;
 extern struct srpc_service brw_test_service;
-void brw_init_test_client(void);
 void brw_init_test_service(void);
 
 #endif /* __SELFTEST_SELFTEST_H__ */