From 2447564e120cf622627a5ab81051657f6ce5ece2 Mon Sep 17 00:00:00 2001 From: Alexey Lyashkov Date: Thu, 14 Jul 2022 16:39:39 +0300 Subject: [PATCH] LU-16011 lnet: use preallocate bulk for server Server side want to have a preallocate bulk to avoid large lock contention on the page cache. Without it LST limited with 35Gb/s speed with 3 rail host (HDR each) due large CPU usage. Preallocate bulks increase a memory consumption for small bulk, but performance improved dramatically up to 74Gb/s with very low cpu usage. Test-Parameters: trivial testlist=sanity-lnet,lnet-selftest Signed-off-by: Alexey Lyashkov Change-Id: If1eaf5addf6c9d9f695a892dc66023b3bc293208 Reviewed-on: https://review.whamcloud.com/47952 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: Andrew Perepechko Reviewed-by: Oleg Drokin --- lnet/selftest/brw_test.c | 77 ++++++++++++++++++++++++++++++++--------------- lnet/selftest/framework.c | 19 +++++------- lnet/selftest/rpc.c | 47 ++++++++++++++++++++--------- lnet/selftest/selftest.h | 15 ++++++--- 4 files changed, 102 insertions(+), 56 deletions(-) diff --git a/lnet/selftest/brw_test.c b/lnet/selftest/brw_test.c index 2e77d8f..83e0548 100644 --- a/lnet/selftest/brw_test.c +++ b/lnet/selftest/brw_test.c @@ -120,11 +120,12 @@ brw_client_init(struct sfw_test_instance *tsi) list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL), - off, npg, len, opc == LST_BRW_READ); + npg); if (bulk == NULL) { brw_client_fini(tsi); return -ENOMEM; } + srpc_init_bulk(bulk, off, npg, len, opc == LST_BRW_READ); tsu->tsu_private = bulk; } @@ -388,8 +389,6 @@ brw_server_rpc_done(struct srpc_server_rpc *rpc) CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n", blk->bk_niov, blk->bk_sink ? "from" : "to", libcfs_id2str(rpc->srpc_peer)); - - sfw_free_pages(rpc); } static int @@ -437,7 +436,6 @@ brw_server_handle(struct srpc_server_rpc *rpc) struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply; struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst; int npg; - int rc; LASSERT (sv->sv_id == SRPC_SERVICE_BRW); @@ -488,37 +486,66 @@ brw_server_handle(struct srpc_server_rpc *rpc) return 0; } - rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg, - reqst->brw_len, - reqst->brw_rw == LST_BRW_WRITE); - if (rc != 0) - return rc; + srpc_init_bulk(rpc->srpc_bulk, 0, npg, reqst->brw_len, + reqst->brw_rw == LST_BRW_WRITE); - if (reqst->brw_rw == LST_BRW_READ) - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC); - else - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON); + if (reqst->brw_rw == LST_BRW_READ) + brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC); + else + brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON); - return 0; + return 0; } -struct sfw_test_client_ops brw_test_client; +static int +brw_srpc_init(struct srpc_server_rpc *rpc, int cpt) +{ + /* just alloc a maximal size - actual values will be adjusted later */ + rpc->srpc_bulk = srpc_alloc_bulk(cpt, LNET_MAX_IOV); + if (rpc->srpc_bulk == NULL) + return -ENOMEM; + + srpc_init_bulk(rpc->srpc_bulk, 0, LNET_MAX_IOV, 0, 0); -void brw_init_test_client(void) + return 0; +} + +static void +brw_srpc_fini(struct srpc_server_rpc *rpc) { - brw_test_client.tso_init = brw_client_init; - brw_test_client.tso_fini = brw_client_fini; - brw_test_client.tso_prep_rpc = brw_client_prep_rpc; - brw_test_client.tso_done_rpc = brw_client_done_rpc; + /* server RPC have just MAX_IOV size */ + srpc_init_bulk(rpc->srpc_bulk, 0, LNET_MAX_IOV, 0, 0); + + srpc_free_bulk(rpc->srpc_bulk); + rpc->srpc_bulk = NULL; +} + +struct sfw_test_client_ops brw_test_client = { + .tso_init = brw_client_init, + .tso_fini = brw_client_fini, + .tso_prep_rpc = brw_client_prep_rpc, + .tso_done_rpc = brw_client_done_rpc, }; -struct srpc_service brw_test_service; +struct srpc_service brw_test_service = { + .sv_id = SRPC_SERVICE_BRW, + .sv_name = "brw_test", + .sv_handler = brw_server_handle, + .sv_bulk_ready = brw_bulk_ready, + + .sv_srpc_init = brw_srpc_init, + .sv_srpc_fini = brw_srpc_fini, +}; void brw_init_test_service(void) { - brw_test_service.sv_id = SRPC_SERVICE_BRW; - brw_test_service.sv_name = "brw_test"; - brw_test_service.sv_handler = brw_server_handle; - brw_test_service.sv_bulk_ready = brw_bulk_ready; + unsigned long cache_size = cfs_totalram_pages() >> 1; + + /* brw prealloc cache should don't eat more than half memory */ + cache_size /= LNET_MAX_IOV; + brw_test_service.sv_wi_total = brw_srv_workitems; + + if (brw_test_service.sv_wi_total > cache_size) + brw_test_service.sv_wi_total = cache_size; } diff --git a/lnet/selftest/framework.c b/lnet/selftest/framework.c index 7e048ad..766ea28 100644 --- a/lnet/selftest/framework.c +++ b/lnet/selftest/framework.c @@ -299,9 +299,10 @@ sfw_server_rpc_done(struct srpc_server_rpc *rpc) sv->sv_name, libcfs_id2str(rpc->srpc_peer), swi_state2str(rpc->srpc_wi.swi_state), status); - - if (rpc->srpc_bulk != NULL) - sfw_free_pages(rpc); + if (rpc->srpc_bulk) { + srpc_free_bulk(rpc->srpc_bulk); + rpc->srpc_bulk = NULL; + } } static void @@ -1095,13 +1096,6 @@ sfw_query_batch(struct sfw_batch *tsb, int testidx, return -ENOENT; } -void -sfw_free_pages(struct srpc_server_rpc *rpc) -{ - srpc_free_bulk(rpc->srpc_bulk); - rpc->srpc_bulk = NULL; -} - int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, int sink) @@ -1109,10 +1103,12 @@ sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, LASSERT(rpc->srpc_bulk == NULL); LASSERT(npages > 0 && npages <= LNET_MAX_IOV); - rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink); + rpc->srpc_bulk = srpc_alloc_bulk(cpt, npages); if (rpc->srpc_bulk == NULL) return -ENOMEM; + srpc_init_bulk(rpc->srpc_bulk, 0, npages, len, sink); + return 0; } @@ -1638,7 +1634,6 @@ sfw_startup (void) INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs); INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions); - brw_init_test_client(); brw_init_test_service(); rc = sfw_register_test(&brw_test_service, &brw_test_client); LASSERT (rc == 0); diff --git a/lnet/selftest/rpc.c b/lnet/selftest/rpc.c index b85e045..c1f4b5f 100644 --- a/lnet/selftest/rpc.c +++ b/lnet/selftest/rpc.c @@ -108,14 +108,12 @@ void srpc_get_counters(struct srpc_counters *cnt) } static int -srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off, - int nob) +srpc_init_bulk_page(struct srpc_bulk *bk, int i, int off, int nob) { LASSERT(off < PAGE_SIZE); LASSERT(nob > 0 && nob <= PAGE_SIZE); bk->bk_iovs[i].bv_offset = off; - bk->bk_iovs[i].bv_page = pg; bk->bk_iovs[i].bv_len = nob; return nob; } @@ -140,8 +138,7 @@ srpc_free_bulk(struct srpc_bulk *bk) } struct srpc_bulk * -srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg, - unsigned bulk_len, int sink) +srpc_alloc_bulk(int cpt, unsigned int bulk_npg) { struct srpc_bulk *bk; int i; @@ -156,13 +153,10 @@ srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg, } memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg])); - bk->bk_sink = sink; - bk->bk_len = bulk_len; bk->bk_niov = bulk_npg; for (i = 0; i < bulk_npg; i++) { struct page *pg; - int nob; pg = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_KERNEL); if (pg == NULL) { @@ -170,16 +164,37 @@ srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg, srpc_free_bulk(bk); return NULL; } + bk->bk_iovs[i].bv_page = pg; + } + + return bk; +} + +void +srpc_init_bulk(struct srpc_bulk *bk, unsigned int bulk_off, + unsigned int bulk_npg, unsigned int bulk_len, int sink) +{ + int i; + + LASSERT(bk != NULL); + LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV); + + bk->bk_sink = sink; + bk->bk_len = bulk_len; + bk->bk_niov = bulk_npg; + + for (i = 0; i < bulk_npg && bulk_len > 0; i++) { + int nob; + + LASSERT(bk->bk_iovs[i].bv_page != NULL); nob = min_t(unsigned, bulk_off + bulk_len, PAGE_SIZE) - bulk_off; - srpc_add_bulk_page(bk, pg, i, bulk_off, nob); + srpc_init_bulk_page(bk, i, bulk_off, nob); bulk_len -= nob; bulk_off = 0; } - - return bk; } static inline __u64 @@ -193,7 +208,6 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc, struct srpc_service_cd *scd, struct srpc_buffer *buffer) { - memset(rpc, 0, sizeof(*rpc)); swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc, srpc_serv_is_framework(scd->scd_svc) ? lst_sched_serial : lst_sched_test[scd->scd_cpt]); @@ -205,6 +219,9 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc, rpc->srpc_peer = buffer->buf_peer; rpc->srpc_self = buffer->buf_self; LNetInvalidateMDHandle(&rpc->srpc_replymdh); + + rpc->srpc_aborted = 0; + rpc->srpc_status = 0; } static void @@ -244,6 +261,8 @@ srpc_service_fini(struct srpc_service *svc) struct srpc_server_rpc, srpc_list); list_del(&rpc->srpc_list); + if (svc->sv_srpc_fini) + svc->sv_srpc_fini(rpc); LIBCFS_FREE(rpc, sizeof(*rpc)); } } @@ -311,7 +330,8 @@ srpc_service_init(struct srpc_service *svc) for (j = 0; j < nrpcs; j++) { LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(), i, sizeof(*rpc)); - if (rpc == NULL) { + if (rpc == NULL || + (svc->sv_srpc_init && svc->sv_srpc_init(rpc, i))) { srpc_service_fini(svc); return -ENOMEM; } @@ -940,7 +960,6 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) if (rpc->srpc_done != NULL) (*rpc->srpc_done) (rpc); - LASSERT(rpc->srpc_bulk == NULL); spin_lock(&scd->scd_lock); diff --git a/lnet/selftest/selftest.h b/lnet/selftest/selftest.h index f19685d..fb73aae 100644 --- a/lnet/selftest/selftest.h +++ b/lnet/selftest/selftest.h @@ -316,6 +316,12 @@ struct srpc_service { */ int (*sv_handler)(struct srpc_server_rpc *); int (*sv_bulk_ready)(struct srpc_server_rpc *, int); + + /** Service side srpc constructor/destructor. + * used for the bulk preallocation as usual. + */ + int (*sv_srpc_init)(struct srpc_server_rpc *, int); + void (*sv_srpc_fini)(struct srpc_server_rpc *); }; struct sfw_session { @@ -417,7 +423,6 @@ void sfw_abort_rpc(struct srpc_client_rpc *rpc); void sfw_post_rpc(struct srpc_client_rpc *rpc); void sfw_client_rpc_done(struct srpc_client_rpc *rpc); void sfw_unpack_message(struct srpc_msg *msg); -void sfw_free_pages(struct srpc_server_rpc *rpc); void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i); int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, int sink); @@ -432,9 +437,10 @@ srpc_create_client_rpc(struct lnet_process_id peer, int service, void srpc_post_rpc(struct srpc_client_rpc *rpc); void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why); void srpc_free_bulk(struct srpc_bulk *bk); -struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off, - unsigned int bulk_npg, unsigned int bulk_len, - int sink); +struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int bulk_npg); +void srpc_init_bulk(struct srpc_bulk *bk, unsigned int off, + unsigned int bulk_npg, unsigned int bulk_len, int sink); + int srpc_send_rpc(struct swi_workitem *wi); int srpc_send_reply(struct srpc_server_rpc *rpc); int srpc_add_service(struct srpc_service *sv); @@ -606,7 +612,6 @@ void ping_init_test_service(void); extern struct sfw_test_client_ops brw_test_client; extern struct srpc_service brw_test_service; -void brw_init_test_client(void); void brw_init_test_service(void); #endif /* __SELFTEST_SELFTEST_H__ */ -- 1.8.3.1