From: Alexey Lyashkov Date: Tue, 6 Feb 2024 14:58:04 +0000 (+0300) Subject: LU-16011 lnet: use preallocate bulk for server X-Git-Tag: 2.15.62~132 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=e502638050a6a6c176bf921ebb3733846fd7ab87;p=fs%2Flustre-release.git LU-16011 lnet: use preallocate bulk for server Server side want to have a preallocate bulk to avoid large lock contention on the page cache. Without it LST limited with 35Gb/s speed with 3 rail host (HDR each) due large CPU usage. Preallocate bulks increase a memory consumption for small bulk, but performance improved dramatically up to 74Gb/s with very low cpu usage. Test-Parameters: testgroup=review-ldiskfs-arm testlist=sanity-lnet,lnet-selftest Signed-off-by: Alexey Lyashkov Change-Id: Icf396ba2ecfbded807b5722bb2c4cbe4d0084300 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50276 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: Serguei Smirnov Reviewed-by: Oleg Drokin --- diff --git a/lnet/selftest/brw_test.c b/lnet/selftest/brw_test.c index 8a15306..8cd8b44 100644 --- a/lnet/selftest/brw_test.c +++ b/lnet/selftest/brw_test.c @@ -71,8 +71,7 @@ brw_client_init(struct sfw_test_instance *tsi) struct sfw_session *sn = tsi->tsi_batch->bat_session; int flags; int off; - int npg; - int len; + unsigned int len; int opc; struct srpc_bulk *bulk; struct sfw_test_unit *tsu; @@ -85,10 +84,9 @@ brw_client_init(struct sfw_test_instance *tsi) opc = breq->blk_opc; flags = breq->blk_flags; - npg = breq->blk_npg; /* NB: this is not going to work for variable page size, * but we have to keep it for compatibility */ - len = npg * PAGE_SIZE; + len = breq->blk_npg * PAGE_SIZE; off = 0; } else { @@ -102,13 +100,12 @@ brw_client_init(struct sfw_test_instance *tsi) flags = breq->blk_flags; len = breq->blk_len; off = breq->blk_offset & ~PAGE_MASK; - npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; } if (off % BRW_MSIZE != 0) return -EINVAL; - if (npg > LNET_MAX_IOV || npg <= 0) + if (len > LNET_MTU) return -EINVAL; if (opc != LST_BRW_READ && opc != LST_BRW_WRITE) @@ -120,11 +117,12 @@ brw_client_init(struct sfw_test_instance *tsi) list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL), - off, npg, len, opc == LST_BRW_READ); + len); if (bulk == NULL) { brw_client_fini(tsi); return -ENOMEM; } + srpc_init_bulk(bulk, off, len, opc == LST_BRW_READ); tsu->tsu_private = bulk; } @@ -278,6 +276,7 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest, int flags; int npg; int len; + int off; int opc; int rc; @@ -289,8 +288,8 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest, opc = breq->blk_opc; flags = breq->blk_flags; - npg = breq->blk_npg; - len = npg * PAGE_SIZE; + len = breq->blk_npg * PAGE_SIZE; + off = 0; } else { struct test_bulk_req_v1 *breq = &tsi->tsi_u.bulk_v1; @@ -304,8 +303,8 @@ brw_client_prep_rpc(struct sfw_test_unit *tsu, struct lnet_process_id dest, flags = breq->blk_flags; len = breq->blk_len; off = breq->blk_offset; - npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; } + npg = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; rc = sfw_create_test_rpc(tsu, dest, sn->sn_features, npg, len, &rpc); if (rc != 0) @@ -390,8 +389,6 @@ brw_server_rpc_done(struct srpc_server_rpc *rpc) CDEBUG(D_NET, "Transferred %d pages bulk data %s %s\n", blk->bk_niov, blk->bk_sink ? "from" : "to", libcfs_id2str(rpc->srpc_peer)); - - sfw_free_pages(rpc); } static int @@ -438,8 +435,6 @@ brw_server_handle(struct srpc_server_rpc *rpc) struct srpc_msg *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; struct srpc_brw_reply *reply = &replymsg->msg_body.brw_reply; struct srpc_brw_reqst *reqst = &reqstmsg->msg_body.brw_reqst; - int npg; - int rc; LASSERT (sv->sv_id == SRPC_SERVICE_BRW); @@ -477,50 +472,72 @@ brw_server_handle(struct srpc_server_rpc *rpc) reply->brw_status = EINVAL; return 0; } - npg = reqst->brw_len >> PAGE_SHIFT; - - } else { - npg = (reqst->brw_len + PAGE_SIZE - 1) >> PAGE_SHIFT; } replymsg->msg_ses_feats = reqstmsg->msg_ses_feats; - if (reqst->brw_len == 0 || npg > LNET_MAX_IOV) { + if (reqst->brw_len == 0 || reqst->brw_len > LNET_MTU) { reply->brw_status = EINVAL; return 0; } - rc = sfw_alloc_pages(rpc, rpc->srpc_scd->scd_cpt, npg, - reqst->brw_len, - reqst->brw_rw == LST_BRW_WRITE); - if (rc != 0) - return rc; + srpc_init_bulk(rpc->srpc_bulk, 0, reqst->brw_len, + reqst->brw_rw == LST_BRW_WRITE); - if (reqst->brw_rw == LST_BRW_READ) - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC); - else - brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON); + if (reqst->brw_rw == LST_BRW_READ) + brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_MAGIC); + else + brw_fill_bulk(rpc->srpc_bulk, reqst->brw_flags, BRW_POISON); - return 0; + return 0; } -struct sfw_test_client_ops brw_test_client; +static int +brw_srpc_init(struct srpc_server_rpc *rpc, int cpt) +{ + /* just alloc a maximal size - actual values will be adjusted later */ + rpc->srpc_bulk = srpc_alloc_bulk(cpt, LNET_MTU); + if (rpc->srpc_bulk == NULL) + return -ENOMEM; + + srpc_init_bulk(rpc->srpc_bulk, 0, 0, 0); -void brw_init_test_client(void) + return 0; +} + +static void +brw_srpc_fini(struct srpc_server_rpc *rpc) { - brw_test_client.tso_init = brw_client_init; - brw_test_client.tso_fini = brw_client_fini; - brw_test_client.tso_prep_rpc = brw_client_prep_rpc; - brw_test_client.tso_done_rpc = brw_client_done_rpc; + srpc_free_bulk(rpc->srpc_bulk); + rpc->srpc_bulk = NULL; +} + +struct sfw_test_client_ops brw_test_client = { + .tso_init = brw_client_init, + .tso_fini = brw_client_fini, + .tso_prep_rpc = brw_client_prep_rpc, + .tso_done_rpc = brw_client_done_rpc, }; -struct srpc_service brw_test_service; +struct srpc_service brw_test_service = { + .sv_id = SRPC_SERVICE_BRW, + .sv_name = "brw_test", + .sv_handler = brw_server_handle, + .sv_bulk_ready = brw_bulk_ready, + + .sv_srpc_init = brw_srpc_init, + .sv_srpc_fini = brw_srpc_fini, +}; void brw_init_test_service(void) { - brw_test_service.sv_id = SRPC_SERVICE_BRW; - brw_test_service.sv_name = "brw_test"; - brw_test_service.sv_handler = brw_server_handle; - brw_test_service.sv_bulk_ready = brw_bulk_ready; + unsigned long cache_size = cfs_totalram_pages() >> 4; + + /* brw prealloc cache should don't eat more than half memory */ + cache_size /= ((LNET_MTU >> PAGE_SHIFT) + 1) ; + brw_test_service.sv_wi_total = brw_srv_workitems; + + if (brw_test_service.sv_wi_total > cache_size) + brw_test_service.sv_wi_total = cache_size; } diff --git a/lnet/selftest/framework.c b/lnet/selftest/framework.c index cef458a..9bd14ff 100644 --- a/lnet/selftest/framework.c +++ b/lnet/selftest/framework.c @@ -309,8 +309,10 @@ sfw_server_rpc_done(struct srpc_server_rpc *rpc) sv->sv_name, libcfs_id2str(rpc->srpc_peer), swi_state2str(rpc->srpc_wi.swi_state), status); - if (rpc->srpc_bulk != NULL) - sfw_free_pages(rpc); + if (rpc->srpc_bulk) { + srpc_free_bulk(rpc->srpc_bulk); + rpc->srpc_bulk = NULL; + } } static void @@ -1127,24 +1129,19 @@ sfw_query_batch(struct sfw_batch *tsb, int testidx, return -ENOENT; } -void -sfw_free_pages(struct srpc_server_rpc *rpc) -{ - srpc_free_bulk(rpc->srpc_bulk); - rpc->srpc_bulk = NULL; -} - int -sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, +sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int len, int sink) { LASSERT(rpc->srpc_bulk == NULL); - LASSERT(npages > 0 && npages <= LNET_MAX_IOV); + LASSERT(len > 0 && len <= LNET_MTU); - rpc->srpc_bulk = srpc_alloc_bulk(cpt, 0, npages, len, sink); + rpc->srpc_bulk = srpc_alloc_bulk(cpt, len); if (rpc->srpc_bulk == NULL) return -ENOMEM; + srpc_init_bulk(rpc->srpc_bulk, 0, len, sink); + return 0; } @@ -1192,19 +1189,13 @@ sfw_add_test(struct srpc_server_rpc *rpc) if (request->tsr_is_client && rpc->srpc_bulk == NULL) { /* rpc will be resumed later in sfw_bulk_ready */ - int npg = sfw_id_pages(request->tsr_ndest); int len; - if ((sn->sn_features & LST_FEAT_BULK_LEN) == 0) { - len = npg * PAGE_SIZE; - - } else { - len = sizeof(struct lnet_process_id_packed) * - request->tsr_ndest; - } + len = sizeof(struct lnet_process_id_packed) * + request->tsr_ndest; - return sfw_alloc_pages(rpc, CFS_CPT_ANY, npg, len, 1); - } + return sfw_alloc_pages(rpc, CFS_CPT_ANY, len, 1); + } rc = sfw_add_test_instance(bat, rpc); CDEBUG(rc == 0 ? D_NET : D_WARNING, @@ -1667,7 +1658,6 @@ sfw_startup(void) INIT_LIST_HEAD(&sfw_data.fw_zombie_rpcs); INIT_LIST_HEAD(&sfw_data.fw_zombie_sessions); - brw_init_test_client(); brw_init_test_service(); rc = sfw_register_test(&brw_test_service, &brw_test_client); LASSERT(rc == 0); diff --git a/lnet/selftest/rpc.c b/lnet/selftest/rpc.c index 4d5a787..c848431 100644 --- a/lnet/selftest/rpc.c +++ b/lnet/selftest/rpc.c @@ -108,14 +108,12 @@ void srpc_get_counters(struct srpc_counters *cnt) } static int -srpc_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i, int off, - int nob) +srpc_init_bulk_page(struct srpc_bulk *bk, int i, int off, int nob) { LASSERT(off < PAGE_SIZE); LASSERT(nob > 0 && nob <= PAGE_SIZE); bk->bk_iovs[i].bv_offset = off; - bk->bk_iovs[i].bv_page = pg; bk->bk_iovs[i].bv_len = nob; return nob; } @@ -128,7 +126,7 @@ srpc_free_bulk(struct srpc_bulk *bk) LASSERT(bk != NULL); - for (i = 0; i < bk->bk_niov; i++) { + for (i = 0; i < bk->bk_alloc; i++) { pg = bk->bk_iovs[i].bv_page; if (pg == NULL) break; @@ -136,15 +134,15 @@ srpc_free_bulk(struct srpc_bulk *bk) __free_page(pg); } - LIBCFS_FREE(bk, offsetof(struct srpc_bulk, bk_iovs[bk->bk_niov])); + LIBCFS_FREE(bk, offsetof(struct srpc_bulk, bk_iovs[bk->bk_alloc])); } struct srpc_bulk * -srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg, - unsigned bulk_len, int sink) +srpc_alloc_bulk(int cpt, unsigned int bulk_len) { struct srpc_bulk *bk; int i; + int bulk_npg = (bulk_len + PAGE_SIZE - 1) >> PAGE_SHIFT; LASSERT(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV); @@ -156,13 +154,11 @@ srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg, } memset(bk, 0, offsetof(struct srpc_bulk, bk_iovs[bulk_npg])); - bk->bk_sink = sink; - bk->bk_len = bulk_len; - bk->bk_niov = bulk_npg; + bk->bk_alloc = bulk_npg; + LASSERTF(bulk_npg > 0 && bulk_npg <= LNET_MAX_IOV, "b: %u\n", bulk_npg); for (i = 0; i < bulk_npg; i++) { struct page *pg; - int nob; pg = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, GFP_KERNEL); if (pg == NULL) { @@ -170,16 +166,42 @@ srpc_alloc_bulk(int cpt, unsigned bulk_off, unsigned bulk_npg, srpc_free_bulk(bk); return NULL; } + bk->bk_iovs[i].bv_page = pg; + } + + return bk; +} + +void +srpc_init_bulk(struct srpc_bulk *bk, unsigned int bulk_off, + unsigned int bulk_len, int sink) +{ + int i; + ENTRY; + + CDEBUG(D_INFO, "bulk %p o %u l %u s %u\n", + bk, bulk_off, bulk_len, sink); + + LASSERT(bk != NULL); + + bk->bk_sink = sink; + bk->bk_len = bulk_len; + + for (i = 0; bulk_len > 0; i++) { + int nob; + + LASSERT(bk->bk_iovs[i].bv_page != NULL); nob = min_t(unsigned, bulk_off + bulk_len, PAGE_SIZE) - bulk_off; - srpc_add_bulk_page(bk, pg, i, bulk_off, nob); + srpc_init_bulk_page(bk, i, bulk_off, nob); bulk_len -= nob; bulk_off = 0; } - - return bk; + bk->bk_niov = i; + LASSERTF(bk->bk_niov >= 0 && bk->bk_niov <= bk->bk_alloc, + "bk %p - n: %u/%u\n", bk, bk->bk_niov, bk->bk_alloc); } static inline __u64 @@ -193,7 +215,6 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc, struct srpc_service_cd *scd, struct srpc_buffer *buffer) { - memset(rpc, 0, sizeof(*rpc)); swi_init_workitem(&rpc->srpc_wi, srpc_handle_rpc, srpc_serv_is_framework(scd->scd_svc) ? lst_serial_wq : lst_test_wq[scd->scd_cpt]); @@ -205,6 +226,9 @@ srpc_init_server_rpc(struct srpc_server_rpc *rpc, rpc->srpc_peer = buffer->buf_peer; rpc->srpc_self = buffer->buf_self; LNetInvalidateMDHandle(&rpc->srpc_replymdh); + + rpc->srpc_aborted = 0; + rpc->srpc_status = 0; } static void @@ -244,6 +268,8 @@ srpc_service_fini(struct srpc_service *svc) struct srpc_server_rpc, srpc_list); list_del(&rpc->srpc_list); + if (svc->sv_srpc_fini) + svc->sv_srpc_fini(rpc); LIBCFS_FREE(rpc, sizeof(*rpc)); } } @@ -311,7 +337,8 @@ srpc_service_init(struct srpc_service *svc) for (j = 0; j < nrpcs; j++) { LIBCFS_CPT_ALLOC(rpc, lnet_cpt_table(), i, sizeof(*rpc)); - if (rpc == NULL) { + if (rpc == NULL || + (svc->sv_srpc_init && svc->sv_srpc_init(rpc, i))) { srpc_service_fini(svc); return -ENOMEM; } @@ -933,7 +960,6 @@ srpc_server_rpc_done(struct srpc_server_rpc *rpc, int status) if (rpc->srpc_done != NULL) (*rpc->srpc_done) (rpc); - LASSERT(rpc->srpc_bulk == NULL); spin_lock(&scd->scd_lock); @@ -1094,6 +1120,7 @@ srpc_client_rpc_expired (void *data) rpc->crpc_service, libcfs_id2str(rpc->crpc_dest), rpc->crpc_timeout); + LBUG(); spin_lock(&rpc->crpc_lock); rpc->crpc_timeout = 0; diff --git a/lnet/selftest/selftest.h b/lnet/selftest/selftest.h index 24b37fb..7cffc75 100644 --- a/lnet/selftest/selftest.h +++ b/lnet/selftest/selftest.h @@ -234,6 +234,7 @@ struct srpc_bulk { int bk_len; /* len of bulk data */ struct lnet_handle_md bk_mdh; int bk_sink; /* sink/source */ + int bk_alloc; /* # allocated iov */ int bk_niov; /* # iov in bk_iovs */ struct bio_vec bk_iovs[0]; }; @@ -397,6 +398,12 @@ struct srpc_service { */ int (*sv_handler)(struct srpc_server_rpc *); int (*sv_bulk_ready)(struct srpc_server_rpc *, int); + + /** Service side srpc constructor/destructor. + * used for the bulk preallocation as usual. + */ + int (*sv_srpc_init)(struct srpc_server_rpc *, int); + void (*sv_srpc_fini)(struct srpc_server_rpc *); }; struct lst_session_id { @@ -513,9 +520,8 @@ void sfw_abort_rpc(struct srpc_client_rpc *rpc); void sfw_post_rpc(struct srpc_client_rpc *rpc); void sfw_client_rpc_done(struct srpc_client_rpc *rpc); void sfw_unpack_message(struct srpc_msg *msg); -void sfw_free_pages(struct srpc_server_rpc *rpc); void sfw_add_bulk_page(struct srpc_bulk *bk, struct page *pg, int i); -int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int npages, int len, +int sfw_alloc_pages(struct srpc_server_rpc *rpc, int cpt, int len, int sink); int sfw_make_session(struct srpc_mksn_reqst *request, struct srpc_mksn_reply *reply); @@ -528,9 +534,11 @@ srpc_create_client_rpc(struct lnet_process_id peer, int service, void srpc_post_rpc(struct srpc_client_rpc *rpc); void srpc_abort_rpc(struct srpc_client_rpc *rpc, int why); void srpc_free_bulk(struct srpc_bulk *bk); -struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int off, - unsigned int bulk_npg, unsigned int bulk_len, - int sink); + +struct srpc_bulk *srpc_alloc_bulk(int cpt, unsigned int bulk_len); +void srpc_init_bulk(struct srpc_bulk *bk, unsigned int off, + unsigned int bulk_len, int sink); + void srpc_send_rpc(struct swi_workitem *wi); int srpc_send_reply(struct srpc_server_rpc *rpc); int srpc_add_service(struct srpc_service *sv); @@ -696,7 +704,6 @@ void ping_init_test_service(void); extern struct sfw_test_client_ops brw_test_client; extern struct srpc_service brw_test_service; -void brw_init_test_client(void); void brw_init_test_service(void); #endif /* __SELFTEST_SELFTEST_H__ */