From 5e1957841df3e771f3d72d8ea59180213430bbb9 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Tue, 12 Jun 2012 17:15:20 +0800 Subject: [PATCH] LU-56 lnet: cleanup for rtrpool and LNet counter This patch covered a few of things: - code cleanup for router buffer pools - code cleanup for error handling in lnet_prepare() - code cleanup for LNet counters This is an intermediate patch for LNet SMP improvements. Signed-off-by: Liang Zhen Change-Id: I554d6acb79a55dd77f709d3b6633f157f50a8cee Reviewed-on: http://review.whamcloud.com/3091 Reviewed-by: Doug Oucharek Tested-by: Hudson Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 8 +- lnet/include/lnet/lib-types.h | 8 +- lnet/lnet/api-ni.c | 107 ++++++++++++++++--------- lnet/lnet/lib-move.c | 24 +++--- lnet/lnet/lib-msg.c | 8 +- lnet/lnet/router.c | 178 +++++++++++++++++++++++++++--------------- lnet/lnet/router_proc.c | 14 ++-- lnet/selftest/framework.c | 7 +- 8 files changed, 218 insertions(+), 136 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 6f16f49..1abeb33 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -716,9 +716,8 @@ int lnet_get_route(int idx, __u32 *net, __u32 *hops, lnet_nid_t *gateway, __u32 *alive); void lnet_proc_init(void); void lnet_proc_fini(void); -void lnet_init_rtrpools(void); -int lnet_alloc_rtrpools(int im_a_router); -void lnet_free_rtrpools(void); +int lnet_rtrpools_alloc(int im_a_router); +void lnet_rtrpools_free(void); lnet_remotenet_t *lnet_find_net_locked (__u32 net); int lnet_islocalnid(lnet_nid_t nid); @@ -811,6 +810,9 @@ char *lnet_msgtyp2str (int type); void lnet_print_hdr (lnet_hdr_t * hdr); int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold); +void lnet_counters_get(lnet_counters_t *counters); +void lnet_counters_reset(void); + unsigned int lnet_iov_nob (unsigned int niov, struct iovec *iov); int lnet_extract_iov (int dst_niov, struct iovec *dst, int src_niov, struct iovec *src, diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 63ef7c3..4ba4a40 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -716,7 +716,8 @@ typedef struct __u64 ln_routers_version; /* validity stamp */ int ln_routing; /* am I a router? */ - lnet_rtrbufpool_t ln_rtrpools[LNET_NRBPOOLS]; /* router buffer pools */ + /* router buffer pools */ + lnet_rtrbufpool_t *ln_rtrpools; __u64 ln_interface_cookie; /* uniquely identifies this ni in this epoch */ @@ -728,8 +729,9 @@ typedef struct cfs_list_t ln_test_peers; /* failure simulation */ /* message container */ - struct lnet_peer_table *ln_peer_table; struct lnet_msg_container ln_msg_container; + struct lnet_peer_table *ln_peer_table; + lnet_counters_t *ln_counters; lnet_handle_md_t ln_ping_target_md; lnet_handle_eq_t ln_ping_target_eq; @@ -747,8 +749,6 @@ typedef struct /* rcd ready for free */ cfs_list_t ln_rcd_zombie; - lnet_counters_t ln_counters; - #ifndef __KERNEL__ /* Temporary workaround to allow uOSS and test programs force * server mode in userspace. The only place where we use it is diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index a2e9df8..eb4e0b7 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -377,6 +377,47 @@ lnet_unregister_lnd (lnd_t *lnd) LNET_MUTEX_UNLOCK(&the_lnet.ln_lnd_mutex); } +void +lnet_counters_get(lnet_counters_t *counters) +{ + lnet_counters_t *ctr; + + memset(counters, 0, sizeof(*counters)); + + LNET_LOCK(); + ctr = the_lnet.ln_counters; + do { /* iterate over counters of all CPTs in upcoming patches */ + counters->msgs_max += ctr->msgs_max; + counters->msgs_alloc += ctr->msgs_alloc; + counters->errors += ctr->errors; + counters->send_count += ctr->send_count; + counters->recv_count += ctr->recv_count; + counters->route_count += ctr->route_count; + counters->drop_length += ctr->drop_length; + counters->send_length += ctr->send_length; + counters->recv_length += ctr->recv_length; + counters->route_length += ctr->route_length; + counters->drop_length += ctr->drop_length; + } while (0); + + LNET_UNLOCK(); +} +EXPORT_SYMBOL(lnet_counters_get); + +void +lnet_counters_reset(void) +{ + lnet_counters_t *counters; + + LNET_LOCK(); + counters = the_lnet.ln_counters; + do { /* iterate over counters of all CPTs in upcoming patches */ + memset(counters, 0, sizeof(lnet_counters_t)); + } while (0); + LNET_UNLOCK(); +} +EXPORT_SYMBOL(lnet_counters_reset); + #ifdef LNET_USE_LIB_FREELIST int @@ -636,6 +677,8 @@ lnet_server_mode() { } #endif +int lnet_unprepare(void); + int lnet_prepare(lnet_pid_t requested_pid) { @@ -665,38 +708,40 @@ lnet_prepare(lnet_pid_t requested_pid) } #endif - memset(&the_lnet.ln_counters, 0, - sizeof(the_lnet.ln_counters)); - - CFS_INIT_LIST_HEAD (&the_lnet.ln_test_peers); - CFS_INIT_LIST_HEAD (&the_lnet.ln_nis); - CFS_INIT_LIST_HEAD (&the_lnet.ln_zombie_nis); - CFS_INIT_LIST_HEAD (&the_lnet.ln_remote_nets); - CFS_INIT_LIST_HEAD (&the_lnet.ln_routers); + CFS_INIT_LIST_HEAD(&the_lnet.ln_test_peers); + CFS_INIT_LIST_HEAD(&the_lnet.ln_nis); + CFS_INIT_LIST_HEAD(&the_lnet.ln_zombie_nis); + CFS_INIT_LIST_HEAD(&the_lnet.ln_remote_nets); + CFS_INIT_LIST_HEAD(&the_lnet.ln_routers); - the_lnet.ln_interface_cookie = lnet_create_interface_cookie(); + the_lnet.ln_interface_cookie = lnet_create_interface_cookie(); - lnet_init_rtrpools(); + LIBCFS_ALLOC(the_lnet.ln_counters, sizeof(lnet_counters_t)); + if (the_lnet.ln_counters == NULL) { + CERROR("Failed to allocate counters for LNet\n"); + rc = -ENOMEM; + goto failed; + } rc = lnet_peer_table_create(); - if (rc != 0) - goto failed0; + if (rc != 0) + goto failed; /* NB: we will have instance of message container per CPT soon */ rc = lnet_msg_container_setup(&the_lnet.ln_msg_container); if (rc != 0) - goto failed1; + goto failed; rc = lnet_res_container_setup(&the_lnet.ln_eq_container, 0, LNET_COOKIE_TYPE_EQ, LNET_FL_MAX_EQS, sizeof(lnet_eq_t)); if (rc != 0) - goto failed2; + goto failed; recs = lnet_res_containers_create(LNET_COOKIE_TYPE_ME, LNET_FL_MAX_MES, sizeof(lnet_me_t)); if (recs == NULL) - goto failed3; + goto failed; the_lnet.ln_me_containers = recs; @@ -704,35 +749,20 @@ lnet_prepare(lnet_pid_t requested_pid) recs = lnet_res_containers_create(LNET_COOKIE_TYPE_MD, LNET_FL_MAX_MDS, sizeof(lnet_libmd_t)); if (recs == NULL) - goto failed3; + goto failed; the_lnet.ln_md_containers = recs; rc = lnet_portals_create(); if (rc != 0) { CERROR("Failed to create portals for LNet: %d\n", rc); - goto failed3; + goto failed; } return 0; - failed3: - /* NB: lnet_res_container_cleanup is safe to call for - * uninitialized container */ - if (the_lnet.ln_md_containers != NULL) { - lnet_res_containers_destroy(the_lnet.ln_md_containers); - the_lnet.ln_md_containers = NULL; - } - if (the_lnet.ln_me_containers != NULL) { - lnet_res_containers_destroy(the_lnet.ln_me_containers); - the_lnet.ln_me_containers = NULL; - } - lnet_res_container_cleanup(&the_lnet.ln_eq_container); - failed2: - lnet_msg_container_cleanup(&the_lnet.ln_msg_container); - failed1: - lnet_peer_table_destroy(); - failed0: + failed: + lnet_unprepare(); return rc; } @@ -766,9 +796,14 @@ lnet_unprepare (void) lnet_res_container_cleanup(&the_lnet.ln_eq_container); - lnet_free_rtrpools(); lnet_msg_container_cleanup(&the_lnet.ln_msg_container); lnet_peer_table_destroy(); + lnet_rtrpools_free(); + + if (the_lnet.ln_counters != NULL) { + LIBCFS_FREE(the_lnet.ln_counters, sizeof(lnet_counters_t)); + the_lnet.ln_counters = NULL; + } return 0; } @@ -1294,7 +1329,7 @@ LNetNIInit(lnet_pid_t requested_pid) if (rc != 0) goto failed2; - rc = lnet_alloc_rtrpools(im_a_router); + rc = lnet_rtrpools_alloc(im_a_router); if (rc != 0) goto failed2; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 099bcd0..c1289b7 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -821,11 +821,11 @@ lnet_post_send_locked (lnet_msg_t *msg, int do_send) LASSERT(!do_send || msg->msg_tx_delayed); LASSERT(!msg->msg_receiving); - /* NB 'lp' is always the next hop */ - if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 && - lnet_peer_alive_locked(lp) == 0) { - the_lnet.ln_counters.drop_count++; - the_lnet.ln_counters.drop_length += msg->msg_len; + /* NB 'lp' is always the next hop */ + if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 && + lnet_peer_alive_locked(lp) == 0) { + the_lnet.ln_counters->drop_count++; + the_lnet.ln_counters->drop_length += msg->msg_len; LNET_UNLOCK(); CNETERR("Dropping message for %s: peer not alive\n", @@ -1331,12 +1331,12 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg) static void lnet_drop_message (lnet_ni_t *ni, void *private, unsigned int nob) { - LNET_LOCK(); - the_lnet.ln_counters.drop_count++; - the_lnet.ln_counters.drop_length += nob; - LNET_UNLOCK(); + LNET_LOCK(); + the_lnet.ln_counters->drop_count++; + the_lnet.ln_counters->drop_length += nob; + LNET_UNLOCK(); - lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob); + lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob); } static void @@ -2182,8 +2182,8 @@ lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg) drop: LNET_LOCK(); - the_lnet.ln_counters.drop_count++; - the_lnet.ln_counters.drop_length += getmd->md_length; + the_lnet.ln_counters->drop_count++; + the_lnet.ln_counters->drop_length += getmd->md_length; LNET_UNLOCK (); if (msg != NULL) diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index f03df6b..1d78ddb 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -138,7 +138,7 @@ void lnet_msg_commit(lnet_msg_t *msg, int sending) { struct lnet_msg_container *container = &the_lnet.ln_msg_container; - lnet_counters_t *counters = &the_lnet.ln_counters; + lnet_counters_t *counters = the_lnet.ln_counters; /* routed message can be committed for both receiving and sending */ LASSERT(!msg->msg_tx_committed); @@ -167,7 +167,7 @@ lnet_msg_commit(lnet_msg_t *msg, int sending) static void lnet_msg_tx_decommit(lnet_msg_t *msg, int status) { - lnet_counters_t *counters = &the_lnet.ln_counters; + lnet_counters_t *counters = the_lnet.ln_counters; lnet_event_t *ev = &msg->msg_ev; LASSERT(msg->msg_tx_committed); @@ -217,7 +217,7 @@ lnet_msg_tx_decommit(lnet_msg_t *msg, int status) static void lnet_msg_rx_decommit(lnet_msg_t *msg, int status) { - lnet_counters_t *counters = &the_lnet.ln_counters; + lnet_counters_t *counters = the_lnet.ln_counters; lnet_event_t *ev = &msg->msg_ev; LASSERT(!msg->msg_tx_committed); /* decommitted or uncommitted */ @@ -262,7 +262,7 @@ lnet_msg_rx_decommit(lnet_msg_t *msg, int status) void lnet_msg_decommit(lnet_msg_t *msg, int status) { - lnet_counters_t *counters = &the_lnet.ln_counters; + lnet_counters_t *counters = the_lnet.ln_counters; LASSERT(msg->msg_tx_committed || msg->msg_rx_committed); LASSERT(msg->msg_onactivelist); diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 4cdce48..6faf95b 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -26,19 +26,23 @@ #if defined(__KERNEL__) && defined(LNET_ROUTER) +#define LNET_NRB_TINY 1024 +#define LNET_NRB_SMALL 8192 +#define LNET_NRB_LARGE 512 + static char *forwarding = ""; CFS_MODULE_PARM(forwarding, "s", charp, 0444, "Explicitly enable/disable forwarding between networks"); -static int tiny_router_buffers = 1024; +static int tiny_router_buffers; CFS_MODULE_PARM(tiny_router_buffers, "i", int, 0444, - "# of 0 payload messages to buffer in the router"); -static int small_router_buffers = 8192; + "# of 0 payload messages to buffer in the router"); +static int small_router_buffers; CFS_MODULE_PARM(small_router_buffers, "i", int, 0444, - "# of small (1 page) messages to buffer in the router"); -static int large_router_buffers = 512; + "# of small (1 page) messages to buffer in the router"); +static int large_router_buffers; CFS_MODULE_PARM(large_router_buffers, "i", int, 0444, - "# of large messages to buffer in the router"); + "# of large messages to buffer in the router"); static int peer_buffer_credits = 0; CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444, "# router buffer credits per peer"); @@ -1269,9 +1273,12 @@ lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp) void lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp) { - int npages = rbp->rbp_npages; - int nbuffers = 0; - lnet_rtrbuf_t *rb; + int npages = rbp->rbp_npages; + int nbuffers = 0; + lnet_rtrbuf_t *rb; + + if (rbp->rbp_nbuffers == 0) /* not initialized or already freed */ + return; LASSERT (cfs_list_empty(&rbp->rbp_msgs)); LASSERT (rbp->rbp_credits == rbp->rbp_nbuffers); @@ -1338,29 +1345,72 @@ lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages) } void -lnet_free_rtrpools(void) +lnet_rtrpools_free(void) { - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[0]); - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[1]); - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[2]); + if (the_lnet.ln_rtrpools == NULL) /* uninitialized or freed */ + return; + + lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[0]); + lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[1]); + lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[2]); + + LIBCFS_FREE(the_lnet.ln_rtrpools, + sizeof(lnet_rtrbufpool_t) * LNET_NRBPOOLS); + the_lnet.ln_rtrpools = NULL; } -void -lnet_init_rtrpools(void) +static int +lnet_nrb_tiny_calculate(int npages) { - int small_pages = 1; - int large_pages = (LNET_MTU + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; + if (tiny_router_buffers > 0) + return tiny_router_buffers; - lnet_rtrpool_init(&the_lnet.ln_rtrpools[0], 0); - lnet_rtrpool_init(&the_lnet.ln_rtrpools[1], small_pages); - lnet_rtrpool_init(&the_lnet.ln_rtrpools[2], large_pages); + if (tiny_router_buffers == 0) + return LNET_NRB_TINY; + + LCONSOLE_ERROR_MSG(0x10c, "tiny_router_buffers=%d invalid when " + "routing enabled\n", tiny_router_buffers); + return -1; } +static int +lnet_nrb_small_calculate(int npages) +{ + if (small_router_buffers > 0) + return tiny_router_buffers; + + if (small_router_buffers == 0) + return LNET_NRB_SMALL; + + LCONSOLE_ERROR_MSG(0x10d, "small_router_buffers=%d invalid when " + "routing enabled\n", small_router_buffers); + return -1; +} + +static int +lnet_nrb_large_calculate(int npages) +{ + if (large_router_buffers > 0) + return large_router_buffers; + + if (large_router_buffers == 0) + return LNET_NRB_LARGE; + + LCONSOLE_ERROR_MSG(0x10e, "large_router_buffers=%d invalid when" + " routing enabled\n", large_router_buffers); + return -1; +} int -lnet_alloc_rtrpools(int im_a_router) +lnet_rtrpools_alloc(int im_a_router) { - int rc; + lnet_rtrbufpool_t *rtrp; + int large_pages = (LNET_MTU + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; + int small_pages = 1; + int nrb_tiny; + int nrb_small; + int nrb_large; + int rc; if (!strcmp(forwarding, "")) { /* not set either way */ @@ -1377,51 +1427,54 @@ lnet_alloc_rtrpools(int im_a_router) return -EINVAL; } - if (tiny_router_buffers <= 0) { - LCONSOLE_ERROR_MSG(0x10c, "tiny_router_buffers=%d invalid when " - "routing enabled\n", tiny_router_buffers); - rc = -EINVAL; - goto failed; - } + nrb_tiny = lnet_nrb_tiny_calculate(0); + if (nrb_tiny < 0) + return -EINVAL; - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[0], - tiny_router_buffers); - if (rc != 0) - goto failed; + nrb_small = lnet_nrb_small_calculate(small_pages); + if (nrb_small < 0) + return -EINVAL; - if (small_router_buffers <= 0) { - LCONSOLE_ERROR_MSG(0x10d, "small_router_buffers=%d invalid when" - " routing enabled\n", small_router_buffers); - rc = -EINVAL; - goto failed; - } + nrb_large = lnet_nrb_large_calculate(large_pages); + if (nrb_large < 0) + return -EINVAL; - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[1], - small_router_buffers); - if (rc != 0) - goto failed; + LIBCFS_ALLOC(the_lnet.ln_rtrpools, + sizeof(lnet_rtrbufpool_t) * LNET_NRBPOOLS); + if (the_lnet.ln_rtrpools == NULL) { + LCONSOLE_ERROR_MSG(0x10c, + "Failed to initialize router buffe pool\n"); + return -ENOMEM; + } - if (large_router_buffers <= 0) { - LCONSOLE_ERROR_MSG(0x10e, "large_router_buffers=%d invalid when" - " routing enabled\n", large_router_buffers); - rc = -EINVAL; - goto failed; - } + do { /* iterate over rtrpools on all CPTs in upcoming patches */ + rtrp = the_lnet.ln_rtrpools; - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[2], - large_router_buffers); - if (rc != 0) - goto failed; + lnet_rtrpool_init(&rtrp[0], 0); + rc = lnet_rtrpool_alloc_bufs(&rtrp[0], nrb_tiny); + if (rc != 0) + goto failed; - LNET_LOCK(); - the_lnet.ln_routing = 1; - LNET_UNLOCK(); + lnet_rtrpool_init(&rtrp[1], small_pages); + rc = lnet_rtrpool_alloc_bufs(&rtrp[1], nrb_small); + if (rc != 0) + goto failed; - return 0; + lnet_rtrpool_init(&rtrp[2], large_pages); + rc = lnet_rtrpool_alloc_bufs(&rtrp[2], nrb_large); + if (rc != 0) + goto failed; + } while (0); + + LNET_LOCK(); + the_lnet.ln_routing = 1; + LNET_UNLOCK(); + + return 0; failed: - lnet_free_rtrpools(); - return rc; + lnet_rtrpools_free(); + return rc; } int @@ -1605,17 +1658,12 @@ lnet_get_tunables (void) } void -lnet_free_rtrpools (void) -{ -} - -void -lnet_init_rtrpools (void) +lnet_rtrpools_free(void) { } int -lnet_alloc_rtrpools (int im_a_arouter) +lnet_rtrpools_alloc(int im_a_arouter) { return 0; } diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index 2e8f0fa..60e0dda 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -97,10 +97,8 @@ static int __proc_lnet_stats(void *data, int write, const int tmpsiz = 256; /* 7 %u and 4 LPU64 */ if (write) { - LNET_LOCK(); - memset(&the_lnet.ln_counters, 0, sizeof(the_lnet.ln_counters)); - LNET_UNLOCK(); - return 0; + lnet_counters_reset(); + return 0; } /* read */ @@ -115,9 +113,7 @@ static int __proc_lnet_stats(void *data, int write, return -ENOMEM; } - LNET_LOCK(); - *ctrs = the_lnet.ln_counters; - LNET_UNLOCK(); + lnet_counters_get(ctrs); len = snprintf(tmpstr, tmpsiz, "%u %u %u %u %u %u %u "LPU64" "LPU64" " @@ -566,6 +562,9 @@ static int __proc_lnet_buffers(void *data, int write, "pages", "count", "credits", "min"); LASSERT (tmpstr + tmpsiz - s > 0); + if (the_lnet.ln_rtrpools == NULL) + goto out; /* I'm not a router */ + LNET_LOCK(); for (idx = 0; idx < LNET_NRBPOOLS; idx++) { @@ -584,6 +583,7 @@ static int __proc_lnet_buffers(void *data, int write, LNET_UNLOCK(); + out: len = s - tmpstr; if (pos >= min_t(int, len, strlen(tmpstr))) diff --git a/lnet/selftest/framework.c b/lnet/selftest/framework.c index 7f7844d..4ef5679 100644 --- a/lnet/selftest/framework.c +++ b/lnet/selftest/framework.c @@ -416,11 +416,8 @@ sfw_get_stats (srpc_stat_reqst_t *request, srpc_stat_reply_t *reply) return 0; } - LNET_LOCK(); - reply->str_lnet = the_lnet.ln_counters; - LNET_UNLOCK(); - - srpc_get_counters(&reply->str_rpc); + lnet_counters_get(&reply->str_lnet); + srpc_get_counters(&reply->str_rpc); /* send over the msecs since the session was started - with 32 bits to send, this is ~49 days */ -- 1.8.3.1