From e069296630240947f1815e505067fd48033909f7 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Fri, 15 Jun 2012 23:56:44 +0800 Subject: [PATCH] LU-56 lnet: allow user to bind NI on CPTs By default, NI will be bond on all CPTs, which means messages for a NI could be handled by LND threads on any CPT (hashed by NID). This patch add a new parameter for NI configuration, it allows user to bind NI on specified CPT(s): - tcp0(eth1)[0,1] bind NI (tcp0) on CPT0 and CPT1 - o2ib(ib0)[2-5] bind NI (o2ib) on CPT2,3,4,5 Expression between square brackets are CPTs that user wants this NI to bind, if user provided this expression, messages for the NI will only be handled by LND threads running on specified CPTs. This is an intermediate patch, to get this feature we also need upcoming LND patches. Signed-off-by: Liang Zhen Change-Id: I706a92c6da181ed0fec857cc25b5ae27a7a7c36b Reviewed-on: http://review.whamcloud.com/3114 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Doug Oucharek Reviewed-by: Bobi Jam Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 1 + lnet/include/lnet/lib-types.h | 5 ++ lnet/lnet/api-ni.c | 68 ++++++++++++++++++-- lnet/lnet/config.c | 143 +++++++++++++++++++++++++++++------------- lnet/lnet/lib-move.c | 2 +- lnet/lnet/peer.c | 2 +- lnet/lnet/router.c | 2 +- lnet/lnet/router_proc.c | 12 +++- 8 files changed, 182 insertions(+), 53 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index cc991c2..2eab4a6 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -726,6 +726,7 @@ lnet_set_msg_uid(lnet_ni_t *ni, lnet_msg_t *msg, lnet_uid_t uid) } #endif +extern int lnet_cpt_of_nid_locked(lnet_nid_t nid); extern int lnet_cpt_of_nid(lnet_nid_t nid); extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt); extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt); diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 22ef8f7..d39553a 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -422,6 +422,7 @@ typedef struct lnet_ni { # endif #endif cfs_list_t ni_list; /* chain on ln_nis */ + cfs_list_t ni_cptlist; /* chain on ln_nis_cpt */ int ni_maxtxcredits; /* # tx credits */ /* # per-peer send credits */ int ni_peertxcredits; @@ -429,6 +430,8 @@ typedef struct lnet_ni { int ni_peerrtrcredits; /* seconds to consider peer dead */ int ni_peertimeout; + int ni_ncpts; /* number of CPTs */ + __u32 *ni_cpts; /* bond NI on some CPTs */ lnet_nid_t ni_nid; /* interface's NID */ void *ni_data; /* instance-specific data */ lnd_t *ni_lnd; /* procedural interface */ @@ -713,6 +716,8 @@ typedef struct cfs_list_t ln_test_peers; cfs_list_t ln_nis; /* LND instances */ + /* NIs bond on specific CPT(s) */ + cfs_list_t ln_nis_cpt; /* dying LND instances */ cfs_list_t ln_nis_zombie; lnet_ni_t *ln_loni; /* the loopback NI */ diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 91450f1..fe21414 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -719,6 +719,7 @@ lnet_prepare(lnet_pid_t requested_pid) CFS_INIT_LIST_HEAD(&the_lnet.ln_test_peers); CFS_INIT_LIST_HEAD(&the_lnet.ln_nis); + CFS_INIT_LIST_HEAD(&the_lnet.ln_nis_cpt); CFS_INIT_LIST_HEAD(&the_lnet.ln_nis_zombie); CFS_INIT_LIST_HEAD(&the_lnet.ln_remote_nets); CFS_INIT_LIST_HEAD(&the_lnet.ln_routers); @@ -789,6 +790,7 @@ lnet_unprepare (void) LASSERT(the_lnet.ln_refcount == 0); LASSERT(cfs_list_empty(&the_lnet.ln_test_peers)); LASSERT(cfs_list_empty(&the_lnet.ln_nis)); + LASSERT(cfs_list_empty(&the_lnet.ln_nis_cpt)); LASSERT(cfs_list_empty(&the_lnet.ln_nis_zombie)); lnet_portals_destroy(); @@ -837,27 +839,66 @@ lnet_net2ni_locked(__u32 net, int cpt) return NULL; } -unsigned int -lnet_nid_cpt_hash(lnet_nid_t nid) +static unsigned int +lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number) { __u64 key = nid; unsigned int val; + LASSERT(number >= 1 && number <= LNET_CPT_NUMBER); + + if (number == 1) + return 0; + val = cfs_hash_long(key, LNET_CPT_BITS); /* NB: LNET_CP_NUMBER doesn't have to be PO2 */ - if (val < LNET_CPT_NUMBER) + if (val < number) return val; - return (unsigned int)((key + val + (val >> 1)) % LNET_CPT_NUMBER); + return (unsigned int)((key + val + (val >> 1)) % number); +} + +int +lnet_cpt_of_nid_locked(lnet_nid_t nid) +{ + struct lnet_ni *ni; + + /* must called with hold of lnet_net_lock */ + if (LNET_CPT_NUMBER == 1) + return 0; /* the only one */ + + /* take lnet_net_lock(any) would be OK */ + if (!cfs_list_empty(&the_lnet.ln_nis_cpt)) { + cfs_list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) { + if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) + continue; + + LASSERT(ni->ni_cpts != NULL); + return ni->ni_cpts[lnet_nid_cpt_hash + (nid, ni->ni_ncpts)]; + } + } + + return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); } int lnet_cpt_of_nid(lnet_nid_t nid) { + int cpt; + int cpt2; + if (LNET_CPT_NUMBER == 1) return 0; /* the only one */ - return lnet_nid_cpt_hash(nid); + if (cfs_list_empty(&the_lnet.ln_nis_cpt)) + return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); + + cpt = lnet_net_lock_current(); + cpt2 = lnet_cpt_of_nid_locked(nid); + lnet_net_unlock(cpt); + + return cpt2; } EXPORT_SYMBOL(lnet_cpt_of_nid); @@ -942,7 +983,12 @@ lnet_ni_tq_credits(lnet_ni_t *ni) { int credits; - credits = ni->ni_maxtxcredits / LNET_CPT_NUMBER; + LASSERT(ni->ni_ncpts >= 1); + + if (ni->ni_ncpts == 1) + return ni->ni_maxtxcredits; + + credits = ni->ni_maxtxcredits / ni->ni_ncpts; credits = max(credits, 8 * ni->ni_peertxcredits); credits = min(credits, ni->ni_maxtxcredits); @@ -974,6 +1020,11 @@ lnet_shutdown_lndnis (void) /* move it to zombie list and nobody can find it anymore */ cfs_list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ + + if (!cfs_list_empty(&ni->ni_cptlist)) { + cfs_list_del_init(&ni->ni_cptlist); + lnet_ni_decref_locked(ni, 0); + } } /* Drop the cached eqwait NI. */ @@ -1158,6 +1209,11 @@ lnet_startup_lndnis (void) /* refcount for ln_nis */ lnet_ni_addref_locked(ni, 0); cfs_list_add_tail(&ni->ni_list, &the_lnet.ln_nis); + if (ni->ni_cpts != NULL) { + cfs_list_add_tail(&ni->ni_cptlist, + &the_lnet.ln_nis_cpt); + lnet_ni_addref_locked(ni, 0); + } lnet_net_unlock(LNET_LOCK_EX); diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index a20044b..99c9ff3 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -100,6 +100,11 @@ lnet_ni_free(struct lnet_ni *ni) if (ni->ni_tx_queues != NULL) cfs_percpt_free(ni->ni_tx_queues); + if (ni->ni_cpts != NULL) { + LIBCFS_FREE(ni->ni_cpts, + sizeof(ni->ni_cpts[0] * ni->ni_ncpts)); + } + #ifndef __KERNEL__ # ifdef HAVE_LIBPTHREAD pthread_mutex_destroy(&ni->ni_lock); @@ -109,10 +114,11 @@ lnet_ni_free(struct lnet_ni *ni) } lnet_ni_t * -lnet_ni_alloc(__u32 net, cfs_list_t *nilist) +lnet_ni_alloc(__u32 net, struct cfs_expr_list **el, cfs_list_t *nilist) { struct lnet_tx_queue *tq; struct lnet_ni *ni; + int rc; int i; if (!lnet_net_unique(net, nilist)) { @@ -135,6 +141,7 @@ lnet_ni_alloc(__u32 net, cfs_list_t *nilist) pthread_mutex_init(&ni->ni_lock, NULL); # endif #endif + CFS_INIT_LIST_HEAD(&ni->ni_cptlist); ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*ni->ni_refs[0])); if (ni->ni_refs == NULL) @@ -148,6 +155,28 @@ lnet_ni_alloc(__u32 net, cfs_list_t *nilist) cfs_percpt_for_each(tq, i, ni->ni_tx_queues) CFS_INIT_LIST_HEAD(&tq->tq_delayed); + if (el == NULL || *el == NULL) { + ni->ni_cpts = NULL; + ni->ni_ncpts = LNET_CPT_NUMBER; + } else { + rc = cfs_expr_list_values(*el, LNET_CPT_NUMBER, &ni->ni_cpts); + if (rc <= 0) { + CERROR("Failed to set CPTs for NI %s: %d\n", + libcfs_net2str(net), rc); + goto failed; + } + + LASSERT(rc <= LNET_CPT_NUMBER); + if (rc == LNET_CPT_NUMBER) { + LIBCFS_FREE(ni->ni_cpts, rc * sizeof(ni->ni_cpts[0])); + ni->ni_cpts = NULL; + } + + ni->ni_ncpts = rc; + cfs_expr_list_free(*el); /* consume it */ + *el = NULL; + } + /* LND will fill in the address part of the NID */ ni->ni_nid = LNET_MKNID(net, 0); ni->ni_last_alive = cfs_time_current(); @@ -161,12 +190,14 @@ lnet_ni_alloc(__u32 net, cfs_list_t *nilist) int lnet_parse_networks(cfs_list_t *nilist, char *networks) { - int tokensize = strlen(networks) + 1; - char *tokens; - char *str; - lnet_ni_t *ni; - __u32 net; - int nnets = 0; + struct cfs_expr_list *el = NULL; + int tokensize = strlen(networks) + 1; + char *tokens; + char *str; + char *tmp; + struct lnet_ni *ni; + __u32 net; + int nnets = 0; if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) { /* _WAY_ conservative */ @@ -184,21 +215,48 @@ lnet_parse_networks(cfs_list_t *nilist, char *networks) the_lnet.ln_network_tokens = tokens; the_lnet.ln_network_tokens_nob = tokensize; memcpy (tokens, networks, tokensize); - str = tokens; + str = tmp = tokens; /* Add in the loopback network */ - ni = lnet_ni_alloc(LNET_MKNET(LOLND, 0), nilist); + ni = lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, nilist); if (ni == NULL) goto failed; - while (str != NULL && *str != 0) { - char *comma = strchr(str, ','); - char *bracket = strchr(str, '('); - int niface; - char *iface; + while (str != NULL && *str != 0) { + char *comma = strchr(str, ','); + char *bracket = strchr(str, '('); + char *square = strchr(str, '['); + char *iface; + int niface; + int rc; + + /* NB we don't check interface conflicts here; it's the LNDs + * responsibility (if it cares at all) */ + + if (square != NULL && (comma == NULL || square < comma)) { + /* i.e: o2ib0(ib0)[1,2], number between square + * brackets are CPTs this NI needs to be bond */ + if (bracket != NULL && bracket > square) { + tmp = square; + goto failed_syntax; + } + + tmp = strchr(square, ']'); + if (tmp == NULL) { + tmp = square; + goto failed_syntax; + } + + rc = cfs_expr_list_parse(square, tmp - square + 1, + 0, LNET_CPT_NUMBER - 1, &el); + if (rc != 0) { + tmp = square; + goto failed_syntax; + } - /* NB we don't check interface conflicts here; it's the LNDs - * responsibility (if it cares at all) */ + while (square <= tmp) + *square++ = ' '; + } if (bracket == NULL || (comma != NULL && comma < bracket)) { @@ -210,15 +268,14 @@ lnet_parse_networks(cfs_list_t *nilist, char *networks) net = libcfs_str2net(cfs_trimwhite(str)); if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("networks", networks, - (int)(str - tokens), strlen(str)); LCONSOLE_ERROR_MSG(0x113, "Unrecognised network" " type\n"); - goto failed; + tmp = str; + goto failed_syntax; } if (LNET_NETTYP(net) != LOLND && /* LO is implicit */ - lnet_ni_alloc(net, nilist) == NULL) + lnet_ni_alloc(net, &el, nilist) == NULL) goto failed; str = comma; @@ -228,13 +285,12 @@ lnet_parse_networks(cfs_list_t *nilist, char *networks) *bracket = 0; net = libcfs_str2net(cfs_trimwhite(str)); if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("networks", networks, - (int)(str - tokens), strlen(str)); - goto failed; + tmp = str; + goto failed_syntax; } - nnets++; - ni = lnet_ni_alloc(net, nilist); + nnets++; + ni = lnet_ni_alloc(net, &el, nilist); if (ni == NULL) goto failed; @@ -243,9 +299,8 @@ lnet_parse_networks(cfs_list_t *nilist, char *networks) bracket = strchr(iface, ')'); if (bracket == NULL) { - lnet_syntax("networks", networks, - (int)(iface - tokens), strlen(iface)); - goto failed; + tmp = iface; + goto failed_syntax; } *bracket = 0; @@ -256,10 +311,8 @@ lnet_parse_networks(cfs_list_t *nilist, char *networks) iface = cfs_trimwhite(iface); if (*iface == 0) { - lnet_syntax("networks", networks, - (int)(iface - tokens), - strlen(iface)); - goto failed; + tmp = iface; + goto failed_syntax; } if (niface == LNET_MAX_INTERFACES) { @@ -279,9 +332,8 @@ lnet_parse_networks(cfs_list_t *nilist, char *networks) *comma = 0; str = cfs_trimwhite(str); if (*str != 0) { - lnet_syntax("networks", networks, - (int)(str - tokens), strlen(str)); - goto failed; + tmp = str; + goto failed_syntax; } str = comma + 1; continue; @@ -289,22 +341,27 @@ lnet_parse_networks(cfs_list_t *nilist, char *networks) str = cfs_trimwhite(str); if (*str != 0) { - lnet_syntax("networks", networks, - (int)(str - tokens), strlen(str)); - goto failed; - } + tmp = str; + goto failed_syntax; + } } - LASSERT (!cfs_list_empty(nilist)); - return 0; + LASSERT(!cfs_list_empty(nilist)); + return 0; + failed_syntax: + lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp)); failed: - while (!cfs_list_empty(nilist)) { - ni = cfs_list_entry(nilist->next, lnet_ni_t, ni_list); + while (!cfs_list_empty(nilist)) { + ni = cfs_list_entry(nilist->next, lnet_ni_t, ni_list); cfs_list_del(&ni->ni_list); lnet_ni_free(ni); } + + if (el != NULL) + cfs_expr_list_free(el); + LIBCFS_FREE(tokens, tokensize); the_lnet.ln_network_tokens = NULL; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 07b2d97..f556eb2 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -1327,7 +1327,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) * pre-determined router, this can happen if router table * was changed when we release the lock */ if (rtr_nid != lp->lp_nid) { - cpt2 = lnet_cpt_of_nid(lp->lp_nid); + cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid); if (cpt2 != cpt) { if (src_ni != NULL) lnet_ni_decref_locked(src_ni, cpt); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index b257624..e9e3147 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -209,7 +209,7 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) int cpt2; /* cpt can be LNET_LOCK_EX if it's called from router functions */ - cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid(nid); + cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid); ptable = the_lnet.ln_peer_tables[cpt2]; lp = lnet_find_peer_locked(ptable, nid); diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 2ba0a11..49fa60f 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -1217,7 +1217,7 @@ rescan: cfs_list_for_each(entry, &the_lnet.ln_routers) { rtr = cfs_list_entry(entry, lnet_peer_t, lp_rtr_list); - cpt2 = lnet_cpt_of_nid(rtr->lp_nid); + cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid); if (cpt != cpt2) { lnet_net_unlock(cpt); cpt = cpt2; diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index 5f85792..a91d82b 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -672,11 +672,12 @@ int LL_PROC_PROTO(proc_lnet_nis) } if (ni != NULL) { - char *stat; struct lnet_tx_queue *tq; + char *stat; long now = cfs_time_current_sec(); int last_alive = -1; int i; + int j; if (the_lnet.ln_routing) last_alive = now - ni->ni_last_alive; @@ -694,6 +695,15 @@ int LL_PROC_PROTO(proc_lnet_nis) /* we actually output credits information for * TX queue of each partition */ cfs_percpt_for_each(tq, i, ni->ni_tx_queues) { + for (j = 0; ni->ni_cpts != NULL && + j < ni->ni_ncpts; j++) { + if (i == ni->ni_cpts[j]) + break; + } + + if (j == ni->ni_ncpts) + continue; + if (i != 0) lnet_net_lock(i); -- 1.8.3.1