From ffd8e881bb98bf3fce0716b46cc51b1922642f6e Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Thu, 27 Mar 2014 16:51:21 -0700 Subject: [PATCH] LU-2456 lnet: Dynamic LNet Configuration (DLC) This is the first patch of a set of patches that enables DLC. This patch adds some cleanup in the config.c as well as some preparatory changes in peer.c to enable dynamic network configuration Signed-off-by: Amir Shehata Change-Id: I8c8bbf3b55acf4d76f22a8be587b553a70d31889 Reviewed-on: http://review.whamcloud.com/9830 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Liang Zhen Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 2 +- lnet/include/lnet/lib-types.h | 5 +- lnet/lnet/api-ni.c | 14 ++--- lnet/lnet/config.c | 90 +++++++++++++++++------------ lnet/lnet/peer.c | 130 +++++++++++++++++++++++++++++------------- 5 files changed, 153 insertions(+), 88 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 13aa91b..abd8dd7 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -959,7 +959,7 @@ int lnet_parse_networks(struct list_head *nilist, char *networks); int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt); lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid); -void lnet_peer_tables_cleanup(void); +void lnet_peer_tables_cleanup(lnet_ni_t *ni); void lnet_peer_tables_destroy(void); int lnet_peer_tables_create(void); void lnet_debug_peer(lnet_nid_t nid); diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index b9b9f45..41fa32b 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -539,6 +539,8 @@ typedef struct lnet_peer { struct lnet_peer_table { int pt_version; /* /proc validity stamp */ int pt_number; /* # peers extant */ + int pt_zombies; /* # zombies to go to deathrow + * (and not there yet) */ struct list_head pt_deathrow; /* zombie peers */ struct list_head *pt_hash; /* NID->peer hash */ }; @@ -843,9 +845,6 @@ typedef struct /* registered LNDs */ struct list_head ln_lnds; - /* space for network names */ - char *ln_network_tokens; - int ln_network_tokens_nob; /* test protocol compatibility flags */ int ln_testprotocompat; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 1a2294f..3eb2ee7 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -1086,7 +1086,7 @@ lnet_shutdown_lndnis (void) /* Clear the peer table and wait for all peers to go (they hold refs on * their NIs) */ - lnet_peer_tables_cleanup(); + lnet_peer_tables_cleanup(NULL); lnet_net_lock(LNET_LOCK_EX); /* Now wait for the NI's I just nuked to show up on ln_zombie_nis @@ -1142,12 +1142,6 @@ lnet_shutdown_lndnis (void) the_lnet.ln_shutdown = 0; lnet_net_unlock(LNET_LOCK_EX); - - if (the_lnet.ln_network_tokens != NULL) { - LIBCFS_FREE(the_lnet.ln_network_tokens, - the_lnet.ln_network_tokens_nob); - the_lnet.ln_network_tokens = NULL; - } } int @@ -1168,9 +1162,9 @@ lnet_startup_lndnis (void) if (nets == NULL) goto failed; - rc = lnet_parse_networks(&nilist, nets); - if (rc != 0) - goto failed; + rc = lnet_parse_networks(&nilist, nets); + if (rc != 0) + goto failed; while (!list_empty(&nilist)) { ni = list_entry(nilist.next, lnet_ni_t, ni_list); diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 345dd26..7dd588e 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -97,6 +97,8 @@ lnet_net_unique(__u32 net, struct list_head *nilist) void lnet_ni_free(struct lnet_ni *ni) { + int i; + if (ni->ni_refs != NULL) cfs_percpt_free(ni->ni_refs); @@ -111,6 +113,11 @@ lnet_ni_free(struct lnet_ni *ni) pthread_mutex_destroy(&ni->ni_lock); # endif #endif + for (i = 0; i < LNET_MAX_INTERFACES && + ni->ni_interfaces[i] != NULL; i++) { + LIBCFS_FREE(ni->ni_interfaces[i], + strlen(ni->ni_interfaces[i]) + 1); + } LIBCFS_FREE(ni, sizeof(*ni)); } @@ -205,15 +212,13 @@ lnet_parse_networks(struct list_head *nilist, char *networks) return -EINVAL; } - LIBCFS_ALLOC(tokens, tokensize); - if (tokens == NULL) { - CERROR("Can't allocate net tokens\n"); + LIBCFS_ALLOC(tokens, tokensize); + if (tokens == NULL) { + CERROR("Can't allocate net tokens\n"); return -ENOMEM; - } + } - the_lnet.ln_network_tokens = tokens; - the_lnet.ln_network_tokens_nob = tokensize; - memcpy (tokens, networks, tokensize); + memcpy(tokens, networks, tokensize); str = tmp = tokens; /* Add in the loopback network */ @@ -324,14 +329,28 @@ lnet_parse_networks(struct list_head *nilist, char *networks) goto failed_syntax; } - if (niface == LNET_MAX_INTERFACES) { - LCONSOLE_ERROR_MSG(0x115, "Too many interfaces " - "for net %s\n", - libcfs_net2str(net)); - goto failed; - } + if (niface == LNET_MAX_INTERFACES) { + LCONSOLE_ERROR_MSG(0x115, "Too many interfaces " + "for net %s\n", + libcfs_net2str(net)); + goto failed; + } - ni->ni_interfaces[niface++] = iface; + /* Allocate a seperate piece of memory and copy + * into it the string, so we don't have + * a depencency on the tokens string. This way we + * can free the tokens at the end of the function. + * The newly allocated ni_interfaces[] can be + * freed when freeing the NI */ + LIBCFS_ALLOC(ni->ni_interfaces[niface], + strlen(iface) + 1); + if (ni->ni_interfaces[niface] == NULL) { + CERROR("Can't allocate net interface name\n"); + goto failed; + } + strncpy(ni->ni_interfaces[niface], iface, + strlen(iface)); + niface++; iface = comma; } while (iface != NULL); @@ -356,6 +375,8 @@ lnet_parse_networks(struct list_head *nilist, char *networks) } LASSERT(!list_empty(nilist)); + + LIBCFS_FREE(tokens, tokensize); return 0; failed_syntax: @@ -372,7 +393,6 @@ lnet_parse_networks(struct list_head *nilist, char *networks) cfs_expr_list_free(el); LIBCFS_FREE(tokens, tokensize); - the_lnet.ln_network_tokens = NULL; return -EINVAL; } @@ -663,9 +683,9 @@ lnet_parse_route (char *str, int *im_a_router) char *sep; char *token = str; int ntokens = 0; - int myrc = -1; - unsigned int hops; - int got_hops = 0; + int myrc = -1; + unsigned int hops; + int got_hops = 0; unsigned int priority = 0; INIT_LIST_HEAD(&gateways); @@ -747,8 +767,8 @@ lnet_parse_route (char *str, int *im_a_router) } } - if (!got_hops) - hops = 1; + if (!got_hops) + hops = 1; LASSERT(!list_empty(&nets)); LASSERT(!list_empty(&gateways)); @@ -763,28 +783,28 @@ lnet_parse_route (char *str, int *im_a_router) nid = libcfs_str2nid(ltb->ltb_text); LASSERT(nid != LNET_NID_ANY); - if (lnet_islocalnid(nid)) { - *im_a_router = 1; - continue; - } + if (lnet_islocalnid(nid)) { + *im_a_router = 1; + continue; + } rc = lnet_add_route(net, hops, nid, priority); - if (rc != 0) { - CERROR("Can't create route " - "to %s via %s\n", - libcfs_net2str(net), - libcfs_nid2str(nid)); - goto out; - } + if (rc != 0) { + CERROR("Can't create route " + "to %s via %s\n", + libcfs_net2str(net), + libcfs_nid2str(nid)); + goto out; + } } } - myrc = 0; - goto out; + myrc = 0; + goto out; - token_error: +token_error: lnet_syntax("routes", cmd, (int)(token - str), strlen(token)); - out: +out: lnet_free_text_bufs(&nets); lnet_free_text_bufs(&gateways); return myrc; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 182f44d..5b4b09e 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -103,61 +103,111 @@ lnet_peer_tables_destroy(void) the_lnet.ln_peer_tables = NULL; } +static void +lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable) +{ + int i; + lnet_peer_t *lp; + lnet_peer_t *tmp; + + for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { + list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], + lp_hashlist) { + if (ni != NULL && ni != lp->lp_ni) + continue; + list_del_init(&lp->lp_hashlist); + /* Lose hash table's ref */ + ptable->pt_zombies++; + lnet_peer_decref_locked(lp); + } + } +} + +static void +lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable, + int cpt_locked) +{ + int i; + + for (i = 3; ptable->pt_zombies != 0; i++) { + lnet_net_unlock(cpt_locked); + + if (IS_PO2(i)) { + CDEBUG(D_WARNING, + "Waiting for %d zombies on peer table\n", + ptable->pt_zombies); + } + cfs_pause(cfs_time_seconds(1) >> 1); + lnet_net_lock(cpt_locked); + } +} + +static void +lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable, + int cpt_locked) +{ + lnet_peer_t *lp; + lnet_peer_t *tmp; + lnet_nid_t lp_nid; + int i; + + for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { + list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], + lp_hashlist) { + if (ni != lp->lp_ni) + continue; + + if (lp->lp_rtr_refcount == 0) + continue; + + lp_nid = lp->lp_nid; + + lnet_net_unlock(cpt_locked); + lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid); + lnet_net_lock(cpt_locked); + } + } +} + void -lnet_peer_tables_cleanup(void) +lnet_peer_tables_cleanup(lnet_ni_t *ni) { - struct lnet_peer_table *ptable; int i; - int j; + struct lnet_peer_table *ptable; + lnet_peer_t *lp; + struct list_head deathrow; - LASSERT(the_lnet.ln_shutdown); /* i.e. no new peers */ + INIT_LIST_HEAD(&deathrow); + LASSERT(the_lnet.ln_shutdown || ni != NULL); + /* If just deleting the peers for a NI, get rid of any routes these + * peers are gateways for. */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { lnet_net_lock(i); - - for (j = 0; j < LNET_PEER_HASH_SIZE; j++) { - struct list_head *peers = &ptable->pt_hash[j]; - - while (!list_empty(peers)) { - lnet_peer_t *lp = list_entry(peers->next, - lnet_peer_t, - lp_hashlist); - list_del_init(&lp->lp_hashlist); - /* lose hash table's ref */ - lnet_peer_decref_locked(lp); - } - } - + lnet_peer_table_del_rtrs_locked(ni, ptable, i); lnet_net_unlock(i); } + /* Start the process of moving the applicable peers to + * deathrow. */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - struct list_head deathrow = LIST_HEAD_INIT(deathrow); - lnet_peer_t *lp; - lnet_net_lock(i); + lnet_peer_table_cleanup_locked(ni, ptable); + lnet_net_unlock(i); + } - for (j = 3; ptable->pt_number != 0; j++) { - lnet_net_unlock(i); - - if ((j & (j - 1)) == 0) { - CDEBUG(D_WARNING, - "Waiting for %d peers on peer table\n", - ptable->pt_number); - } - cfs_pause(cfs_time_seconds(1) / 2); - lnet_net_lock(i); - } + /* Cleanup all entries on deathrow. */ + cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { + lnet_net_lock(i); + lnet_peer_table_deathrow_wait_locked(ptable, i); list_splice_init(&ptable->pt_deathrow, &deathrow); - lnet_net_unlock(i); + } - while (!list_empty(&deathrow)) { - lp = list_entry(deathrow.next, - lnet_peer_t, lp_hashlist); - list_del(&lp->lp_hashlist); - LIBCFS_FREE(lp, sizeof(*lp)); - } + while (!list_empty(&deathrow)) { + lp = list_entry(deathrow.next, lnet_peer_t, lp_hashlist); + list_del(&lp->lp_hashlist); + LIBCFS_FREE(lp, sizeof(*lp)); } } @@ -180,6 +230,8 @@ lnet_destroy_peer_locked(lnet_peer_t *lp) lp->lp_ni = NULL; list_add(&lp->lp_hashlist, &ptable->pt_deathrow); + LASSERT(ptable->pt_zombies > 0); + ptable->pt_zombies--; } lnet_peer_t * -- 1.8.3.1