X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Flnet%2Fpeer.c;h=5b4b09e323ea0ffc1a76c966560cbfd1f3de1399;hb=ffd8e881bb98bf3fce0716b46cc51b1922642f6e;hp=bca85f27fe1cacadf44fd8cbb4b3c5ca160610fc;hpb=e531dc437c56a08a65de9074a511faa55184712b;p=fs%2Flustre-release.git diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index bca85f2..5b4b09e 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -26,6 +26,8 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,7 +44,7 @@ int lnet_peer_tables_create(void) { struct lnet_peer_table *ptable; - cfs_list_t *hash; + struct list_head *hash; int i; int j; @@ -54,7 +56,7 @@ lnet_peer_tables_create(void) } cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - CFS_INIT_LIST_HEAD(&ptable->pt_deathrow); + INIT_LIST_HEAD(&ptable->pt_deathrow); LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i, LNET_PEER_HASH_SIZE * sizeof(*hash)); @@ -65,7 +67,7 @@ lnet_peer_tables_create(void) } for (j = 0; j < LNET_PEER_HASH_SIZE; j++) - CFS_INIT_LIST_HEAD(&hash[j]); + INIT_LIST_HEAD(&hash[j]); ptable->pt_hash = hash; /* sign of initialization */ } @@ -76,7 +78,7 @@ void lnet_peer_tables_destroy(void) { struct lnet_peer_table *ptable; - cfs_list_t *hash; + struct list_head *hash; int i; int j; @@ -88,11 +90,11 @@ lnet_peer_tables_destroy(void) if (hash == NULL) /* not intialized */ break; - LASSERT(cfs_list_empty(&ptable->pt_deathrow)); + LASSERT(list_empty(&ptable->pt_deathrow)); ptable->pt_hash = NULL; for (j = 0; j < LNET_PEER_HASH_SIZE; j++) - LASSERT(cfs_list_empty(&hash[j])); + LASSERT(list_empty(&hash[j])); LIBCFS_FREE(hash, LNET_PEER_HASH_SIZE * sizeof(*hash)); } @@ -101,61 +103,111 @@ lnet_peer_tables_destroy(void) the_lnet.ln_peer_tables = NULL; } +static void +lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable) +{ + int i; + lnet_peer_t *lp; + lnet_peer_t *tmp; + + for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { + list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], + lp_hashlist) { + if (ni != NULL && ni != lp->lp_ni) + continue; + list_del_init(&lp->lp_hashlist); + /* Lose hash table's ref */ + ptable->pt_zombies++; + lnet_peer_decref_locked(lp); + } + } +} + +static void +lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable, + int cpt_locked) +{ + int i; + + for (i = 3; ptable->pt_zombies != 0; i++) { + lnet_net_unlock(cpt_locked); + + if (IS_PO2(i)) { + CDEBUG(D_WARNING, + "Waiting for %d zombies on peer table\n", + ptable->pt_zombies); + } + cfs_pause(cfs_time_seconds(1) >> 1); + lnet_net_lock(cpt_locked); + } +} + +static void +lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable, + int cpt_locked) +{ + lnet_peer_t *lp; + lnet_peer_t *tmp; + lnet_nid_t lp_nid; + int i; + + for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { + list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], + lp_hashlist) { + if (ni != lp->lp_ni) + continue; + + if (lp->lp_rtr_refcount == 0) + continue; + + lp_nid = lp->lp_nid; + + lnet_net_unlock(cpt_locked); + lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid); + lnet_net_lock(cpt_locked); + } + } +} + void -lnet_peer_tables_cleanup(void) +lnet_peer_tables_cleanup(lnet_ni_t *ni) { - struct lnet_peer_table *ptable; int i; - int j; + struct lnet_peer_table *ptable; + lnet_peer_t *lp; + struct list_head deathrow; - LASSERT(the_lnet.ln_shutdown); /* i.e. no new peers */ + INIT_LIST_HEAD(&deathrow); + LASSERT(the_lnet.ln_shutdown || ni != NULL); + /* If just deleting the peers for a NI, get rid of any routes these + * peers are gateways for. */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { lnet_net_lock(i); - - for (j = 0; j < LNET_PEER_HASH_SIZE; j++) { - cfs_list_t *peers = &ptable->pt_hash[j]; - - while (!cfs_list_empty(peers)) { - lnet_peer_t *lp = cfs_list_entry(peers->next, - lnet_peer_t, - lp_hashlist); - cfs_list_del_init(&lp->lp_hashlist); - /* lose hash table's ref */ - lnet_peer_decref_locked(lp); - } - } - + lnet_peer_table_del_rtrs_locked(ni, ptable, i); lnet_net_unlock(i); } + /* Start the process of moving the applicable peers to + * deathrow. */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - CFS_LIST_HEAD (deathrow); - lnet_peer_t *lp; - lnet_net_lock(i); + lnet_peer_table_cleanup_locked(ni, ptable); + lnet_net_unlock(i); + } - for (j = 3; ptable->pt_number != 0; j++) { - lnet_net_unlock(i); - - if ((j & (j - 1)) == 0) { - CDEBUG(D_WARNING, - "Waiting for %d peers on peer table\n", - ptable->pt_number); - } - cfs_pause(cfs_time_seconds(1) / 2); - lnet_net_lock(i); - } - cfs_list_splice_init(&ptable->pt_deathrow, &deathrow); - + /* Cleanup all entries on deathrow. */ + cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { + lnet_net_lock(i); + lnet_peer_table_deathrow_wait_locked(ptable, i); + list_splice_init(&ptable->pt_deathrow, &deathrow); lnet_net_unlock(i); + } - while (!cfs_list_empty(&deathrow)) { - lp = cfs_list_entry(deathrow.next, - lnet_peer_t, lp_hashlist); - cfs_list_del(&lp->lp_hashlist); - LIBCFS_FREE(lp, sizeof(*lp)); - } + while (!list_empty(&deathrow)) { + lp = list_entry(deathrow.next, lnet_peer_t, lp_hashlist); + list_del(&lp->lp_hashlist); + LIBCFS_FREE(lp, sizeof(*lp)); } } @@ -166,8 +218,8 @@ lnet_destroy_peer_locked(lnet_peer_t *lp) LASSERT(lp->lp_refcount == 0); LASSERT(lp->lp_rtr_refcount == 0); - LASSERT(cfs_list_empty(&lp->lp_txq)); - LASSERT(cfs_list_empty(&lp->lp_hashlist)); + LASSERT(list_empty(&lp->lp_txq)); + LASSERT(list_empty(&lp->lp_hashlist)); LASSERT(lp->lp_txqnob == 0); ptable = the_lnet.ln_peer_tables[lp->lp_cpt]; @@ -177,20 +229,21 @@ lnet_destroy_peer_locked(lnet_peer_t *lp) lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt); lp->lp_ni = NULL; - cfs_list_add(&lp->lp_hashlist, &ptable->pt_deathrow); + list_add(&lp->lp_hashlist, &ptable->pt_deathrow); + LASSERT(ptable->pt_zombies > 0); + ptable->pt_zombies--; } lnet_peer_t * lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid) { - cfs_list_t *peers; - lnet_peer_t *lp; + struct list_head *peers; + lnet_peer_t *lp; - if (the_lnet.ln_shutdown) - return NULL; + LASSERT(!the_lnet.ln_shutdown); peers = &ptable->pt_hash[lnet_nid2peerhash(nid)]; - cfs_list_for_each_entry(lp, peers, lp_hashlist) { + list_for_each_entry(lp, peers, lp_hashlist) { if (lp->lp_nid == nid) { lnet_peer_addref_locked(lp); return lp; @@ -207,23 +260,33 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) lnet_peer_t *lp = NULL; lnet_peer_t *lp2; int cpt2; + int rc = 0; + + *lpp = NULL; + if (the_lnet.ln_shutdown) /* it's shutting down */ + return -ESHUTDOWN; /* cpt can be LNET_LOCK_EX if it's called from router functions */ cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid); ptable = the_lnet.ln_peer_tables[cpt2]; lp = lnet_find_peer_locked(ptable, nid); - if (lp != NULL) { - *lpp = lp; - return 0; - } + if (lp != NULL) { + *lpp = lp; + return 0; + } - if (!cfs_list_empty(&ptable->pt_deathrow)) { - lp = cfs_list_entry(ptable->pt_deathrow.next, - lnet_peer_t, lp_hashlist); - cfs_list_del(&lp->lp_hashlist); + if (!list_empty(&ptable->pt_deathrow)) { + lp = list_entry(ptable->pt_deathrow.next, + lnet_peer_t, lp_hashlist); + list_del(&lp->lp_hashlist); } + /* + * take extra refcount in case another thread has shutdown LNet + * and destroyed locks and peer-table before I finish the allocation + */ + ptable->pt_number++; lnet_net_unlock(cpt); if (lp != NULL) @@ -232,14 +295,14 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp)); if (lp == NULL) { - *lpp = NULL; - LNET_LOCK(); - return -ENOMEM; - } + rc = -ENOMEM; + lnet_net_lock(cpt); + goto out; + } - CFS_INIT_LIST_HEAD(&lp->lp_txq); - CFS_INIT_LIST_HEAD(&lp->lp_rtrq); - CFS_INIT_LIST_HEAD(&lp->lp_routes); + INIT_LIST_HEAD(&lp->lp_txq); + INIT_LIST_HEAD(&lp->lp_rtrq); + INIT_LIST_HEAD(&lp->lp_routes); lp->lp_notify = 0; lp->lp_notifylnd = 0; @@ -253,48 +316,44 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) lp->lp_ping_feats = LNET_PING_FEAT_INVAL; lp->lp_nid = nid; lp->lp_cpt = cpt2; - lp->lp_refcount = 2; /* 1 for caller; 1 for hash */ + lp->lp_refcount = 2; /* 1 for caller; 1 for hash */ lp->lp_rtr_refcount = 0; lnet_net_lock(cpt); + if (the_lnet.ln_shutdown) { + rc = -ESHUTDOWN; + goto out; + } + lp2 = lnet_find_peer_locked(ptable, nid); if (lp2 != NULL) { - cfs_list_add(&lp->lp_hashlist, &ptable->pt_deathrow); - - if (the_lnet.ln_shutdown) { - lnet_peer_decref_locked(lp2); - *lpp = NULL; - return -ESHUTDOWN; - } - - *lpp = lp2; - return 0; - } + *lpp = lp2; + goto out; + } lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2); if (lp->lp_ni == NULL) { - cfs_list_add(&lp->lp_hashlist, &ptable->pt_deathrow); - - *lpp = NULL; - return the_lnet.ln_shutdown ? -ESHUTDOWN : -EHOSTUNREACH; - } - - lp->lp_txcredits = - lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; - lp->lp_rtrcredits = - lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni); + rc = -EHOSTUNREACH; + goto out; + } - /* can't add peers after shutdown starts */ - LASSERT (!the_lnet.ln_shutdown); + lp->lp_txcredits = + lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; + lp->lp_rtrcredits = + lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni); - cfs_list_add_tail(&lp->lp_hashlist, - &ptable->pt_hash[lnet_nid2peerhash(nid)]); + list_add_tail(&lp->lp_hashlist, + &ptable->pt_hash[lnet_nid2peerhash(nid)]); ptable->pt_version++; - ptable->pt_number++; - *lpp = lp; + return 0; +out: + if (lp != NULL) + list_add(&lp->lp_hashlist, &ptable->pt_deathrow); + ptable->pt_number--; + return rc; } void