X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fgnilnd%2Fgnilnd_conn.c;h=948cc1c05829ebe2a90e4643e4d04bea9cb46679;hb=refs%2Fchanges%2F74%2F13174%2F2;hp=5fee8cc19b88f15ec51bc80a64e99a41190a8b33;hpb=59071a8334bbc1a3a6d31565b7474063438d1f43;p=fs%2Flustre-release.git diff --git a/lnet/klnds/gnilnd/gnilnd_conn.c b/lnet/klnds/gnilnd/gnilnd_conn.c index 5fee8cc..948cc1c 100644 --- a/lnet/klnds/gnilnd/gnilnd_conn.c +++ b/lnet/klnds/gnilnd/gnilnd_conn.c @@ -1,6 +1,8 @@ /* * Copyright (C) 2012 Cray, Inc. * + * Copyright (c) 2014, Intel Corporation. + * * Author: Nic Henke * Author: James Shimek * @@ -79,9 +81,9 @@ kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys) gni_smsg_attr_t smsg_attr; unsigned long fmablk_vers; - /* we'll use fmablk_vers and the gnd_fmablk_sem to gate access + /* we'll use fmablk_vers and the gnd_fmablk_mutex to gate access * to this allocation code. Everyone will sample the version - * before and after getting the semaphore. If it has changed, + * before and after getting the mutex. If it has changed, * we'll bail out to check the lists again - this indicates that * some sort of change was made to the lists and it is possible * that there is a mailbox for us to find now. This should prevent @@ -89,12 +91,12 @@ kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys) * that need a yet-to-be-allocated mailbox for a connection. */ fmablk_vers = atomic_read(&device->gnd_fmablk_vers); - down(&device->gnd_fmablk_sem); + mutex_lock(&device->gnd_fmablk_mutex); if (fmablk_vers != atomic_read(&device->gnd_fmablk_vers)) { /* version changed while we were waiting for semaphore, * we'll recheck the lists assuming something nice happened */ - up(&device->gnd_fmablk_sem); + mutex_unlock(&device->gnd_fmablk_mutex); return 0; } @@ -203,7 +205,7 @@ kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys) spin_unlock(&device->gnd_fmablk_lock); - up(&device->gnd_fmablk_sem); + mutex_unlock(&device->gnd_fmablk_mutex); return 0; @@ -220,7 +222,7 @@ free_blk: free_desc: LIBCFS_FREE(fma_blk, sizeof(kgn_fma_memblock_t)); out: - up(&device->gnd_fmablk_sem); + mutex_unlock(&device->gnd_fmablk_mutex); return rc; } @@ -584,42 +586,42 @@ kgnilnd_map_phys_fmablk(kgn_device_t *device) int rc = 0; kgn_fma_memblock_t *fma_blk; - /* use sem to gate access to single thread, just in case */ - down(&device->gnd_fmablk_sem); + /* use mutex to gate access to single thread, just in case */ + mutex_lock(&device->gnd_fmablk_mutex); spin_lock(&device->gnd_fmablk_lock); list_for_each_entry(fma_blk, &device->gnd_fma_buffs, gnm_bufflist) { - if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) + if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) { rc = kgnilnd_map_fmablk(device, fma_blk); if (rc) break; + } } spin_unlock(&device->gnd_fmablk_lock); - up(&device->gnd_fmablk_sem); + mutex_unlock(&device->gnd_fmablk_mutex); RETURN(rc); } void -kgnilnd_unmap_phys_fmablk(kgn_device_t *device) +kgnilnd_unmap_fma_blocks(kgn_device_t *device) { kgn_fma_memblock_t *fma_blk; - /* use sem to gate access to single thread, just in case */ - down(&device->gnd_fmablk_sem); + /* use mutex to gate access to single thread, just in case */ + mutex_lock(&device->gnd_fmablk_mutex); spin_lock(&device->gnd_fmablk_lock); list_for_each_entry(fma_blk, &device->gnd_fma_buffs, gnm_bufflist) { - if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) - kgnilnd_unmap_fmablk(device, fma_blk); + kgnilnd_unmap_fmablk(device, fma_blk); } spin_unlock(&device->gnd_fmablk_lock); - up(&device->gnd_fmablk_sem); + mutex_unlock(&device->gnd_fmablk_mutex); } void @@ -628,8 +630,8 @@ kgnilnd_free_phys_fmablk(kgn_device_t *device) kgn_fma_memblock_t *fma_blk, *fma_blkN; - /* use sem to gate access to single thread, just in case */ - down(&device->gnd_fmablk_sem); + /* use mutex to gate access to single thread, just in case */ + mutex_lock(&device->gnd_fmablk_mutex); spin_lock(&device->gnd_fmablk_lock); @@ -639,7 +641,7 @@ kgnilnd_free_phys_fmablk(kgn_device_t *device) } spin_unlock(&device->gnd_fmablk_lock); - up(&device->gnd_fmablk_sem); + mutex_unlock(&device->gnd_fmablk_mutex); } /* kgnilnd dgram nid->struct managment */ @@ -939,8 +941,10 @@ kgnilnd_alloc_dgram(kgn_dgram_t **dgramp, kgn_device_t *dev, kgn_dgram_type_t ty atomic_inc(&dev->gnd_ndgrams); - CDEBUG(D_MALLOC|D_NETTRACE, "slab-alloced 'dgram': %lu at %p.\n", - sizeof(*dgram), dgram); + CDEBUG(D_MALLOC|D_NETTRACE, "slab-alloced 'dgram': %lu at %p %s ndgrams" + " %d\n", + sizeof(*dgram), dgram, kgnilnd_dgram_type2str(dgram), + atomic_read(&dev->gnd_ndgrams)); *dgramp = dgram; return 0; @@ -1152,8 +1156,10 @@ kgnilnd_free_dgram(kgn_device_t *dev, kgn_dgram_t *dgram) atomic_dec(&dev->gnd_ndgrams); kmem_cache_free(kgnilnd_data.kgn_dgram_cache, dgram); - CDEBUG(D_MALLOC|D_NETTRACE, "slab-freed 'dgram': %lu at %p.\n", - sizeof(*dgram), dgram); + CDEBUG(D_MALLOC|D_NETTRACE, "slab-freed 'dgram': %lu at %p %s" + " ndgrams %d\n", + sizeof(*dgram), dgram, kgnilnd_dgram_type2str(dgram), + atomic_read(&dev->gnd_ndgrams)); } int @@ -1304,9 +1310,44 @@ post_failed: RETURN(rc); } +/* The shutdown flag is set from the shutdown and stack reset threads. */ void -kgnilnd_release_dgram(kgn_device_t *dev, kgn_dgram_t *dgram) +kgnilnd_release_dgram(kgn_device_t *dev, kgn_dgram_t *dgram, int shutdown) { + /* The conns of canceled active dgrams need to be put in purgatory so + * we don't reuse the mailbox */ + if (unlikely(dgram->gndg_state == GNILND_DGRAM_CANCELED)) { + kgn_peer_t *peer; + kgn_conn_t *conn = dgram->gndg_conn; + lnet_nid_t nid = dgram->gndg_conn_out.gncr_dstnid; + + dgram->gndg_state = GNILND_DGRAM_DONE; + + /* During shutdown we've already removed the peer so we don't + * need to add a peer. During stack reset we don't care about + * MDDs since they are all released. */ + if (!shutdown) { + write_lock(&kgnilnd_data.kgn_peer_conn_lock); + peer = kgnilnd_find_peer_locked(nid); + + if (peer != NULL) { + CDEBUG(D_NET, "adding peer's conn with nid %s " + "to purgatory\n", libcfs_nid2str(nid)); + kgnilnd_conn_addref(conn); + conn->gnc_peer = peer; + kgnilnd_peer_addref(peer); + kgnilnd_admin_addref(conn->gnc_peer->gnp_dirty_eps); + conn->gnc_state = GNILND_CONN_CLOSED; + list_add_tail(&conn->gnc_list, + &peer->gnp_conns); + kgnilnd_add_purgatory_locked(conn, + conn->gnc_peer); + kgnilnd_schedule_conn(conn); + } + write_unlock(&kgnilnd_data.kgn_peer_conn_lock); + } + } + spin_lock(&dev->gnd_dgram_lock); kgnilnd_cancel_dgram_locked(dgram); spin_unlock(&dev->gnd_dgram_lock); @@ -1380,8 +1421,9 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp) dgram, kgnilnd_dgram_state2str(dgram)); LASSERTF(!list_empty(&dgram->gndg_list), - "dgram 0x%p with bad list state %s\n", - dgram, kgnilnd_dgram_state2str(dgram)); + "dgram 0x%p with bad list state %s type %s\n", + dgram, kgnilnd_dgram_state2str(dgram), + kgnilnd_dgram_type2str(dgram)); /* now we know that the datagram structure is ok, so pull off list */ list_del_init(&dgram->gndg_list); @@ -1393,10 +1435,6 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp) dgram->gndg_state = GNILND_DGRAM_PROCESSING; } - spin_unlock(&dev->gnd_dgram_lock); - - /* we now "own" this datagram */ - LASSERTF(dgram->gndg_conn != NULL, "dgram 0x%p with NULL conn\n", dgram); @@ -1404,6 +1442,9 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp) (__u64)dgram, &post_state, &remote_addr, &remote_id); + /* we now "own" this datagram */ + spin_unlock(&dev->gnd_dgram_lock); + LASSERTF(grc != GNI_RC_NO_MATCH, "kgni lied! probe_by_id told us that" " id "LPU64" was ready\n", readyid); @@ -1433,8 +1474,10 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp) /* fake rc to mark that we've done something */ rc = 1; } else { - /* bring out your dead! */ - dgram->gndg_state = GNILND_DGRAM_DONE; + /* let kgnilnd_release_dgram take care of canceled dgrams */ + if (dgram->gndg_state != GNILND_DGRAM_CANCELED) { + dgram->gndg_state = GNILND_DGRAM_DONE; + } } *dgramp = dgram; @@ -1442,7 +1485,7 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp) probe_for_out: - kgnilnd_release_dgram(dev, dgram); + kgnilnd_release_dgram(dev, dgram, 0); RETURN(rc); } @@ -1549,12 +1592,41 @@ kgnilnd_cancel_wc_dgrams(kgn_device_t *dev) list_for_each_entry_safe(dg, dgN, &zombies, gndg_list) { list_del_init(&dg->gndg_list); - kgnilnd_release_dgram(dev, dg); + kgnilnd_release_dgram(dev, dg, 1); } RETURN(0); } +int +kgnilnd_cancel_dgrams(kgn_device_t *dev) +{ + kgn_dgram_t *dg, *dgN; + int i; + ENTRY; + + /* Cancel any outstanding non wildcard datagrams regardless + * of which net they are on as we are in base shutdown and + * dont care about connecting anymore. + */ + + LASSERTF(kgnilnd_data.kgn_wc_kill == 1,"We didnt get called from base shutdown\n"); + + spin_lock(&dev->gnd_dgram_lock); + + for (i = 0; i < (*kgnilnd_tunables.kgn_peer_hash_size -1); i++) { + list_for_each_entry_safe(dg, dgN, &dev->gnd_dgrams[i], gndg_list) { + if (dg->gndg_type != GNILND_DGRAM_WC_REQ) + kgnilnd_cancel_dgram_locked(dg); + } + } + + spin_unlock(&dev->gnd_dgram_lock); + + RETURN(0); +} + + void kgnilnd_wait_for_canceled_dgrams(kgn_device_t *dev) { @@ -1596,7 +1668,7 @@ kgnilnd_wait_for_canceled_dgrams(kgn_device_t *dev) rc = kgnilnd_probe_for_dgram(dev, &dgram); if (rc != 0) { /* if we got a valid dgram or one that is now done, clean up */ - kgnilnd_release_dgram(dev, dgram); + kgnilnd_release_dgram(dev, dgram, 1); } } while (atomic_read(&dev->gnd_canceled_dgrams)); } @@ -1689,7 +1761,7 @@ kgnilnd_finish_connect(kgn_dgram_t *dgram) /* assume this is a new peer - it makes locking cleaner when it isn't */ /* no holding kgn_net_rw_sem - already are at the kgnilnd_dgram_mover level */ - rc = kgnilnd_create_peer_safe(&new_peer, her_nid, NULL); + rc = kgnilnd_create_peer_safe(&new_peer, her_nid, NULL, GNILND_RCA_NODE_UP); if (rc != 0) { CERROR("Can't create peer for %s\n", libcfs_nid2str(her_nid)); return rc; @@ -1926,7 +1998,6 @@ kgnilnd_process_nak(kgn_dgram_t *dgram) libcfs_nid2str(connreq->gncr_srcnid), libcfs_nid2str(connreq->gncr_dstnid), errno, rc); } else { - rc = 0; spin_lock(&dgram->gndg_conn->gnc_device->gnd_connd_lock); if (list_empty(&peer->gnp_connd_list)) { @@ -1957,7 +2028,7 @@ kgnilnd_process_nak(kgn_dgram_t *dgram) /* success! we found a peer and at least marked pending_nak */ write_unlock(&kgnilnd_data.kgn_peer_conn_lock); - return 0; + return rc; } int @@ -2055,7 +2126,7 @@ inform_peer: orig_dstnid = dgram->gndg_conn_out.gncr_dstnid; - kgnilnd_release_dgram(dev, dgram); + kgnilnd_release_dgram(dev, dgram, 0); CDEBUG(D_NET, "cleaning up dgram to %s, rc %d\n", libcfs_nid2str(orig_dstnid), rc); @@ -2166,10 +2237,12 @@ int kgnilnd_dgram_waitq(void *arg) { kgn_device_t *dev = (kgn_device_t *) arg; + char name[16]; gni_return_t grc; __u64 readyid; DEFINE_WAIT(mover_done); + snprintf(name, sizeof(name), "kgnilnd_dgn_%02d", dev->gnd_id); cfs_block_allsigs(); /* all gnilnd threads need to run fairly urgently */ @@ -2341,6 +2414,7 @@ int kgnilnd_dgram_mover(void *arg) { kgn_device_t *dev = (kgn_device_t *)arg; + char name[16]; int rc, did_something; unsigned long next_purge_check = jiffies - 1; unsigned long timeout; @@ -2348,6 +2422,7 @@ kgnilnd_dgram_mover(void *arg) unsigned long deadline = 0; DEFINE_WAIT(wait); + snprintf(name, sizeof(name), "kgnilnd_dg_%02d", dev->gnd_id); cfs_block_allsigs(); /* all gnilnd threads need to run fairly urgently */ set_user_nice(current, *kgnilnd_tunables.kgn_nice); @@ -2438,4 +2513,3 @@ kgnilnd_dgram_mover(void *arg) kgnilnd_thread_fini(); return 0; } -