Whamcloud - gitweb
LU-4588 code: replace semaphores with mutexes
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd_conn.c
index 5fee8cc..4cf2dd2 100644 (file)
@@ -79,9 +79,9 @@ kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys)
        gni_smsg_attr_t         smsg_attr;
        unsigned long           fmablk_vers;
 
-       /* we'll use fmablk_vers and the gnd_fmablk_sem to gate access
+       /* we'll use fmablk_vers and the gnd_fmablk_mutex to gate access
         * to this allocation code. Everyone will sample the version
-        * before and after getting the semaphore. If it has changed,
+        * before and after getting the mutex. If it has changed,
         * we'll bail out to check the lists again - this indicates that
         * some sort of change was made to the lists and it is possible
         * that there is a mailbox for us to find now. This should prevent
@@ -89,12 +89,12 @@ kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys)
         * that need a yet-to-be-allocated mailbox for a connection. */
 
        fmablk_vers = atomic_read(&device->gnd_fmablk_vers);
-       down(&device->gnd_fmablk_sem);
+       mutex_lock(&device->gnd_fmablk_mutex);
 
        if (fmablk_vers != atomic_read(&device->gnd_fmablk_vers)) {
                /* version changed while we were waiting for semaphore,
                 * we'll recheck the lists assuming something nice happened */
-               up(&device->gnd_fmablk_sem);
+               mutex_unlock(&device->gnd_fmablk_mutex);
                return 0;
        }
 
@@ -203,7 +203,7 @@ kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys)
 
        spin_unlock(&device->gnd_fmablk_lock);
 
-       up(&device->gnd_fmablk_sem);
+       mutex_unlock(&device->gnd_fmablk_mutex);
 
        return 0;
 
@@ -220,7 +220,7 @@ free_blk:
 free_desc:
        LIBCFS_FREE(fma_blk, sizeof(kgn_fma_memblock_t));
 out:
-       up(&device->gnd_fmablk_sem);
+       mutex_unlock(&device->gnd_fmablk_mutex);
        return rc;
 }
 
@@ -584,42 +584,42 @@ kgnilnd_map_phys_fmablk(kgn_device_t *device)
        int                     rc = 0;
        kgn_fma_memblock_t     *fma_blk;
 
-       /* use sem to gate access to single thread, just in case */
-       down(&device->gnd_fmablk_sem);
+       /* use mutex to gate access to single thread, just in case */
+       mutex_lock(&device->gnd_fmablk_mutex);
 
        spin_lock(&device->gnd_fmablk_lock);
 
        list_for_each_entry(fma_blk, &device->gnd_fma_buffs, gnm_bufflist) {
-               if (fma_blk->gnm_state == GNILND_FMABLK_PHYS)
+               if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) {
                        rc = kgnilnd_map_fmablk(device, fma_blk);
                        if (rc)
                                break;
+               }
        }
        spin_unlock(&device->gnd_fmablk_lock);
 
-       up(&device->gnd_fmablk_sem);
+       mutex_unlock(&device->gnd_fmablk_mutex);
 
        RETURN(rc);
 }
 
 void
-kgnilnd_unmap_phys_fmablk(kgn_device_t *device)
+kgnilnd_unmap_fma_blocks(kgn_device_t *device)
 {
 
        kgn_fma_memblock_t      *fma_blk;
 
-       /* use sem to gate access to single thread, just in case */
-       down(&device->gnd_fmablk_sem);
+       /* use mutex to gate access to single thread, just in case */
+       mutex_lock(&device->gnd_fmablk_mutex);
 
        spin_lock(&device->gnd_fmablk_lock);
 
        list_for_each_entry(fma_blk, &device->gnd_fma_buffs, gnm_bufflist) {
-               if (fma_blk->gnm_state == GNILND_FMABLK_PHYS)
-                       kgnilnd_unmap_fmablk(device, fma_blk);
+               kgnilnd_unmap_fmablk(device, fma_blk);
        }
        spin_unlock(&device->gnd_fmablk_lock);
 
-       up(&device->gnd_fmablk_sem);
+       mutex_unlock(&device->gnd_fmablk_mutex);
 }
 
 void
@@ -628,8 +628,8 @@ kgnilnd_free_phys_fmablk(kgn_device_t *device)
 
        kgn_fma_memblock_t      *fma_blk, *fma_blkN;
 
-       /* use sem to gate access to single thread, just in case */
-       down(&device->gnd_fmablk_sem);
+       /* use mutex to gate access to single thread, just in case */
+       mutex_lock(&device->gnd_fmablk_mutex);
 
        spin_lock(&device->gnd_fmablk_lock);
 
@@ -639,7 +639,7 @@ kgnilnd_free_phys_fmablk(kgn_device_t *device)
        }
        spin_unlock(&device->gnd_fmablk_lock);
 
-       up(&device->gnd_fmablk_sem);
+       mutex_unlock(&device->gnd_fmablk_mutex);
 }
 
 /* kgnilnd dgram nid->struct managment */
@@ -939,8 +939,10 @@ kgnilnd_alloc_dgram(kgn_dgram_t **dgramp, kgn_device_t *dev, kgn_dgram_type_t ty
 
        atomic_inc(&dev->gnd_ndgrams);
 
-       CDEBUG(D_MALLOC|D_NETTRACE, "slab-alloced 'dgram': %lu at %p.\n",
-              sizeof(*dgram), dgram);
+       CDEBUG(D_MALLOC|D_NETTRACE, "slab-alloced 'dgram': %lu at %p %s ndgrams"
+               " %d\n",
+               sizeof(*dgram), dgram, kgnilnd_dgram_type2str(dgram),
+               atomic_read(&dev->gnd_ndgrams));
 
        *dgramp = dgram;
        return 0;
@@ -1152,8 +1154,10 @@ kgnilnd_free_dgram(kgn_device_t *dev, kgn_dgram_t *dgram)
        atomic_dec(&dev->gnd_ndgrams);
 
        kmem_cache_free(kgnilnd_data.kgn_dgram_cache, dgram);
-       CDEBUG(D_MALLOC|D_NETTRACE, "slab-freed 'dgram': %lu at %p.\n",
-              sizeof(*dgram), dgram);
+       CDEBUG(D_MALLOC|D_NETTRACE, "slab-freed 'dgram': %lu at %p %s"
+              " ndgrams %d\n",
+              sizeof(*dgram), dgram, kgnilnd_dgram_type2str(dgram),
+              atomic_read(&dev->gnd_ndgrams));
 }
 
 int
@@ -1304,9 +1308,44 @@ post_failed:
        RETURN(rc);
 }
 
+/* The shutdown flag is set from the shutdown and stack reset threads. */
 void
-kgnilnd_release_dgram(kgn_device_t *dev, kgn_dgram_t *dgram)
+kgnilnd_release_dgram(kgn_device_t *dev, kgn_dgram_t *dgram, int shutdown)
 {
+       /* The conns of canceled active dgrams need to be put in purgatory so
+        * we don't reuse the mailbox */
+       if (unlikely(dgram->gndg_state == GNILND_DGRAM_CANCELED)) {
+               kgn_peer_t *peer;
+               kgn_conn_t *conn = dgram->gndg_conn;
+               lnet_nid_t nid = dgram->gndg_conn_out.gncr_dstnid;
+
+               dgram->gndg_state = GNILND_DGRAM_DONE;
+
+               /* During shutdown we've already removed the peer so we don't
+                * need to add a peer. During stack reset we don't care about
+                * MDDs since they are all released. */
+               if (!shutdown) {
+                       write_lock(&kgnilnd_data.kgn_peer_conn_lock);
+                       peer = kgnilnd_find_peer_locked(nid);
+
+                       if (peer != NULL) {
+                               CDEBUG(D_NET, "adding peer's conn with nid %s "
+                                       "to purgatory\n", libcfs_nid2str(nid));
+                               kgnilnd_conn_addref(conn);
+                               conn->gnc_peer = peer;
+                               kgnilnd_peer_addref(peer);
+                               kgnilnd_admin_addref(conn->gnc_peer->gnp_dirty_eps);
+                               conn->gnc_state = GNILND_CONN_CLOSED;
+                               list_add_tail(&conn->gnc_list,
+                                             &peer->gnp_conns);
+                               kgnilnd_add_purgatory_locked(conn,
+                                                            conn->gnc_peer);
+                               kgnilnd_schedule_conn(conn);
+                       }
+                       write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
+               }
+       }
+
        spin_lock(&dev->gnd_dgram_lock);
        kgnilnd_cancel_dgram_locked(dgram);
        spin_unlock(&dev->gnd_dgram_lock);
@@ -1380,8 +1419,9 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
                 dgram, kgnilnd_dgram_state2str(dgram));
 
        LASSERTF(!list_empty(&dgram->gndg_list),
-                "dgram 0x%p with bad list state %s\n",
-                dgram, kgnilnd_dgram_state2str(dgram));
+                "dgram 0x%p with bad list state %s type %s\n",
+                dgram, kgnilnd_dgram_state2str(dgram),
+                kgnilnd_dgram_type2str(dgram));
 
        /* now we know that the datagram structure is ok, so pull off list */
        list_del_init(&dgram->gndg_list);
@@ -1393,10 +1433,6 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
                dgram->gndg_state = GNILND_DGRAM_PROCESSING;
        }
 
-       spin_unlock(&dev->gnd_dgram_lock);
-
-       /* we now "own" this datagram */
-
        LASSERTF(dgram->gndg_conn != NULL,
                "dgram 0x%p with NULL conn\n", dgram);
 
@@ -1404,6 +1440,9 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
                                             (__u64)dgram, &post_state,
                                             &remote_addr, &remote_id);
 
+       /* we now "own" this datagram */
+       spin_unlock(&dev->gnd_dgram_lock);
+
        LASSERTF(grc != GNI_RC_NO_MATCH, "kgni lied! probe_by_id told us that"
                 " id "LPU64" was ready\n", readyid);
 
@@ -1433,8 +1472,10 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
                /* fake rc to mark that we've done something */
                rc = 1;
        } else {
-               /* bring out your dead! */
-               dgram->gndg_state = GNILND_DGRAM_DONE;
+               /* let kgnilnd_release_dgram take care of canceled dgrams */
+               if (dgram->gndg_state != GNILND_DGRAM_CANCELED) {
+                       dgram->gndg_state = GNILND_DGRAM_DONE;
+               }
        }
 
        *dgramp = dgram;
@@ -1442,7 +1483,7 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
 
 probe_for_out:
 
-       kgnilnd_release_dgram(dev, dgram);
+       kgnilnd_release_dgram(dev, dgram, 0);
        RETURN(rc);
 }
 
@@ -1549,12 +1590,41 @@ kgnilnd_cancel_wc_dgrams(kgn_device_t *dev)
 
        list_for_each_entry_safe(dg, dgN, &zombies, gndg_list) {
                list_del_init(&dg->gndg_list);
-               kgnilnd_release_dgram(dev, dg);
+               kgnilnd_release_dgram(dev, dg, 1);
        }
        RETURN(0);
 
 }
 
+int
+kgnilnd_cancel_dgrams(kgn_device_t *dev)
+{
+       kgn_dgram_t *dg, *dgN;
+       int i;
+       ENTRY;
+
+       /* Cancel any outstanding non wildcard datagrams regardless
+        * of which net they are on as we are in base shutdown and
+        * dont care about connecting anymore.
+        */
+
+       LASSERTF(kgnilnd_data.kgn_wc_kill == 1,"We didnt get called from base shutdown\n");
+
+       spin_lock(&dev->gnd_dgram_lock);
+
+       for (i = 0; i < (*kgnilnd_tunables.kgn_peer_hash_size -1); i++) {
+               list_for_each_entry_safe(dg, dgN, &dev->gnd_dgrams[i], gndg_list) {
+                       if (dg->gndg_type != GNILND_DGRAM_WC_REQ)
+                               kgnilnd_cancel_dgram_locked(dg);
+               }
+       }
+
+       spin_unlock(&dev->gnd_dgram_lock);
+
+       RETURN(0);
+}
+
+
 void
 kgnilnd_wait_for_canceled_dgrams(kgn_device_t *dev)
 {
@@ -1596,7 +1666,7 @@ kgnilnd_wait_for_canceled_dgrams(kgn_device_t *dev)
                rc = kgnilnd_probe_for_dgram(dev, &dgram);
                if (rc != 0) {
                        /* if we got a valid dgram or one that is now done, clean up */
-                       kgnilnd_release_dgram(dev, dgram);
+                       kgnilnd_release_dgram(dev, dgram, 1);
                }
        } while (atomic_read(&dev->gnd_canceled_dgrams));
 }
@@ -1689,7 +1759,7 @@ kgnilnd_finish_connect(kgn_dgram_t *dgram)
        /* assume this is a new peer  - it makes locking cleaner when it isn't */
        /* no holding kgn_net_rw_sem - already are at the kgnilnd_dgram_mover level */
 
-       rc = kgnilnd_create_peer_safe(&new_peer, her_nid, NULL);
+       rc = kgnilnd_create_peer_safe(&new_peer, her_nid, NULL, GNILND_RCA_NODE_UP);
        if (rc != 0) {
                CERROR("Can't create peer for %s\n", libcfs_nid2str(her_nid));
                return rc;
@@ -1926,7 +1996,6 @@ kgnilnd_process_nak(kgn_dgram_t *dgram)
                        libcfs_nid2str(connreq->gncr_srcnid),
                        libcfs_nid2str(connreq->gncr_dstnid), errno, rc);
        } else {
-               rc = 0;
                spin_lock(&dgram->gndg_conn->gnc_device->gnd_connd_lock);
 
                if (list_empty(&peer->gnp_connd_list)) {
@@ -1957,7 +2026,7 @@ kgnilnd_process_nak(kgn_dgram_t *dgram)
        /* success! we found a peer and at least marked pending_nak */
        write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
 
-       return 0;
+       return rc;
 }
 
 int
@@ -2055,7 +2124,7 @@ inform_peer:
 
        orig_dstnid = dgram->gndg_conn_out.gncr_dstnid;
 
-       kgnilnd_release_dgram(dev, dgram);
+       kgnilnd_release_dgram(dev, dgram, 0);
 
        CDEBUG(D_NET, "cleaning up dgram to %s, rc %d\n",
               libcfs_nid2str(orig_dstnid), rc);
@@ -2166,10 +2235,12 @@ int
 kgnilnd_dgram_waitq(void *arg)
 {
        kgn_device_t     *dev = (kgn_device_t *) arg;
+       char              name[16];
        gni_return_t      grc;
        __u64             readyid;
        DEFINE_WAIT(mover_done);
 
+       snprintf(name, sizeof(name), "kgnilnd_dgn_%02d", dev->gnd_id);
        cfs_block_allsigs();
 
        /* all gnilnd threads need to run fairly urgently */
@@ -2341,6 +2412,7 @@ int
 kgnilnd_dgram_mover(void *arg)
 {
        kgn_device_t            *dev = (kgn_device_t *)arg;
+       char                     name[16];
        int                      rc, did_something;
        unsigned long            next_purge_check = jiffies - 1;
        unsigned long            timeout;
@@ -2348,6 +2420,7 @@ kgnilnd_dgram_mover(void *arg)
        unsigned long            deadline = 0;
        DEFINE_WAIT(wait);
 
+       snprintf(name, sizeof(name), "kgnilnd_dg_%02d", dev->gnd_id);
        cfs_block_allsigs();
        /* all gnilnd threads need to run fairly urgently */
        set_user_nice(current, *kgnilnd_tunables.kgn_nice);
@@ -2438,4 +2511,3 @@ kgnilnd_dgram_mover(void *arg)
        kgnilnd_thread_fini();
        return 0;
 }
-