Whamcloud - gitweb
LU-9679 modules: convert MIN/MAX to kernel style
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd_conn.c
index 7ec96a2..541c7ef 100644 (file)
@@ -38,6 +38,8 @@ kgnilnd_map_fmablk(kgn_device_t *device, kgn_fma_memblock_t *fma_blk)
 {
        gni_return_t            rrc;
        __u32                   flags = GNI_MEM_READWRITE;
+       static unsigned long    reg_to;
+       int                     rfto = *kgnilnd_tunables.kgn_reg_fail_timeout;
 
        if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) {
                flags |= GNI_MEM_PHYS_CONT;
@@ -52,14 +54,25 @@ kgnilnd_map_fmablk(kgn_device_t *device, kgn_fma_memblock_t *fma_blk)
                                   fma_blk->gnm_blk_size, device->gnd_rcv_fma_cqh,
                                   flags, &fma_blk->gnm_hndl);
        if (rrc != GNI_RC_SUCCESS) {
-               /* XXX Nic: need a way to silence this for runtime stuff that is ok to fail
-                * -- like when under MDD or GART pressure on big systems
-                */
+               if (rfto != GNILND_REGFAILTO_DISABLE) {
+                       if (reg_to == 0) {
+                               reg_to = jiffies + cfs_time_seconds(rfto);
+                       } else if (time_after(jiffies, reg_to)) {
+                               CERROR("FATAL:fmablk registration has failed "
+                                      "for %ld seconds.\n",
+                                      cfs_duration_sec(jiffies - reg_to) +
+                                               rfto);
+                               LBUG();
+                       }
+               }
+
                CNETERR("register fmablk failed 0x%p mbox_size %d flags %u\n",
                        fma_blk, fma_blk->gnm_mbox_size, flags);
                RETURN(-ENOMEM);
        }
 
+       reg_to = 0;
+
        /* PHYS_CONT memory isn't really mapped, at least not in GART -
         *  but all mappings chew up a MDD
         */
@@ -148,7 +161,7 @@ kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys)
                        rc = -ENOMEM;
                        GOTO(free_desc, rc);
                }
-               fma_blk->gnm_blk_size = KMALLOC_MAX_SIZE;
+               fma_blk->gnm_blk_size = GNILND_MBOX_SIZE;
                num_mbox = fma_blk->gnm_blk_size / fma_blk->gnm_mbox_size;
 
                LASSERTF(num_mbox >= 1,
@@ -204,7 +217,7 @@ kgnilnd_alloc_fmablk(kgn_device_t *device, int use_phys)
        fma_blk->gnm_avail_mboxs = fma_blk->gnm_num_mboxs = num_mbox;
 
        CDEBUG(D_MALLOC, "alloc fmablk 0x%p num %d msg_maxsize %d credits %d "
-               "mbox_size %d MDD "LPX64"."LPX64"\n",
+               "mbox_size %d MDD %#llx.%#llx\n",
                fma_blk, num_mbox, smsg_attr.msg_maxsize, smsg_attr.mbox_maxcredit,
                fma_blk->gnm_mbox_size, fma_blk->gnm_hndl.qword1,
                fma_blk->gnm_hndl.qword2);
@@ -230,7 +243,7 @@ free_bit:
        LIBCFS_FREE(fma_blk->gnm_bit_array, BITS_TO_LONGS(num_mbox) * sizeof (unsigned long));
 free_blk:
        if (fma_blk->gnm_state == GNILND_FMABLK_VIRT) {
-               LIBCFS_FREE(fma_blk->gnm_block, fma_blk->gnm_blk_size);
+               kgnilnd_vfree(fma_blk->gnm_block, fma_blk->gnm_blk_size);
        } else {
                kmem_cache_free(kgnilnd_data.kgn_mbox_cache, fma_blk->gnm_block);
        }
@@ -334,7 +347,7 @@ kgnilnd_free_fmablk_locked(kgn_device_t *dev, kgn_fma_memblock_t *fma_blk)
        if (fma_blk->gnm_state == GNILND_FMABLK_PHYS) {
                kmem_cache_free(kgnilnd_data.kgn_mbox_cache, fma_blk->gnm_block);
        } else {
-               LIBCFS_FREE(fma_blk->gnm_block, fma_blk->gnm_blk_size);
+               kgnilnd_vfree(fma_blk->gnm_block, fma_blk->gnm_blk_size);
        }
        fma_blk->gnm_state = GNILND_FMABLK_FREED;
 
@@ -402,7 +415,7 @@ kgnilnd_find_free_mbox(kgn_conn_t *conn)
 
                CDEBUG(D_NET, "conn %p smsg %p fmablk %p "
                        "allocating SMSG mbox %d buf %p "
-                       "offset %u hndl "LPX64"."LPX64"\n",
+                       "offset %u hndl %#llx.%#llx\n",
                        conn, smsg_attr, fma_blk, id,
                        smsg_attr->msg_buffer, smsg_attr->mbox_offset,
                        fma_blk->gnm_hndl.qword1,
@@ -492,23 +505,23 @@ kgnilnd_release_mbox(kgn_conn_t *conn, int purgatory_hold)
         * > 0 - hold it for now */
        if (purgatory_hold == 0) {
                CDEBUG(D_NET, "conn %p smsg %p fmablk %p freeing SMSG mbox %d "
-                       "hndl "LPX64"."LPX64"\n",
+                       "hndl %#llx.%#llx\n",
                        conn, smsg_attr, fma_blk, id,
                        fma_blk->gnm_hndl.qword1, fma_blk->gnm_hndl.qword2);
                fma_blk->gnm_avail_mboxs++;
 
        } else if (purgatory_hold > 0) {
                CDEBUG(D_NET, "conn %p smsg %p fmablk %p holding SMSG mbox %d "
-                       "hndl "LPX64"."LPX64"\n",
+                       "hndl %#llx.%#llx\n",
                        conn, smsg_attr, fma_blk, id,
                        fma_blk->gnm_hndl.qword1, fma_blk->gnm_hndl.qword2);
 
                fma_blk->gnm_held_mboxs++;
-               fma_blk->gnm_max_timeout = MAX(fma_blk->gnm_max_timeout,
-                                               conn->gnc_timeout);
+               fma_blk->gnm_max_timeout = max_t(long, fma_blk->gnm_max_timeout,
+                                                conn->gnc_timeout);
        } else {
                CDEBUG(D_NET, "conn %p smsg %p fmablk %p release SMSG mbox %d "
-                       "hndl "LPX64"."LPX64"\n",
+                       "hndl %#llx.%#llx\n",
                        conn, smsg_attr, fma_blk, id,
                        fma_blk->gnm_hndl.qword1, fma_blk->gnm_hndl.qword2);
 
@@ -720,7 +733,7 @@ kgnilnd_pack_connreq(kgn_connreq_t *connreq, kgn_conn_t *conn,
        int err = 0;
 
        /* ensure we haven't violated max datagram size */
-       CLASSERT(sizeof(kgn_connreq_t) <= GNI_DATAGRAM_MAXSIZE);
+       BUILD_BUG_ON(sizeof(kgn_connreq_t) > GNI_DATAGRAM_MAXSIZE);
 
        /* no need to zero out, we do that when allocating dgram */
        connreq->gncr_magic     = GNILND_MSG_MAGIC;
@@ -928,7 +941,7 @@ kgnilnd_unpack_connreq(kgn_dgram_t *dgram)
        }
 
        if (connreq->gncr_peerstamp == 0 || connreq->gncr_connstamp == 0) {
-               CERROR("Recived bad timestamps peer "LPU64" conn "LPU64"\n",
+               CERROR("Recived bad timestamps peer %llu conn %llu\n",
                connreq->gncr_peerstamp, connreq->gncr_connstamp);
                return -EPROTO;
        }
@@ -1426,13 +1439,13 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
                RETURN(0);
        }
 
-       CDEBUG(D_NET, "ready "LPX64" on device 0x%p\n",
+       CDEBUG(D_NET, "ready %#llx on device 0x%p\n",
                readyid, dev);
 
        dgram = (kgn_dgram_t *)readyid;
 
        LASSERTF(dgram->gndg_magic == GNILND_DGRAM_MAGIC,
-                "dgram 0x%p from id "LPX64" with bad magic %x\n",
+                "dgram 0x%p from id %#llx with bad magic %x\n",
                 dgram, readyid, dgram->gndg_magic);
 
        LASSERTF(dgram->gndg_state == GNILND_DGRAM_POSTED ||
@@ -1466,7 +1479,7 @@ kgnilnd_probe_for_dgram(kgn_device_t *dev, kgn_dgram_t **dgramp)
        spin_unlock(&dev->gnd_dgram_lock);
 
        LASSERTF(grc != GNI_RC_NO_MATCH, "kgni lied! probe_by_id told us that"
-                " id "LPU64" was ready\n", readyid);
+                " id %llu was ready\n", readyid);
 
        CDEBUG(D_NET, "grc %d dgram 0x%p type %s post_state %d "
                "remote_addr %u remote_id %u\n", grc, dgram,
@@ -1533,9 +1546,9 @@ failed:
 int
 kgnilnd_cancel_net_dgrams(kgn_net_t *net)
 {
-       kgn_dgram_t            *dg, *dgN;
-       struct list_head        zombies;
-       int                     i;
+       kgn_dgram_t *dg, *dgN;
+       LIST_HEAD(zombies);
+       int i;
        ENTRY;
 
        /* we want to cancel any outstanding dgrams - we don't want to rely
@@ -1548,8 +1561,6 @@ kgnilnd_cancel_net_dgrams(kgn_net_t *net)
                 "in reset %d\n", net->gnn_shutdown,
                 kgnilnd_data.kgn_in_reset);
 
-       INIT_LIST_HEAD(&zombies);
-
        spin_lock(&net->gnn_dev->gnd_dgram_lock);
 
        for (i = 0; i < *kgnilnd_tunables.kgn_peer_hash_size; i++) {
@@ -1575,7 +1586,7 @@ int
 kgnilnd_cancel_wc_dgrams(kgn_device_t *dev)
 {
        kgn_dgram_t *dg, *dgN;
-       struct list_head zombies;
+       LIST_HEAD(zombies);
        ENTRY;
 
        /* Time to kill the outstanding WC's
@@ -1587,7 +1598,6 @@ kgnilnd_cancel_wc_dgrams(kgn_device_t *dev)
                "in reset %d\n", kgnilnd_data.kgn_wc_kill,
                kgnilnd_data.kgn_in_reset);
 
-       INIT_LIST_HEAD(&zombies);
        spin_lock(&dev->gnd_dgram_lock);
 
        do {
@@ -1601,10 +1611,8 @@ kgnilnd_cancel_wc_dgrams(kgn_device_t *dev)
                        kgnilnd_cancel_dgram_locked(dg);
 
                        /* WC could be DONE already, check and if so add to list to be released */
-                       if (dg->gndg_state == GNILND_DGRAM_DONE) {
-                               list_del_init(&dg->gndg_list);
-                               list_add_tail(&dg->gndg_list, &zombies);
-                       }
+                       if (dg->gndg_state == GNILND_DGRAM_DONE)
+                               list_move_tail(&dg->gndg_list, &zombies);
                }
        } while (dg != NULL);
 
@@ -1682,7 +1690,7 @@ kgnilnd_wait_for_canceled_dgrams(kgn_device_t *dev)
                if (grc != GNI_RC_SUCCESS)
                        continue;
 
-               CDEBUG(D_NET, "ready "LPX64" on device %d->0x%p\n",
+               CDEBUG(D_NET, "ready %#llx on device %d->0x%p\n",
                        readyid, dev->gnd_id, dev);
 
                rc = kgnilnd_probe_for_dgram(dev, &dgram);
@@ -1781,7 +1789,7 @@ kgnilnd_finish_connect(kgn_dgram_t *dgram)
        /* assume this is a new peer  - it makes locking cleaner when it isn't */
        /* no holding kgn_net_rw_sem - already are at the kgnilnd_dgram_mover level */
 
-       rc = kgnilnd_create_peer_safe(&new_peer, her_nid, NULL, GNILND_RCA_NODE_UP);
+       rc = kgnilnd_create_peer_safe(&new_peer, her_nid, NULL, GNILND_PEER_UP);
        if (rc != 0) {
                CERROR("Can't create peer for %s\n", libcfs_nid2str(her_nid));
                return rc;
@@ -1836,12 +1844,12 @@ kgnilnd_finish_connect(kgn_dgram_t *dgram)
                }
        }
 
-       if (peer->gnp_down == GNILND_RCA_NODE_DOWN) {
+       if (peer->gnp_state == GNILND_PEER_DOWN) {
                CNETERR("Received connection request from down nid %s\n",
                        libcfs_nid2str(her_nid));
-               peer->gnp_down = GNILND_RCA_NODE_UP;
        }
 
+       peer->gnp_state = GNILND_PEER_UP;
        nstale = kgnilnd_close_stale_conns_locked(peer, conn);
 
        /* either way with peer (new or existing), we are ok with ref counts here as the
@@ -1939,9 +1947,10 @@ kgnilnd_finish_connect(kgn_dgram_t *dgram)
        write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
 
        /* Notify LNET that we now have a working connection to this peer.
-        * This is a Cray extension to the "standard" LND behavior. */
-       lnet_notify(peer->gnp_net->gnn_ni, peer->gnp_nid,
-                    1, cfs_time_current());
+        * This is a Cray extension to the "standard" LND behavior.
+        */
+       lnet_notify(peer->gnp_net->gnn_ni, peer->gnp_nid, true, true,
+                   ktime_get_seconds());
 
        /* drop our 'hold' ref */
        kgnilnd_conn_decref(conn);