Whamcloud - gitweb
LU-14008 o2iblnd: cleanup
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_cb.c
index 7189676..932e06a 100644 (file)
@@ -492,45 +492,45 @@ kiblnd_rx_complete(struct kib_rx *rx, int status, int nob)
        int rc;
        int err = -EIO;
 
-        LASSERT (net != NULL);
-        LASSERT (rx->rx_nob < 0);               /* was posted */
-        rx->rx_nob = 0;                         /* isn't now */
+       LASSERT(net);
+       LASSERT(rx->rx_nob < 0);        /* was posted */
+       rx->rx_nob = 0;                 /* isn't now */
 
-        if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
-                goto ignore;
+       if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
+               goto ignore;
 
-        if (status != IB_WC_SUCCESS) {
-                CNETERR("Rx from %s failed: %d\n",
-                        libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
-                goto failed;
-        }
+       if (status != IB_WC_SUCCESS) {
+               CNETERR("Rx from %s failed: %d\n",
+                       libcfs_nid2str(conn->ibc_peer->ibp_nid), status);
+               goto failed;
+       }
 
-        LASSERT (nob >= 0);
-        rx->rx_nob = nob;
+       LASSERT(nob >= 0);
+       rx->rx_nob = nob;
 
-        rc = kiblnd_unpack_msg(msg, rx->rx_nob);
-        if (rc != 0) {
-                CERROR ("Error %d unpacking rx from %s\n",
-                        rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-                goto failed;
-        }
+       rc = kiblnd_unpack_msg(msg, rx->rx_nob);
+       if (rc != 0) {
+               CERROR("Error %d unpacking rx from %s\n",
+                      rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
+               goto failed;
+       }
 
-        if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
-            msg->ibm_dstnid != ni->ni_nid ||
-            msg->ibm_srcstamp != conn->ibc_incarnation ||
-            msg->ibm_dststamp != net->ibn_incarnation) {
-                CERROR ("Stale rx from %s\n",
-                        libcfs_nid2str(conn->ibc_peer->ibp_nid));
-                err = -ESTALE;
-                goto failed;
-        }
+       if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
+           msg->ibm_dstnid != lnet_nid_to_nid4(&ni->ni_nid) ||
+           msg->ibm_srcstamp != conn->ibc_incarnation ||
+           msg->ibm_dststamp != net->ibn_incarnation) {
+               CERROR("Stale rx from %s\n",
+                      libcfs_nid2str(conn->ibc_peer->ibp_nid));
+               err = -ESTALE;
+               goto failed;
+       }
 
-        /* set time last known alive */
-        kiblnd_peer_alive(conn->ibc_peer);
+       /* set time last known alive */
+       kiblnd_peer_alive(conn->ibc_peer);
 
-        /* racing with connection establishment/teardown! */
+       /* racing with connection establishment/teardown! */
 
-        if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
+       if (conn->ibc_state < IBLND_CONN_ESTABLISHED) {
                rwlock_t  *g_lock = &kiblnd_data.kib_global_lock;
                unsigned long  flags;
 
@@ -542,15 +542,15 @@ kiblnd_rx_complete(struct kib_rx *rx, int status, int nob)
                        return;
                }
                write_unlock_irqrestore(g_lock, flags);
-        }
-        kiblnd_handle_rx(rx);
-        return;
+       }
+       kiblnd_handle_rx(rx);
+       return;
 
- failed:
-        CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
-        kiblnd_close_conn(conn, err);
- ignore:
-        kiblnd_drop_rx(rx);                     /* Don't re-post rx. */
+failed:
+       CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
+       kiblnd_close_conn(conn, err);
+ignore:
+       kiblnd_drop_rx(rx);                     /* Don't re-post rx. */
 }
 
 static int
@@ -662,7 +662,7 @@ kiblnd_unmap_tx(struct kib_tx *tx)
                kiblnd_fmr_pool_unmap(&tx->tx_fmr, tx->tx_status);
 
        if (tx->tx_nfrags != 0) {
-               kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
+               kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev,
                                    tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
                tx->tx_nfrags = 0;
        }
@@ -717,7 +717,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
         tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
        tx->tx_nfrags = nfrags;
 
-       rd->rd_nfrags = kiblnd_dma_map_sg(hdev->ibh_ibdev, tx->tx_frags,
+       rd->rd_nfrags = kiblnd_dma_map_sg(hdev, tx->tx_frags,
                                          tx->tx_nfrags, tx->tx_dmadir);
 
         for (i = 0, nob = 0; i < rd->rd_nfrags; i++) {
@@ -749,8 +749,9 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx,
 {
        struct kib_net *net = ni->ni_data;
        struct scatterlist *sg;
-       int                 fragnob;
-       int                 max_nkiov;
+       int fragnob;
+       int max_nkiov;
+       int sg_count = 0;
 
        CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
 
@@ -771,6 +772,12 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx,
        do {
                LASSERT(nkiov > 0);
 
+               if (!sg) {
+                       CERROR("lacking enough sg entries to map tx\n");
+                       return -EFAULT;
+               }
+               sg_count++;
+
                fragnob = min((int)(kiov->bv_len - offset), nob);
 
                /*
@@ -790,10 +797,6 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx,
                sg_set_page(sg, kiov->bv_page, fragnob,
                            kiov->bv_offset + offset);
                sg = sg_next(sg);
-               if (!sg) {
-                       CERROR("lacking enough sg entries to map tx\n");
-                       return -EFAULT;
-               }
 
                offset = 0;
                kiov++;
@@ -801,7 +804,7 @@ static int kiblnd_setup_rd_kiov(struct lnet_ni *ni, struct kib_tx *tx,
                nob -= fragnob;
        } while (nob > 0);
 
-       return kiblnd_map_tx(ni, tx, rd, sg - tx->tx_frags);
+       return kiblnd_map_tx(ni, tx, rd, sg_count);
 }
 
 static int
@@ -1100,7 +1103,7 @@ kiblnd_init_tx_msg(struct lnet_ni *ni, struct kib_tx *tx, int type,
 #endif
 
        LASSERT(tx->tx_nwrq >= 0);
-       LASSERT(tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1);
+       LASSERT(tx->tx_nwrq <= IBLND_MAX_RDMA_FRAGS);
        LASSERT(nob <= IBLND_MSG_SIZE);
 #ifdef HAVE_IB_GET_DMA_MR
        LASSERT(mr != NULL);
@@ -1641,6 +1644,15 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
        /* Thread context */
        LASSERT (!in_interrupt());
 
+       tx = kiblnd_get_idle_tx(ni, target.nid);
+       if (tx == NULL) {
+               CERROR("Can't allocate %s txd for %s\n",
+                       lnet_msgtyp2str(type),
+                       libcfs_nid2str(target.nid));
+               return -ENOMEM;
+       }
+       ibmsg = tx->tx_msg;
+
        switch (type) {
        default:
                LBUG();
@@ -1656,17 +1668,9 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
 
                 /* is the REPLY message too small for RDMA? */
                nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
-                if (nob <= IBLND_MSG_SIZE && !lntmsg->msg_rdma_force)
-                        break;                  /* send IMMEDIATE */
+               if (nob <= IBLND_MSG_SIZE && !lntmsg->msg_rdma_force)
+                       break;                  /* send IMMEDIATE */
 
-               tx = kiblnd_get_idle_tx(ni, target.nid);
-               if (tx == NULL) {
-                       CERROR("Can't allocate txd for GET to %s\n",
-                              libcfs_nid2str(target.nid));
-                       return -ENOMEM;
-               }
-
-               ibmsg = tx->tx_msg;
                rd = &ibmsg->ibm_u.get.ibgm_rd;
                rc = kiblnd_setup_rd_kiov(ni, tx, rd,
                                          lntmsg->msg_md->md_niov,
@@ -1694,25 +1698,18 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
                        return -EIO;
                }
 
-                tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg[0,1] on completion */
-                tx->tx_waiting = 1;             /* waiting for GET_DONE */
-                kiblnd_launch_tx(ni, tx, target.nid);
-                return 0;
+               /* finalise lntmsg[0,1] on completion */
+               tx->tx_lntmsg[0] = lntmsg;
+               tx->tx_waiting = 1;             /* waiting for GET_DONE */
+               kiblnd_launch_tx(ni, tx, target.nid);
+               return 0;
 
-        case LNET_MSG_REPLY:
-        case LNET_MSG_PUT:
-                /* Is the payload small enough not to need RDMA? */
+       case LNET_MSG_REPLY:
+       case LNET_MSG_PUT:
+               /* Is the payload small enough not to need RDMA? */
                nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]);
-                if (nob <= IBLND_MSG_SIZE && !lntmsg->msg_rdma_force)
-                        break;                  /* send IMMEDIATE */
-
-               tx = kiblnd_get_idle_tx(ni, target.nid);
-                if (tx == NULL) {
-                        CERROR("Can't allocate %s txd for %s\n",
-                               type == LNET_MSG_PUT ? "PUT" : "REPLY",
-                               libcfs_nid2str(target.nid));
-                        return -ENOMEM;
-                }
+               if (nob <= IBLND_MSG_SIZE && !lntmsg->msg_rdma_force)
+                       break;                  /* send IMMEDIATE */
 
                rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd,
                                          payload_niov, payload_kiov,
@@ -1724,31 +1721,24 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
                        return -EIO;
                }
 
-                ibmsg = tx->tx_msg;
-                ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
-                ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
+               ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
+               ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
                kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ,
                                   sizeof(struct kib_putreq_msg));
 
-                tx->tx_lntmsg[0] = lntmsg;      /* finalise lntmsg on completion */
-                tx->tx_waiting = 1;             /* waiting for PUT_{ACK,NAK} */
-                kiblnd_launch_tx(ni, tx, target.nid);
-                return 0;
-        }
+               /* finalise lntmsg[0,1] on completion */
+               tx->tx_lntmsg[0] = lntmsg;
+               tx->tx_waiting = 1;             /* waiting for PUT_{ACK,NAK} */
+               kiblnd_launch_tx(ni, tx, target.nid);
+               return 0;
+       }
 
        /* send IMMEDIATE */
        LASSERT(offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob])
                <= IBLND_MSG_SIZE);
 
-       tx = kiblnd_get_idle_tx(ni, target.nid);
-        if (tx == NULL) {
-                CERROR ("Can't send %d to %s: tx descs exhausted\n",
-                        type, libcfs_nid2str(target.nid));
-                return -ENOMEM;
-        }
-
-        ibmsg = tx->tx_msg;
-        ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
+       ibmsg = tx->tx_msg;
+       ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
 
        lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg,
                            offsetof(struct kib_msg,
@@ -1757,11 +1747,14 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
                            payload_offset, payload_nob);
 
        nob = offsetof(struct kib_immediate_msg, ibim_payload[payload_nob]);
-        kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
 
-        tx->tx_lntmsg[0] = lntmsg;              /* finalise lntmsg on completion */
-        kiblnd_launch_tx(ni, tx, target.nid);
-        return 0;
+       kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob);
+
+       /* finalise lntmsg on completion */
+       tx->tx_lntmsg[0] = lntmsg;
+
+       kiblnd_launch_tx(ni, tx, target.nid);
+       return 0;
 }
 
 static void
@@ -1824,6 +1817,19 @@ failed_0:
        lnet_finalize(lntmsg, -EIO);
 }
 
+unsigned int
+kiblnd_get_dev_prio(struct lnet_ni *ni, unsigned int dev_idx)
+{
+       struct kib_net *net = ni->ni_data;
+       struct device *dev = NULL;
+
+       if (net)
+               dev = net->ibn_dev->ibd_hdev->ibh_ibdev->dma_device;
+
+       return lnet_get_dev_prio(dev, dev_idx);
+
+}
+
 int
 kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
            int delayed, unsigned int niov, struct bio_vec *kiov,
@@ -1931,18 +1937,6 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
         return rc;
 }
 
-int
-kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
-       struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
-       if (IS_ERR(task))
-               return PTR_ERR(task);
-
-       atomic_inc(&kiblnd_data.kib_nthreads);
-       return 0;
-}
-
 static void
 kiblnd_thread_fini (void)
 {
@@ -2466,11 +2460,12 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
                rej.ibr_incarnation = net->ibn_incarnation;
        }
 
-       if (ni == NULL ||                         /* no matching net */
-           ni->ni_nid != reqmsg->ibm_dstnid ||   /* right NET, wrong NID! */
-           net->ibn_dev != ibdev) {              /* wrong device */
+       if (ni == NULL ||                       /* no matching net */
+           lnet_nid_to_nid4(&ni->ni_nid) !=
+           reqmsg->ibm_dstnid ||               /* right NET, wrong NID! */
+           net->ibn_dev != ibdev) {            /* wrong device */
                CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n", libcfs_nid2str(nid),
-                      ni ? libcfs_nid2str(ni->ni_nid) : "NA",
+                      ni ? libcfs_nidstr(&ni->ni_nid) : "NA",
                       ibdev->ibd_ifname, ibdev->ibd_nnets,
                       &ibdev->ibd_ifip,
                       libcfs_nid2str(reqmsg->ibm_dstnid));
@@ -2590,8 +2585,8 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
                 * the lower NID connection win so we can move forward.
                 */
                if (peer2->ibp_connecting != 0 &&
-                   nid < ni->ni_nid && peer2->ibp_races <
-                   MAX_CONN_RACES_BEFORE_ABORT) {
+                   nid < lnet_nid_to_nid4(&ni->ni_nid) &&
+                   peer2->ibp_races < MAX_CONN_RACES_BEFORE_ABORT) {
                        peer2->ibp_races++;
                        write_unlock_irqrestore(g_lock, flags);
 
@@ -2755,23 +2750,11 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
                break;
 
        case IBLND_REJECT_RDMA_FRAGS: {
-               struct lnet_ioctl_config_o2iblnd_tunables *tunables;
-
                if (!cp) {
                        reason = "can't negotiate max frags";
                        goto out;
                }
-               tunables = &peer_ni->ibp_ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
-#ifdef HAVE_IB_GET_DMA_MR
-               /*
-                * This check only makes sense if the kernel supports global
-                * memory registration. Otherwise, map_on_demand will never == 0
-                */
-               if (!tunables->lnd_map_on_demand) {
-                       reason = "map_on_demand must be enabled";
-                       goto out;
-               }
-#endif
+
                if (conn->ibc_max_frags <= frag_num) {
                        reason = "unsupported max frags";
                        goto out;
@@ -3022,7 +3005,7 @@ kiblnd_check_connreply(struct kib_conn *conn, void *priv, int priv_nob)
         }
 
        read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-       if (msg->ibm_dstnid == ni->ni_nid &&
+       if (msg->ibm_dstnid == lnet_nid_to_nid4(&ni->ni_nid) &&
            msg->ibm_dststamp == net->ibn_incarnation)
                rc = 0;
        else
@@ -3567,8 +3550,7 @@ kiblnd_connd (void *arg)
                }
 
                conn = list_first_entry_or_null(&kiblnd_data.kib_connd_waits,
-                                               struct kib_conn,
-                                               ibc_sched_list);
+                                               struct kib_conn, ibc_list);
                if (conn) {
                        list_del(&conn->ibc_list);
                        spin_unlock_irqrestore(lock, flags);
@@ -3657,13 +3639,13 @@ kiblnd_qp_event(struct ib_event *event, void *arg)
        case IB_EVENT_PORT_ERR:
        case IB_EVENT_DEVICE_FATAL:
                CERROR("Fatal device error for NI %s\n",
-                      libcfs_nid2str(conn->ibc_peer->ibp_ni->ni_nid));
+                      libcfs_nidstr(&conn->ibc_peer->ibp_ni->ni_nid));
                atomic_set(&conn->ibc_peer->ibp_ni->ni_fatal_error_on, 1);
                return;
 
        case IB_EVENT_PORT_ACTIVE:
                CERROR("Port reactivated for NI %s\n",
-                      libcfs_nid2str(conn->ibc_peer->ibp_ni->ni_nid));
+                      libcfs_nidstr(&conn->ibc_peer->ibp_ni->ni_nid));
                atomic_set(&conn->ibc_peer->ibp_ni->ni_fatal_error_on, 0);
                return;