Whamcloud - gitweb
LU-6142 lnet: convert kiblnd/ksocknal_thread_start to vararg
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_cb.c
index 90ba35b..38c91a7 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lnet/klnds/o2iblnd/o2iblnd_cb.c
  *
@@ -100,9 +99,9 @@ kiblnd_txlist_done(struct list_head *txlist, int status,
 {
        struct kib_tx *tx;
 
-       while (!list_empty(txlist)) {
-               tx = list_entry(txlist->next, struct kib_tx, tx_list);
-
+       while ((tx = list_first_entry_or_null(txlist,
+                                             struct kib_tx,
+                                             tx_list)) != NULL) {
                list_del(&tx->tx_list);
                /* complete now */
                tx->tx_waiting = 0;
@@ -243,11 +242,9 @@ out:
 static struct kib_tx *
 kiblnd_find_waiting_tx_locked(struct kib_conn *conn, int txtype, u64 cookie)
 {
-       struct list_head *tmp;
-
-       list_for_each(tmp, &conn->ibc_active_txs) {
-               struct kib_tx *tx = list_entry(tmp, struct kib_tx, tx_list);
+       struct kib_tx *tx;
 
+       list_for_each_entry(tx, &conn->ibc_active_txs, tx_list) {
                LASSERT(!tx->tx_queued);
                LASSERT(tx->tx_sending != 0 || tx->tx_waiting);
 
@@ -814,6 +811,7 @@ __must_hold(&conn->ibc_lock)
        struct kib_msg *msg = tx->tx_msg;
        struct kib_peer_ni *peer_ni = conn->ibc_peer;
        struct lnet_ni *ni = peer_ni->ibp_ni;
+       struct kib_fast_reg_descriptor *frd = tx->tx_fmr.fmr_frd;
        int ver = conn->ibc_version;
        int rc;
        int done;
@@ -898,11 +896,10 @@ __must_hold(&conn->ibc_lock)
                 /* close_conn will launch failover */
                 rc = -ENETDOWN;
         } else {
-               struct kib_fast_reg_descriptor *frd = tx->tx_fmr.fmr_frd;
                struct ib_send_wr *bad = &tx->tx_wrq[tx->tx_nwrq - 1].wr;
                struct ib_send_wr *wr  = &tx->tx_wrq[0].wr;
 
-               if (frd != NULL) {
+               if (frd != NULL && !frd->frd_posted) {
                        if (!frd->frd_valid) {
                                wr = &frd->frd_inv_wr.wr;
                                wr->next = &frd->frd_fastreg_wr.wr;
@@ -931,8 +928,11 @@ __must_hold(&conn->ibc_lock)
 
        conn->ibc_last_send = ktime_get();
 
-        if (rc == 0)
-                return 0;
+       if (rc == 0) {
+               if (frd != NULL)
+                       frd->frd_posted = true;
+               return 0;
+       }
 
         /* NB credits are transferred in the actual
          * message, which can only be the last work item */
@@ -990,9 +990,8 @@ kiblnd_check_sends_locked(struct kib_conn *conn)
         LASSERT (conn->ibc_reserved_credits >= 0);
 
         while (conn->ibc_reserved_credits > 0 &&
-              !list_empty(&conn->ibc_tx_queue_rsrvd)) {
-               tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
-                               struct kib_tx, tx_list);
+              (tx = list_first_entry_or_null(&conn->ibc_tx_queue_rsrvd,
+                                             struct kib_tx, tx_list)) != NULL) {
                list_move_tail(&tx->tx_list, &conn->ibc_tx_queue);
                 conn->ibc_reserved_credits--;
         }
@@ -1014,17 +1013,17 @@ kiblnd_check_sends_locked(struct kib_conn *conn)
 
                if (!list_empty(&conn->ibc_tx_queue_nocred)) {
                         credit = 0;
-                       tx = list_entry(conn->ibc_tx_queue_nocred.next,
-                                       struct kib_tx, tx_list);
+                       tx = list_first_entry(&conn->ibc_tx_queue_nocred,
+                                             struct kib_tx, tx_list);
                } else if (!list_empty(&conn->ibc_tx_noops)) {
                         LASSERT (!IBLND_OOB_CAPABLE(ver));
                         credit = 1;
-                       tx = list_entry(conn->ibc_tx_noops.next,
-                                       struct kib_tx, tx_list);
+                       tx = list_first_entry(&conn->ibc_tx_noops,
+                                             struct kib_tx, tx_list);
                } else if (!list_empty(&conn->ibc_tx_queue)) {
                         credit = 1;
-                       tx = list_entry(conn->ibc_tx_queue.next,
-                                       struct kib_tx, tx_list);
+                       tx = list_first_entry(&conn->ibc_tx_queue,
+                                             struct kib_tx, tx_list);
                 } else
                         break;
 
@@ -1657,7 +1656,7 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
 
                 /* is the REPLY message too small for RDMA? */
                nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
-                if (nob <= IBLND_MSG_SIZE)
+                if (nob <= IBLND_MSG_SIZE && !lntmsg->msg_rdma_force)
                         break;                  /* send IMMEDIATE */
 
                tx = kiblnd_get_idle_tx(ni, target.nid);
@@ -1704,7 +1703,7 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg)
         case LNET_MSG_PUT:
                 /* Is the payload small enough not to need RDMA? */
                nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[payload_nob]);
-                if (nob <= IBLND_MSG_SIZE)
+                if (nob <= IBLND_MSG_SIZE && !lntmsg->msg_rdma_force)
                         break;                  /* send IMMEDIATE */
 
                tx = kiblnd_get_idle_tx(ni, target.nid);
@@ -1932,18 +1931,6 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
         return rc;
 }
 
-int
-kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
-       struct task_struct *task = kthread_run(fn, arg, "%s", name);
-
-       if (IS_ERR(task))
-               return PTR_ERR(task);
-
-       atomic_inc(&kiblnd_data.kib_nthreads);
-       return 0;
-}
-
 static void
 kiblnd_thread_fini (void)
 {
@@ -2076,9 +2063,9 @@ kiblnd_handle_early_rxs(struct kib_conn *conn)
        LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
 
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
-       while (!list_empty(&conn->ibc_early_rxs)) {
-               rx = list_entry(conn->ibc_early_rxs.next,
-                               struct kib_rx, rx_list);
+       while ((rx = list_first_entry_or_null(&conn->ibc_early_rxs,
+                                             struct kib_rx,
+                                             rx_list)) != NULL) {
                list_del(&rx->rx_list);
                write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
@@ -2208,10 +2195,11 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active,
                           int error)
 {
        LIST_HEAD(zombies);
-       unsigned long   flags;
+       unsigned long flags;
+       enum lnet_msg_hstatus hstatus;
 
-       LASSERT (error != 0);
-       LASSERT (!in_interrupt());
+       LASSERT(error != 0);
+       LASSERT(!in_interrupt());
 
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
@@ -2254,12 +2242,20 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active,
        CNETERR("Deleting messages for %s: connection failed\n",
                libcfs_nid2str(peer_ni->ibp_nid));
 
-       if (error == -EHOSTUNREACH || error == -ETIMEDOUT)
-               kiblnd_txlist_done(&zombies, error,
-                                  LNET_MSG_STATUS_NETWORK_TIMEOUT);
-       else
-               kiblnd_txlist_done(&zombies, error,
-                                  LNET_MSG_STATUS_LOCAL_DROPPED);
+       switch (error) {
+       case -EHOSTUNREACH:
+       case -ETIMEDOUT:
+               hstatus = LNET_MSG_STATUS_NETWORK_TIMEOUT;
+               break;
+       case -ECONNREFUSED:
+               hstatus = LNET_MSG_STATUS_REMOTE_DROPPED;
+               break;
+       default:
+               hstatus = LNET_MSG_STATUS_LOCAL_DROPPED;
+               break;
+       }
+
+       kiblnd_txlist_done(&zombies, error, hstatus);
 }
 
 static void
@@ -2296,9 +2292,6 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
        /* connection established */
        write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
 
-       /* reset retry count */
-       peer_ni->ibp_retries = 0;
-
        conn->ibc_last_send = ktime_get();
        kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED);
        kiblnd_peer_alive(peer_ni);
@@ -2356,8 +2349,8 @@ kiblnd_connreq_done(struct kib_conn *conn, int status)
         * scheduled.  We won't be using round robin on this first batch.
         */
        spin_lock(&conn->ibc_lock);
-       while (!list_empty(&txs)) {
-               tx = list_entry(txs.next, struct kib_tx, tx_list);
+       while ((tx = list_first_entry_or_null(&txs, struct kib_tx,
+                                             tx_list)) != NULL) {
                list_del(&tx->tx_list);
 
                kiblnd_queue_tx_locked(tx, conn);
@@ -2744,11 +2737,6 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
                goto out;
        }
 
-       if (peer_ni->ibp_retries > *kiblnd_tunables.kib_retry_count) {
-               reason = "retry count exceeded due to no listener";
-               goto out;
-       }
-
        switch (why) {
        default:
                reason = "Unknown";
@@ -2806,10 +2794,6 @@ kiblnd_check_reconnect(struct kib_conn *conn, int version,
         case IBLND_REJECT_CONN_UNCOMPAT:
                 reason = "version negotiation";
                 break;
-
-       case IBLND_REJECT_INVALID_SRV_ID:
-               reason = "invalid service id";
-               break;
         }
 
        conn->ibc_reconnect = 1;
@@ -2836,6 +2820,7 @@ static void
 kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
 {
        struct kib_peer_ni *peer_ni = conn->ibc_peer;
+       int status = -ECONNREFUSED;
 
        LASSERT (!in_interrupt());
        LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
@@ -2847,9 +2832,7 @@ kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
                break;
 
        case IB_CM_REJ_INVALID_SERVICE_ID:
-               peer_ni->ibp_retries++;
-               kiblnd_check_reconnect(conn, IBLND_MSG_VERSION, 0,
-                                      IBLND_REJECT_INVALID_SRV_ID, NULL);
+               status = -EHOSTUNREACH;
                CNETERR("%s rejected: no listener at %d\n",
                        libcfs_nid2str(peer_ni->ibp_nid),
                        *kiblnd_tunables.kib_service);
@@ -2960,7 +2943,7 @@ kiblnd_rejected(struct kib_conn *conn, int reason, void *priv, int priv_nob)
                break;
        }
 
-       kiblnd_connreq_done(conn, -ECONNREFUSED);
+       kiblnd_connreq_done(conn, status);
 }
 
 static void
@@ -3314,11 +3297,8 @@ static int
 kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs)
 {
        struct kib_tx *tx;
-       struct list_head *ttmp;
-
-       list_for_each(ttmp, txs) {
-               tx = list_entry(ttmp, struct kib_tx, tx_list);
 
+       list_for_each_entry(tx, txs, tx_list) {
                if (txs != &conn->ibc_active_txs) {
                        LASSERT(tx->tx_queued);
                } else {
@@ -3360,7 +3340,6 @@ kiblnd_check_conns (int idx)
        struct kib_peer_ni *peer_ni;
        struct kib_conn *conn;
        struct kib_tx *tx, *tx_tmp;
-       struct list_head *ctmp;
        unsigned long flags;
 
        /* NB. We expect to have a look at all the peers and not find any
@@ -3381,12 +3360,10 @@ kiblnd_check_conns (int idx)
                        }
                }
 
-               list_for_each(ctmp, &peer_ni->ibp_conns) {
+               list_for_each_entry(conn, &peer_ni->ibp_conns, ibc_list) {
                        int timedout;
                        int sendnoop;
 
-                       conn = list_entry(ctmp, struct kib_conn, ibc_list);
-
                        LASSERT(conn->ibc_state == IBLND_CONN_ESTABLISHED);
 
                        spin_lock(&conn->ibc_lock);
@@ -3427,9 +3404,9 @@ kiblnd_check_conns (int idx)
         * connection. We can only be sure RDMA activity
         * has ceased once the QP has been modified.
         */
-       while (!list_empty(&closes)) {
-               conn = list_entry(closes.next,
-                                 struct kib_conn, ibc_connd_list);
+       while ((conn = list_first_entry_or_null(&closes,
+                                               struct kib_conn,
+                                               ibc_connd_list)) != NULL) {
                list_del(&conn->ibc_connd_list);
                kiblnd_close_conn(conn, -ETIMEDOUT);
                kiblnd_conn_decref(conn);
@@ -3439,9 +3416,9 @@ kiblnd_check_conns (int idx)
         * NOOP, but there were no non-blocking tx descs
         * free to do it last time...
         */
-       while (!list_empty(&checksends)) {
-               conn = list_entry(checksends.next,
-                                 struct kib_conn, ibc_connd_list);
+       while ((conn = list_first_entry_or_null(&checksends,
+                                               struct kib_conn,
+                                               ibc_connd_list)) != NULL) {
                list_del(&conn->ibc_connd_list);
 
                spin_lock(&conn->ibc_lock);
@@ -3499,11 +3476,11 @@ kiblnd_connd (void *arg)
 
                dropped_lock = false;
 
-               if (!list_empty(&kiblnd_data.kib_connd_zombies)) {
+               conn = list_first_entry_or_null(&kiblnd_data.kib_connd_zombies,
+                                               struct kib_conn, ibc_list);
+               if (conn) {
                        struct kib_peer_ni *peer_ni = NULL;
 
-                       conn = list_entry(kiblnd_data.kib_connd_zombies.next,
-                                         struct kib_conn, ibc_list);
                        list_del(&conn->ibc_list);
                        if (conn->ibc_reconnect) {
                                peer_ni = conn->ibc_peer;
@@ -3530,10 +3507,11 @@ kiblnd_connd (void *arg)
                                              &kiblnd_data.kib_reconn_wait);
                }
 
-               if (!list_empty(&kiblnd_data.kib_connd_conns)) {
+               conn = list_first_entry_or_null(&kiblnd_data.kib_connd_conns,
+                                               struct kib_conn, ibc_list);
+               if (conn) {
                        int wait;
-                       conn = list_entry(kiblnd_data.kib_connd_conns.next,
-                                         struct kib_conn, ibc_list);
+
                        list_del(&conn->ibc_list);
 
                        spin_unlock_irqrestore(lock, flags);
@@ -3559,11 +3537,11 @@ kiblnd_connd (void *arg)
                                                 &kiblnd_data.kib_reconn_list);
                        }
 
-                       if (list_empty(&kiblnd_data.kib_reconn_list))
+                       conn = list_first_entry_or_null(&kiblnd_data.kib_reconn_list,
+                                                       struct kib_conn, ibc_list);
+                       if (!conn)
                                break;
 
-                       conn = list_entry(kiblnd_data.kib_reconn_list.next,
-                                         struct kib_conn, ibc_list);
                        list_del(&conn->ibc_list);
 
                        spin_unlock_irqrestore(lock, flags);
@@ -3576,9 +3554,10 @@ kiblnd_connd (void *arg)
                        spin_lock_irqsave(lock, flags);
                }
 
-               if (!list_empty(&kiblnd_data.kib_connd_waits)) {
-                       conn = list_entry(kiblnd_data.kib_connd_waits.next,
-                                         struct kib_conn, ibc_list);
+               conn = list_first_entry_or_null(&kiblnd_data.kib_connd_waits,
+                                               struct kib_conn,
+                                               ibc_sched_list);
+               if (conn) {
                        list_del(&conn->ibc_list);
                        spin_unlock_irqrestore(lock, flags);
 
@@ -3793,9 +3772,10 @@ kiblnd_scheduler(void *arg)
 
                did_something = false;
 
-               if (!list_empty(&sched->ibs_conns)) {
-                       conn = list_entry(sched->ibs_conns.next,
-                                         struct kib_conn, ibc_sched_list);
+               conn = list_first_entry_or_null(&sched->ibs_conns,
+                                               struct kib_conn,
+                                               ibc_sched_list);
+               if (conn) {
                        /* take over kib_sched_conns' ref on conn... */
                        LASSERT(conn->ibc_scheduled);
                        list_del(&conn->ibc_sched_list);