Whamcloud - gitweb
LU-14159 build: fix gcc8 warnings on kthread_run calls
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_cb.c
index 7e078b2..0960c25 100644 (file)
@@ -1919,7 +1919,7 @@ kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
 int
 kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
 {
-       struct task_struct *task = kthread_run(fn, arg, name);
+       struct task_struct *task = kthread_run(fn, arg, "%s", name);
 
        if (IS_ERR(task))
                return PTR_ERR(task);
@@ -2119,6 +2119,10 @@ kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
                if (tx->tx_sending == 0) {
                        tx->tx_queued = 0;
                        list_move(&tx->tx_list, &zombies);
+               } else {
+                       /* keep tx until cq destroy */
+                       list_move(&tx->tx_list, &conn->ibc_zombie_txs);
+                       conn->ibc_waits ++;
                }
        }
 
@@ -2133,6 +2137,31 @@ kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs)
        kiblnd_txlist_done(&zombies, -ECONNABORTED, LNET_MSG_STATUS_OK);
 }
 
+static int
+kiblnd_tx_may_discard(struct kib_conn *conn)
+{
+       int rc = 0;
+       struct kib_tx *nxt;
+       struct kib_tx *tx;
+
+       spin_lock(&conn->ibc_lock);
+
+       list_for_each_entry_safe(tx, nxt, &conn->ibc_zombie_txs, tx_list) {
+               if (tx->tx_sending > 0 && tx->tx_lntmsg[0] &&
+                   lnet_md_discarded(tx->tx_lntmsg[0]->msg_md)) {
+                       tx->tx_sending --;
+                       if (tx->tx_sending == 0) {
+                               kiblnd_conn_decref(tx->tx_conn);
+                               tx->tx_conn = NULL;
+                               rc = 1;
+                       }
+               }
+       }
+
+       spin_unlock(&conn->ibc_lock);
+       return rc;
+}
+
 static void
 kiblnd_finalise_conn(struct kib_conn *conn)
 {
@@ -2209,8 +2238,12 @@ kiblnd_peer_connect_failed(struct kib_peer_ni *peer_ni, int active,
        CNETERR("Deleting messages for %s: connection failed\n",
                libcfs_nid2str(peer_ni->ibp_nid));
 
-       kiblnd_txlist_done(&zombies, error,
-                          LNET_MSG_STATUS_LOCAL_DROPPED);
+       if (error == -EHOSTUNREACH || error == -ETIMEDOUT)
+               kiblnd_txlist_done(&zombies, error,
+                                  LNET_MSG_STATUS_NETWORK_TIMEOUT);
+       else
+               kiblnd_txlist_done(&zombies, error,
+                                  LNET_MSG_STATUS_LOCAL_DROPPED);
 }
 
 static void
@@ -3279,8 +3312,9 @@ kiblnd_check_txs_locked(struct kib_conn *conn, struct list_head *txs)
                }
 
                if (ktime_compare(ktime_get(), tx->tx_deadline) >= 0) {
-                       CERROR("Timed out tx: %s, %lld seconds\n",
+                       CERROR("Timed out tx: %s(WSQ:%d%d%d), %lld seconds\n",
                               kiblnd_queue2str(conn, txs),
+                              tx->tx_waiting, tx->tx_sending, tx->tx_queued,
                               kiblnd_timeout() +
                               ktime_ms_delta(ktime_get(),
                                              tx->tx_deadline) / MSEC_PER_SEC);
@@ -3374,7 +3408,7 @@ kiblnd_check_conns (int idx)
 
        if (!list_empty(&timedout_txs))
                kiblnd_txlist_done(&timedout_txs, -ETIMEDOUT,
-                                  LNET_MSG_STATUS_LOCAL_TIMEOUT);
+                                  LNET_MSG_STATUS_NETWORK_TIMEOUT);
 
        /* Handle timeout by closing the whole
         * connection. We can only be sure RDMA activity
@@ -3482,6 +3516,7 @@ kiblnd_connd (void *arg)
                }
 
                if (!list_empty(&kiblnd_data.kib_connd_conns)) {
+                       int wait;
                        conn = list_entry(kiblnd_data.kib_connd_conns.next,
                                          struct kib_conn, ibc_list);
                        list_del(&conn->ibc_list);
@@ -3490,9 +3525,15 @@ kiblnd_connd (void *arg)
                        dropped_lock = 1;
 
                        kiblnd_disconnect_conn(conn);
-                       kiblnd_conn_decref(conn);
+                       wait = conn->ibc_waits;
+                       if (wait == 0) /* keep ref for connd_wait, see below */
+                               kiblnd_conn_decref(conn);
 
                        spin_lock_irqsave(lock, flags);
+
+                       if (wait)
+                               list_add_tail(&conn->ibc_list,
+                                             &kiblnd_data.kib_connd_waits);
                 }
 
                while (reconn < KIB_RECONN_BREAK) {
@@ -3520,6 +3561,22 @@ kiblnd_connd (void *arg)
                        spin_lock_irqsave(lock, flags);
                }
 
+               if (!list_empty(&kiblnd_data.kib_connd_waits)) {
+                       conn = list_entry(kiblnd_data.kib_connd_waits.next,
+                                         struct kib_conn, ibc_list);
+                       list_del(&conn->ibc_list);
+                       spin_unlock_irqrestore(lock, flags);
+
+                       dropped_lock = kiblnd_tx_may_discard(conn);
+                       if (dropped_lock)
+                               kiblnd_conn_decref(conn);
+
+                       spin_lock_irqsave(lock, flags);
+                       if (dropped_lock == 0)
+                               list_add_tail(&conn->ibc_list,
+                                             &kiblnd_data.kib_connd_waits);
+               }
+
                 /* careful with the jiffy wrap... */
                 timeout = (int)(deadline - jiffies);
                 if (timeout <= 0) {