int
kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
{
- struct task_struct *task = kthread_run(fn, arg, name);
+ struct task_struct *task = kthread_run(fn, arg, "%s", name);
if (IS_ERR(task))
return PTR_ERR(task);
if (tx->tx_sending == 0) {
tx->tx_queued = 0;
list_move(&tx->tx_list, &zombies);
+ } else {
+ /* keep tx until cq destroy */
+ list_move(&tx->tx_list, &conn->ibc_zombie_txs);
+ conn->ibc_waits ++;
}
}
kiblnd_txlist_done(&zombies, -ECONNABORTED, LNET_MSG_STATUS_OK);
}
+static int
+kiblnd_tx_may_discard(struct kib_conn *conn)
+{
+ int rc = 0;
+ struct kib_tx *nxt;
+ struct kib_tx *tx;
+
+ spin_lock(&conn->ibc_lock);
+
+ list_for_each_entry_safe(tx, nxt, &conn->ibc_zombie_txs, tx_list) {
+ if (tx->tx_sending > 0 && tx->tx_lntmsg[0] &&
+ lnet_md_discarded(tx->tx_lntmsg[0]->msg_md)) {
+ tx->tx_sending --;
+ if (tx->tx_sending == 0) {
+ kiblnd_conn_decref(tx->tx_conn);
+ tx->tx_conn = NULL;
+ rc = 1;
+ }
+ }
+ }
+
+ spin_unlock(&conn->ibc_lock);
+ return rc;
+}
+
static void
kiblnd_finalise_conn(struct kib_conn *conn)
{
CNETERR("Deleting messages for %s: connection failed\n",
libcfs_nid2str(peer_ni->ibp_nid));
- kiblnd_txlist_done(&zombies, error,
- LNET_MSG_STATUS_LOCAL_DROPPED);
+ if (error == -EHOSTUNREACH || error == -ETIMEDOUT)
+ kiblnd_txlist_done(&zombies, error,
+ LNET_MSG_STATUS_NETWORK_TIMEOUT);
+ else
+ kiblnd_txlist_done(&zombies, error,
+ LNET_MSG_STATUS_LOCAL_DROPPED);
}
static void
}
if (ktime_compare(ktime_get(), tx->tx_deadline) >= 0) {
- CERROR("Timed out tx: %s, %lld seconds\n",
+ CERROR("Timed out tx: %s(WSQ:%d%d%d), %lld seconds\n",
kiblnd_queue2str(conn, txs),
+ tx->tx_waiting, tx->tx_sending, tx->tx_queued,
kiblnd_timeout() +
ktime_ms_delta(ktime_get(),
tx->tx_deadline) / MSEC_PER_SEC);
if (!list_empty(&timedout_txs))
kiblnd_txlist_done(&timedout_txs, -ETIMEDOUT,
- LNET_MSG_STATUS_LOCAL_TIMEOUT);
+ LNET_MSG_STATUS_NETWORK_TIMEOUT);
/* Handle timeout by closing the whole
* connection. We can only be sure RDMA activity
}
if (!list_empty(&kiblnd_data.kib_connd_conns)) {
+ int wait;
conn = list_entry(kiblnd_data.kib_connd_conns.next,
struct kib_conn, ibc_list);
list_del(&conn->ibc_list);
dropped_lock = 1;
kiblnd_disconnect_conn(conn);
- kiblnd_conn_decref(conn);
+ wait = conn->ibc_waits;
+ if (wait == 0) /* keep ref for connd_wait, see below */
+ kiblnd_conn_decref(conn);
spin_lock_irqsave(lock, flags);
+
+ if (wait)
+ list_add_tail(&conn->ibc_list,
+ &kiblnd_data.kib_connd_waits);
}
while (reconn < KIB_RECONN_BREAK) {
spin_lock_irqsave(lock, flags);
}
+ if (!list_empty(&kiblnd_data.kib_connd_waits)) {
+ conn = list_entry(kiblnd_data.kib_connd_waits.next,
+ struct kib_conn, ibc_list);
+ list_del(&conn->ibc_list);
+ spin_unlock_irqrestore(lock, flags);
+
+ dropped_lock = kiblnd_tx_may_discard(conn);
+ if (dropped_lock)
+ kiblnd_conn_decref(conn);
+
+ spin_lock_irqsave(lock, flags);
+ if (dropped_lock == 0)
+ list_add_tail(&conn->ibc_list,
+ &kiblnd_data.kib_connd_waits);
+ }
+
/* careful with the jiffy wrap... */
timeout = (int)(deadline - jiffies);
if (timeout <= 0) {