atomic_t ni_healthv;
/*
+ * Set to 1 by the LND when it receives an event telling it the device
+ * has gone into a fatal state. Set to 0 when the LND receives an
+ * even telling it the device is back online.
+ */
+ atomic_t ni_fatal_error_on;
+
+ /*
* equivalent interfaces to use
* This is an array because socklnd bonding can still be configured
*/
{
struct kib_conn *conn = arg;
- switch (event->event) {
- case IB_EVENT_COMM_EST:
- CDEBUG(D_NET, "%s established\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
+ switch (event->event) {
+ case IB_EVENT_COMM_EST:
+ CDEBUG(D_NET, "%s established\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid));
/* We received a packet but connection isn't established
* probably handshake packet was lost, so free to
* force make connection established */
rdma_notify(conn->ibc_cmid, IB_EVENT_COMM_EST);
- return;
+ return;
- default:
- CERROR("%s: Async QP event type %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
- return;
- }
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_DEVICE_FATAL:
+ CERROR("Fatal device error for NI %s\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_ni->ni_nid));
+ atomic_set(&conn->ibc_peer->ibp_ni->ni_fatal_error_on, 1);
+ return;
+
+ case IB_EVENT_PORT_ACTIVE:
+ CERROR("Port reactivated for NI %s\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_ni->ni_nid));
+ atomic_set(&conn->ibc_peer->ibp_ni->ni_fatal_error_on, 0);
+ return;
+
+ default:
+ CERROR("%s: Async QP event type %d\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event);
+ return;
+ }
}
static void
unsigned int distance;
int ni_credits;
int ni_healthv;
+ int ni_fatal;
ni_credits = atomic_read(&ni->ni_tx_credits);
ni_healthv = atomic_read(&ni->ni_healthv);
+ ni_fatal = atomic_read(&ni->ni_fatal_error_on);
/*
* calculate the distance from the CPT on which
* Select on health, shorter distance, available
* credits, then round-robin.
*/
- if (ni_healthv < best_healthv) {
+ if (ni_fatal) {
+ continue;
+ } else if (ni_healthv < best_healthv) {
continue;
} else if (ni_healthv > best_healthv) {
best_healthv = ni_healthv;