#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */
#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */
+/* flag to show a peer can understand connrace protocol */
+#define IBLND_CONNREQ_NOOP (1U << 0)
+/* want to win connrace */
+#define IBLND_CONNREQ_WIN_WISH (1U << 1)
+
typedef struct {
__u32 ibr_magic; /* sender's magic */
__u16 ibr_version; /* sender's version */
__u16 ibc_version;
/* reconnect later */
__u16 ibc_reconnect:1;
+ /* rejected by connrace */
+ __u16 ibc_connrace:1;
/* which instance of the peer */
__u64 ibc_incarnation;
/* # users */
unsigned short ibp_connecting;
/* reconnect this peer later */
unsigned short ibp_reconnecting:1;
+ /* wish to win the connrace */
+ unsigned short ibp_connrace_win:1;
/* # consecutive reconnection attempts to this peer */
unsigned int ibp_reconnected;
/* errno on closing this peer */
} while (0)
static inline bool
+kiblnd_peer_win_race(kib_peer_t *peer, kib_msg_t *msg)
+{
+ if (!peer->ibp_connecting)
+ return true; /* no race */
+
+ if (msg->ibm_credits & IBLND_CONNREQ_NOOP) {
+ /* peer can understand connrace protocol */
+
+ if (msg->ibm_credits & IBLND_CONNREQ_WIN_WISH)
+ return true; /* peer has win wish */
+
+ if (peer->ibp_connrace_win)
+ return false; /* I wish to win, reject peer */
+ }
+ /* tie-break connection race in favour of the higher NID */
+ return peer->ibp_nid > peer->ibp_ni->ni_nid;
+}
+
+static inline bool
kiblnd_peer_connecting(kib_peer_t *peer)
{
return peer->ibp_connecting != 0 ||
int kiblnd_dev_failover(kib_dev_t *dev);
int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
void kiblnd_destroy_peer (kib_peer_t *peer);
-bool kiblnd_reconnect_peer(kib_peer_t *peer);
+bool kiblnd_reconnect_peer(kib_peer_t *peer, bool connrace_win);
void kiblnd_destroy_dev (kib_dev_t *dev);
void kiblnd_unlink_peer_locked (kib_peer_t *peer);
kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
}
bool
-kiblnd_reconnect_peer(kib_peer_t *peer)
+kiblnd_reconnect_peer(kib_peer_t *peer, bool connrace_win)
{
rwlock_t *glock = &kiblnd_data.kib_global_lock;
char *reason = NULL;
peer->ibp_connecting++;
peer->ibp_reconnected++;
+
+ LASSERT(!peer->ibp_connrace_win);
+ /*
+ * If I have lost connrace for enough times and want to win the race,
+ * I need to flag this peer so I can use IBLND_CONNREQ_WIN_WISH for
+ * the next connection request, and reject incoming connection request
+ * from the peer.
+ */
+ if (peer->ibp_nid > peer->ibp_ni->ni_nid)
+ peer->ibp_connrace_win = connrace_win;
+
write_unlock_irqrestore(glock, flags);
kiblnd_connect_peer(peer);
if (active) {
LASSERT (peer->ibp_connecting > 0);
peer->ibp_connecting--;
+ peer->ibp_connrace_win = 0;
} else {
LASSERT (peer->ibp_accepting > 0);
peer->ibp_accepting--;
kiblnd_conn_addref(conn); /* +1 ref for ibc_list */
list_add(&conn->ibc_list, &peer->ibp_conns);
peer->ibp_reconnected = 0;
- if (active)
+ if (active) {
peer->ibp_connecting--;
- else
+ peer->ibp_connrace_win = 0;
+ } else {
peer->ibp_accepting--;
+ }
if (peer->ibp_version == 0) {
peer->ibp_version = conn->ibc_version;
goto failed;
}
- /* tie-break connection race in favour of the higher NID */
- if (peer2->ibp_connecting != 0 &&
- nid < ni->ni_nid) {
+ if (!kiblnd_peer_win_race(peer2, reqmsg)) {
write_unlock_irqrestore(g_lock, flags);
CWARN("Conn race %s\n", libcfs_nid2str(peer2->ibp_nid));
switch (rej->ibr_why) {
case IBLND_REJECT_CONN_RACE:
+ conn->ibc_connrace = 1;
case IBLND_REJECT_CONN_STALE:
case IBLND_REJECT_CONN_UNCOMPAT:
case IBLND_REJECT_MSG_QUEUE_SIZE:
__u64 incarnation;
unsigned long flags;
int rc;
+ int connreq = IBLND_CONNREQ_NOOP;
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ if (peer->ibp_connrace_win)
+ connreq |= IBLND_CONNREQ_WIN_WISH;
+
incarnation = peer->ibp_incarnation;
version = (peer->ibp_version == 0) ? IBLND_MSG_VERSION :
peer->ibp_version;
-
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT,
msg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags;
msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE;
- kiblnd_pack_msg(peer->ibp_ni, msg, version,
- 0, peer->ibp_nid, incarnation);
+ kiblnd_pack_msg(peer->ibp_ni, msg, version, connreq,
+ peer->ibp_nid, incarnation);
memset(&cp, 0, sizeof(cp));
cp.private_data = msg;
continue;
conn->ibc_peer = peer;
- if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE)
+ if (peer->ibp_reconnected < KIB_RECONN_HIGH_RACE) {
+ /* reset ibc_connrace because it is still too
+ * early to break the normal connrace protocol
+ */
+ conn->ibc_connrace = 0;
list_add_tail(&conn->ibc_list,
&kiblnd_data.kib_reconn_list);
- else
+ } else {
+ /* want to win the next connrace, don't reset
+ * conn->ibc_connrace so kiblnd_reconnect_peer
+ * can see the win wish.
+ */
list_add_tail(&conn->ibc_list,
&kiblnd_data.kib_reconn_wait);
+ }
}
if (!list_empty(&kiblnd_data.kib_connd_conns)) {
spin_unlock_irqrestore(lock, flags);
dropped_lock = 1;
- reconn += kiblnd_reconnect_peer(conn->ibc_peer);
+ reconn += kiblnd_reconnect_peer(conn->ibc_peer,
+ conn->ibc_connrace);
kiblnd_peer_decref(conn->ibc_peer);
LIBCFS_FREE(conn, sizeof(*conn));