From e56d40fb1f911e0230a82b83726fcb43d555dbe6 Mon Sep 17 00:00:00 2001 From: Maxim Patlasov Date: Wed, 23 Jun 2010 17:01:19 +0200 Subject: [PATCH] b=23076 fix for o2iblnd reconnect to retry one more time i=isaac With peer health detection, o2iblnd makes only one attempt to reconnect which is not enough with nodes running lustre 1.6 because of proto version mismatch. --- lnet/klnds/o2iblnd/o2iblnd_cb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 638ffc5..b6cd310 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -2348,8 +2348,12 @@ kiblnd_reconnect (kib_conn_t *conn, int version, write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); /* retry connection if it's still needed and no other connection - * attempts (active or passive) are in progress */ - if (!list_empty(&peer->ibp_tx_queue) && + * attempts (active or passive) are in progress + * NB: reconnect is still needed even when ibp_tx_queue is + * empty if ibp_version != version because reconnect may be + * initiated by kiblnd_query() */ + if ((!list_empty(&peer->ibp_tx_queue) || + peer->ibp_version != version) && peer->ibp_connecting == 1 && peer->ibp_accepting == 0) { retry = 1; -- 1.8.3.1