If peer repeatedly rejects connection requests with EALREADY,
assume that it doesn't support as many connections as we're trying
to create. Make sure to stop connecting to the peer altogether and
either continue with already created connections if there's at least
one of each type, or fail.
This helps avoid the assertion:
"ASSERTION( (wanted & ((((1UL))) << (3))) != 0 ) failed"
Test-Parameters: trivial testlist=sanity-lnet
Fixes:
5afe3b053 ("LU-17258 socklnd: ensure connection type established upon race")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I6072e91cc36544fc2f56c91cd78f6637cf82ecbc
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53955
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
break;
case SOCKLND_CONN_BULK_IN:
conn_cb->ksnr_blki_conn_count--;
- if (conn_cb->ksnr_blki_conn_count < conn_cb->ksnr_max_conns)
+ if (conn_cb->ksnr_blki_conn_count == 0)
conn_cb->ksnr_connected &= ~BIT(type);
break;
case SOCKLND_CONN_BULK_OUT:
conn_cb->ksnr_blko_conn_count--;
- if (conn_cb->ksnr_blko_conn_count < conn_cb->ksnr_max_conns)
+ if (conn_cb->ksnr_blko_conn_count == 0)
conn_cb->ksnr_connected &= ~BIT(type);
break;
case SOCKLND_CONN_ANY:
- if (conn_cb->ksnr_conn_count < conn_cb->ksnr_max_conns)
+ if (conn_cb->ksnr_conn_count == 0)
conn_cb->ksnr_connected &= ~BIT(type);
break;
default:
/* After so many retries due to EALREADY assume that
* the peer doesn't support as many connections as we want
*/
- conn_cb->ksnr_connected |= BIT(type);
+ if (conn_cb->ksnr_blki_conn_count &&
+ conn_cb->ksnr_blko_conn_count &&
+ conn_cb->ksnr_ctrl_conn_count) {
+ /* Don't create any more connections of any type */
+ conn_cb->ksnr_connected |= (BIT(SOCKLND_CONN_CONTROL) |
+ BIT(SOCKLND_CONN_BULK_IN) |
+ BIT(SOCKLND_CONN_BULK_OUT));
+ } else {
+ /* If don't have at least one connection of each
+ * type, fail
+ */
+ write_unlock_bh(&ksocknal_data.ksnd_global_lock);
+ goto failed;
+ }
retry_later = false;
}