From 7f27a2fceef9a03d3ada74e258e774c8f5d420f0 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Mon, 11 Mar 2024 10:59:29 -0700 Subject: [PATCH] LU-17632 o2iblnd: graceful handling of CM_EVENT_CONNECT_ERROR There were examples in the field with RoCE setups which demonstrate that RDMA_CM_EVENT_CONNECT_ERROR may be received when conn state is neither IBLND_CONN_ACTIVE_CONNECT nor IBLND_CONN_PASSIVE_WAIT. Handle this in a more gracious manner: report the event as unexpected and allow the flow to continue. Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Serguei Smirnov Change-Id: I58b2482207cfd821f6eac142bdefc8f5bc50f8b4 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54353 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Etienne AUJAMES Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd_cb.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 7b740ef..956649a 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -3355,13 +3355,15 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) case RDMA_CM_EVENT_CONNECT_ERROR: conn = cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CNETERR("%s: CONNECT ERROR %d cm_id %p conn %p\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status, cmid, conn); - kiblnd_connreq_done(conn, -ENOTCONN); - kiblnd_conn_decref(conn); - return 0; + CNETERR("%s: CONNECT ERROR %d cm_id %p conn %p state: %d\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), + event->status, cmid, conn, conn->ibc_state); + if (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || + conn->ibc_state == IBLND_CONN_PASSIVE_WAIT) { + kiblnd_connreq_done(conn, -ENOTCONN); + kiblnd_conn_decref(conn); + } + return 0; case RDMA_CM_EVENT_REJECTED: conn = cmid->context; -- 1.8.3.1