From dda0bd1207d0cb4864c3ec2a10cd881021ef2ea9 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Thu, 3 Nov 2022 07:23:20 -0400 Subject: [PATCH] LU-16297 ptlrpc: don't panic during reconnection ptlrpc_send_rpc() could race with ptlrpc_connect_import_locked() in the middle of assertion check and this leads to a wrong panic. Assertion checks (AT_OFF || imp->imp_state != LUSTRE_IMP_FULL || reconnect changes import state and flags and second part (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) || !(imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_AT))) MSGHDR_AT_SUPPORT is disabled during client reconnection. It is not good to use locking at this hot part, so fix changes assertion to a report. Lustre-change: https://review.whamcloud.com/49029 Lustre-commit: df31c4c0b39b8845911344e6fadc008bcba40bb1 HPE-bug-id: LUS-10985 Signed-off-by: Alexander Boyko Change-Id: Ifc9e413c679c3e8a4c8f4f541251bebabae41c82 Reviewed-by: Andreas Dilger Reviewed-by: Alexander Zarochentsev Reviewed-by: Mikhail Pershin Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55040 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Yang Sheng Reviewed-by: Oleg Drokin --- lustre/ptlrpc/niobuf.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index cfd16ff..9968028 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -793,13 +793,21 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) LBUG(); } - /** For enabled AT all request should have AT_SUPPORT in the - * FULL import state when OBD_CONNECT_AT is set */ - LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL || - (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) || - !(imp->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_AT)); - + /** + * For enabled AT all request should have AT_SUPPORT in the + * FULL import state when OBD_CONNECT_AT is set. + * This check has a race with ptlrpc_connect_import_locked() + * with low chance, don't panic, only report. + */ + if (!(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL || + (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) || + !(imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_AT))) { + DEBUG_REQ(D_HA, request, "Wrong state of import detected, AT=%d, imp=%d, msghdr=%d, conn=%d\n", + AT_OFF, imp->imp_state != LUSTRE_IMP_FULL, + (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT), + !(imp->imp_connect_data.ocd_connect_flags & + OBD_CONNECT_AT)); + } if (request->rq_resend) { lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); if (request->rq_resend_cb != NULL) -- 1.8.3.1