From da98e1a6f31462ab76ce7c3a48c21eb4c9eda151 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Thu, 19 Jan 2023 21:38:04 +0100 Subject: [PATCH] LU-16349 o2iblnd: Fix key mismatch issue If a pool memory region (mr) is mapped then unmapped without being used, its key becomes out of sync with the RDMA subsystem. At pool mr map time, the present code will create a local invalidate work request (wr) using the mr's present key and then change the mr's key. When the mr is first used after being mapped, the local invalidate wr will invalidate the original mr key, and then a fast register wr is used with the modified key. The fast register will update the RDMA subsystem's key for the mr. The error occurs when the mr is never used. The next time the mr is mapped, a local invalidate wr will again be created, but this time it will use the mr's modified key. The RDMA subsystem never saw the original local invalidate, so now the RDMA subsystem's key for the mr and o2iblnd's key for the mr are out of sync. Fix the issue by tracking if the invalidate has been used. Repurpose the boolean frd->frd_valid. Presently, frd_valid is always false. Remove the code that used frd_valid to conditionally split the invalidate from the fast register. Instead, use frd_valid to indicate when a new invalidate needs to be generated. After a post, evaluate if the invalidate was successfully used in the post. These changes are only meaningful to the FRWR code path. The failure has only been observed when using Omni-Path Architecture. Lustre-change: https://review.whamcloud.com/49714 Lustre-commit: 0c93919f1375ce16d42ea13755ca6ffcc66b9969 Signed-off-by: Cyril Bordage Signed-off-by: Xing Huang Change-Id: I532a11f10ae6a5917a4c054f37747d08eb4d6331 Reviewed-by: Serguei Smirnov Reviewed-by: Amir Shehata Reviewed-by: Frank Sehr Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50214 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd.c | 7 ++----- lnet/klnds/o2iblnd/o2iblnd_cb.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 2b1272a..e9c2332 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -1557,9 +1557,7 @@ static int kiblnd_alloc_freg_pool(struct kib_fmr_poolset *fps, goto out_middle; } - /* There appears to be a bug in MLX5 code where you must - * invalidate the rkey of a new FastReg pool before first - * using it. Thus, I am marking the FRD invalid here. */ + /* indicate that the local invalidate needs to be generated */ frd->frd_valid = false; list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); @@ -1750,9 +1748,7 @@ kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status) #endif /* HAVE_FMR_POOL_API */ { struct kib_fast_reg_descriptor *frd = fmr->fmr_frd; - if (frd) { - frd->frd_valid = false; frd->frd_posted = false; fmr->fmr_frd = NULL; spin_lock(&fps->fps_lock); @@ -1855,6 +1851,7 @@ again: struct ib_rdma_wr *inv_wr; __u32 key = is_rx ? mr->rkey : mr->lkey; + frd->frd_valid = true; inv_wr = &frd->frd_inv_wr; memset(inv_wr, 0, sizeof(*inv_wr)); diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 2cbc2b8..380a6ca 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -910,12 +910,8 @@ __must_hold(&conn->ibc_lock) struct ib_send_wr *wr = &tx->tx_wrq[0].wr; if (frd != NULL && !frd->frd_posted) { - if (!frd->frd_valid) { - wr = &frd->frd_inv_wr.wr; - wr->next = &frd->frd_fastreg_wr.wr; - } else { - wr = &frd->frd_fastreg_wr.wr; - } + wr = &frd->frd_inv_wr.wr; + wr->next = &frd->frd_fastreg_wr.wr; frd->frd_fastreg_wr.wr.next = &tx->tx_wrq[0].wr; } @@ -934,6 +930,13 @@ __must_hold(&conn->ibc_lock) #else rc = ib_post_send(conn->ibc_cmid->qp, wr, &bad); #endif + if (frd && !frd->frd_posted) { + /* The local invalidate becomes invalid (has been + * successfully used) if the post succeeds or the + * failing wr was not the invalidate. */ + frd->frd_valid = + !(rc == 0 || (bad != &frd->frd_inv_wr.wr)); + } } conn->ibc_last_send = ktime_get(); -- 1.8.3.1