From: Chris Horn Date: Mon, 6 Jun 2022 18:09:03 +0000 (-0500) Subject: LU-15914 lnet: Fix null md deref for finalized message X-Git-Tag: 2.15.51~89 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=cb0220db3ce517b0e2fce93c864e6c3dbb61b5e0;hp=716ac65ef6315f771eb3382b42bb2bb1a22a7391 LU-15914 lnet: Fix null md deref for finalized message When a message is finalized the lnet_msg:msg_md field may be cleared (see lnet_finalize() -> lnet_msg_detach_md()). When an LNet router is forwarding such message, or if an ACK has been requested for such a message, then the NULL msg_md may be deref'd in lnet_get_best_ni(). Check for this in lnet_get_best_ni() before dereferencing the MD. It may also be dereferenced in kiblnd_send(), so check for this situation there, too. Some style cleanup is included in kiblnd_send(). Test-Parameters: trivial Fixes: 959304eac7 ("LU-15189 lnet: fix memory mapping.") HPE-bug-id: LUS-10997 Signed-off-by: Chris Horn Change-Id: I3cfdc8d342bd3b49a61d1ce6c31a245848accf8f Reviewed-on: https://review.whamcloud.com/47546 Reviewed-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Serguei Smirnov Reviewed-by: Alexey Lyashkov Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index ac69534..c0bc6db 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1637,31 +1637,32 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) { struct kib_dev *dev = ((struct kib_net *)ni->ni_data)->ibn_dev; struct lnet_hdr *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; + int type = lntmsg->msg_type; struct lnet_processid *target = &lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct bio_vec *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - bool gpu; + int target_is_router = lntmsg->msg_target_is_router; + int routing = lntmsg->msg_routing; + unsigned int payload_niov = lntmsg->msg_niov; + struct bio_vec *payload_kiov = lntmsg->msg_kiov; + unsigned int payload_offset = lntmsg->msg_offset; + unsigned int payload_nob = lntmsg->msg_len; + struct lnet_libmd *msg_md = lntmsg->msg_md; + bool gpu; struct kib_msg *ibmsg; struct kib_rdma_desc *rd; - struct kib_tx *tx; - int nob; - int rc; + struct kib_tx *tx; + int nob; + int rc; /* NB 'private' is different depending on what we're sending.... */ CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", payload_nob, payload_niov, libcfs_idstr(target)); - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); + LASSERT(payload_nob == 0 || payload_niov > 0); + LASSERT(payload_niov <= LNET_MAX_IOV); /* Thread context */ - LASSERT (!in_interrupt()); + LASSERT(!in_interrupt()); tx = kiblnd_get_idle_tx(ni, lnet_nid_to_nid4(&target->nid)); if (tx == NULL) { @@ -1671,7 +1672,7 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) return -ENOMEM; } ibmsg = tx->tx_msg; - gpu = (lntmsg->msg_md->md_flags & LNET_MD_FLAG_GPU); + gpu = msg_md ? (msg_md->md_flags & LNET_MD_FLAG_GPU) : false; switch (type) { default: @@ -1687,16 +1688,17 @@ kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg) break; /* send IMMEDIATE */ /* is the REPLY message too small for RDMA? */ - nob = offsetof(struct kib_msg, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); + nob = offsetof(struct kib_msg, + ibm_u.immediate.ibim_payload[msg_md->md_length]); if (nob <= IBLND_MSG_SIZE && !gpu) break; /* send IMMEDIATE */ rd = &ibmsg->ibm_u.get.ibgm_rd; - tx->tx_gpu = !!gpu; + tx->tx_gpu = gpu; rc = kiblnd_setup_rd_kiov(ni, tx, rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_kiov, - 0, lntmsg->msg_md->md_length); + msg_md->md_niov, + msg_md->md_kiov, + 0, msg_md->md_length); if (rc != 0) { CERROR("Can't setup GET sink for %s: %d\n", libcfs_nidstr(&target->nid), rc); diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 6a3220a..2fcfc79 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -1636,7 +1636,7 @@ lnet_get_best_ni(struct lnet_net *local_net, struct lnet_ni *best_ni, __u32 best_sel_prio; unsigned int best_dev_prio; unsigned int dev_idx = UINT_MAX; - bool gpu = md->md_flags & LNET_MD_FLAG_GPU; + bool gpu = md ? (md->md_flags & LNET_MD_FLAG_GPU) : false; if (gpu) { struct page *page = lnet_get_first_page(md, offset);