From 2df49b78c86f9609e5a296bad02a1ecb600a3a7b Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sat, 16 Sep 2023 00:54:54 -0500 Subject: [PATCH] LU-17062 lnet: Update lnet_peer_*_decref_locked usage Move decref's to occur after last reference to prevent use after free. Lustre-change: https://review.whamcloud.com/52184 Lustre-commit: 60cfceb8c59364f786b31ac36c2c245b9a1e495a HPE-bug-id: LUS-11799 Signed-off-by: Shaun Tancheff Change-Id: I2382ece560039383f644b6aee73a9481d6bb5673 Reviewed-by: Chris Horn Reviewed-by: Petros Koutoupis Reviewed-by: James Simmons Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54904 Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lnet/lnet/lib-move.c | 33 ++++++++++++++++++++++----------- lnet/lnet/peer.c | 27 +++++++++++++++------------ lnet/lnet/router.c | 4 +++- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 5c50c9e..fda630c 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2152,7 +2152,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, struct lnet_peer_ni **gw_lpni, struct lnet_peer **gw_peer) { - int rc; + int rc = 0; struct lnet_peer *gw; struct lnet_peer *lp; struct lnet_peer_net *lpn; @@ -2163,6 +2163,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, struct lnet_peer_ni *lpni = NULL; struct lnet_peer_ni *gwni = NULL; bool route_found = false; + bool gwni_decref = false; struct lnet_nid *src_nid = !LNET_NID_IS_ANY(&sd->sd_src_nid) || !sd->sd_best_ni ? &sd->sd_src_nid @@ -2180,8 +2181,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (!LNET_NID_IS_ANY(&sd->sd_rtr_nid)) { gwni = lnet_peer_ni_find_locked(&sd->sd_rtr_nid); if (gwni) { + gwni_decref = true; gw = gwni->lpni_peer_net->lpn_peer; - lnet_peer_ni_decref_locked(gwni); if (gw->lp_rtr_refcount) route_found = true; } else { @@ -2205,7 +2206,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, "any local NI" : libcfs_nidstr(src_nid), libcfs_nidstr(&sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } } else { /* we've already looked up the initial lpni using @@ -2251,7 +2253,8 @@ use_lpn: if (!best_lpn) { CERROR("peer %s has no available nets\n", libcfs_nidstr(&sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } sd->sd_best_lpni = lnet_find_best_lpni(sd->sd_best_ni, @@ -2261,7 +2264,8 @@ use_lpn: if (!sd->sd_best_lpni) { CERROR("peer %s is unreachable\n", libcfs_nidstr(&sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } /* We're attempting to round robin over the remote peer @@ -2293,14 +2297,16 @@ use_lpn: CERROR("no route to %s from %s\n", libcfs_nidstr(dst_nid), libcfs_nidstr(src_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } if (!gwni) { CERROR("Internal Error. Route expected to %s from %s\n", libcfs_nidstr(dst_nid), libcfs_nidstr(src_nid)); - return -EFAULT; + rc = -EFAULT; + goto out; } gw = best_route->lr_gateway; @@ -2323,7 +2329,7 @@ use_lpn: if (alive_router_check_interval <= 0) { rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt); if (rc) - return rc; + goto out; } if (!sd->sd_best_ni) { @@ -2335,7 +2341,8 @@ use_lpn: CERROR("Internal Error. Expected local ni on %s but non found: %s\n", libcfs_net2str(lpn->lpn_net_id), libcfs_nidstr(&sd->sd_src_nid)); - return -EFAULT; + rc = -EFAULT; + goto out; } } @@ -2353,7 +2360,11 @@ use_lpn: best_lpn->lpn_seq++; } - return 0; +out: + if (gwni_decref && gwni) + lnet_peer_ni_decref_locked(gwni); + + return rc; } /* @@ -2995,7 +3006,6 @@ again: lnet_net_unlock(cpt); return rc; } - lnet_peer_ni_decref_locked(lpni); peer = lpni->lpni_peer_net->lpn_peer; @@ -3095,6 +3105,7 @@ again: * updated as a result of calling lnet_handle_send_case_locked(). */ cpt = send_data.sd_cpt; + lnet_peer_ni_decref_locked(lpni); if (rc == REPEAT_SEND) goto again; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 4ba681a..c2a54a5 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -561,9 +561,9 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid4, unsigned int flags) rc = -ENOENT; goto out; } - lnet_peer_ni_decref_locked(lpni); if (lp != lpni->lpni_peer_net->lpn_peer) { rc = -ECHILD; + lnet_peer_ni_decref_locked(lpni); goto out; } @@ -573,6 +573,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid4, unsigned int flags) */ if (nid_same(&nid, &lp->lp_primary_nid) && lp->lp_nnis != 1 && !force) { rc = -EBUSY; + lnet_peer_ni_decref_locked(lpni); goto out; } @@ -586,6 +587,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid4, unsigned int flags) lp->lp_primary_nid = lpni2->lpni_nid; } rc = lnet_peer_ni_del_locked(lpni, force); + lnet_peer_ni_decref_locked(lpni); lnet_net_unlock(LNET_LOCK_EX); @@ -2034,8 +2036,8 @@ __must_hold(&the_lnet.ln_api_mutex) lpni = lnet_find_peer_ni_locked(prim_nid); if (!lpni) return -ENOENT; - lnet_peer_ni_decref_locked(lpni); lp = lpni->lpni_peer_net->lpn_peer; + lnet_peer_ni_decref_locked(lpni); /* Peer must have been configured. */ if ((flags & LNET_PEER_CONFIGURED) && @@ -2139,8 +2141,8 @@ lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, int force) lpni = lnet_find_peer_ni_locked(prim_nid); if (!lpni) return -ENOENT; - lnet_peer_ni_decref_locked(lpni); lp = lpni->lpni_peer_net->lpn_peer; + lnet_peer_ni_decref_locked(lpni); if (prim_nid != lnet_nid_to_nid4(&lp->lp_primary_nid)) { CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n", @@ -2720,11 +2722,13 @@ int lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block) { DEFINE_WAIT(wait); - struct lnet_peer *lp; + struct lnet_peer *lp = NULL; int rc = 0; int count = 0; again: + if (lp) + lnet_peer_decref_locked(lp); lnet_net_unlock(cpt); lnet_net_lock(LNET_LOCK_EX); lp = lpni->lpni_peer_net->lpn_peer; @@ -2781,7 +2785,6 @@ again: lnet_net_unlock(LNET_LOCK_EX); lnet_net_lock(cpt); - lnet_peer_decref_locked(lp); /* * The peer may have changed, so re-check and rediscover if that turns * out to have been the case. The reference count on lp ensured that @@ -2809,6 +2812,7 @@ again: (lp ? libcfs_nidstr(&lp->lp_primary_nid) : "(none)"), libcfs_nidstr(&lpni->lpni_nid), rc, (!block) ? "pending discovery" : "discovery complete"); + lnet_peer_decref_locked(lp); return rc; } @@ -3112,11 +3116,6 @@ static void lnet_discovery_event_handler(struct lnet_event *event) LBUG(); } lnet_net_lock(LNET_LOCK_EX); - if (event->unlinked) { - pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start); - lnet_ping_buffer_decref(pbuf); - lnet_peer_decref_locked(lp); - } /* put peer back at end of request queue, if discovery not already * done */ @@ -3125,6 +3124,11 @@ static void lnet_discovery_event_handler(struct lnet_event *event) list_move_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request); wake_up(&the_lnet.ln_dc_waitq); } + if (event->unlinked) { + pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start); + lnet_ping_buffer_decref(pbuf); + lnet_peer_decref_locked(lp); + } lnet_net_unlock(LNET_LOCK_EX); } @@ -3290,8 +3294,6 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, goto out; } - lnet_peer_ni_decref_locked(lpni); - lpn = lpni->lpni_peer_net; if (lpn->lpn_peer_nets.prev != &lp->lp_peer_nets) list_move(&lpn->lpn_peer_nets, &lp->lp_peer_nets); @@ -3300,6 +3302,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, list_move(&lpni->lpni_peer_nis, &lpni->lpni_peer_net->lpn_peer_nis); + lnet_peer_ni_decref_locked(lpni); /* * Errors other than -ENOMEM are due to peers having been * configured with DLC. Ignore these because DLC overrides diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 9ccf562..196d1d0 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -874,12 +874,13 @@ lnet_del_route(__u32 net, struct lnet_nid *gw) LASSERT(lp); gw_nid = lp->lp_primary_nid; gw = &gw_nid; - lnet_peer_ni_decref_locked(lpni); } if (net != LNET_NET_ANY) { rnet = lnet_find_rnet_locked(net); if (!rnet) { + if (lpni) + lnet_peer_ni_decref_locked(lpni); lnet_net_unlock(LNET_LOCK_EX); return -ENOENT; } @@ -912,6 +913,7 @@ delete_zombies: if (lpni) { if (list_empty(&lp->lp_routes)) lp->lp_disc_net_id = 0; + lnet_peer_ni_decref_locked(lpni); } lnet_net_unlock(LNET_LOCK_EX); -- 1.8.3.1