From e8a834333989bde0cf5426e8239c696265dfbaf6 Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Sat, 16 Sep 2023 00:54:54 -0500 Subject: [PATCH] LU-17062 lnet: Update lnet_peer_*_decref_locked usage Move decref's to occur after last reference to prevent use after free. Lustre-change: https://review.whamcloud.com/52184 Lustre-commit: 60cfceb8c59364f786b31ac36c2c245b9a1e495a HPE-bug-id: LUS-11799 Signed-off-by: Shaun Tancheff Change-Id: I2382ece560039383f644b6aee73a9481d6bb5673 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54897 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Andreas Dilger --- lnet/lnet/lib-move.c | 33 ++++++++++++++++++++++----------- lnet/lnet/peer.c | 27 +++++++++++++++------------ lnet/lnet/router.c | 4 +++- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index aa7ea81..1486eda 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2127,7 +2127,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, struct lnet_peer_ni **gw_lpni, struct lnet_peer **gw_peer) { - int rc; + int rc = 0; struct lnet_peer *gw; struct lnet_peer *lp; struct lnet_peer_net *lpn; @@ -2138,6 +2138,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, struct lnet_peer_ni *lpni = NULL; struct lnet_peer_ni *gwni = NULL; bool route_found = false; + bool gwni_decref = false; lnet_nid_t src_nid = (sd->sd_src_nid != LNET_NID_ANY) ? sd->sd_src_nid : (sd->sd_best_ni != NULL) ? sd->sd_best_ni->ni_nid : LNET_NID_ANY; @@ -2152,8 +2153,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (sd->sd_rtr_nid != LNET_NID_ANY) { gwni = lnet_find_peer_ni_locked(sd->sd_rtr_nid); if (gwni) { + gwni_decref = true; gw = gwni->lpni_peer_net->lpn_peer; - lnet_peer_ni_decref_locked(gwni); if (gw->lp_rtr_refcount) route_found = true; } else { @@ -2177,7 +2178,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, libcfs_nid2str(src_nid) : "any local NI", libcfs_nid2str(sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } } else { /* we've already looked up the initial lpni using @@ -2210,7 +2212,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (!best_lpn) { CERROR("peer %s has no available nets\n", libcfs_nid2str(sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } sd->sd_best_lpni = lnet_find_best_lpni(sd->sd_best_ni, @@ -2220,7 +2223,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (!sd->sd_best_lpni) { CERROR("peer %s is unreachable\n", libcfs_nid2str(sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } /* We're attempting to round robin over the remote peer @@ -2251,14 +2255,16 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, CERROR("no route to %s from %s\n", libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } if (!gwni) { CERROR("Internal Error. Route expected to %s from %s\n", libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid)); - return -EFAULT; + rc = -EFAULT; + goto out; } gw = best_route->lr_gateway; @@ -2281,7 +2287,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (alive_router_check_interval <= 0) { rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt); if (rc) - return rc; + goto out; } if (!sd->sd_best_ni) { @@ -2293,7 +2299,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, CERROR("Internal Error. Expected local ni on %s but non found: %s\n", libcfs_net2str(lpn->lpn_net_id), libcfs_nid2str(sd->sd_src_nid)); - return -EFAULT; + rc = -EFAULT; + goto out; } } @@ -2311,7 +2318,11 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, best_lpn->lpn_seq++; } - return 0; +out: + if (gwni_decref && gwni) + lnet_peer_ni_decref_locked(gwni); + + return rc; } /* @@ -2929,7 +2940,6 @@ again: lnet_net_unlock(cpt); return rc; } - lnet_peer_ni_decref_locked(lpni); peer = lpni->lpni_peer_net->lpn_peer; @@ -3023,6 +3033,7 @@ again: * updated as a result of calling lnet_handle_send_case_locked(). */ cpt = send_data.sd_cpt; + lnet_peer_ni_decref_locked(lpni); if (rc == REPEAT_SEND) goto again; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index d47f896..105d94b 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -557,9 +557,9 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) rc = -ENOENT; goto out; } - lnet_peer_ni_decref_locked(lpni); if (lp != lpni->lpni_peer_net->lpn_peer) { rc = -ECHILD; + lnet_peer_ni_decref_locked(lpni); goto out; } @@ -569,6 +569,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) */ if (nid == lp->lp_primary_nid && lp->lp_nnis != 1 && !force) { rc = -EBUSY; + lnet_peer_ni_decref_locked(lpni); goto out; } @@ -582,6 +583,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags) lp->lp_primary_nid = lpni2->lpni_nid; } rc = lnet_peer_ni_del_locked(lpni, force); + lnet_peer_ni_decref_locked(lpni); lnet_net_unlock(LNET_LOCK_EX); @@ -1864,8 +1866,8 @@ __must_hold(&the_lnet.ln_api_mutex) lpni = lnet_find_peer_ni_locked(prim_nid); if (!lpni) return -ENOENT; - lnet_peer_ni_decref_locked(lpni); lp = lpni->lpni_peer_net->lpn_peer; + lnet_peer_ni_decref_locked(lpni); /* Peer must have been configured. */ if ((flags & LNET_PEER_CONFIGURED) && @@ -1966,8 +1968,8 @@ lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, int force) lpni = lnet_find_peer_ni_locked(prim_nid); if (!lpni) return -ENOENT; - lnet_peer_ni_decref_locked(lpni); lp = lpni->lpni_peer_net->lpn_peer; + lnet_peer_ni_decref_locked(lpni); if (prim_nid != lp->lp_primary_nid) { CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n", @@ -2527,11 +2529,13 @@ int lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block) { DEFINE_WAIT(wait); - struct lnet_peer *lp; + struct lnet_peer *lp = NULL; int rc = 0; int count = 0; again: + if (lp) + lnet_peer_decref_locked(lp); lnet_net_unlock(cpt); lnet_net_lock(LNET_LOCK_EX); lp = lpni->lpni_peer_net->lpn_peer; @@ -2588,7 +2592,6 @@ again: lnet_net_unlock(LNET_LOCK_EX); lnet_net_lock(cpt); - lnet_peer_decref_locked(lp); /* * The peer may have changed, so re-check and rediscover if that turns * out to have been the case. The reference count on lp ensured that @@ -2616,6 +2619,7 @@ again: (lp ? libcfs_nid2str(lp->lp_primary_nid) : "(none)"), libcfs_nid2str(lpni->lpni_nid), rc, (!block) ? "pending discovery" : "discovery complete"); + lnet_peer_decref_locked(lp); return rc; } @@ -2908,11 +2912,6 @@ static void lnet_discovery_event_handler(struct lnet_event *event) LBUG(); } lnet_net_lock(LNET_LOCK_EX); - if (event->unlinked) { - pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start); - lnet_ping_buffer_decref(pbuf); - lnet_peer_decref_locked(lp); - } /* put peer back at end of request queue, if discovery not already * done */ @@ -2921,6 +2920,11 @@ static void lnet_discovery_event_handler(struct lnet_event *event) list_move_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request); wake_up(&the_lnet.ln_dc_waitq); } + if (event->unlinked) { + pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start); + lnet_ping_buffer_decref(pbuf); + lnet_peer_decref_locked(lp); + } lnet_net_unlock(LNET_LOCK_EX); } @@ -3102,8 +3106,6 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, goto out; } - lnet_peer_ni_decref_locked(lpni); - lpn = lpni->lpni_peer_net; if (lpn->lpn_peer_nets.prev != &lp->lp_peer_nets) list_move(&lpn->lpn_peer_nets, &lp->lp_peer_nets); @@ -3112,6 +3114,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, list_move(&lpni->lpni_peer_nis, &lpni->lpni_peer_net->lpn_peer_nis); + lnet_peer_ni_decref_locked(lpni); /* * Errors other than -ENOMEM are due to peers having been * configured with DLC. Ignore these because DLC overrides diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index d453d57..178cabe 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -821,12 +821,13 @@ __must_hold(&the_lnet.ln_api_mutex) lp = lpni->lpni_peer_net->lpn_peer; LASSERT(lp); gw_nid = lp->lp_primary_nid; - lnet_peer_ni_decref_locked(lpni); } if (net != LNET_NET_ANY) { rnet = lnet_find_rnet_locked(net); if (!rnet) { + if (lpni) + lnet_peer_ni_decref_locked(lpni); lnet_net_unlock(LNET_LOCK_EX); return -ENOENT; } @@ -859,6 +860,7 @@ delete_zombies: if (lpni) { if (list_empty(&lp->lp_routes)) lp->lp_disc_net_id = 0; + lnet_peer_ni_decref_locked(lpni); } lnet_net_unlock(LNET_LOCK_EX); -- 1.8.3.1