From: Shaun Tancheff Date: Sat, 16 Sep 2023 05:54:54 +0000 (-0500) Subject: LU-17062 lnet: Update lnet_peer_*_decref_locked usage X-Git-Tag: 2.15.59~79 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=60cfceb8c59364f786b31ac36c2c245b9a1e495a;p=fs%2Flustre-release.git LU-17062 lnet: Update lnet_peer_*_decref_locked usage Move decref's to occur after last reference to prevent use after free. HPE-bug-id: LUS-11799 Signed-off-by: Shaun Tancheff Change-Id: I2382ece560039383f644b6aee73a9481d6bb5673 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52184 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: Petros Koutoupis Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index 2d8c967..111e0a2 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -2177,7 +2177,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, struct lnet_peer_ni **gw_lpni, struct lnet_peer **gw_peer) { - int rc; + int rc = 0; struct lnet_peer *gw; struct lnet_peer *lp; struct lnet_peer_net *lpn; @@ -2188,6 +2188,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, struct lnet_peer_ni *lpni = NULL; struct lnet_peer_ni *gwni = NULL; bool route_found = false; + bool gwni_decref = false; struct lnet_nid *src_nid = !LNET_NID_IS_ANY(&sd->sd_src_nid) || !sd->sd_best_ni ? &sd->sd_src_nid @@ -2207,8 +2208,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, if (!LNET_NID_IS_ANY(&sd->sd_rtr_nid)) { gwni = lnet_peer_ni_find_locked(&sd->sd_rtr_nid); if (gwni) { + gwni_decref = true; gw = gwni->lpni_peer_net->lpn_peer; - lnet_peer_ni_decref_locked(gwni); if (gw->lp_rtr_refcount) route_found = true; } else { @@ -2232,7 +2233,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd, "any local NI" : libcfs_nidstr(src_nid), libcfs_nidstr(&sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } CDEBUG(D_NET, "best_rnet %s\n", libcfs_net2str(best_rnet->lrn_net)); @@ -2288,7 +2290,8 @@ use_lpn: if (!best_lpn) { CERROR("peer %s has no available nets\n", libcfs_nidstr(&sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } CDEBUG(D_NET, "selected best_lpn %s\n", @@ -2301,7 +2304,8 @@ use_lpn: if (!sd->sd_best_lpni) { CERROR("peer %s is unreachable\n", libcfs_nidstr(&sd->sd_dst_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } /* We're attempting to round robin over the remote peer @@ -2327,14 +2331,16 @@ use_lpn: CERROR("no route to %s from %s\n", libcfs_nidstr(dst_nid), libcfs_nidstr(src_nid)); - return -EHOSTUNREACH; + rc = -EHOSTUNREACH; + goto out; } if (!gwni) { CERROR("Internal Error. Route expected to %s from %s\n", libcfs_nidstr(dst_nid), libcfs_nidstr(src_nid)); - return -EFAULT; + rc = -EFAULT; + goto out; } gw = best_route->lr_gateway; @@ -2357,7 +2363,7 @@ use_lpn: if (alive_router_check_interval <= 0) { rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt); if (rc) - return rc; + goto out; } if (!sd->sd_best_ni) { @@ -2369,7 +2375,8 @@ use_lpn: CERROR("Internal Error. Expected local ni on %s but non found: %s\n", libcfs_net2str(lpn->lpn_net_id), libcfs_nidstr(&sd->sd_src_nid)); - return -EFAULT; + rc = -EFAULT; + goto out; } } @@ -2385,7 +2392,11 @@ use_lpn: best_route->lr_seq = last_route->lr_seq + 1; } - return 0; +out: + if (gwni_decref && gwni) + lnet_peer_ni_decref_locked(gwni); + + return rc; } /* @@ -3045,7 +3056,6 @@ again: lnet_net_unlock(cpt); return rc; } - lnet_peer_ni_decref_locked(lpni); peer = lpni->lpni_peer_net->lpn_peer; @@ -3145,6 +3155,7 @@ again: * updated as a result of calling lnet_handle_send_case_locked(). */ cpt = send_data.sd_cpt; + lnet_peer_ni_decref_locked(lpni); if (rc == REPEAT_SEND) goto again; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 236f278..67173fb 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -560,9 +560,9 @@ lnet_peer_del_nid(struct lnet_peer *lp, struct lnet_nid *nid, rc = -ENOENT; goto out; } - lnet_peer_ni_decref_locked(lpni); if (lp != lpni->lpni_peer_net->lpn_peer) { rc = -ECHILD; + lnet_peer_ni_decref_locked(lpni); goto out; } @@ -572,6 +572,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, struct lnet_nid *nid, */ if (nid_same(nid, &lp->lp_primary_nid) && lp->lp_nnis != 1 && !force) { rc = -EBUSY; + lnet_peer_ni_decref_locked(lpni); goto out; } @@ -585,6 +586,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, struct lnet_nid *nid, lp->lp_primary_nid = lpni2->lpni_nid; } rc = lnet_peer_ni_del_locked(lpni, force); + lnet_peer_ni_decref_locked(lpni); lnet_net_unlock(LNET_LOCK_EX); @@ -2012,8 +2014,8 @@ __must_hold(&the_lnet.ln_api_mutex) lpni = lnet_peer_ni_find_locked(prim_nid); if (!lpni) return -ENOENT; - lnet_peer_ni_decref_locked(lpni); lp = lpni->lpni_peer_net->lpn_peer; + lnet_peer_ni_decref_locked(lpni); /* Peer must have been configured. */ if ((flags & LNET_PEER_CONFIGURED) && @@ -2115,8 +2117,8 @@ lnet_del_peer_ni(struct lnet_nid *prim_nid, struct lnet_nid *nid, lpni = lnet_peer_ni_find_locked(prim_nid); if (!lpni) return -ENOENT; - lnet_peer_ni_decref_locked(lpni); lp = lpni->lpni_peer_net->lpn_peer; + lnet_peer_ni_decref_locked(lpni); if (!nid_same(prim_nid, &lp->lp_primary_nid)) { CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n", @@ -2659,11 +2661,13 @@ int lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block) { DEFINE_WAIT(wait); - struct lnet_peer *lp; + struct lnet_peer *lp = NULL; int rc = 0; int count = 0; again: + if (lp) + lnet_peer_decref_locked(lp); lnet_net_unlock(cpt); lnet_net_lock(LNET_LOCK_EX); lp = lpni->lpni_peer_net->lpn_peer; @@ -2720,7 +2724,6 @@ again: lnet_net_unlock(LNET_LOCK_EX); lnet_net_lock(cpt); - lnet_peer_decref_locked(lp); /* * The peer may have changed, so re-check and rediscover if that turns * out to have been the case. The reference count on lp ensured that @@ -2748,6 +2751,7 @@ again: (lp ? libcfs_nidstr(&lp->lp_primary_nid) : "(none)"), libcfs_nidstr(&lpni->lpni_nid), rc, (!block) ? "pending discovery" : "discovery complete"); + lnet_peer_decref_locked(lp); return rc; } @@ -3073,11 +3077,6 @@ static void lnet_discovery_event_handler(struct lnet_event *event) LBUG(); } lnet_net_lock(LNET_LOCK_EX); - if (event->unlinked) { - pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start); - lnet_ping_buffer_decref(pbuf); - lnet_peer_decref_locked(lp); - } /* put peer back at end of request queue, if discovery not already * done */ @@ -3086,6 +3085,11 @@ static void lnet_discovery_event_handler(struct lnet_event *event) list_move_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request); wake_up(&the_lnet.ln_dc_waitq); } + if (event->unlinked) { + pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start); + lnet_ping_buffer_decref(pbuf); + lnet_peer_decref_locked(lp); + } lnet_net_unlock(LNET_LOCK_EX); } @@ -3351,8 +3355,6 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, goto out; } - lnet_peer_ni_decref_locked(lpni); - lpn = lpni->lpni_peer_net; if (lpn->lpn_peer_nets.prev != &lp->lp_peer_nets) list_move(&lpn->lpn_peer_nets, &lp->lp_peer_nets); @@ -3361,6 +3363,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp, list_move(&lpni->lpni_peer_nis, &lpni->lpni_peer_net->lpn_peer_nis); + lnet_peer_ni_decref_locked(lpni); /* * Errors other than -ENOMEM are due to peers having been * configured with DLC. Ignore these because DLC overrides diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 2b916ef..4daae66 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -829,12 +829,13 @@ __must_hold(&the_lnet.ln_api_mutex) LASSERT(lp); gw_nid = lp->lp_primary_nid; gw = &gw_nid; - lnet_peer_ni_decref_locked(lpni); } if (net != LNET_NET_ANY) { rnet = lnet_find_rnet_locked(net); if (!rnet) { + if (lpni) + lnet_peer_ni_decref_locked(lpni); lnet_net_unlock(LNET_LOCK_EX); return -ENOENT; } @@ -867,6 +868,7 @@ delete_zombies: if (lpni) { if (list_empty(&lp->lp_routes)) lp->lp_disc_net_id = 0; + lnet_peer_ni_decref_locked(lpni); } lnet_net_unlock(LNET_LOCK_EX);