Whamcloud - gitweb
LU-17062 lnet: Update lnet_peer_*_decref_locked usage
authorShaun Tancheff <shaun.tancheff@hpe.com>
Sat, 16 Sep 2023 05:54:54 +0000 (00:54 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Sat, 27 Apr 2024 22:28:10 +0000 (22:28 +0000)
Move decref's to occur after last reference to prevent
use after free.

Lustre-change: https://review.whamcloud.com/52184
Lustre-commit: 60cfceb8c59364f786b31ac36c2c245b9a1e495a

HPE-bug-id: LUS-11799
Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Change-Id: I2382ece560039383f644b6aee73a9481d6bb5673
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54897
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lnet/lnet/lib-move.c
lnet/lnet/peer.c
lnet/lnet/router.c

index aa7ea81..1486eda 100644 (file)
@@ -2127,7 +2127,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                             struct lnet_peer_ni **gw_lpni,
                             struct lnet_peer **gw_peer)
 {
-       int rc;
+       int rc = 0;
        struct lnet_peer *gw;
        struct lnet_peer *lp;
        struct lnet_peer_net *lpn;
@@ -2138,6 +2138,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
        struct lnet_peer_ni *lpni = NULL;
        struct lnet_peer_ni *gwni = NULL;
        bool route_found = false;
+       bool gwni_decref = false;
        lnet_nid_t src_nid = (sd->sd_src_nid != LNET_NID_ANY) ? sd->sd_src_nid :
                (sd->sd_best_ni != NULL) ? sd->sd_best_ni->ni_nid :
                LNET_NID_ANY;
@@ -2152,8 +2153,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
        if (sd->sd_rtr_nid != LNET_NID_ANY) {
                gwni = lnet_find_peer_ni_locked(sd->sd_rtr_nid);
                if (gwni) {
+                       gwni_decref = true;
                        gw = gwni->lpni_peer_net->lpn_peer;
-                       lnet_peer_ni_decref_locked(gwni);
                        if (gw->lp_rtr_refcount)
                                route_found = true;
                } else {
@@ -2177,7 +2178,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                                                libcfs_nid2str(src_nid) :
                                                "any local NI",
                                       libcfs_nid2str(sd->sd_dst_nid));
-                               return -EHOSTUNREACH;
+                               rc = -EHOSTUNREACH;
+                               goto out;
                        }
                } else {
                        /* we've already looked up the initial lpni using
@@ -2210,7 +2212,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                        if (!best_lpn) {
                                CERROR("peer %s has no available nets\n",
                                       libcfs_nid2str(sd->sd_dst_nid));
-                               return -EHOSTUNREACH;
+                               rc =  -EHOSTUNREACH;
+                               goto out;
                        }
 
                        sd->sd_best_lpni = lnet_find_best_lpni(sd->sd_best_ni,
@@ -2220,7 +2223,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                        if (!sd->sd_best_lpni) {
                                CERROR("peer %s is unreachable\n",
                                       libcfs_nid2str(sd->sd_dst_nid));
-                               return -EHOSTUNREACH;
+                               rc = -EHOSTUNREACH;
+                               goto out;
                        }
 
                        /* We're attempting to round robin over the remote peer
@@ -2251,14 +2255,16 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                        CERROR("no route to %s from %s\n",
                               libcfs_nid2str(dst_nid),
                               libcfs_nid2str(src_nid));
-                       return -EHOSTUNREACH;
+                       rc = -EHOSTUNREACH;
+                       goto out;
                }
 
                if (!gwni) {
                        CERROR("Internal Error. Route expected to %s from %s\n",
                               libcfs_nid2str(dst_nid),
                               libcfs_nid2str(src_nid));
-                       return -EFAULT;
+                       rc = -EFAULT;
+                       goto out;
                }
 
                gw = best_route->lr_gateway;
@@ -2281,7 +2287,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
        if (alive_router_check_interval <= 0) {
                rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt);
                if (rc)
-                       return rc;
+                       goto out;
        }
 
        if (!sd->sd_best_ni) {
@@ -2293,7 +2299,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                        CERROR("Internal Error. Expected local ni on %s but non found: %s\n",
                               libcfs_net2str(lpn->lpn_net_id),
                               libcfs_nid2str(sd->sd_src_nid));
-                       return -EFAULT;
+                       rc = -EFAULT;
+                       goto out;
                }
        }
 
@@ -2311,7 +2318,11 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                        best_lpn->lpn_seq++;
        }
 
-       return 0;
+out:
+       if (gwni_decref && gwni)
+               lnet_peer_ni_decref_locked(gwni);
+
+       return rc;
 }
 
 /*
@@ -2929,7 +2940,6 @@ again:
                lnet_net_unlock(cpt);
                return rc;
        }
-       lnet_peer_ni_decref_locked(lpni);
 
        peer = lpni->lpni_peer_net->lpn_peer;
 
@@ -3023,6 +3033,7 @@ again:
         * updated as a result of calling lnet_handle_send_case_locked().
         */
        cpt = send_data.sd_cpt;
+       lnet_peer_ni_decref_locked(lpni);
 
        if (rc == REPEAT_SEND)
                goto again;
index d47f896..105d94b 100644 (file)
@@ -557,9 +557,9 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags)
                rc = -ENOENT;
                goto out;
        }
-       lnet_peer_ni_decref_locked(lpni);
        if (lp != lpni->lpni_peer_net->lpn_peer) {
                rc = -ECHILD;
+               lnet_peer_ni_decref_locked(lpni);
                goto out;
        }
 
@@ -569,6 +569,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags)
         */
        if (nid == lp->lp_primary_nid && lp->lp_nnis != 1 && !force) {
                rc = -EBUSY;
+               lnet_peer_ni_decref_locked(lpni);
                goto out;
        }
 
@@ -582,6 +583,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, lnet_nid_t nid, unsigned flags)
                lp->lp_primary_nid = lpni2->lpni_nid;
        }
        rc = lnet_peer_ni_del_locked(lpni, force);
+       lnet_peer_ni_decref_locked(lpni);
 
        lnet_net_unlock(LNET_LOCK_EX);
 
@@ -1864,8 +1866,8 @@ __must_hold(&the_lnet.ln_api_mutex)
        lpni = lnet_find_peer_ni_locked(prim_nid);
        if (!lpni)
                return -ENOENT;
-       lnet_peer_ni_decref_locked(lpni);
        lp = lpni->lpni_peer_net->lpn_peer;
+       lnet_peer_ni_decref_locked(lpni);
 
        /* Peer must have been configured. */
        if ((flags & LNET_PEER_CONFIGURED) &&
@@ -1966,8 +1968,8 @@ lnet_del_peer_ni(lnet_nid_t prim_nid, lnet_nid_t nid, int force)
        lpni = lnet_find_peer_ni_locked(prim_nid);
        if (!lpni)
                return -ENOENT;
-       lnet_peer_ni_decref_locked(lpni);
        lp = lpni->lpni_peer_net->lpn_peer;
+       lnet_peer_ni_decref_locked(lpni);
 
        if (prim_nid != lp->lp_primary_nid) {
                CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
@@ -2527,11 +2529,13 @@ int
 lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block)
 {
        DEFINE_WAIT(wait);
-       struct lnet_peer *lp;
+       struct lnet_peer *lp = NULL;
        int rc = 0;
        int count = 0;
 
 again:
+       if (lp)
+               lnet_peer_decref_locked(lp);
        lnet_net_unlock(cpt);
        lnet_net_lock(LNET_LOCK_EX);
        lp = lpni->lpni_peer_net->lpn_peer;
@@ -2588,7 +2592,6 @@ again:
 
        lnet_net_unlock(LNET_LOCK_EX);
        lnet_net_lock(cpt);
-       lnet_peer_decref_locked(lp);
        /*
         * The peer may have changed, so re-check and rediscover if that turns
         * out to have been the case. The reference count on lp ensured that
@@ -2616,6 +2619,7 @@ again:
               (lp ? libcfs_nid2str(lp->lp_primary_nid) : "(none)"),
               libcfs_nid2str(lpni->lpni_nid), rc,
               (!block) ? "pending discovery" : "discovery complete");
+       lnet_peer_decref_locked(lp);
 
        return rc;
 }
@@ -2908,11 +2912,6 @@ static void lnet_discovery_event_handler(struct lnet_event *event)
                LBUG();
        }
        lnet_net_lock(LNET_LOCK_EX);
-       if (event->unlinked) {
-               pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start);
-               lnet_ping_buffer_decref(pbuf);
-               lnet_peer_decref_locked(lp);
-       }
 
        /* put peer back at end of request queue, if discovery not already
         * done */
@@ -2921,6 +2920,11 @@ static void lnet_discovery_event_handler(struct lnet_event *event)
                list_move_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request);
                wake_up(&the_lnet.ln_dc_waitq);
        }
+       if (event->unlinked) {
+               pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start);
+               lnet_ping_buffer_decref(pbuf);
+               lnet_peer_decref_locked(lp);
+       }
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
@@ -3102,8 +3106,6 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
                goto out;
        }
 
-       lnet_peer_ni_decref_locked(lpni);
-
        lpn = lpni->lpni_peer_net;
        if (lpn->lpn_peer_nets.prev != &lp->lp_peer_nets)
                list_move(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
@@ -3112,6 +3114,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
                list_move(&lpni->lpni_peer_nis,
                          &lpni->lpni_peer_net->lpn_peer_nis);
 
+       lnet_peer_ni_decref_locked(lpni);
        /*
         * Errors other than -ENOMEM are due to peers having been
         * configured with DLC. Ignore these because DLC overrides
index d453d57..178cabe 100644 (file)
@@ -821,12 +821,13 @@ __must_hold(&the_lnet.ln_api_mutex)
                lp = lpni->lpni_peer_net->lpn_peer;
                LASSERT(lp);
                gw_nid = lp->lp_primary_nid;
-               lnet_peer_ni_decref_locked(lpni);
        }
 
        if (net != LNET_NET_ANY) {
                rnet = lnet_find_rnet_locked(net);
                if (!rnet) {
+                       if (lpni)
+                               lnet_peer_ni_decref_locked(lpni);
                        lnet_net_unlock(LNET_LOCK_EX);
                        return -ENOENT;
                }
@@ -859,6 +860,7 @@ delete_zombies:
        if (lpni) {
                if (list_empty(&lp->lp_routes))
                        lp->lp_disc_net_id = 0;
+               lnet_peer_ni_decref_locked(lpni);
        }
 
        lnet_net_unlock(LNET_LOCK_EX);