Whamcloud - gitweb
LU-17062 lnet: Update lnet_peer_*_decref_locked usage 84/52184/6
authorShaun Tancheff <shaun.tancheff@hpe.com>
Sat, 16 Sep 2023 05:54:54 +0000 (00:54 -0500)
committerOleg Drokin <green@whamcloud.com>
Wed, 25 Oct 2023 18:03:37 +0000 (18:03 +0000)
Move decref's to occur after last reference to prevent
use after free.

HPE-bug-id: LUS-11799
Signed-off-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Change-Id: I2382ece560039383f644b6aee73a9481d6bb5673
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52184
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Petros Koutoupis <petros.koutoupis@hpe.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/lib-move.c
lnet/lnet/peer.c
lnet/lnet/router.c

index 2d8c967..111e0a2 100644 (file)
@@ -2177,7 +2177,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                             struct lnet_peer_ni **gw_lpni,
                             struct lnet_peer **gw_peer)
 {
-       int rc;
+       int rc = 0;
        struct lnet_peer *gw;
        struct lnet_peer *lp;
        struct lnet_peer_net *lpn;
@@ -2188,6 +2188,7 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
        struct lnet_peer_ni *lpni = NULL;
        struct lnet_peer_ni *gwni = NULL;
        bool route_found = false;
+       bool gwni_decref = false;
        struct lnet_nid *src_nid =
                !LNET_NID_IS_ANY(&sd->sd_src_nid) || !sd->sd_best_ni
                ? &sd->sd_src_nid
@@ -2207,8 +2208,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
        if (!LNET_NID_IS_ANY(&sd->sd_rtr_nid)) {
                gwni = lnet_peer_ni_find_locked(&sd->sd_rtr_nid);
                if (gwni) {
+                       gwni_decref = true;
                        gw = gwni->lpni_peer_net->lpn_peer;
-                       lnet_peer_ni_decref_locked(gwni);
                        if (gw->lp_rtr_refcount)
                                route_found = true;
                } else {
@@ -2232,7 +2233,8 @@ lnet_handle_find_routed_path(struct lnet_send_data *sd,
                                                "any local NI" :
                                                libcfs_nidstr(src_nid),
                                       libcfs_nidstr(&sd->sd_dst_nid));
-                               return -EHOSTUNREACH;
+                               rc = -EHOSTUNREACH;
+                               goto out;
                        }
                        CDEBUG(D_NET, "best_rnet %s\n",
                               libcfs_net2str(best_rnet->lrn_net));
@@ -2288,7 +2290,8 @@ use_lpn:
                        if (!best_lpn) {
                                CERROR("peer %s has no available nets\n",
                                       libcfs_nidstr(&sd->sd_dst_nid));
-                               return -EHOSTUNREACH;
+                               rc =  -EHOSTUNREACH;
+                               goto out;
                        }
 
                        CDEBUG(D_NET, "selected best_lpn %s\n",
@@ -2301,7 +2304,8 @@ use_lpn:
                        if (!sd->sd_best_lpni) {
                                CERROR("peer %s is unreachable\n",
                                       libcfs_nidstr(&sd->sd_dst_nid));
-                               return -EHOSTUNREACH;
+                               rc = -EHOSTUNREACH;
+                               goto out;
                        }
 
                        /* We're attempting to round robin over the remote peer
@@ -2327,14 +2331,16 @@ use_lpn:
                        CERROR("no route to %s from %s\n",
                               libcfs_nidstr(dst_nid),
                               libcfs_nidstr(src_nid));
-                       return -EHOSTUNREACH;
+                       rc = -EHOSTUNREACH;
+                       goto out;
                }
 
                if (!gwni) {
                        CERROR("Internal Error. Route expected to %s from %s\n",
                               libcfs_nidstr(dst_nid),
                               libcfs_nidstr(src_nid));
-                       return -EFAULT;
+                       rc = -EFAULT;
+                       goto out;
                }
 
                gw = best_route->lr_gateway;
@@ -2357,7 +2363,7 @@ use_lpn:
        if (alive_router_check_interval <= 0) {
                rc = lnet_initiate_peer_discovery(gwni, sd->sd_msg, sd->sd_cpt);
                if (rc)
-                       return rc;
+                       goto out;
        }
 
        if (!sd->sd_best_ni) {
@@ -2369,7 +2375,8 @@ use_lpn:
                        CERROR("Internal Error. Expected local ni on %s but non found: %s\n",
                               libcfs_net2str(lpn->lpn_net_id),
                               libcfs_nidstr(&sd->sd_src_nid));
-                       return -EFAULT;
+                       rc = -EFAULT;
+                       goto out;
                }
        }
 
@@ -2385,7 +2392,11 @@ use_lpn:
                best_route->lr_seq = last_route->lr_seq + 1;
        }
 
-       return 0;
+out:
+       if (gwni_decref && gwni)
+               lnet_peer_ni_decref_locked(gwni);
+
+       return rc;
 }
 
 /*
@@ -3045,7 +3056,6 @@ again:
                lnet_net_unlock(cpt);
                return rc;
        }
-       lnet_peer_ni_decref_locked(lpni);
 
        peer = lpni->lpni_peer_net->lpn_peer;
 
@@ -3145,6 +3155,7 @@ again:
         * updated as a result of calling lnet_handle_send_case_locked().
         */
        cpt = send_data.sd_cpt;
+       lnet_peer_ni_decref_locked(lpni);
 
        if (rc == REPEAT_SEND)
                goto again;
index 236f278..67173fb 100644 (file)
@@ -560,9 +560,9 @@ lnet_peer_del_nid(struct lnet_peer *lp, struct lnet_nid *nid,
                rc = -ENOENT;
                goto out;
        }
-       lnet_peer_ni_decref_locked(lpni);
        if (lp != lpni->lpni_peer_net->lpn_peer) {
                rc = -ECHILD;
+               lnet_peer_ni_decref_locked(lpni);
                goto out;
        }
 
@@ -572,6 +572,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, struct lnet_nid *nid,
         */
        if (nid_same(nid, &lp->lp_primary_nid) && lp->lp_nnis != 1 && !force) {
                rc = -EBUSY;
+               lnet_peer_ni_decref_locked(lpni);
                goto out;
        }
 
@@ -585,6 +586,7 @@ lnet_peer_del_nid(struct lnet_peer *lp, struct lnet_nid *nid,
                lp->lp_primary_nid = lpni2->lpni_nid;
        }
        rc = lnet_peer_ni_del_locked(lpni, force);
+       lnet_peer_ni_decref_locked(lpni);
 
        lnet_net_unlock(LNET_LOCK_EX);
 
@@ -2012,8 +2014,8 @@ __must_hold(&the_lnet.ln_api_mutex)
        lpni = lnet_peer_ni_find_locked(prim_nid);
        if (!lpni)
                return -ENOENT;
-       lnet_peer_ni_decref_locked(lpni);
        lp = lpni->lpni_peer_net->lpn_peer;
+       lnet_peer_ni_decref_locked(lpni);
 
        /* Peer must have been configured. */
        if ((flags & LNET_PEER_CONFIGURED) &&
@@ -2115,8 +2117,8 @@ lnet_del_peer_ni(struct lnet_nid *prim_nid, struct lnet_nid *nid,
        lpni = lnet_peer_ni_find_locked(prim_nid);
        if (!lpni)
                return -ENOENT;
-       lnet_peer_ni_decref_locked(lpni);
        lp = lpni->lpni_peer_net->lpn_peer;
+       lnet_peer_ni_decref_locked(lpni);
 
        if (!nid_same(prim_nid, &lp->lp_primary_nid)) {
                CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
@@ -2659,11 +2661,13 @@ int
 lnet_discover_peer_locked(struct lnet_peer_ni *lpni, int cpt, bool block)
 {
        DEFINE_WAIT(wait);
-       struct lnet_peer *lp;
+       struct lnet_peer *lp = NULL;
        int rc = 0;
        int count = 0;
 
 again:
+       if (lp)
+               lnet_peer_decref_locked(lp);
        lnet_net_unlock(cpt);
        lnet_net_lock(LNET_LOCK_EX);
        lp = lpni->lpni_peer_net->lpn_peer;
@@ -2720,7 +2724,6 @@ again:
 
        lnet_net_unlock(LNET_LOCK_EX);
        lnet_net_lock(cpt);
-       lnet_peer_decref_locked(lp);
        /*
         * The peer may have changed, so re-check and rediscover if that turns
         * out to have been the case. The reference count on lp ensured that
@@ -2748,6 +2751,7 @@ again:
               (lp ? libcfs_nidstr(&lp->lp_primary_nid) : "(none)"),
               libcfs_nidstr(&lpni->lpni_nid), rc,
               (!block) ? "pending discovery" : "discovery complete");
+       lnet_peer_decref_locked(lp);
 
        return rc;
 }
@@ -3073,11 +3077,6 @@ static void lnet_discovery_event_handler(struct lnet_event *event)
                LBUG();
        }
        lnet_net_lock(LNET_LOCK_EX);
-       if (event->unlinked) {
-               pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start);
-               lnet_ping_buffer_decref(pbuf);
-               lnet_peer_decref_locked(lp);
-       }
 
        /* put peer back at end of request queue, if discovery not already
         * done */
@@ -3086,6 +3085,11 @@ static void lnet_discovery_event_handler(struct lnet_event *event)
                list_move_tail(&lp->lp_dc_list, &the_lnet.ln_dc_request);
                wake_up(&the_lnet.ln_dc_waitq);
        }
+       if (event->unlinked) {
+               pbuf = LNET_PING_INFO_TO_BUFFER(event->md_start);
+               lnet_ping_buffer_decref(pbuf);
+               lnet_peer_decref_locked(lp);
+       }
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
@@ -3351,8 +3355,6 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
                goto out;
        }
 
-       lnet_peer_ni_decref_locked(lpni);
-
        lpn = lpni->lpni_peer_net;
        if (lpn->lpn_peer_nets.prev != &lp->lp_peer_nets)
                list_move(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
@@ -3361,6 +3363,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
                list_move(&lpni->lpni_peer_nis,
                          &lpni->lpni_peer_net->lpn_peer_nis);
 
+       lnet_peer_ni_decref_locked(lpni);
        /*
         * Errors other than -ENOMEM are due to peers having been
         * configured with DLC. Ignore these because DLC overrides
index 2b916ef..4daae66 100644 (file)
@@ -829,12 +829,13 @@ __must_hold(&the_lnet.ln_api_mutex)
                LASSERT(lp);
                gw_nid = lp->lp_primary_nid;
                gw = &gw_nid;
-               lnet_peer_ni_decref_locked(lpni);
        }
 
        if (net != LNET_NET_ANY) {
                rnet = lnet_find_rnet_locked(net);
                if (!rnet) {
+                       if (lpni)
+                               lnet_peer_ni_decref_locked(lpni);
                        lnet_net_unlock(LNET_LOCK_EX);
                        return -ENOENT;
                }
@@ -867,6 +868,7 @@ delete_zombies:
        if (lpni) {
                if (list_empty(&lp->lp_routes))
                        lp->lp_disc_net_id = 0;
+               lnet_peer_ni_decref_locked(lpni);
        }
 
        lnet_net_unlock(LNET_LOCK_EX);