Whamcloud - gitweb
LU-17000 lnet: fix various bugs in lib-move.c 76/51876/5
authorTimothy Day <timday@amazon.com>
Sat, 5 Aug 2023 19:12:15 +0000 (19:12 +0000)
committerOleg Drokin <green@whamcloud.com>
Wed, 6 Sep 2023 06:21:21 +0000 (06:21 +0000)
In lnet_select_peer_ni, best_lpni_is_preferred is often written
to before being immediately overwritten.

Addresses-Coverity-ID: 397646 ("Unused value")
Addresses-Coverity-ID: 397434 ("Unused value")

Both LNetPut and LNetGet were not freeing msg under certain failure
conditions. This leaks a small amount of memory each time it occurs.

Addresses-Coverity-ID: 397644 ("Resource leak")
Addresses-Coverity-ID: 397133 ("Resource leak")

Fix potential null dereference in lnet_find_best_ni_on_local_net
when best_lp gets defined by best_net doesn't.

Addresses-Coverity-ID: 397568 ("Explicit null dereferenced")

lnet_post_send_locked has an un-needed null check, since every path
leading to that block of code must dereference ni anyway.

Addresses-Coverity-ID: 397278 ("Dereference before null check")

In the other usage of msg_peerrtrcredit, it is accessed under
lpni_lock. Change the second usage to also be accessed under this
lock.

Addresses-Coverity-ID: 397606 ("Data race condition")

Test-Parameters: trivial testlist=sanity-lnet
Signed-off-by: Timothy Day <timday@amazon.com>
Change-Id: I4012f407b10d0c9644535d49cce83a6c95d3d22d
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51876
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/lib-move.c

index 93ba748..2d8c967 100644 (file)
@@ -860,10 +860,10 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send)
                        lnet_incr_stats(&msg->msg_txpeer->lpni_stats,
                                        msg->msg_type,
                                        LNET_STATS_TYPE_DROP);
-               if (msg->msg_txni)
-                       lnet_incr_stats(&msg->msg_txni->ni_stats,
-                                       msg->msg_type,
-                                       LNET_STATS_TYPE_DROP);
+
+               lnet_incr_stats(&msg->msg_txni->ni_stats,
+                               msg->msg_type,
+                               LNET_STATS_TYPE_DROP);
 
                if (rc == -EHOSTUNREACH) {
                        CNETERR("Dropping message for %s: peer not alive\n",
@@ -1250,10 +1250,9 @@ routing_off:
                LASSERT(rxpeerni->lpni_peer_net);
                LASSERT(rxpeerni->lpni_peer_net->lpn_peer);
 
+               spin_lock(&rxpeerni->lpni_lock);
                /* give back peer router credits */
                msg->msg_peerrtrcredit = 0;
-
-               spin_lock(&rxpeerni->lpni_lock);
                rxpeerni->lpni_rtrcredits++;
                spin_unlock(&rxpeerni->lpni_lock);
 
@@ -1361,30 +1360,19 @@ lnet_select_peer_ni(struct lnet_ni *best_ni, struct lnet_nid *dst_nid,
                /* pick the healthiest peer ni */
                if (lpni_healthv < best_lpni_healthv)
                        continue;
-               else if (lpni_healthv > best_lpni_healthv) {
-                       if (best_lpni_is_preferred)
-                               best_lpni_is_preferred = false;
+               else if (lpni_healthv > best_lpni_healthv)
                        goto select_lpni;
-               }
 
                if (lpni_sel_prio > best_sel_prio)
                        continue;
-               else if (lpni_sel_prio < best_sel_prio) {
-                       if (best_lpni_is_preferred)
-                               best_lpni_is_preferred = false;
+               else if (lpni_sel_prio < best_sel_prio)
                        goto select_lpni;
-               }
 
-               /* if this is a preferred peer use it */
-               if (!best_lpni_is_preferred && lpni_is_preferred) {
-                       best_lpni_is_preferred = true;
+               /* If this is a preferred peer - use it. Otherwise, ignore it */
+               if (!best_lpni_is_preferred && lpni_is_preferred)
                        goto select_lpni;
-               } else if (best_lpni_is_preferred && !lpni_is_preferred) {
-                       /* this is not the preferred peer so let's ignore
-                        * it.
-                        */
+               else if (best_lpni_is_preferred && !lpni_is_preferred)
                        continue;
-               }
 
                if (lpni->lpni_txcredits < best_lpni_credits)
                        /* We already have a peer that has more credits
@@ -2506,7 +2494,7 @@ lnet_find_best_ni_on_local_net(struct lnet_peer *peer, int md_cpt,
                net_healthv = lnet_get_net_healthv_locked(net);
                net_sel_prio = net->net_sel_priority;
 
-               if (!best_lpn)
+               if (!best_lpn || !best_net)
                        goto select_lpn;
                else
                        CDEBUG(D_NET,
@@ -5234,6 +5222,7 @@ LNetPut(struct lnet_nid *self, struct lnet_handle_md mdh, enum lnet_ack_req ack,
                if (!rspt) {
                        CERROR("Dropping PUT to %s: ENOMEM on response tracker\n",
                                libcfs_idstr(target));
+                       lnet_msg_free(msg);
                        return -ENOMEM;
                }
                INIT_LIST_HEAD(&rspt->rspt_on_list);
@@ -5464,6 +5453,7 @@ LNetGet(struct lnet_nid *self, struct lnet_handle_md mdh,
        if (!rspt) {
                CERROR("Dropping GET to %s: ENOMEM on response tracker\n",
                       libcfs_idstr(target));
+               lnet_msg_free(msg);
                return -ENOMEM;
        }
        INIT_LIST_HEAD(&rspt->rspt_on_list);