Whamcloud - gitweb
LU-15930 lnet: Remove duplicate checks for peer sensitivity 26/46626/12
authorChris Horn <chris.horn@hpe.com>
Thu, 24 Feb 2022 20:30:59 +0000 (14:30 -0600)
committerOleg Drokin <green@whamcloud.com>
Thu, 1 Sep 2022 05:53:43 +0000 (05:53 +0000)
Callers of lnet_inc_lpni_healthv_locked() and
lnet_dec_healthv_locked() currently check whether the parent peer
has a peer specific sensitivity defined. To remove this code
duplication, this logic is rolled into
lnet_inc_lpni_healthv_locked() and lnet_dec_lpni_healthv_locked().
The latter is a new wrapper around lnet_dec_healthv_locked().

lnet_dec_healthv_locked() is changed to return a bool indicating
whether the health value was actually modified so that the peer
net health is only updated when the peer NI health actually changes.

Test-Parameters: trivial testlist=sanity-lnet
HPE-bug-id: LUS-11018
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I624561167392ad625ea7478689e9c5975cec3f2e
Reviewed-on: https://review.whamcloud.com/46626
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/lib-msg.c
lnet/lnet/router.c

index 3fab690..06854ad 100644 (file)
@@ -1188,13 +1188,49 @@ lnet_atomic_add_unless_max(atomic_t *v, int a, int u)
        return mod;
 }
 
        return mod;
 }
 
+static bool
+lnet_dec_healthv_locked(atomic_t *healthv, int sensitivity)
+{
+       int h = atomic_read(healthv);
+
+       if (h == 0)
+               return false;
+
+       if (h < sensitivity)
+               h = 0;
+       else
+               h -= sensitivity;
+
+       return (atomic_xchg(healthv, h) != h);
+}
+
 static inline void
 static inline void
-lnet_inc_lpni_healthv_locked(struct lnet_peer_ni *lpni, int value)
+lnet_dec_lpni_healthv_locked(struct lnet_peer_ni *lpni)
 {
 {
-       /* only adjust the net health if the lpni health value changed */
-       if (lnet_atomic_add_unless_max(&lpni->lpni_healthv, value,
-                                      LNET_MAX_HEALTH_VALUE))
+       /* If there is a health sensitivity in the peer then use that
+        * instead of the globally set one.
+        * only adjust the net health if the lpni health value changed
+        */
+       if (lnet_dec_healthv_locked(&lpni->lpni_healthv,
+                       lpni->lpni_peer_net->lpn_peer->lp_health_sensitivity ? :
+                       lnet_health_sensitivity)) {
                lnet_update_peer_net_healthv(lpni);
                lnet_update_peer_net_healthv(lpni);
+       }
+}
+
+static inline void
+lnet_inc_lpni_healthv_locked(struct lnet_peer_ni *lpni)
+{
+       /* If there is a health sensitivity in the peer then use that
+        * instead of the globally set one.
+        * only adjust the net health if the lpni health value changed
+        */
+       if (lnet_atomic_add_unless_max(&lpni->lpni_healthv,
+                       lpni->lpni_peer_net->lpn_peer->lp_health_sensitivity ? :
+                       lnet_health_sensitivity,
+                                      LNET_MAX_HEALTH_VALUE)) {
+               lnet_update_peer_net_healthv(lpni);
+       }
 }
 
 static inline void
 }
 
 static inline void
index 69a85be..36a29cc 100644 (file)
@@ -437,19 +437,6 @@ lnet_complete_msg_locked(struct lnet_msg *msg, int cpt)
        return 0;
 }
 
        return 0;
 }
 
-static void
-lnet_dec_healthv_locked(atomic_t *healthv, int sensitivity)
-{
-       int h = atomic_read(healthv);
-
-       if (h < sensitivity) {
-               atomic_set(healthv, 0);
-       } else {
-               h -= sensitivity;
-               atomic_set(healthv, h);
-       }
-}
-
 /* must hold net_lock/0 */
 void
 lnet_ni_add_to_recoveryq_locked(struct lnet_ni *ni,
 /* must hold net_lock/0 */
 void
 lnet_ni_add_to_recoveryq_locked(struct lnet_ni *ni,
@@ -499,21 +486,7 @@ lnet_handle_local_failure(struct lnet_ni *local_ni)
 void
 lnet_handle_remote_failure_locked(struct lnet_peer_ni *lpni)
 {
 void
 lnet_handle_remote_failure_locked(struct lnet_peer_ni *lpni)
 {
-       __u32 sensitivity = lnet_health_sensitivity;
-       __u32 lp_sensitivity;
-
-       /*
-        * If there is a health sensitivity in the peer then use that
-        * instead of the globally set one.
-        */
-       lp_sensitivity = lpni->lpni_peer_net->lpn_peer->lp_health_sensitivity;
-       if (lp_sensitivity)
-               sensitivity = lp_sensitivity;
-
-       lnet_dec_healthv_locked(&lpni->lpni_healthv, sensitivity);
-
-       /* update the peer_net's health value */
-       lnet_update_peer_net_healthv(lpni);
+       lnet_dec_lpni_healthv_locked(lpni);
 
        /*
         * add the peer NI to the recovery queue if it's not already there
 
        /*
         * add the peer NI to the recovery queue if it's not already there
@@ -918,12 +891,7 @@ lnet_health_check(struct lnet_msg *msg)
                                lnet_set_lpni_healthv_locked(lpni,
                                        LNET_MAX_HEALTH_VALUE);
                        } else {
                                lnet_set_lpni_healthv_locked(lpni,
                                        LNET_MAX_HEALTH_VALUE);
                        } else {
-                               __u32 sensitivity = lpni->lpni_peer_net->
-                                       lpn_peer->lp_health_sensitivity;
-
-                               lnet_inc_lpni_healthv_locked(lpni,
-                                       (sensitivity) ? sensitivity :
-                                       lnet_health_sensitivity);
+                               lnet_inc_lpni_healthv_locked(lpni);
                                /* This peer NI may have previously aged out
                                 * of recovery. Now that we've received a
                                 * message from it, we can continue recovery
                                /* This peer NI may have previously aged out
                                 * of recovery. Now that we've received a
                                 * message from it, we can continue recovery
index 6bbcd34..93ae181 100644 (file)
@@ -1769,12 +1769,7 @@ lnet_notify(struct lnet_ni *ni, lnet_nid_t nid4, bool alive, bool reset,
                        lnet_set_lpni_healthv_locked(lpni,
                                                     LNET_MAX_HEALTH_VALUE);
                } else {
                        lnet_set_lpni_healthv_locked(lpni,
                                                     LNET_MAX_HEALTH_VALUE);
                } else {
-                       __u32 sensitivity = lpni->lpni_peer_net->
-                                       lpn_peer->lp_health_sensitivity;
-
-                       lnet_inc_lpni_healthv_locked(lpni,
-                                       (sensitivity) ? sensitivity :
-                                       lnet_health_sensitivity);
+                       lnet_inc_lpni_healthv_locked(lpni);
                }
        } else if (reset) {
                lpni->lpni_ns_status = LNET_NI_STATUS_DOWN;
                }
        } else if (reset) {
                lpni->lpni_ns_status = LNET_NI_STATUS_DOWN;