Whamcloud - gitweb
LU-13255 lnet: introduce wait_var_event_warning. 93/37593/6
authorMr NeilBrown <neilb@suse.de>
Thu, 7 Nov 2019 05:58:00 +0000 (16:58 +1100)
committerOleg Drokin <green@whamcloud.com>
Tue, 17 Mar 2020 03:40:38 +0000 (03:40 +0000)
LNet has a recurring pattern of waiting for some variable to reach a
particular value, and generating a warning every second that it
hasn't.  In many cases the warning has a higher priority if the wait
has been for a power-of-2 seconds.

This patch embodies that pattern in a new macro
  wait_var_event_warning()

and uses wake_up_var() to cause the wait to complete as soon as
possible.

This patch does not include any change to gnilnd - that is left for
a separate patch.

Signed-off-by: Mr NeilBrown <neilb@suse.de>
Change-Id: I557a64713cbe379a566a775944f58ddf93dbd800
Reviewed-on: https://review.whamcloud.com/37593
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/include/libcfs/libcfs.h
lnet/include/lnet/lib-lnet.h
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd_cb.c
lnet/lnet/api-ni.c
lnet/lnet/peer.c

index 23174a9..2007dc6 100644 (file)
@@ -141,4 +141,19 @@ int lprocfs_call_handler(void *data, int write, loff_t *ppos,
                         int (*handler)(void *data, int write, loff_t pos,
                                        void __user *buffer, int len));
 
+
+#define wait_var_event_warning(var, condition, format, ...)            \
+do {                                                                   \
+       int counter = 4;                                                \
+       might_sleep();                                                  \
+       if (condition)                                                  \
+               break;                                                  \
+       ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0,   \
+                         if (schedule_timeout(cfs_time_seconds(1)) == 0)\
+                                 CDEBUG(is_power_of_2(counter++) ?     \
+                                        D_WARNING : D_NET,             \
+                                        format, ## __VA_ARGS__)        \
+               );                                                      \
+} while (0)
+
 #endif /* _LIBCFS_LIBCFS_H_ */
index 602bf46..ccc74b5 100644 (file)
@@ -831,8 +831,10 @@ static inline void lnet_ping_buffer_addref(struct lnet_ping_buffer *pbuf)
 
 static inline void lnet_ping_buffer_decref(struct lnet_ping_buffer *pbuf)
 {
-       if (atomic_dec_and_test(&pbuf->pb_refcnt))
+       if (atomic_dec_and_test(&pbuf->pb_refcnt)) {
+               wake_up_var(&pbuf->pb_refcnt);
                lnet_ping_buffer_free(pbuf);
+       }
 }
 
 static inline int lnet_push_target_resize_needed(void)
index dae653b..9e7f4db 100644 (file)
@@ -372,7 +372,8 @@ kiblnd_destroy_peer(struct kib_peer_ni *peer_ni)
         * they are destroyed, so we can be assured that _all_ state to do
         * with this peer_ni has been cleaned up when its refcount drops to
         * zero. */
-       atomic_dec(&net->ibn_npeers);
+       if (atomic_dec_and_test(&net->ibn_npeers))
+               wake_up_var(&net->ibn_npeers);
 }
 
 struct kib_peer_ni *
@@ -2972,17 +2973,11 @@ kiblnd_base_shutdown(void)
                wake_up_all(&kiblnd_data.kib_connd_waitq);
                wake_up_all(&kiblnd_data.kib_failover_waitq);
 
-               i = 2;
-               while (atomic_read(&kiblnd_data.kib_nthreads) != 0) {
-                       i++;
-                       /* power of 2? */
-                       CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-                              "Waiting for %d threads to terminate\n",
-                              atomic_read(&kiblnd_data.kib_nthreads));
-                       schedule_timeout_uninterruptible(cfs_time_seconds(1));
-               }
-
-                /* fall through */
+               wait_var_event_warning(&kiblnd_data.kib_nthreads,
+                                      !atomic_read(&kiblnd_data.kib_nthreads),
+                                      "Waiting for %d threads to terminate\n",
+                                      atomic_read(&kiblnd_data.kib_nthreads));
+               /* fall through */
 
         case IBLND_INIT_NOTHING:
                 break;
@@ -3007,8 +3002,7 @@ kiblnd_shutdown(struct lnet_ni *ni)
 {
        struct kib_net *net = ni->ni_data;
        rwlock_t     *g_lock = &kiblnd_data.kib_global_lock;
-        int               i;
-        unsigned long     flags;
+       unsigned long     flags;
 
         LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL);
 
@@ -3026,21 +3020,16 @@ kiblnd_shutdown(struct lnet_ni *ni)
         default:
                 LBUG();
 
-        case IBLND_INIT_ALL:
-                /* nuke all existing peers within this net */
-                kiblnd_del_peer(ni, LNET_NID_ANY);
+       case IBLND_INIT_ALL:
+               /* nuke all existing peers within this net */
+               kiblnd_del_peer(ni, LNET_NID_ANY);
 
                /* Wait for all peer_ni state to clean up */
-               i = 2;
-               while (atomic_read(&net->ibn_npeers) != 0) {
-                       i++;
-                       /* power of 2? */
-                       CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-                              "%s: waiting for %d peers to disconnect\n",
-                              libcfs_nid2str(ni->ni_nid),
-                              atomic_read(&net->ibn_npeers));
-                       schedule_timeout_uninterruptible(cfs_time_seconds(1));
-               }
+               wait_var_event_warning(&net->ibn_npeers,
+                                      atomic_read(&net->ibn_npeers) == 0,
+                                      "%s: waiting for %d peers to disconnect\n",
+                                      libcfs_nid2str(ni->ni_nid),
+                                      atomic_read(&net->ibn_npeers));
 
                kiblnd_net_fini_pools(net);
 
@@ -3050,7 +3039,7 @@ kiblnd_shutdown(struct lnet_ni *ni)
                list_del(&net->ibn_list);
                write_unlock_irqrestore(g_lock, flags);
 
-                /* fall through */
+               /* fall through */
 
         case IBLND_INIT_NOTHING:
                LASSERT (atomic_read(&net->ibn_nconns) == 0);
index 4ed0458..094e840 100644 (file)
@@ -158,7 +158,8 @@ ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni)
         * state to do with this peer_ni has been cleaned up when its refcount
         * drops to zero.
         */
-       atomic_dec(&net->ksnn_npeers);
+       if (atomic_dec_and_test(&net->ksnn_npeers))
+               wake_up_var(&net->ksnn_npeers);
 }
 
 struct ksock_peer_ni *
@@ -2205,25 +2206,16 @@ ksocknal_base_shutdown(void)
                                        wake_up_all(&sched->kss_waitq);
                }
 
-               i = 4;
-               read_lock(&ksocknal_data.ksnd_global_lock);
-               while (ksocknal_data.ksnd_nthreads != 0) {
-                       i++;
-                       /* power of 2? */
-                       CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
-                               "waiting for %d threads to terminate\n",
-                               ksocknal_data.ksnd_nthreads);
-                       read_unlock(&ksocknal_data.ksnd_global_lock);
-                       schedule_timeout_uninterruptible(cfs_time_seconds(1));
-                       read_lock(&ksocknal_data.ksnd_global_lock);
-               }
-               read_unlock(&ksocknal_data.ksnd_global_lock);
+               wait_var_event_warning(&ksocknal_data.ksnd_nthreads,
+                                      ksocknal_data.ksnd_nthreads == 0,
+                                      "waiting for %d threads to terminate\n",
+                                      ksocknal_data.ksnd_nthreads);
 
-                ksocknal_free_buffers();
+               ksocknal_free_buffers();
 
-                ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
-                break;
-        }
+               ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
+               break;
+       }
 
        CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
               atomic_read (&libcfs_kmemory));
@@ -2352,7 +2344,7 @@ ksocknal_base_startup(void)
         return -ENETDOWN;
 }
 
-static void
+static int
 ksocknal_debug_peerhash(struct lnet_ni *ni)
 {
        struct ksock_peer_ni *peer_ni;
@@ -2394,6 +2386,7 @@ ksocknal_debug_peerhash(struct lnet_ni *ni)
        }
 
        read_unlock(&ksocknal_data.ksnd_global_lock);
+       return 0;
 }
 
 void
@@ -2416,16 +2409,13 @@ ksocknal_shutdown(struct lnet_ni *ni)
        ksocknal_del_peer(ni, anyid, 0);
 
        /* Wait for all peer_ni state to clean up */
-       i = 2;
-       while (atomic_read(&net->ksnn_npeers) > SOCKNAL_SHUTDOWN_BIAS) {
-               i++;
-               CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                      "waiting for %d peers to disconnect\n",
-                      atomic_read(&net->ksnn_npeers) - SOCKNAL_SHUTDOWN_BIAS);
-               schedule_timeout_uninterruptible(cfs_time_seconds(1));
-
-               ksocknal_debug_peerhash(ni);
-       }
+       wait_var_event_warning(&net->ksnn_npeers,
+                              atomic_read(&net->ksnn_npeers) ==
+                              SOCKNAL_SHUTDOWN_BIAS,
+                              "waiting for %d peers to disconnect\n",
+                              ksocknal_debug_peerhash(ni) +
+                              atomic_read(&net->ksnn_npeers) -
+                              SOCKNAL_SHUTDOWN_BIAS);
 
        for (i = 0; i < net->ksnn_ninterfaces; i++) {
                LASSERT(net->ksnn_interfaces[i].ksni_npeers == 0);
@@ -2435,9 +2425,9 @@ ksocknal_shutdown(struct lnet_ni *ni)
        list_del(&net->ksnn_list);
        LIBCFS_FREE(net, sizeof(*net));
 
-        ksocknal_data.ksnd_nnets--;
-        if (ksocknal_data.ksnd_nnets == 0)
-                ksocknal_base_shutdown();
+       ksocknal_data.ksnd_nnets--;
+       if (ksocknal_data.ksnd_nnets == 0)
+               ksocknal_base_shutdown();
 }
 
 static int
index 9009af3..acaa6b9 100644 (file)
@@ -1083,7 +1083,8 @@ void
 ksocknal_thread_fini (void)
 {
        write_lock_bh(&ksocknal_data.ksnd_global_lock);
-        ksocknal_data.ksnd_nthreads--;
+       if (--ksocknal_data.ksnd_nthreads == 0)
+               wake_up_var(&ksocknal_data.ksnd_nthreads);
        write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 }
 
index baabf41..388e7eb 100644 (file)
@@ -1744,10 +1744,9 @@ lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf,
        LNetInvalidateMDHandle(ping_mdh);
 
        /* NB the MD could be busy; this just starts the unlink */
-       while (atomic_read(&pbuf->pb_refcnt) > 1) {
-               CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
-               schedule_timeout_uninterruptible(cfs_time_seconds(1));
-       }
+       wait_var_event_warning(&pbuf->pb_refcnt,
+                              atomic_read(&pbuf->pb_refcnt) <= 1,
+                              "Still waiting for ping data MD to unlink\n");
 }
 
 static void
@@ -2011,10 +2010,9 @@ static void lnet_push_target_fini(void)
        LNetInvalidateMDHandle(&the_lnet.ln_push_target_md);
 
        /* Wait for the unlink to complete. */
-       while (atomic_read(&the_lnet.ln_push_target->pb_refcnt) > 1) {
-               CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n");
-               schedule_timeout_uninterruptible(cfs_time_seconds(1));
-       }
+       wait_var_event_warning(&the_lnet.ln_push_target->pb_refcnt,
+                              atomic_read(&the_lnet.ln_push_target->pb_refcnt) <= 1,
+                              "Still waiting for ping data MD to unlink\n");
 
        /* Drop ref set by lnet_ping_buffer_alloc() */
        lnet_ping_buffer_decref(the_lnet.ln_push_target);
index 4584dac..051a342 100644 (file)
@@ -576,21 +576,10 @@ lnet_peer_table_cleanup_locked(struct lnet_net *net,
 static void
 lnet_peer_ni_finalize_wait(struct lnet_peer_table *ptable)
 {
-       int     i = 3;
-
-       spin_lock(&ptable->pt_zombie_lock);
-       while (ptable->pt_zombies) {
-               spin_unlock(&ptable->pt_zombie_lock);
-
-               if (is_power_of_2(i)) {
-                       CDEBUG(D_WARNING,
+       wait_var_event_warning(&ptable->pt_zombies,
+                              ptable->pt_zombies == 0,
                               "Waiting for %d zombies on peer table\n",
                               ptable->pt_zombies);
-               }
-               schedule_timeout_uninterruptible(cfs_time_seconds(1) >> 1);
-               spin_lock(&ptable->pt_zombie_lock);
-       }
-       spin_unlock(&ptable->pt_zombie_lock);
 }
 
 static void