From: Mr NeilBrown Date: Thu, 7 Nov 2019 05:58:00 +0000 (+1100) Subject: LU-13255 lnet: introduce wait_var_event_warning. X-Git-Tag: 2.13.53~83 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=9a0ed2bc507016b555e8e4641b22535468e051c4 LU-13255 lnet: introduce wait_var_event_warning. LNet has a recurring pattern of waiting for some variable to reach a particular value, and generating a warning every second that it hasn't. In many cases the warning has a higher priority if the wait has been for a power-of-2 seconds. This patch embodies that pattern in a new macro wait_var_event_warning() and uses wake_up_var() to cause the wait to complete as soon as possible. This patch does not include any change to gnilnd - that is left for a separate patch. Signed-off-by: Mr NeilBrown Change-Id: I557a64713cbe379a566a775944f58ddf93dbd800 Reviewed-on: https://review.whamcloud.com/37593 Tested-by: jenkins Reviewed-by: Chris Horn Reviewed-by: James Simmons Reviewed-by: Shaun Tancheff Reviewed-by: Serguei Smirnov Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/libcfs/include/libcfs/libcfs.h b/libcfs/include/libcfs/libcfs.h index 23174a9..2007dc6 100644 --- a/libcfs/include/libcfs/libcfs.h +++ b/libcfs/include/libcfs/libcfs.h @@ -141,4 +141,19 @@ int lprocfs_call_handler(void *data, int write, loff_t *ppos, int (*handler)(void *data, int write, loff_t pos, void __user *buffer, int len)); + +#define wait_var_event_warning(var, condition, format, ...) \ +do { \ + int counter = 4; \ + might_sleep(); \ + if (condition) \ + break; \ + ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + if (schedule_timeout(cfs_time_seconds(1)) == 0)\ + CDEBUG(is_power_of_2(counter++) ? \ + D_WARNING : D_NET, \ + format, ## __VA_ARGS__) \ + ); \ +} while (0) + #endif /* _LIBCFS_LIBCFS_H_ */ diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 602bf46..ccc74b5 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -831,8 +831,10 @@ static inline void lnet_ping_buffer_addref(struct lnet_ping_buffer *pbuf) static inline void lnet_ping_buffer_decref(struct lnet_ping_buffer *pbuf) { - if (atomic_dec_and_test(&pbuf->pb_refcnt)) + if (atomic_dec_and_test(&pbuf->pb_refcnt)) { + wake_up_var(&pbuf->pb_refcnt); lnet_ping_buffer_free(pbuf); + } } static inline int lnet_push_target_resize_needed(void) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index dae653b..9e7f4db 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -372,7 +372,8 @@ kiblnd_destroy_peer(struct kib_peer_ni *peer_ni) * they are destroyed, so we can be assured that _all_ state to do * with this peer_ni has been cleaned up when its refcount drops to * zero. */ - atomic_dec(&net->ibn_npeers); + if (atomic_dec_and_test(&net->ibn_npeers)) + wake_up_var(&net->ibn_npeers); } struct kib_peer_ni * @@ -2972,17 +2973,11 @@ kiblnd_base_shutdown(void) wake_up_all(&kiblnd_data.kib_connd_waitq); wake_up_all(&kiblnd_data.kib_failover_waitq); - i = 2; - while (atomic_read(&kiblnd_data.kib_nthreads) != 0) { - i++; - /* power of 2? */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for %d threads to terminate\n", - atomic_read(&kiblnd_data.kib_nthreads)); - schedule_timeout_uninterruptible(cfs_time_seconds(1)); - } - - /* fall through */ + wait_var_event_warning(&kiblnd_data.kib_nthreads, + !atomic_read(&kiblnd_data.kib_nthreads), + "Waiting for %d threads to terminate\n", + atomic_read(&kiblnd_data.kib_nthreads)); + /* fall through */ case IBLND_INIT_NOTHING: break; @@ -3007,8 +3002,7 @@ kiblnd_shutdown(struct lnet_ni *ni) { struct kib_net *net = ni->ni_data; rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - int i; - unsigned long flags; + unsigned long flags; LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL); @@ -3026,21 +3020,16 @@ kiblnd_shutdown(struct lnet_ni *ni) default: LBUG(); - case IBLND_INIT_ALL: - /* nuke all existing peers within this net */ - kiblnd_del_peer(ni, LNET_NID_ANY); + case IBLND_INIT_ALL: + /* nuke all existing peers within this net */ + kiblnd_del_peer(ni, LNET_NID_ANY); /* Wait for all peer_ni state to clean up */ - i = 2; - while (atomic_read(&net->ibn_npeers) != 0) { - i++; - /* power of 2? */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "%s: waiting for %d peers to disconnect\n", - libcfs_nid2str(ni->ni_nid), - atomic_read(&net->ibn_npeers)); - schedule_timeout_uninterruptible(cfs_time_seconds(1)); - } + wait_var_event_warning(&net->ibn_npeers, + atomic_read(&net->ibn_npeers) == 0, + "%s: waiting for %d peers to disconnect\n", + libcfs_nid2str(ni->ni_nid), + atomic_read(&net->ibn_npeers)); kiblnd_net_fini_pools(net); @@ -3050,7 +3039,7 @@ kiblnd_shutdown(struct lnet_ni *ni) list_del(&net->ibn_list); write_unlock_irqrestore(g_lock, flags); - /* fall through */ + /* fall through */ case IBLND_INIT_NOTHING: LASSERT (atomic_read(&net->ibn_nconns) == 0); diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 4ed0458..094e840 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -158,7 +158,8 @@ ksocknal_destroy_peer(struct ksock_peer_ni *peer_ni) * state to do with this peer_ni has been cleaned up when its refcount * drops to zero. */ - atomic_dec(&net->ksnn_npeers); + if (atomic_dec_and_test(&net->ksnn_npeers)) + wake_up_var(&net->ksnn_npeers); } struct ksock_peer_ni * @@ -2205,25 +2206,16 @@ ksocknal_base_shutdown(void) wake_up_all(&sched->kss_waitq); } - i = 4; - read_lock(&ksocknal_data.ksnd_global_lock); - while (ksocknal_data.ksnd_nthreads != 0) { - i++; - /* power of 2? */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "waiting for %d threads to terminate\n", - ksocknal_data.ksnd_nthreads); - read_unlock(&ksocknal_data.ksnd_global_lock); - schedule_timeout_uninterruptible(cfs_time_seconds(1)); - read_lock(&ksocknal_data.ksnd_global_lock); - } - read_unlock(&ksocknal_data.ksnd_global_lock); + wait_var_event_warning(&ksocknal_data.ksnd_nthreads, + ksocknal_data.ksnd_nthreads == 0, + "waiting for %d threads to terminate\n", + ksocknal_data.ksnd_nthreads); - ksocknal_free_buffers(); + ksocknal_free_buffers(); - ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; - break; - } + ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; + break; + } CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", atomic_read (&libcfs_kmemory)); @@ -2352,7 +2344,7 @@ ksocknal_base_startup(void) return -ENETDOWN; } -static void +static int ksocknal_debug_peerhash(struct lnet_ni *ni) { struct ksock_peer_ni *peer_ni; @@ -2394,6 +2386,7 @@ ksocknal_debug_peerhash(struct lnet_ni *ni) } read_unlock(&ksocknal_data.ksnd_global_lock); + return 0; } void @@ -2416,16 +2409,13 @@ ksocknal_shutdown(struct lnet_ni *ni) ksocknal_del_peer(ni, anyid, 0); /* Wait for all peer_ni state to clean up */ - i = 2; - while (atomic_read(&net->ksnn_npeers) > SOCKNAL_SHUTDOWN_BIAS) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to disconnect\n", - atomic_read(&net->ksnn_npeers) - SOCKNAL_SHUTDOWN_BIAS); - schedule_timeout_uninterruptible(cfs_time_seconds(1)); - - ksocknal_debug_peerhash(ni); - } + wait_var_event_warning(&net->ksnn_npeers, + atomic_read(&net->ksnn_npeers) == + SOCKNAL_SHUTDOWN_BIAS, + "waiting for %d peers to disconnect\n", + ksocknal_debug_peerhash(ni) + + atomic_read(&net->ksnn_npeers) - + SOCKNAL_SHUTDOWN_BIAS); for (i = 0; i < net->ksnn_ninterfaces; i++) { LASSERT(net->ksnn_interfaces[i].ksni_npeers == 0); @@ -2435,9 +2425,9 @@ ksocknal_shutdown(struct lnet_ni *ni) list_del(&net->ksnn_list); LIBCFS_FREE(net, sizeof(*net)); - ksocknal_data.ksnd_nnets--; - if (ksocknal_data.ksnd_nnets == 0) - ksocknal_base_shutdown(); + ksocknal_data.ksnd_nnets--; + if (ksocknal_data.ksnd_nnets == 0) + ksocknal_base_shutdown(); } static int diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 9009af3..acaa6b9 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -1083,7 +1083,8 @@ void ksocknal_thread_fini (void) { write_lock_bh(&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads--; + if (--ksocknal_data.ksnd_nthreads == 0) + wake_up_var(&ksocknal_data.ksnd_nthreads); write_unlock_bh(&ksocknal_data.ksnd_global_lock); } diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index baabf41..388e7eb 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -1744,10 +1744,9 @@ lnet_ping_md_unlink(struct lnet_ping_buffer *pbuf, LNetInvalidateMDHandle(ping_mdh); /* NB the MD could be busy; this just starts the unlink */ - while (atomic_read(&pbuf->pb_refcnt) > 1) { - CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n"); - schedule_timeout_uninterruptible(cfs_time_seconds(1)); - } + wait_var_event_warning(&pbuf->pb_refcnt, + atomic_read(&pbuf->pb_refcnt) <= 1, + "Still waiting for ping data MD to unlink\n"); } static void @@ -2011,10 +2010,9 @@ static void lnet_push_target_fini(void) LNetInvalidateMDHandle(&the_lnet.ln_push_target_md); /* Wait for the unlink to complete. */ - while (atomic_read(&the_lnet.ln_push_target->pb_refcnt) > 1) { - CDEBUG(D_NET, "Still waiting for ping data MD to unlink\n"); - schedule_timeout_uninterruptible(cfs_time_seconds(1)); - } + wait_var_event_warning(&the_lnet.ln_push_target->pb_refcnt, + atomic_read(&the_lnet.ln_push_target->pb_refcnt) <= 1, + "Still waiting for ping data MD to unlink\n"); /* Drop ref set by lnet_ping_buffer_alloc() */ lnet_ping_buffer_decref(the_lnet.ln_push_target); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 4584dac..051a342 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -576,21 +576,10 @@ lnet_peer_table_cleanup_locked(struct lnet_net *net, static void lnet_peer_ni_finalize_wait(struct lnet_peer_table *ptable) { - int i = 3; - - spin_lock(&ptable->pt_zombie_lock); - while (ptable->pt_zombies) { - spin_unlock(&ptable->pt_zombie_lock); - - if (is_power_of_2(i)) { - CDEBUG(D_WARNING, + wait_var_event_warning(&ptable->pt_zombies, + ptable->pt_zombies == 0, "Waiting for %d zombies on peer table\n", ptable->pt_zombies); - } - schedule_timeout_uninterruptible(cfs_time_seconds(1) >> 1); - spin_lock(&ptable->pt_zombie_lock); - } - spin_unlock(&ptable->pt_zombie_lock); } static void