From: eeb Date: Sat, 21 Feb 2004 09:54:05 +0000 (+0000) Subject: * Applied fix for 1888 X-Git-Tag: v1_7_100~2564 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=b060b306a33d2b2bdc6a615667c403dc21c1ec1f * Applied fix for 1888 --- diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index 127bbce..90c9a95 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -213,7 +213,6 @@ kqswnal_finalise (void) /* fall through */ case KQN_INIT_DATA: - LASSERT(list_empty(&kqswnal_data.kqn_activetxds)); break; case KQN_INIT_NOTHING: @@ -248,6 +247,9 @@ kqswnal_finalise (void) if (kqswnal_data.kqn_eptx != NULL) ep_free_xmtr (kqswnal_data.kqn_eptx); + + /* freeing the xmtr completes all txs pdq */ + LASSERT(list_empty(&kqswnal_data.kqn_activetxds)); #else if (kqswnal_data.kqn_eprx_small != NULL) ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small); @@ -255,6 +257,13 @@ kqswnal_finalise (void) if (kqswnal_data.kqn_eprx_large != NULL) ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large); + /* wait for transmits to complete */ + while (!list_empty(&kqswnal_data.kqn_activetxds)) { + CWARN("waiting for active transmits to complete\n"); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ); + } + if (kqswnal_data.kqn_eptx != NULL) ep_free_large_xmtr (kqswnal_data.kqn_eptx); #endif diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c index 127bbce..90c9a95 100644 --- a/lustre/portals/knals/qswnal/qswnal.c +++ b/lustre/portals/knals/qswnal/qswnal.c @@ -213,7 +213,6 @@ kqswnal_finalise (void) /* fall through */ case KQN_INIT_DATA: - LASSERT(list_empty(&kqswnal_data.kqn_activetxds)); break; case KQN_INIT_NOTHING: @@ -248,6 +247,9 @@ kqswnal_finalise (void) if (kqswnal_data.kqn_eptx != NULL) ep_free_xmtr (kqswnal_data.kqn_eptx); + + /* freeing the xmtr completes all txs pdq */ + LASSERT(list_empty(&kqswnal_data.kqn_activetxds)); #else if (kqswnal_data.kqn_eprx_small != NULL) ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small); @@ -255,6 +257,13 @@ kqswnal_finalise (void) if (kqswnal_data.kqn_eprx_large != NULL) ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large); + /* wait for transmits to complete */ + while (!list_empty(&kqswnal_data.kqn_activetxds)) { + CWARN("waiting for active transmits to complete\n"); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(HZ); + } + if (kqswnal_data.kqn_eptx != NULL) ep_free_large_xmtr (kqswnal_data.kqn_eptx); #endif diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index b2aa6b9..6ba3909 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -365,8 +365,39 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) void ptlrpc_ni_fini(struct ptlrpc_ni *pni) { - PtlEQFree(pni->pni_eq_h); - kportal_put_ni (pni->pni_number); + wait_queue_head_t waitq; + struct l_wait_info lwi; + int rc; + int retries; + + /* Wait for the event queue to become idle since there may still be + * messages in flight with pending events (i.e. the fire-and-forget + * messages == client requests and "non-difficult" server + * replies */ + + for (retries = 0;; retries++) { + rc = PtlEQFree(pni->pni_eq_h); + switch (rc) { + default: + LBUG(); + + case PTL_OK: + kportal_put_ni (pni->pni_number); + return; + + case PTL_EQ_INUSE: + if (retries != 0) + CWARN("Event queue for %s still busy\n", + pni->pni_name); + + /* Wait for a bit */ + init_waitqueue_head(&waitq); + lwi = LWI_TIMEOUT(2*HZ, NULL, NULL); + l_wait_event(waitq, 0, &lwi); + break; + } + } + /* notreached */ } int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni)