From 61d77b675610a833f69b64b4033a9dbd636d8931 Mon Sep 17 00:00:00 2001 From: adilger Date: Sat, 1 Oct 2005 06:09:53 +0000 Subject: [PATCH] Branch b1_4 Description: if client is started with down MDS mount hangs in ptlrpc_queue_wait Details : Having an LWI_INTR() wait event (interruptible, but no timeout) will wait indefinitely in ptlrpc_queue_wait->l_wait_event() after ptlrpc_import_delayed_req() because we didn't check if the request was interrupted, and we also didn't break out of the event loop if there was no timeout. __l_wait_event() changes match those recently made in HEAD. b=7184 r=devesh --- lustre/ChangeLog | 14 ++++++++------ lustre/include/linux/lustre_lib.h | 4 ++-- lustre/ptlrpc/client.c | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 2cb46c8..b78a184 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -165,12 +165,14 @@ Details : all OST threads are uniformly bound to CPUs on a single NUMA node and do their allocations there to localize memory access Severity : minor -Frequency : liblustre-only, when liblustre client dies or becomes busy -Bugzilla : 7311 -Description: Doing ls on Linux clients can take a long time with active - liblustre clients -Details : Newer more complete fix for 7311 issue: add connection flags - handling. +Frequency : if client is started with down MDS +Bugzilla : 7184 +Description: if client is started with down MDS mount hangs in ptlrpc_queue_wait +Details : Having an LWI_INTR() wait event (interruptible, but no timeout) + will wait indefinitely in ptlrpc_queue_wait->l_wait_event() after + ptlrpc_import_delayed_req() because we didn't check if the + request was interrupted, and we also didn't break out of the + event loop if there was no timeout ------------------------------------------------------------------------------ diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index a2f54b0..57017a5 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -611,7 +611,7 @@ do { \ if (condition) \ break; \ if (signal_pending(current)) { \ - if (__timed_out) { \ + if (!info->lwi_timeout || __timed_out) { \ break; \ } else { \ /* We have to do this here because some signals */ \ @@ -633,7 +633,7 @@ do { \ RECALC_SIGPENDING; \ SIGNAL_MASK_UNLOCK(current, irqflags); \ \ - if (__timed_out && signal_pending(current)) { \ + if ((!info->lwi_timeout || __timed_out) && signal_pending(current)) { \ if (info->lwi_on_signal) \ info->lwi_on_signal(info->lwi_cb_data); \ ret = -EINTR; \ diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index e6f4f42..53ec1a5 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1500,13 +1500,13 @@ restart: lwi = LWI_INTR(interrupted_request, req); rc = l_wait_event(req->rq_reply_waitq, (req->rq_send_state == imp->imp_state || - req->rq_err), + req->rq_err || req->rq_intr), &lwi); - DEBUG_REQ(D_HA, req, "\"%s\" awake: (%s == %s or %d == 1)", + DEBUG_REQ(D_HA, req, "\"%s\" awake: (%s == %s or %d/%d == 1)", current->comm, ptlrpc_import_state_name(imp->imp_state), ptlrpc_import_state_name(req->rq_send_state), - req->rq_err); + req->rq_err, req->rq_intr); spin_lock_irqsave(&imp->imp_lock, flags); list_del_init(&req->rq_list); -- 1.8.3.1