From 751c6450e392b99184982bb6dcd0b397de9b7e54 Mon Sep 17 00:00:00 2001 From: adilger Date: Wed, 29 Jun 2005 23:41:09 +0000 Subject: [PATCH] Branch b1_4 Multiple concurrent overlapping read+write on multiple SMP nodes caused lock timeout during readahead (since 1.4.2). Processes doing ll_page_matches() during readahead might match a lock that hasn't been granted yet if there are overlapping and conflicting lock requests pending. The readahead process waits on ungranted lock (original lock is CBPENDING), while OST waits for that process to cancel CBPENDING read lock and eventually evicts client. Caused by change to ll_page_matches() from bug 5654. b=6469 --- lustre/ChangeLog | 11 +++++++++++ lustre/ldlm/ldlm_lock.c | 4 ++-- lustre/ldlm/ldlm_request.c | 4 +++- lustre/llite/llite_internal.h | 1 + lustre/llite/lproc_llite.c | 1 + lustre/llite/rw.c | 9 +++++---- lustre/lov/lov_obd.c | 8 ++++---- 7 files changed, 27 insertions(+), 11 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 38d4982..0fb9dff 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -54,6 +54,17 @@ Details : If some OSTs are full or unavailable, creating files may try to use other servers or return an error to the client. +Severity : minor +Frequency : occasional +Bugzilla : 6469 +Description: Multiple concurrent overlapping read+write on multiple SMP nodes + caused lock timeout during readahead (since 1.4.2). +Details : Processes doing readahead might match a lock that hasn't been + granted yet if there are overlapping and conflicting lock + requests. The readahead process waits on ungranted lock + (original lock is CBPENDING), while OST waits for that process + to cancel CBPENDING read lock and eventually evicts client. + ------------------------------------------------------------------------------ 2005-06-20 Cluster File Systems, Inc. diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 6d7091d..1f3f608 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -593,8 +593,8 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode, lock->l_policy_data.l_extent.end < policy->l_extent.end)) continue; - if (lock->l_resource->lr_type == LDLM_EXTENT && - mode == LCK_GROUP && + if (unlikely(mode == LCK_GROUP) && + lock->l_resource->lr_type == LDLM_EXTENT && lock->l_policy_data.l_extent.gid != policy->l_extent.gid) continue; diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index d15a258..9d490f5 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -86,8 +86,10 @@ int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data) int rc = 0; ENTRY; - if (flags == LDLM_FL_WAIT_NOREPROC) + if (flags == LDLM_FL_WAIT_NOREPROC) { + LDLM_DEBUG(lock, "client-side enqueue waiting on pending lock"); goto noreproc; + } if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV))) { diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 9e91555..dab0979 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -100,6 +100,7 @@ enum ra_stat { RA_STAT_MISS, RA_STAT_DISTANT_READPAGE, RA_STAT_MISS_IN_WINDOW, + RA_STAT_FAILED_GRAB_PAGE, RA_STAT_FAILED_MATCH, RA_STAT_DISCARDED, RA_STAT_ZERO_LEN, diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index d8114fa..54cb257 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -683,6 +683,7 @@ static int ll_ra_stats_seq_show(struct seq_file *seq, void *v) [RA_STAT_MISS] = "misses", [RA_STAT_DISTANT_READPAGE] = "readpage not consecutive", [RA_STAT_MISS_IN_WINDOW] = "miss inside window", + [RA_STAT_FAILED_GRAB_PAGE] = "failed grab_cache_page", [RA_STAT_FAILED_MATCH] = "failed lock match", [RA_STAT_DISCARDED] = "read but discarded", [RA_STAT_ZERO_LEN] = "zero length file", diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index fd8781f..9c6c8db 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -892,9 +892,9 @@ static int ll_page_matches(struct page *page, int fd_flags) page_extent.l_extent.start = (__u64)page->index << PAGE_CACHE_SHIFT; page_extent.l_extent.end = page_extent.l_extent.start + PAGE_CACHE_SIZE - 1; - flags = LDLM_FL_TEST_LOCK; - if (!(fd_flags&LL_FILE_READAHEAD)) - flags |= LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED; + flags = LDLM_FL_TEST_LOCK | LDLM_FL_BLOCK_GRANTED; + if (!(fd_flags & LL_FILE_READAHEAD)) + flags |= LDLM_FL_CBPENDING matches = obd_match(ll_i2sbi(inode)->ll_osc_exp, ll_i2info(inode)->lli_smd, LDLM_EXTENT, &page_extent, LCK_PR | LCK_PW, &flags, inode, @@ -1020,6 +1020,7 @@ static int ll_readahead(struct ll_readahead_state *ras, /* skip locked pages from previous readpage calls */ page = grab_cache_page_nowait_gfp(mapping, i, gfp_mask); if (page == NULL) { + ll_ra_stats_inc(mapping, RA_STAT_FAILED_GRAB_PAGE); CDEBUG(D_READA, "g_c_p_n failed\n"); continue; } @@ -1035,7 +1036,7 @@ static int ll_readahead(struct ll_readahead_state *ras, goto next_page; /* bail when we hit the end of the lock. */ - if ((rc = ll_page_matches(page, flags|LL_FILE_READAHEAD)) <= 0) { + if ((rc = ll_page_matches(page, flags|LL_FILE_READAHEAD)) <= 0){ LL_CDEBUG_PAGE(D_READA | D_PAGE, page, "lock match failed: rc %d\n", rc); ll_ra_stats_inc(mapping, RA_STAT_FAILED_MATCH); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 5e28f4b..7104aa9 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -827,11 +827,11 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, RETURN(rc); } -#define ASSERT_LSM_MAGIC(lsmp) \ -do { \ - LASSERT((lsmp) != NULL); \ +#define ASSERT_LSM_MAGIC(lsmp) \ +do { \ + LASSERT((lsmp) != NULL); \ LASSERTF((lsmp)->lsm_magic == LOV_MAGIC, "%p->lsm_magic=%x\n", \ - (lsmp), (lsmp)->lsm_magic); \ + (lsmp), (lsmp)->lsm_magic); \ } while (0) static int lov_destroy(struct obd_export *exp, struct obdo *oa, -- 1.8.3.1