From 7c50b2ddca2883c732b8d71e6f20671572bec907 Mon Sep 17 00:00:00 2001 From: adilger Date: Wed, 24 Mar 2004 00:29:15 +0000 Subject: [PATCH] Don't crash in expired_lock_main() with racing client eviction/lock completion. b=2867 --- lnet/include/linux/kp30.h | 3 +++ lustre/ChangeLog | 3 ++- lustre/include/linux/lustre_lib.h | 2 ++ lustre/ldlm/ldlm_lockd.c | 42 +++++++++++++++++-------------------- lustre/portals/include/linux/kp30.h | 3 +++ lustre/ptlrpc/client.c | 4 ++-- lustre/ptlrpc/events.c | 2 +- 7 files changed, 32 insertions(+), 27 deletions(-) diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 9e7e7c2..c4c650e 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -743,18 +743,21 @@ void kportal_put_ni (int nal); # define LPX64 "%#Lx" # define LPSZ "%lu" # define LPSSZ "%ld" +# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a) #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) # define LPU64 "%Lu" # define LPD64 "%Ld" # define LPX64 "%#Lx" # define LPSZ "%u" # define LPSSZ "%d" +# define LP_POISON ((void *)0x5a5a5a5a) #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) # define LPU64 "%lu" # define LPD64 "%ld" # define LPX64 "%#lx" # define LPSZ "%lu" # define LPSSZ "%ld" +# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a) #endif #ifndef LPU64 # error "No word size defined" diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 0eb3b7f..f70d5d1 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -3,7 +3,7 @@ tbd Cluster File Systems, Inc. * Bug fixes - clear page cache after eviction (2766) -tbd Cluster File Systems, Inc. +2004-03-22 Cluster File Systems, Inc. * version 1.2.1 * bug fixes - fixes for glimpse AST timeouts / incorrectly 0-sized files (2818) @@ -25,6 +25,7 @@ tbd Cluster File Systems, Inc. - don't evict page beyond end of stripe extent (2925) - don't oops on a deleted current working directory (2399) - handle hard links to targets without a parent properly (2517) + - don't dereference NULL lock when racing during eviction (2867) 2004-03-04 Cluster File Systems, Inc. * version 1.2.0 diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 24ad8fb..b0b907c 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -48,10 +48,12 @@ #define LPU64 "%lu" #define LPD64 "%ld" #define LPX64 "%#lx" +#define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a) #else #define LPU64 "%Lu" #define LPD64 "%Ld" #define LPX64 "%#Lx" +#define LP_POISON ((void *)0x5a5a5a5a) #endif #endif diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index f36661d..5fc764e 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -126,7 +126,6 @@ static int expired_lock_main(void *arg) wake_up(&expired_lock_thread.elt_waitq); while (1) { - struct list_head *tmp, *n, work_list; l_wait_event(expired_lock_thread.elt_waitq, have_expired_locks() || expired_lock_thread.elt_state == ELT_TERMINATE, @@ -134,33 +133,30 @@ static int expired_lock_main(void *arg) spin_lock_bh(&expired_lock_thread.elt_lock); while (!list_empty(expired)) { + struct obd_export *export; struct ldlm_lock *lock; - list_add(&work_list, expired); - list_del_init(expired); - - list_for_each_entry(lock, &work_list, l_pending_chain) { - LDLM_DEBUG(lock, "moving to work list"); - } - - spin_unlock_bh(&expired_lock_thread.elt_lock); - - - list_for_each_safe(tmp, n, &work_list) { - lock = list_entry(tmp, struct ldlm_lock, - l_pending_chain); - ptlrpc_fail_export(lock->l_export); + lock = list_entry(expired->next, struct ldlm_lock, + l_pending_chain); + if ((void *)lock < LP_POISON + PAGE_SIZE && + (void *)lock >= LP_POISON) { + CERROR("free lock on elt list %p\n", lock); + LBUG(); } - - - if (!list_empty(&work_list)) { - list_for_each_entry(lock, &work_list, l_pending_chain) { - LDLM_ERROR(lock, "still on work list!"); - } + list_del_init(&lock->l_pending_chain); + if ((void *)lock->l_export < LP_POISON + PAGE_SIZE && + (void *)lock->l_export >= LP_POISON + PAGE_SIZE) { + CERROR("lock with free export on elt list %p\n", + export); + lock->l_export = NULL; + LDLM_ERROR(lock, "free export\n"); + continue; } - LASSERTF (list_empty(&work_list), - "some exports not failed properly\n"); + export = class_export_get(lock->l_export); + spin_unlock_bh(&expired_lock_thread.elt_lock); + ptlrpc_fail_export(export); + class_export_put(export); spin_lock_bh(&expired_lock_thread.elt_lock); } spin_unlock_bh(&expired_lock_thread.elt_lock); diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 9e7e7c2..c4c650e 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -743,18 +743,21 @@ void kportal_put_ni (int nal); # define LPX64 "%#Lx" # define LPSZ "%lu" # define LPSSZ "%ld" +# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a) #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) # define LPU64 "%Lu" # define LPD64 "%Ld" # define LPX64 "%#Lx" # define LPSZ "%u" # define LPSSZ "%d" +# define LP_POISON ((void *)0x5a5a5a5a) #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) # define LPU64 "%lu" # define LPD64 "%ld" # define LPX64 "%#lx" # define LPSZ "%lu" # define LPSSZ "%ld" +# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a) #endif #ifndef LPU64 # error "No word size defined" diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 0774fa2..6635659 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1060,8 +1060,8 @@ static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked) if (request == NULL) RETURN(1); - if (request == (void *)(unsigned long)(0x5a5a5a5a5a5a5a5a) || - request->rq_reqmsg == (void *)(unsigned long)(0x5a5a5a5a5a5a5a5a)) { + if (request == LP_POISON || + request->rq_reqmsg == LP_POISON) { CERROR("dereferencing freed request (bug 575)\n"); LBUG(); RETURN(1); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 6e61236..b39d673 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -325,7 +325,7 @@ static int ptlrpc_master_callback(ptl_event_t *ev) void (*callback)(ptl_event_t *ev) = cbid->cbid_fn; /* Honestly, it's best to find out early. */ - LASSERT (cbid->cbid_arg != (void *)0x5a5a5a5a5a5a5a5a); + LASSERT (cbid->cbid_arg != LP_POISON); LASSERT (callback == request_out_callback || callback == reply_in_callback || callback == client_bulk_callback || -- 1.8.3.1