Whamcloud - gitweb
Don't crash in expired_lock_main() with racing client eviction/lock completion.
authoradilger <adilger>
Wed, 24 Mar 2004 00:29:15 +0000 (00:29 +0000)
committeradilger <adilger>
Wed, 24 Mar 2004 00:29:15 +0000 (00:29 +0000)
b=2867

lnet/include/linux/kp30.h
lustre/ChangeLog
lustre/include/linux/lustre_lib.h
lustre/ldlm/ldlm_lockd.c
lustre/portals/include/linux/kp30.h
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c

index 9e7e7c2..c4c650e 100644 (file)
@@ -743,18 +743,21 @@ void kportal_put_ni (int nal);
 # define LPX64 "%#Lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
+# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
 # define LPU64 "%Lu"
 # define LPD64 "%Ld"
 # define LPX64 "%#Lx"
 # define LPSZ  "%u"
 # define LPSSZ "%d"
+# define LP_POISON ((void *)0x5a5a5a5a)
 #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
 # define LPU64 "%lu"
 # define LPD64 "%ld"
 # define LPX64 "%#lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
+# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #endif
 #ifndef LPU64
 # error "No word size defined"
index 0eb3b7f..f70d5d1 100644 (file)
@@ -3,7 +3,7 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        * Bug fixes
        - clear page cache after eviction  (2766)
 
-tbd  Cluster File Systems, Inc. <info@clusterfs.com>
+2004-03-22  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.1
        * bug fixes
        - fixes for glimpse AST timeouts / incorrectly 0-sized files (2818)
@@ -25,6 +25,7 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        - don't evict page beyond end of stripe extent (2925)
        - don't oops on a deleted current working directory (2399)
        - handle hard links to targets without a parent properly (2517)
+       - don't dereference NULL lock when racing during eviction (2867)
 
 2004-03-04  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.0
index 24ad8fb..b0b907c 100644 (file)
 #define LPU64 "%lu"
 #define LPD64 "%ld"
 #define LPX64 "%#lx"
+#define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #else
 #define LPU64 "%Lu"
 #define LPD64 "%Ld"
 #define LPX64 "%#Lx"
+#define LP_POISON ((void *)0x5a5a5a5a)
 #endif
 #endif
 
index f36661d..5fc764e 100644 (file)
@@ -126,7 +126,6 @@ static int expired_lock_main(void *arg)
         wake_up(&expired_lock_thread.elt_waitq);
 
         while (1) {
-                struct list_head *tmp, *n, work_list;
                 l_wait_event(expired_lock_thread.elt_waitq,
                              have_expired_locks() ||
                              expired_lock_thread.elt_state == ELT_TERMINATE,
@@ -134,33 +133,30 @@ static int expired_lock_main(void *arg)
 
                 spin_lock_bh(&expired_lock_thread.elt_lock);
                 while (!list_empty(expired)) {
+                        struct obd_export *export;
                         struct ldlm_lock *lock;
 
-                        list_add(&work_list, expired);
-                        list_del_init(expired);
-
-                        list_for_each_entry(lock, &work_list, l_pending_chain) {
-                                LDLM_DEBUG(lock, "moving to work list");
-                        }
-
-                        spin_unlock_bh(&expired_lock_thread.elt_lock);
-
-
-                        list_for_each_safe(tmp, n, &work_list) {
-                                 lock = list_entry(tmp, struct ldlm_lock,
-                                                   l_pending_chain);
-                                 ptlrpc_fail_export(lock->l_export);
+                        lock = list_entry(expired->next, struct ldlm_lock,
+                                          l_pending_chain);
+                        if ((void *)lock < LP_POISON + PAGE_SIZE &&
+                            (void *)lock >= LP_POISON) {
+                                CERROR("free lock on elt list %p\n", lock);
+                                LBUG();
                         }
-
-
-                        if (!list_empty(&work_list)) {
-                                list_for_each_entry(lock, &work_list, l_pending_chain) {
-                                        LDLM_ERROR(lock, "still on work list!");
-                                }
+                        list_del_init(&lock->l_pending_chain);
+                        if ((void *)lock->l_export < LP_POISON + PAGE_SIZE &&
+                            (void *)lock->l_export >= LP_POISON + PAGE_SIZE) {
+                                CERROR("lock with free export on elt list %p\n",
+                                       export);
+                                lock->l_export = NULL;
+                                LDLM_ERROR(lock, "free export\n");
+                                continue;
                         }
-                        LASSERTF (list_empty(&work_list),
-                                  "some exports not failed properly\n");
+                        export = class_export_get(lock->l_export);
+                        spin_unlock_bh(&expired_lock_thread.elt_lock);
 
+                        ptlrpc_fail_export(export);
+                        class_export_put(export);
                         spin_lock_bh(&expired_lock_thread.elt_lock);
                 }
                 spin_unlock_bh(&expired_lock_thread.elt_lock);
index 9e7e7c2..c4c650e 100644 (file)
@@ -743,18 +743,21 @@ void kportal_put_ni (int nal);
 # define LPX64 "%#Lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
+# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
 # define LPU64 "%Lu"
 # define LPD64 "%Ld"
 # define LPX64 "%#Lx"
 # define LPSZ  "%u"
 # define LPSSZ "%d"
+# define LP_POISON ((void *)0x5a5a5a5a)
 #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
 # define LPU64 "%lu"
 # define LPD64 "%ld"
 # define LPX64 "%#lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
+# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #endif
 #ifndef LPU64
 # error "No word size defined"
index 0774fa2..6635659 100644 (file)
@@ -1060,8 +1060,8 @@ static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
         if (request == NULL)
                 RETURN(1);
 
-        if (request == (void *)(unsigned long)(0x5a5a5a5a5a5a5a5a) ||
-            request->rq_reqmsg == (void *)(unsigned long)(0x5a5a5a5a5a5a5a5a)) {
+        if (request == LP_POISON ||
+            request->rq_reqmsg == LP_POISON) {
                 CERROR("dereferencing freed request (bug 575)\n");
                 LBUG();
                 RETURN(1);
index 6e61236..b39d673 100644 (file)
@@ -325,7 +325,7 @@ static int ptlrpc_master_callback(ptl_event_t *ev)
         void (*callback)(ptl_event_t *ev) = cbid->cbid_fn;
 
         /* Honestly, it's best to find out early. */
-        LASSERT (cbid->cbid_arg != (void *)0x5a5a5a5a5a5a5a5a);
+        LASSERT (cbid->cbid_arg != LP_POISON);
         LASSERT (callback == request_out_callback ||
                  callback == reply_in_callback ||
                  callback == client_bulk_callback ||