Whamcloud - gitweb
Don't dereference freed lock when lock cancellation is racing with export
authoradilger <adilger>
Sun, 21 Mar 2004 06:15:51 +0000 (06:15 +0000)
committeradilger <adilger>
Sun, 21 Mar 2004 06:15:51 +0000 (06:15 +0000)
eviction.
Create a constant for freed memory to make it easier to check.
Test 45 in replay-single is disabled because with or without the patch the
forced client cleanup isn't working properly.
b=2867
r=phil,robert

lnet/include/linux/kp30.h
lustre/ChangeLog
lustre/include/linux/lustre_lib.h
lustre/ldlm/ldlm_lockd.c
lustre/portals/include/linux/kp30.h
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/tests/recovery-small.sh
lustre/tests/replay-single.sh

index c080a57..4c3e85f 100644 (file)
@@ -1115,18 +1115,21 @@ void kportal_put_ni (int nal);
 # define LPX64 "%#Lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
+# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
 # define LPU64 "%Lu"
 # define LPD64 "%Ld"
 # define LPX64 "%#Lx"
 # define LPSZ  "%u"
 # define LPSSZ "%d"
+# define LP_POISON ((void *)0x5a5a5a5a)
 #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
 # define LPU64 "%lu"
 # define LPD64 "%ld"
 # define LPX64 "%#lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
+# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #endif
 #ifndef LPU64
 # error "No word size defined"
index f2477a7..9a2492e 100644 (file)
@@ -20,6 +20,7 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        - don't evict page beyond end of stripe extent (2925)
        - don't oops on a deleted current working directory (2399)
        - handle hard links to targets without a parent properly (2517)
+       - don't dereference NULL lock when racing during eviction (2867)
 
 2004-03-04  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.0
index 24ad8fb..b0b907c 100644 (file)
 #define LPU64 "%lu"
 #define LPD64 "%ld"
 #define LPX64 "%#lx"
+#define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #else
 #define LPU64 "%Lu"
 #define LPD64 "%Ld"
 #define LPX64 "%#Lx"
+#define LP_POISON ((void *)0x5a5a5a5a)
 #endif
 #endif
 
index 6602713..c88139f 100644 (file)
@@ -126,7 +126,6 @@ static int expired_lock_main(void *arg)
         wake_up(&expired_lock_thread.elt_waitq);
 
         while (1) {
-                struct list_head *tmp, *n, work_list;
                 l_wait_event(expired_lock_thread.elt_waitq,
                              have_expired_locks() ||
                              expired_lock_thread.elt_state == ELT_TERMINATE,
@@ -134,33 +133,30 @@ static int expired_lock_main(void *arg)
 
                 spin_lock_bh(&expired_lock_thread.elt_lock);
                 while (!list_empty(expired)) {
+                        struct obd_export *export;
                         struct ldlm_lock *lock;
 
-                        list_add(&work_list, expired);
-                        list_del_init(expired);
-
-                        list_for_each_entry(lock, &work_list, l_pending_chain) {
-                                LDLM_DEBUG(lock, "moving to work list");
-                        }
-
-                        spin_unlock_bh(&expired_lock_thread.elt_lock);
-
-
-                        list_for_each_safe(tmp, n, &work_list) {
-                                 lock = list_entry(tmp, struct ldlm_lock,
-                                                   l_pending_chain);
-                                 ptlrpc_fail_export(lock->l_export);
+                        lock = list_entry(expired->next, struct ldlm_lock,
+                                          l_pending_chain);
+                        if ((void *)lock < LP_POISON + PAGE_SIZE &&
+                            (void *)lock >= LP_POISON) {
+                                CERROR("free lock on elt list %p\n", lock);
+                                LBUG();
                         }
-
-
-                        if (!list_empty(&work_list)) {
-                                list_for_each_entry(lock, &work_list, l_pending_chain) {
-                                        LDLM_ERROR(lock, "still on work list!");
-                                }
+                        list_del_init(&lock->l_pending_chain);
+                        if ((void *)lock->l_export < LP_POISON + PAGE_SIZE &&
+                            (void *)lock->l_export >= LP_POISON + PAGE_SIZE) {
+                                CERROR("lock with free export on elt list %p\n",
+                                       export);
+                                lock->l_export = NULL;
+                                LDLM_ERROR(lock, "free export\n");
+                                continue;
                         }
-                        LASSERTF (list_empty(&work_list),
-                                  "some exports not failed properly\n");
+                        export = class_export_get(lock->l_export);
+                        spin_unlock_bh(&expired_lock_thread.elt_lock);
 
+                        ptlrpc_fail_export(export);
+                        class_export_put(export);
                         spin_lock_bh(&expired_lock_thread.elt_lock);
                 }
                 spin_unlock_bh(&expired_lock_thread.elt_lock);
index c080a57..4c3e85f 100644 (file)
@@ -1115,18 +1115,21 @@ void kportal_put_ni (int nal);
 # define LPX64 "%#Lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
+# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
 # define LPU64 "%Lu"
 # define LPD64 "%Ld"
 # define LPX64 "%#Lx"
 # define LPSZ  "%u"
 # define LPSSZ "%d"
+# define LP_POISON ((void *)0x5a5a5a5a)
 #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
 # define LPU64 "%lu"
 # define LPD64 "%ld"
 # define LPX64 "%#lx"
 # define LPSZ  "%lu"
 # define LPSSZ "%ld"
+# define LP_POISON ((void *)0x5a5a5a5a5a5a5a5a)
 #endif
 #ifndef LPU64
 # error "No word size defined"
index 8dc4ad0..70db906 100644 (file)
@@ -1070,8 +1070,8 @@ static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
         if (request == NULL)
                 RETURN(1);
 
-        if (request == (void *)(unsigned long)(0x5a5a5a5a5a5a5a5a) ||
-            request->rq_reqmsg == (void *)(unsigned long)(0x5a5a5a5a5a5a5a5a)) {
+        if (request == LP_POISON ||
+            request->rq_reqmsg == LP_POISON) {
                 CERROR("dereferencing freed request (bug 575)\n");
                 LBUG();
                 RETURN(1);
index b1f8221..343ccba 100644 (file)
@@ -324,7 +324,7 @@ static int ptlrpc_master_callback(ptl_event_t *ev)
         void (*callback)(ptl_event_t *ev) = cbid->cbid_fn;
 
         /* Honestly, it's best to find out early. */
-        LASSERT (cbid->cbid_arg != (void *)0x5a5a5a5a5a5a5a5a);
+        LASSERT (cbid->cbid_arg != LP_POISON);
         LASSERT (callback == request_out_callback ||
                  callback == reply_in_callback ||
                  callback == client_bulk_callback ||
index 5cd1e62..4b2c37f 100755 (executable)
@@ -199,4 +199,29 @@ test_15() {
 }
 run_test 15 "failed open (-ENOMEM)"
 
+test_19a() {
+    f=$MOUNT/$tfile
+    do_facet client mcreate $f        || return 1
+    drop_ldlm_cancel "chmod 0777 $f"  || echo evicted
+
+    do_facet client checkstat -v -p 0777 $f  || echo evicted
+    do_facet client "munlink $f"
+}
+run_test 19a "test expired_lock_main on mds (2867)"
+
+test_19b() {
+    f=$MOUNT/$tfile
+    do_facet client multiop $f Ow  || return 1
+    do_facet client multiop $f or  || return 2
+
+    cancel_lru_locks OSC
+
+    do_facet client multiop $f or  || return 3
+    drop_ldlm_cancel multiop $f Ow  || echo "client evicted, as expected"
+
+    do_facet client munlink $f  || return 4
+
+}
+run_test 19b "test expired_lock_main on ost (2867)"
+
 $CLEANUP
index 8b1c6e3..13a3465 100755 (executable)
@@ -14,7 +14,7 @@ init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
 
 # Skip these tests
-ALWAYS_EXCEPT=""
+ALWAYS_EXCEPT="45"
 
 
 gen_config() {
@@ -864,6 +864,24 @@ test_44() {
 }
 run_test 44 "race in target handle connect"
 
+test_45() {
+    d=$MOUNT/$tdir
+
+    mkdir $d
+    ls -l $d
+    cp /etc/termcap $d
+    cat $d/termcap > /dev/null
+
+    zconf_umount client $MOUNT -f
+    zconf_mount `hostname` $MOUNT
+
+    rm -rf $d || return 1
+    $CHECKSTAT -t dir $d && return 1 || true
+
+    sleep 10
+}
+run_test 45 "test client eviction after client crash"
+
 equals_msg test complete, cleaning up
 $CLEANUP