Whamcloud - gitweb
b=18829,18850
authorjxiong <jxiong>
Thu, 26 Mar 2009 10:20:53 +0000 (10:20 +0000)
committerjxiong <jxiong>
Thu, 26 Mar 2009 10:20:53 +0000 (10:20 +0000)
r=h.huang,yong.fan,shadow

Fix some ldlm lock related issue for clio..

Still has more :-)

lustre/include/obd_support.h
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/lov/lov_lock.c
lustre/osc/osc_lock.c
lustre/osc/osc_request.c
lustre/tests/sanity.sh

index 75bc6d2..9c70ef4 100644 (file)
@@ -289,6 +289,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_OSC_CKSUM_ADLER_ONLY    0x40c
 #define OBD_FAIL_OSC_DIO_PAUSE           0x40d
 #define OBD_FAIL_OSC_OBJECT_CONTENTION   0x40e
+#define OBD_FAIL_OSC_CP_CANCEL_RACE      0x40f
 
 #define OBD_FAIL_PTLRPC                  0x500
 #define OBD_FAIL_PTLRPC_ACK              0x501
@@ -716,7 +717,7 @@ do {                                                                          \
 })
 #define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
 do {                                                                          \
-        LASSERT(!in_interrupt());                                             \
+        LASSERT(ergo(type != CFS_ALLOC_ATOMIC, !in_interrupt()));             \
         (ptr) = cfs_mem_cache_alloc(slab, (type));                            \
         if (likely((ptr) != NULL &&                                           \
                    (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 ||       \
index ebe2807..f6fe4a7 100644 (file)
@@ -319,6 +319,7 @@ repeat:
                                 cont = 0;
 
                         LDLM_LOCK_GET(lock);
+
                         spin_unlock_bh(&waiting_locks_spinlock);
                         LDLM_DEBUG(lock, "prolong the busy lock");
                         ldlm_refresh_waiting_lock(lock,
@@ -326,11 +327,11 @@ repeat:
                         spin_lock_bh(&waiting_locks_spinlock);
 
                         if (!cont) {
-                                LDLM_LOCK_PUT(lock);
+                                LDLM_LOCK_RELEASE(lock);
                                 break;
                         }
 
-                        LDLM_LOCK_PUT(lock);
+                        LDLM_LOCK_RELEASE(lock);
                         continue;
                 }
                 lock->l_resource->lr_namespace->ns_timeouts++;
index b6fb1d1..d120548 100644 (file)
@@ -1244,7 +1244,7 @@ int ldlm_cli_cancel(struct lustre_handle *lockh)
 
         rc = ldlm_cli_cancel_local(lock);
         if (rc < 0 || rc == LDLM_FL_LOCAL_ONLY) {
-                LDLM_LOCK_RELEASE(lock);
+                LDLM_LOCK_PUT(lock);
                 RETURN(rc < 0 ? rc : 0);
         }
         /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL
index 9c741f8..67700dd 100644 (file)
@@ -1040,7 +1040,7 @@ static struct cl_lock_closure *lov_closure_get(const struct lu_env *env,
         struct cl_lock_closure *closure;
 
         closure = &lov_env_info(env)->lti_closure;
-        LINVRNT(list_empty(&closure->clc_list));
+        LASSERT(list_empty(&closure->clc_list));
         cl_lock_closure_init(env, closure, parent, 1);
         return closure;
 }
index ef82a95..6fcbcc5 100644 (file)
@@ -786,9 +786,9 @@ static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock,
                                 ;
                         else if (dlmlock->l_granted_mode != LCK_MINMODE)
                                 osc_lock_granted(env, olck, dlmlock, dlmrc);
+                        unlock_res_and_lock(dlmlock);
                         if (dlmrc != 0)
                                 cl_lock_error(env, lock, dlmrc);
-                        unlock_res_and_lock(dlmlock);
                         cl_lock_mutex_put(env, lock);
                         osc_ast_data_put(env, olck);
                         result = 0;
@@ -1394,19 +1394,31 @@ static void osc_lock_cancel(const struct lu_env *env,
         struct cl_lock   *lock    = slice->cls_lock;
         struct osc_lock  *olck    = cl2osc_lock(slice);
         struct ldlm_lock *dlmlock = olck->ols_lock;
-        int               result;
+        int               result  = 0;
         int               discard;
 
         LASSERT(cl_lock_is_mutexed(lock));
         LINVRNT(osc_lock_invariant(olck));
 
         if (dlmlock != NULL) {
+                int do_cancel;
+
                 discard = dlmlock->l_flags & LDLM_FL_DISCARD_DATA;
                 result = osc_lock_flush(olck, discard);
                 if (olck->ols_hold)
                         osc_lock_unuse(env, slice);
-                LASSERT(dlmlock->l_readers == 0 && dlmlock->l_writers == 0);
-                result = ldlm_cli_cancel(&olck->ols_handle);
+
+                lock_res_and_lock(dlmlock);
+                /* Now that we're the only user of dlm read/write reference,
+                 * mostly the ->l_readers + ->l_writers should be zero.
+                 * However, there is a corner case.
+                 * See bug 18829 for details.*/
+                do_cancel = (dlmlock->l_readers == 0 &&
+                             dlmlock->l_writers == 0);
+                dlmlock->l_flags |= LDLM_FL_CBPENDING;
+                unlock_res_and_lock(dlmlock);
+                if (do_cancel)
+                        result = ldlm_cli_cancel(&olck->ols_handle);
                 if (result < 0)
                         CL_LOCK_DEBUG(D_ERROR, env, lock,
                                       "lock %p cancel failure with error(%d)\n",
index 6c9fcab..9e34a2c 100644 (file)
@@ -3064,6 +3064,9 @@ static int osc_enqueue_interpret(const struct lu_env *env,
         /* Complete osc stuff. */
         rc = osc_enqueue_fini(req, aa->oa_lvb,
                               aa->oa_upcall, aa->oa_cookie, aa->oa_flags, rc);
+
+        OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_CANCEL_RACE, 10);
+
         /* Release the lock for async request. */
         if (lustre_handle_is_used(&handle) && rc == ELDLM_OK)
                 /*
index d5611db..a32da55 100644 (file)
@@ -6372,6 +6372,17 @@ test_212() {
 }
 run_test 212 "Sendfile test ============================================"
 
+test_213() {
+       dd if=/dev/zero of=$DIR/$tfile bs=4k count=4
+       cancel_lru_locks osc
+       lctl set_param fail_loc=0x8000040f
+       # generate a read lock
+       cat $DIR/$tfile > /dev/null
+       # write to the file, it will try to cancel the above read lock.
+       cat /etc/hosts >> $DIR/$tfile
+}
+run_test 213 "OSC lock completion and cancel race don't crash - bug 18829"
+
 #
 # tests that do cleanup/setup should be run at the end
 #