Whamcloud - gitweb
LU-8391 ldlm: check double grant race after resource change 75/21275/11
authorLi Dongyang <dongyang.li@anu.edu.au>
Wed, 13 Jul 2016 06:17:53 +0000 (16:17 +1000)
committerOleg Drokin <green@whamcloud.com>
Mon, 29 Oct 2018 15:58:03 +0000 (15:58 +0000)
In ldlm_handle_cp_callback(), we call lock_res_and_lock and then
check if the ldlm lock has already been granted.
If the lock resource has changed, we release the lock and go ahead
allocating new resource, then grabs the lock again before calling
ldlm_grant_lock().
However this gives another thread an opportunity to grab the lock
and pass the check, while we change the resource. Eventually the
other thread calls ldlm_grant_lock() on the same ldlm lock and
triggers a LASSERT.

Fix the issue by doing double grant race check after changing the
lock resource.

Signed-off-by: Li Dongyang <dongyang.li@anu.edu.au>
Change-Id: Ib327b5e6b5f211909db5350de383d470a891e72a
Reviewed-on: https://review.whamcloud.com/21275
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lockd.c

index a062e5c..486f53b 100644 (file)
@@ -1790,6 +1790,21 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
        }
 
        lock_res_and_lock(lock);
+
+       if (!ldlm_res_eq(&dlm_req->lock_desc.l_resource.lr_name,
+                        &lock->l_resource->lr_name)) {
+               ldlm_resource_unlink_lock(lock);
+               unlock_res_and_lock(lock);
+               rc = ldlm_lock_change_resource(ns, lock,
+                               &dlm_req->lock_desc.l_resource.lr_name);
+               if (rc < 0) {
+                       LDLM_ERROR(lock, "Failed to allocate resource");
+                       GOTO(out, rc);
+               }
+               LDLM_DEBUG(lock, "completion AST, new resource");
+               lock_res_and_lock(lock);
+       }
+
        if (ldlm_is_destroyed(lock) ||
            lock->l_granted_mode == lock->l_req_mode) {
                /* bug 11300: the lock has already been granted */
@@ -1813,21 +1828,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
                LDLM_DEBUG(lock, "completion AST, new policy data");
        }
 
-        ldlm_resource_unlink_lock(lock);
-        if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
-                   &lock->l_resource->lr_name,
-                   sizeof(lock->l_resource->lr_name)) != 0) {
-                unlock_res_and_lock(lock);
-               rc = ldlm_lock_change_resource(ns, lock,
-                               &dlm_req->lock_desc.l_resource.lr_name);
-               if (rc < 0) {
-                       LDLM_ERROR(lock, "Failed to allocate resource");
-                       GOTO(out, rc);
-               }
-                LDLM_DEBUG(lock, "completion AST, new resource");
-                CERROR("change resource!\n");
-                lock_res_and_lock(lock);
-        }
+       ldlm_resource_unlink_lock(lock);
 
         if (dlm_req->lock_flags & LDLM_FL_AST_SENT) {
                /* BL_AST locks are not needed in LRU.