Whamcloud - gitweb
LU-2317 mgs: both callbacks are to be fixed
authorVitaly Fertman <vitaly_fertman@xyratex.com>
Mon, 17 Dec 2012 19:48:44 +0000 (23:48 +0400)
committerOleg Drokin <green@whamcloud.com>
Tue, 8 Jan 2013 05:56:46 +0000 (00:56 -0500)
The commit ORNL-27 introduced a new lock enqueue which drops a
reference on it in the blocking callbacks. however, it turned out
to be racy as if a lock is taken quickly, callback is called twice,
from enqueue completion and from conflicting lock cancel, and lock
reference is decremented twice.  LU-1259 tried to fix it, but only
for 1 callback.

Fix it for mgs_completion_ast_config as well.

Signed-off-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Xyratex-bug-id: MRP-792
Change-Id: I49207cfae230318da3b6dcbe3a63c14d8d94a244
Reviewed-on: http://review.whamcloud.com/4744
Tested-by: Hudson
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mgs/mgs_handler.c

index dc993c7..ae68653 100644 (file)
@@ -132,23 +132,33 @@ static int mgs_disconnect(struct obd_export *exp)
 static int mgs_handle(struct ptlrpc_request *req);
 
 static int mgs_completion_ast_config(struct ldlm_lock *lock, __u64 flags,
-                                     void *cbdata)
+                                    void *cbdata)
 {
-        ENTRY;
+       ENTRY;
 
-        if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
-                       LDLM_FL_BLOCK_CONV))) {
-                struct fs_db *fsdb = (struct fs_db *)lock->l_ast_data;
-                struct lustre_handle lockh;
+       if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
+                      LDLM_FL_BLOCK_CONV))) {
+                struct fs_db *fsdb;
 
-                /* clear the bit before lock put */
-               clear_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags);
+               /* l_ast_data is used as a marker to avoid cancel ldlm lock
+                * twice. See LU-2317. */
+               lock_res_and_lock(lock);
+               fsdb = (struct fs_db *)lock->l_ast_data;
+               lock->l_ast_data = NULL;
+               unlock_res_and_lock(lock);
 
-                ldlm_lock2handle(lock, &lockh);
-                ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
-        }
+               if (fsdb != NULL) {
+                       struct lustre_handle lockh;
 
-        RETURN(ldlm_completion_ast(lock, flags, cbdata));
+                       /* clear the bit before lock put */
+                       clear_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags);
+
+                       ldlm_lock2handle(lock, &lockh);
+                       ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
+               }
+       }
+
+       RETURN(ldlm_completion_ast(lock, flags, cbdata));
 }
 
 static int mgs_completion_ast_ir(struct ldlm_lock *lock, __u64 flags,