Whamcloud - gitweb
LU-1602 ldlm: Fix flock deadlock detection race 77/3277/7
authorAndriy Skulysh <Andriy_Skulysh@xyratex.com>
Mon, 21 Jan 2013 21:15:23 +0000 (23:15 +0200)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 21 Jun 2013 21:31:30 +0000 (21:31 +0000)
Deadlock isn't detected if 2 threads are trying to
grant 2 locks which deadlock on each other.
They call ldlm_flock_deadlock() simultaneously
and deadlock ins't detected.

The soulition is to add lock to blocking list before
calling ldlm_flock_deadlock()

Xyratex-bug-id: MRP-412
Signed-off-by: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Reviewed-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Reviewed-by: Bruce Korb <bruce_korb@xyratex.com>
Change-Id: I437c8b40a58de14bbac3da39d98d0f03d0f2e064
Reviewed-on: http://review.whamcloud.com/3277
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Keith Mannthey <keith.mannthey@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ldlm/ldlm_flock.c

index c880dc0..5b40b3e 100644 (file)
@@ -203,6 +203,7 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
                if (lock == NULL)
                        break;
 
                if (lock == NULL)
                        break;
 
+               LASSERT(req != lock);
                flock = &lock->l_policy_data.l_flock;
                LASSERT(flock->owner == bl_owner);
                 bl_owner = flock->blocking_owner;
                flock = &lock->l_policy_data.l_flock;
                LASSERT(flock->owner == bl_owner);
                 bl_owner = flock->blocking_owner;
@@ -334,18 +335,21 @@ reprocess:
                                 RETURN(LDLM_ITER_STOP);
                         }
 
                                 RETURN(LDLM_ITER_STOP);
                         }
 
-                        if (ldlm_flock_deadlock(req, lock)) {
-                                ldlm_flock_destroy(req, mode, *flags);
-                                *err = -EDEADLK;
-                                RETURN(LDLM_ITER_STOP);
-                        }
-
+                       /* add lock to blocking list before deadlock
+                        * check to prevent race */
                        rc = ldlm_flock_blocking_link(req, lock);
                        if (rc) {
                                ldlm_flock_destroy(req, mode, *flags);
                                *err = rc;
                                RETURN(LDLM_ITER_STOP);
                        }
                        rc = ldlm_flock_blocking_link(req, lock);
                        if (rc) {
                                ldlm_flock_destroy(req, mode, *flags);
                                *err = rc;
                                RETURN(LDLM_ITER_STOP);
                        }
+                       if (ldlm_flock_deadlock(req, lock)) {
+                               ldlm_flock_blocking_unlink(req);
+                               ldlm_flock_destroy(req, mode, *flags);
+                               *err = -EDEADLK;
+                               RETURN(LDLM_ITER_STOP);
+                       }
+
                         ldlm_resource_add_lock(res, &res->lr_waiting, req);
                         *flags |= LDLM_FL_BLOCK_GRANTED;
                         RETURN(LDLM_ITER_STOP);
                         ldlm_resource_add_lock(res, &res->lr_waiting, req);
                         *flags |= LDLM_FL_BLOCK_GRANTED;
                         RETURN(LDLM_ITER_STOP);