Whamcloud - gitweb
LU-1156 ldlm: per-export waiting flock lists for deadlock detection
authorVitaly Fertman <vitaly_fertman@xyratex.com>
Thu, 1 Mar 2012 16:50:05 +0000 (20:50 +0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 7 Apr 2012 01:30:42 +0000 (21:30 -0400)
replace 1 common list of flocks with 1 common spinlock by per-export
list with per-export lock

Change-Id: Ib59c1b4e0e3d08e07505bdfffc007ac2850e9258
Xyratex-Bug-ID: MRP-382
Reviewed-by: Alexey Lyashkov <alexey_lyashkov@xyratex.com>
Reviewed-by: Andriy Skulysh <andriy_skulysh@xyratex.com>
Signed-off-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Reviewed-on: http://review.whamcloud.com/2239
Tested-by: Hudson
Reviewed-by: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_export.h
lustre/ldlm/ldlm_flock.c
lustre/obdclass/genops.c

index 0dbe3a6..8bac3f3 100644 (file)
@@ -213,8 +213,13 @@ struct obd_export {
         __u32                     exp_conn_cnt;
         /** Hash list of all ldlm locks granted on this export */
         cfs_hash_t               *exp_lock_hash;
         __u32                     exp_conn_cnt;
         /** Hash list of all ldlm locks granted on this export */
         cfs_hash_t               *exp_lock_hash;
-        /** lock to protect exp_lock_hash accesses */
-        cfs_spinlock_t            exp_lock_hash_lock;
+        /** Lock protecting access to exp_flock_wait_list */
+        cfs_rwlock_t              exp_flock_wait_lock;
+        /**
+         * Wait queue for Posix lock deadlock detection, added with
+         * ldlm_lock::l_flock_waitq.
+         */
+        cfs_list_t                exp_flock_wait_list;
         cfs_list_t                exp_outstanding_replies;
         cfs_list_t                exp_uncommitted_replies;
         cfs_spinlock_t            exp_uncommitted_replies_lock;
         cfs_list_t                exp_outstanding_replies;
         cfs_list_t                exp_uncommitted_replies;
         cfs_spinlock_t            exp_uncommitted_replies_lock;
index 1303e8f..c988f28 100644 (file)
 
 #define l_flock_waitq   l_lru
 
 
 #define l_flock_waitq   l_lru
 
-/**
- * Wait queue for Posix lock deadlock detection, added with
- * ldlm_lock::l_flock_waitq.
- */
-static CFS_LIST_HEAD(ldlm_flock_waitq);
-/**
- * Lock protecting access to ldlm_flock_waitq.
- */
-cfs_spinlock_t ldlm_flock_waitq_lock = CFS_SPIN_LOCK_UNLOCKED;
-
 int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                             void *data, int flag);
 
 int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                             void *data, int flag);
 
@@ -98,6 +88,43 @@ ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new)
                 lock->l_policy_data.l_flock.start));
 }
 
                 lock->l_policy_data.l_flock.start));
 }
 
+static inline void ldlm_flock_blocking_link(struct ldlm_lock *req,
+                                            struct ldlm_lock *lock)
+{
+        /* For server only */
+        if (req->l_export == NULL)
+                return;
+
+        LASSERT(cfs_list_empty(&req->l_flock_waitq));
+        cfs_write_lock(&req->l_export->exp_flock_wait_lock);
+
+        req->l_policy_data.l_flock.blocking_owner =
+                lock->l_policy_data.l_flock.owner;
+        req->l_policy_data.l_flock.blocking_export =
+                class_export_get(lock->l_export);
+
+        cfs_list_add_tail(&req->l_flock_waitq,
+                          &req->l_export->exp_flock_wait_list);
+        cfs_write_unlock(&req->l_export->exp_flock_wait_lock);
+}
+
+static inline void ldlm_flock_blocking_unlink(struct ldlm_lock *req)
+{
+        /* For server only */
+        if (req->l_export == NULL)
+                return;
+
+        cfs_write_lock(&req->l_export->exp_flock_wait_lock);
+        if (!cfs_list_empty(&req->l_flock_waitq)) {
+                cfs_list_del_init(&req->l_flock_waitq);
+
+                class_export_put(req->l_policy_data.l_flock.blocking_export);
+                req->l_policy_data.l_flock.blocking_owner = 0;
+                req->l_policy_data.l_flock.blocking_export = NULL;
+        }
+        cfs_write_unlock(&req->l_export->exp_flock_wait_lock);
+}
+
 static inline void
 ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags)
 {
 static inline void
 ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags)
 {
@@ -125,33 +152,45 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags)
 }
 
 static int
 }
 
 static int
-ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *blocking_lock)
+ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock)
 {
 {
-        struct obd_export *req_export = req->l_export;
-        struct obd_export *blocking_export = blocking_lock->l_export;
+        struct obd_export *req_exp = req->l_export;
+        struct obd_export *bl_exp = bl_lock->l_export;
+        struct obd_export *bl_exp_new;
         __u64 req_owner = req->l_policy_data.l_flock.owner;
         __u64 req_owner = req->l_policy_data.l_flock.owner;
-        __u64 blocking_owner = blocking_lock->l_policy_data.l_flock.owner;
+        __u64 bl_owner = bl_lock->l_policy_data.l_flock.owner;
         struct ldlm_lock *lock;
 
         struct ldlm_lock *lock;
 
-        cfs_spin_lock(&ldlm_flock_waitq_lock);
+        /* For server only */
+        if (req_exp == NULL)
+                return 0;
+
+        class_export_get(bl_exp);
 restart:
 restart:
-        cfs_list_for_each_entry(lock, &ldlm_flock_waitq, l_flock_waitq) {
-                if ((lock->l_policy_data.l_flock.owner != blocking_owner) ||
-                    (lock->l_export != blocking_export))
+        cfs_read_lock(&bl_exp->exp_flock_wait_lock);
+        cfs_list_for_each_entry(lock, &bl_exp->exp_flock_wait_list,
+                                l_flock_waitq) {
+                struct ldlm_flock *flock = &lock->l_policy_data.l_flock;
+
+                /* want to find something from same client and same process */
+                if (flock->owner != bl_owner)
                         continue;
 
                         continue;
 
-                blocking_owner = lock->l_policy_data.l_flock.blocking_owner;
-                blocking_export = (struct obd_export *)
-                        lock->l_policy_data.l_flock.blocking_export;
-                if (blocking_owner == req_owner &&
-                    blocking_export == req_export) {
-                        cfs_spin_unlock(&ldlm_flock_waitq_lock);
+                bl_owner = flock->blocking_owner;
+                bl_exp_new = class_export_get(flock->blocking_export);
+                cfs_read_unlock(&bl_exp->exp_flock_wait_lock);
+                class_export_put(bl_exp);
+                bl_exp = bl_exp_new;
+
+                if (bl_owner == req_owner && bl_exp == req_exp) {
+                        class_export_put(bl_exp);
                         return 1;
                 }
 
                 goto restart;
         }
                         return 1;
                 }
 
                 goto restart;
         }
-        cfs_spin_unlock(&ldlm_flock_waitq_lock);
+        cfs_read_unlock(&bl_exp->exp_flock_wait_lock);
+        class_export_put(bl_exp);
 
         return 0;
 }
 
         return 0;
 }
@@ -254,17 +293,8 @@ reprocess:
                                 RETURN(LDLM_ITER_STOP);
                         }
 
                                 RETURN(LDLM_ITER_STOP);
                         }
 
-                        req->l_policy_data.l_flock.blocking_owner =
-                                lock->l_policy_data.l_flock.owner;
-                        req->l_policy_data.l_flock.blocking_export =
-                                lock->l_export;
-
-                        LASSERT(cfs_list_empty(&req->l_flock_waitq));
-                        cfs_spin_lock(&ldlm_flock_waitq_lock);
-                        cfs_list_add_tail(&req->l_flock_waitq,
-                                          &ldlm_flock_waitq);
-                        cfs_spin_unlock(&ldlm_flock_waitq_lock);
 
 
+                        ldlm_flock_blocking_link(req, lock);
                         ldlm_resource_add_lock(res, &res->lr_waiting, req);
                         *flags |= LDLM_FL_BLOCK_GRANTED;
                         RETURN(LDLM_ITER_STOP);
                         ldlm_resource_add_lock(res, &res->lr_waiting, req);
                         *flags |= LDLM_FL_BLOCK_GRANTED;
                         RETURN(LDLM_ITER_STOP);
@@ -280,9 +310,7 @@ reprocess:
 
         /* In case we had slept on this lock request take it off of the
          * deadlock detection waitq. */
 
         /* In case we had slept on this lock request take it off of the
          * deadlock detection waitq. */
-        cfs_spin_lock(&ldlm_flock_waitq_lock);
-        cfs_list_del_init(&req->l_flock_waitq);
-        cfs_spin_unlock(&ldlm_flock_waitq_lock);
+        ldlm_flock_blocking_unlink(req);
 
         /* Scan the locks owned by this process that overlap this request.
          * We may have to merge or split existing locks. */
 
         /* Scan the locks owned by this process that overlap this request.
          * We may have to merge or split existing locks. */
@@ -496,9 +524,7 @@ ldlm_flock_interrupted_wait(void *data)
         lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
 
         /* take lock off the deadlock detection waitq. */
         lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
 
         /* take lock off the deadlock detection waitq. */
-        cfs_spin_lock(&ldlm_flock_waitq_lock);
-        cfs_list_del_init(&lock->l_flock_waitq);
-        cfs_spin_unlock(&ldlm_flock_waitq_lock);
+        ldlm_flock_blocking_unlink(lock);
 
         /* client side - set flag to prevent lock from being put on lru list */
         lock->l_flags |= LDLM_FL_CBPENDING;
 
         /* client side - set flag to prevent lock from being put on lru list */
         lock->l_flags |= LDLM_FL_CBPENDING;
@@ -607,9 +633,7 @@ granted:
         LDLM_DEBUG(lock, "client-side enqueue granted");
 
         /* take lock off the deadlock detection waitq. */
         LDLM_DEBUG(lock, "client-side enqueue granted");
 
         /* take lock off the deadlock detection waitq. */
-        cfs_spin_lock(&ldlm_flock_waitq_lock);
-        cfs_list_del_init(&lock->l_flock_waitq);
-        cfs_spin_unlock(&ldlm_flock_waitq_lock);
+        ldlm_flock_blocking_unlink(lock);
 
         lock_res_and_lock(lock);
         /* ldlm_lock_enqueue() has already placed lock on the granted list. */
 
         lock_res_and_lock(lock);
         /* ldlm_lock_enqueue() has already placed lock on the granted list. */
@@ -658,9 +682,7 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
         LASSERT(flag == LDLM_CB_CANCELING);
 
         /* take lock off the deadlock detection waitq. */
         LASSERT(flag == LDLM_CB_CANCELING);
 
         /* take lock off the deadlock detection waitq. */
-        cfs_spin_lock(&ldlm_flock_waitq_lock);
-        cfs_list_del_init(&lock->l_flock_waitq);
-        cfs_spin_unlock(&ldlm_flock_waitq_lock);
+        ldlm_flock_blocking_unlink(lock);
         RETURN(0);
 }
 
         RETURN(0);
 }
 
index 1584930..ad0330d 100644 (file)
@@ -783,6 +783,7 @@ static void class_export_destroy(struct obd_export *exp)
         if (exp->exp_connection)
                 ptlrpc_put_connection_superhack(exp->exp_connection);
 
         if (exp->exp_connection)
                 ptlrpc_put_connection_superhack(exp->exp_connection);
 
+        LASSERT(cfs_list_empty(&exp->exp_flock_wait_list));
         LASSERT(cfs_list_empty(&exp->exp_outstanding_replies));
         LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies));
         LASSERT(cfs_list_empty(&exp->exp_req_replay_queue));
         LASSERT(cfs_list_empty(&exp->exp_outstanding_replies));
         LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies));
         LASSERT(cfs_list_empty(&exp->exp_req_replay_queue));
@@ -856,6 +857,8 @@ struct obd_export *class_new_export(struct obd_device *obd,
 #endif
         cfs_atomic_set(&export->exp_replay_count, 0);
         export->exp_obd = obd;
 #endif
         cfs_atomic_set(&export->exp_replay_count, 0);
         export->exp_obd = obd;
+        CFS_INIT_LIST_HEAD(&export->exp_flock_wait_list);
+        cfs_rwlock_init(&export->exp_flock_wait_lock);
         CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies);
         cfs_spin_lock_init(&export->exp_uncommitted_replies_lock);
         CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies);
         CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies);
         cfs_spin_lock_init(&export->exp_uncommitted_replies_lock);
         CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies);