Whamcloud - gitweb
Branch: HEAD
[fs/lustre-release.git] / lustre / include / linux / lustre_dlm.h
index d85d7a1..3d089e4 100644 (file)
@@ -19,8 +19,6 @@
 struct obd_ops;
 struct obd_device;
 
-#define OBD_LDLM_DEVICENAME  "ldlm"
-
 #define LDLM_DEFAULT_LRU_SIZE 100
 
 typedef enum {
@@ -60,17 +58,24 @@ typedef enum {
 #define LDLM_FL_LOCAL_ONLY     0x000400 /* see ldlm_cli_cancel_unused */
 
 /* don't run the cancel callback under ldlm_cli_cancel_unused */
-#define LDLM_FL_NO_CALLBACK    0x000800
+#define LDLM_FL_FAILED         0x000800
 
 #define LDLM_FL_HAS_INTENT     0x001000 /* lock request has intent */
 #define LDLM_FL_CANCELING      0x002000 /* lock cancel has already been sent */
 #define LDLM_FL_LOCAL          0x004000 /* local lock (ie, no srv/cli split) */
 #define LDLM_FL_WARN           0x008000 /* see ldlm_cli_cancel_unused */
 #define LDLM_FL_DISCARD_DATA   0x010000 /* discard (no writeback) on cancel */
+#define LDLM_FL_CONFIG_CHANGE  0x020000 /* see ldlm_cli_cancel_unused */
+
+#define LDLM_FL_NO_TIMEOUT     0x040000 /* Blocked by group lock - wait
+                                         * indefinitely */
 
 /* file & record locking */
-#define LDLM_FL_BLOCK_NOWAIT   0x040000 // server told not to wait if blocked
-#define LDLM_FL_TEST_LOCK      0x080000 // return blocking lock
+#define LDLM_FL_BLOCK_NOWAIT   0x080000 /* server told not to wait if blocked */
+#define LDLM_FL_TEST_LOCK      0x100000 /* return blocking lock */
+#define LDLM_FL_GET_BLOCKING   0x200000 /* return updated blocking proc info */
+#define LDLM_FL_DEADLOCK_CHK   0x400000 /* check for deadlock */
+#define LDLM_FL_DEADLOCK_DEL   0x800000 /* lock no longer blocked */
 
 /* These are flags that are mapped into the flags and ASTs of blocking locks */
 #define LDLM_AST_DISCARD_DATA  0x80000000 /* Add FL_DISCARD to blocking ASTs */
@@ -87,6 +92,28 @@ typedef enum {
  * pretty high-risk, though, and would need a lot more testing. */
 #define LDLM_FL_CAN_MATCH      0x100000
 
+/* A lock contributes to the kms calculation until it has finished the part
+ * of it's cancelation that performs write back on its dirty pages.  It
+ * can remain on the granted list during this whole time.  Threads racing
+ * to update the kms after performing their writeback need to know to
+ * exclude each others locks from the calculation as they walk the granted
+ * list. */
+#define LDLM_FL_KMS_IGNORE     0x200000
+
+/* completion ast to be executed */
+#define LDLM_FL_CP_REQD        0x400000
+
+/* cleanup_resource has already handled the lock */
+#define LDLM_FL_CLEANED        0x800000
+
+/* optimization hint: LDLM can run blocking callback from current context
+ * w/o involving separate thread. in order to decrease cs rate */
+#define LDLM_FL_ATOMIC_CB      0x1000000
+
+/* while this flag is set, the lock can't change resource */
+#define LDLM_FL_LOCK_PROTECT   0x4000000
+#define LDLM_FL_LOCK_PROTECT_BIT  26
+
 /* The blocking callback is overloaded to perform two functions.  These flags
  * indicate which operation should be performed. */
 #define LDLM_CB_BLOCKING    1
@@ -99,6 +126,7 @@ typedef enum {
 #define LCK_COMPAT_CW  (LCK_COMPAT_PW | LCK_CW)
 #define LCK_COMPAT_CR  (LCK_COMPAT_CW | LCK_PR | LCK_PW)
 #define LCK_COMPAT_NL  (LCK_COMPAT_CR | LCK_EX)
+#define LCK_COMPAT_GROUP  (LCK_GROUP | LCK_NL)
 
 static ldlm_mode_t lck_compat_array[] = {
         [LCK_EX] LCK_COMPAT_EX,
@@ -106,12 +134,13 @@ static ldlm_mode_t lck_compat_array[] = {
         [LCK_PR] LCK_COMPAT_PR,
         [LCK_CW] LCK_COMPAT_CW,
         [LCK_CR] LCK_COMPAT_CR,
-        [LCK_NL] LCK_COMPAT_NL
+        [LCK_NL] LCK_COMPAT_NL,
+        [LCK_GROUP] LCK_COMPAT_GROUP
 };
 
 static inline void lockmode_verify(ldlm_mode_t mode)
 {
-       LASSERT(mode >= LCK_EX && mode <= LCK_NL);
+       LASSERT(mode >= LCK_EX && mode <= LCK_GROUP);
 }
 
 static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new)
@@ -134,6 +163,25 @@ static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new)
    -
 */
 
+/*
+ * Locking rules:
+ *
+ * lr_lock
+ *
+ * lr_lock
+ *     waiting_locks_spinlock
+ *
+ * lr_lock
+ *     led_lock
+ *
+ * lr_lock
+ *     ns_unused_lock
+ *
+ * lr_lvb_sem
+ *     lr_lock
+ *
+ */
+
 struct ldlm_lock;
 struct ldlm_resource;
 struct ldlm_namespace;
@@ -144,7 +192,9 @@ typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
 
 struct ldlm_valblock_ops {
         int (*lvbo_init)(struct ldlm_resource *res);
-        int (*lvbo_update)(struct ldlm_resource *res, struct lustre_msg *m,
+        
+        int (*lvbo_update)(struct ldlm_resource *res,
+                           struct lustre_msg *m,
                            int buf_idx, int increase);
 };
 
@@ -152,9 +202,9 @@ struct ldlm_namespace {
         char                  *ns_name;
         __u32                  ns_client; /* is this a client-side lock tree? */
         struct list_head      *ns_hash; /* hash table for ns */
+        spinlock_t             ns_hash_lock;
         __u32                  ns_refcount; /* count of resources in the hash */
         struct list_head       ns_root_list; /* all root resources in ns */
-        struct lustre_lock     ns_lock; /* protects hash, refcount, list */
         struct list_head       ns_list_chain; /* position in global NS list */
         /*
         struct proc_dir_entry *ns_proc_dir;
@@ -162,14 +212,17 @@ struct ldlm_namespace {
 
         struct list_head       ns_unused_list; /* all root resources in ns */
         int                    ns_nr_unused;
+        spinlock_t             ns_unused_lock;
+
         unsigned int           ns_max_unused;
+        unsigned long          ns_next_dump;   /* next dump time */
 
-        spinlock_t             ns_counter_lock;
-        __u64                  ns_locks;
+        atomic_t               ns_locks;
         __u64                  ns_resources;
         ldlm_res_policy        ns_policy;
         struct ldlm_valblock_ops *ns_lvbo;
-        void                    *ns_lvbp;
+        void                  *ns_lvbp;
+        wait_queue_head_t      ns_waitq;
 };
 
 /*
@@ -194,14 +247,27 @@ typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data);
 struct ldlm_lock {
         struct portals_handle l_handle; // must be first in the structure
         atomic_t              l_refc;
+
+        /* ldlm_lock_change_resource() can change this */
         struct ldlm_resource *l_resource;
+
+        /* set once, no need to protect it */
         struct ldlm_lock     *l_parent;
+
+        /* protected by ns_hash_lock */
         struct list_head      l_children;
         struct list_head      l_childof;
+
+        /* protected by ns_hash_lock. FIXME */
         struct list_head      l_lru;
+
+        /* protected by lr_lock */
         struct list_head      l_res_link; // position in one of three res lists
+
+        /* protected by led_lock */
         struct list_head      l_export_chain; // per-export chain of locks
 
+        /* protected by lr_lock */
         ldlm_mode_t           l_req_mode;
         ldlm_mode_t           l_granted_mode;
 
@@ -211,10 +277,14 @@ struct ldlm_lock {
 
         struct obd_export    *l_export;
         struct obd_export    *l_conn_export;
+
+        /* protected by lr_lock */
         __u32                 l_flags;
+
         struct lustre_handle  l_remote_handle;
         ldlm_policy_data_t    l_policy_data;
 
+        /* protected by lr_lock */
         __u32                 l_readers;
         __u32                 l_writers;
         __u8                  l_destroyed;
@@ -235,31 +305,48 @@ struct ldlm_lock {
         void                 *l_ast_data;
 
         /* Server-side-only members */
+
+        /* protected by elt_lock */
         struct list_head      l_pending_chain;  /* callbacks pending */
         unsigned long         l_callback_timeout;
+
+        __u32                 l_pid;            /* pid which created this lock */
+        __u32                 l_pidb;           /* who holds LOCK_PROTECT_BIT */
+
+        struct list_head      l_tmp;
+
+        /* for ldlm_add_ast_work_item() */
+        struct list_head      l_bl_ast;
+        struct list_head      l_cp_ast;
+        struct ldlm_lock     *l_blocking_lock; 
+        int                   l_bl_ast_run;
 };
 
 #define LDLM_PLAIN       10
 #define LDLM_EXTENT      11
 #define LDLM_FLOCK       12
+#define LDLM_IBITS       13
 
 #define LDLM_MIN_TYPE 10
-#define LDLM_MAX_TYPE 12
+#define LDLM_MAX_TYPE 14
 
 struct ldlm_resource {
         struct ldlm_namespace *lr_namespace;
+
+        /* protected by ns_hash_lock */
         struct list_head       lr_hash;
         struct ldlm_resource  *lr_parent;   /* 0 for a root resource */
         struct list_head       lr_children; /* list head for child resources */
         struct list_head       lr_childof;  /* part of ns_root_list if root res,
                                              * part of lr_children if child */
+        spinlock_t             lr_lock;
 
+        /* protected by lr_lock */
         struct list_head       lr_granted;
         struct list_head       lr_converting;
         struct list_head       lr_waiting;
         ldlm_mode_t            lr_most_restr;
         __u32                  lr_type; /* LDLM_PLAIN or LDLM_EXTENT */
-        struct ldlm_resource  *lr_root;
         struct ldlm_res_id     lr_name;
         atomic_t               lr_refcount;
 
@@ -295,22 +382,23 @@ do {                                                                          \
                 CDEBUG(level, "### " format                                   \
                        " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
                        "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: "   \
-                       LPX64" expref: %d\n" , ## a, lock,                     \
+                       LPX64" expref: %d pid: %u\n" , ## a, lock,             \
                        lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
                        lock->l_readers, lock->l_writers,                      \
                        ldlm_lockname[lock->l_granted_mode],                   \
                        ldlm_lockname[lock->l_req_mode],                       \
                        lock->l_flags, lock->l_remote_handle.cookie,           \
                        lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
                 break;                                                        \
         }                                                                     \
         if (lock->l_resource->lr_type == LDLM_EXTENT) {                       \
                 CDEBUG(level, "### " format                                   \
                        " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\
-                       "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64     \
-                       " expref: %d\n" , ## a,                                \
+                       "res: "LPU64"/"LPU64"/"LPU64" rrc: %d type: %s ["LPU64 \
+                      "->"LPU64"] (req "LPU64"->"LPU64") flags: %x remote: " \
+                      LPX64" expref: %d pid: %u\n" , ## a,                   \
                        lock->l_resource->lr_namespace->ns_name, lock,         \
                        lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
                        lock->l_readers, lock->l_writers,                      \
@@ -318,6 +406,7 @@ do {                                                                          \
                        ldlm_lockname[lock->l_req_mode],                       \
                        lock->l_resource->lr_name.name[0],                     \
                        lock->l_resource->lr_name.name[1],                     \
+                       lock->l_resource->lr_name.name[2],                     \
                        atomic_read(&lock->l_resource->lr_refcount),           \
                        ldlm_typename[lock->l_resource->lr_type],              \
                        lock->l_policy_data.l_extent.start,                    \
@@ -325,15 +414,16 @@ do {                                                                          \
                        lock->l_req_extent.start, lock->l_req_extent.end,      \
                        lock->l_flags, lock->l_remote_handle.cookie,           \
                        lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
                 break;                                                        \
         }                                                                     \
         if (lock->l_resource->lr_type == LDLM_FLOCK) {                        \
                 CDEBUG(level, "### " format                                   \
                        " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d "       \
-                       "["LPU64"->"LPU64"] flags: %x remote: "LPX64           \
-                       " expref: %d\n" , ## a,                                \
+                       "res: "LPU64"/"LPU64"/"LPU64" rrc: %d type: %s "       \
+                      "pid: "LPU64" nid: "LPU64" ["LPU64"->"LPU64"] "        \
+                       "flags: %x remote: "LPX64" expref: %d pid: %u\n", ## a,\
                        lock->l_resource->lr_namespace->ns_name, lock,         \
                        lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
                        lock->l_readers, lock->l_writers,                      \
@@ -341,21 +431,49 @@ do {                                                                          \
                        ldlm_lockname[lock->l_req_mode],                       \
                        lock->l_resource->lr_name.name[0],                     \
                        lock->l_resource->lr_name.name[1],                     \
+                       lock->l_resource->lr_name.name[2],                     \
                        atomic_read(&lock->l_resource->lr_refcount),           \
                        ldlm_typename[lock->l_resource->lr_type],              \
                        lock->l_policy_data.l_flock.pid,                       \
+                       lock->l_policy_data.l_flock.nid,                       \
                        lock->l_policy_data.l_flock.start,                     \
                        lock->l_policy_data.l_flock.end,                       \
                        lock->l_flags, lock->l_remote_handle.cookie,           \
                        lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                      lock->l_pid);                                          \
+                break;                                                        \
+        }                                                                     \
+        if (lock->l_resource->lr_type == LDLM_IBITS) {                        \
+                CDEBUG(level, "### " format                                   \
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
+                       "res: "LPU64"/"LPU64"/"LPU64" bits "LPX64" rrc: %d "   \
+                      "type: %s flags: %x remote: "LPX64" expref: %d "       \
+                      "pid %u\n" , ## a,                                     \
+                       lock->l_resource->lr_namespace->ns_name,               \
+                       lock, lock->l_handle.h_cookie,                         \
+                       atomic_read (&lock->l_refc),                           \
+                       lock->l_readers, lock->l_writers,                      \
+                       ldlm_lockname[lock->l_granted_mode],                   \
+                       ldlm_lockname[lock->l_req_mode],                       \
+                       lock->l_resource->lr_name.name[0],                     \
+                       lock->l_resource->lr_name.name[1],                     \
+                       lock->l_resource->lr_name.name[2],                     \
+                       lock->l_policy_data.l_inodebits.bits,                  \
+                       atomic_read(&lock->l_resource->lr_refcount),           \
+                       ldlm_typename[lock->l_resource->lr_type],              \
+                       lock->l_flags, lock->l_remote_handle.cookie,           \
+                       lock->l_export ?                                       \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                      lock->l_pid);                                          \
                 break;                                                        \
         }                                                                     \
         {                                                                     \
                 CDEBUG(level, "### " format                                   \
                        " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x "     \
-                       "remote: "LPX64" expref: %d\n" , ## a,                 \
+                       "res: "LPU64"/"LPU64"/"LPU64"/"LPU64" rrc: %d type: %s " \
+                       "flags: %x remote: "LPX64" expref: %d "                \
+                      "pid: %u\n" , ## a,                                    \
                        lock->l_resource->lr_namespace->ns_name,               \
                        lock, lock->l_handle.h_cookie,                         \
                        atomic_read (&lock->l_refc),                           \
@@ -364,11 +482,14 @@ do {                                                                          \
                        ldlm_lockname[lock->l_req_mode],                       \
                        lock->l_resource->lr_name.name[0],                     \
                        lock->l_resource->lr_name.name[1],                     \
+                       lock->l_resource->lr_name.name[2],                     \
+                       lock->l_resource->lr_name.name[3],                     \
                        atomic_read(&lock->l_resource->lr_refcount),           \
                        ldlm_typename[lock->l_resource->lr_type],              \
                        lock->l_flags, lock->l_remote_handle.cookie,           \
                        lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99);     \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
         }                                                                     \
 } while (0)
 
@@ -380,7 +501,8 @@ do {                                                                          \
         CDEBUG(D_DLMTRACE, "### " format "\n" , ## a)
 
 typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, int *flags,
-                                      int first_enq, ldlm_error_t *err);
+                                      int first_enq, ldlm_error_t *err,
+                                      struct list_head *work_list);
 
 /*
  * Iterators.
@@ -405,6 +527,7 @@ void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *,
 
 /* ldlm_flock.c */
 int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data);
+int ldlm_handle_flock_deadlock_check(struct ptlrpc_request *req);
 
 /* ldlm_extent.c */
 __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms);
@@ -500,9 +623,9 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
                             struct ldlm_lock *lock);
 void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
 void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(void);
-void ldlm_namespace_dump(struct ldlm_namespace *);
-void ldlm_resource_dump(struct ldlm_resource *);
+void ldlm_dump_all_namespaces(int level);
+void ldlm_namespace_dump(int level, struct ldlm_namespace *);
+void ldlm_resource_dump(int level, struct ldlm_resource *);
 int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
                               struct ldlm_res_id);
 
@@ -550,4 +673,36 @@ int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 #define IOC_LDLM_REGRESS_STOP           _IOWR('f', 43, long)
 #define IOC_LDLM_MAX_NR                 43
 
+static inline void lock_res(struct ldlm_resource *res)
+{
+        spin_lock(&res->lr_lock);
+}
+
+static inline void unlock_res(struct ldlm_resource *res)
+{
+        spin_unlock(&res->lr_lock);
+}
+
+static inline void check_res_locked(struct ldlm_resource *res)
+{
+        LASSERT_SPIN_LOCKED(&res->lr_lock);
+}
+#ifdef __KERNEL__
+static inline void lock_bitlock(struct ldlm_lock *lock)
+{
+        bit_spin_lock(LDLM_FL_LOCK_PROTECT_BIT, (void *) &lock->l_flags);
+        LASSERT(lock->l_pidb == 0);
+        lock->l_pidb = current->pid;
+}
+
+static inline void unlock_bitlock(struct ldlm_lock *lock)
+{
+        LASSERT(lock->l_pidb == current->pid);
+        lock->l_pidb = 0;
+        bit_spin_unlock(LDLM_FL_LOCK_PROTECT_BIT, (void *) &lock->l_flags);
+}
+#endif
+struct ldlm_resource * lock_res_and_lock(struct ldlm_lock *lock);
+void unlock_res_and_lock(struct ldlm_lock *lock);
+
 #endif