Whamcloud - gitweb
LU-571 ldlm: add parallel ast flow control
[fs/lustre-release.git] / lustre / ldlm / ldlm_lock.c
index d788767..eaaaa43 100644 (file)
@@ -28,6 +28,9 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011 Whamcloud, Inc.
+ *
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -43,9 +46,7 @@
 
 #ifdef __KERNEL__
 # include <libcfs/libcfs.h>
-# ifndef HAVE_VFS_INTENT_PATCHES
 # include <linux/lustre_intent.h>
-# endif
 #else
 # include <liblustre.h>
 #endif
@@ -73,6 +74,48 @@ char *ldlm_typename[] = {
         [LDLM_IBITS] "IBT",
 };
 
+static ldlm_policy_wire_to_local_t ldlm_policy_wire_to_local[] = {
+        [LDLM_PLAIN - LDLM_MIN_TYPE] ldlm_plain_policy_wire_to_local,
+        [LDLM_EXTENT - LDLM_MIN_TYPE] ldlm_extent_policy_wire_to_local,
+        [LDLM_FLOCK - LDLM_MIN_TYPE] ldlm_flock_policy_wire_to_local,
+        [LDLM_IBITS - LDLM_MIN_TYPE] ldlm_ibits_policy_wire_to_local,
+};
+
+static ldlm_policy_local_to_wire_t ldlm_policy_local_to_wire[] = {
+        [LDLM_PLAIN - LDLM_MIN_TYPE] ldlm_plain_policy_local_to_wire,
+        [LDLM_EXTENT - LDLM_MIN_TYPE] ldlm_extent_policy_local_to_wire,
+        [LDLM_FLOCK - LDLM_MIN_TYPE] ldlm_flock_policy_local_to_wire,
+        [LDLM_IBITS - LDLM_MIN_TYPE] ldlm_ibits_policy_local_to_wire,
+};
+
+/**
+ * Converts lock policy from local format to on the wire lock_desc format
+ */
+void ldlm_convert_policy_to_wire(ldlm_type_t type,
+                                 const ldlm_policy_data_t *lpolicy,
+                                 ldlm_wire_policy_data_t *wpolicy)
+{
+        ldlm_policy_local_to_wire_t convert;
+
+        convert = ldlm_policy_local_to_wire[type - LDLM_MIN_TYPE];
+
+        convert(lpolicy, wpolicy);
+}
+
+/**
+ * Converts lock policy from on the wire lock_desc format to local format
+ */
+void ldlm_convert_policy_to_local(ldlm_type_t type,
+                                  const ldlm_wire_policy_data_t *wpolicy,
+                                  ldlm_policy_data_t *lpolicy)
+{
+        ldlm_policy_wire_to_local_t convert;
+
+        convert = ldlm_policy_wire_to_local[type - LDLM_MIN_TYPE];
+
+        convert(wpolicy, lpolicy);
+}
+
 char *ldlm_it2str(int it)
 {
         switch (it) {
@@ -471,8 +514,7 @@ void ldlm_lock2handle(const struct ldlm_lock *lock, struct lustre_handle *lockh)
 struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
                                      int flags)
 {
-        struct ldlm_namespace *ns;
-        struct ldlm_lock *lock, *retval = NULL;
+        struct ldlm_lock *lock;
         ENTRY;
 
         LASSERT(handle);
@@ -481,36 +523,36 @@ struct ldlm_lock *__ldlm_handle2lock(const struct lustre_handle *handle,
         if (lock == NULL)
                 RETURN(NULL);
 
-        LASSERT(lock->l_resource != NULL);
-        ns = ldlm_lock_to_ns(lock);
-        LASSERT(ns != NULL);
+        /* It's unlikely but possible that someone marked the lock as
+         * destroyed after we did handle2object on it */
+        if (flags == 0 && !lock->l_destroyed) {
+                lu_ref_add(&lock->l_reference, "handle", cfs_current());
+                RETURN(lock);
+        }
 
-        lu_ref_add_atomic(&lock->l_reference, "handle", cfs_current());
         lock_res_and_lock(lock);
 
-        /* It's unlikely but possible that someone marked the lock as
-         * destroyed after we did handle2object on it */
-        if (lock->l_destroyed) {
+        LASSERT(lock->l_resource != NULL);
+
+        lu_ref_add_atomic(&lock->l_reference, "handle", cfs_current());
+        if (unlikely(lock->l_destroyed)) {
                 unlock_res_and_lock(lock);
                 CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock);
                 LDLM_LOCK_PUT(lock);
-                GOTO(out, retval);
+                RETURN(NULL);
         }
 
         if (flags && (lock->l_flags & flags)) {
                 unlock_res_and_lock(lock);
                 LDLM_LOCK_PUT(lock);
-                GOTO(out, retval);
+                RETURN(NULL);
         }
 
         if (flags)
                 lock->l_flags |= flags;
 
         unlock_res_and_lock(lock);
-        retval = lock;
-        EXIT;
- out:
-        return retval;
+        RETURN(lock);
 }
 
 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
@@ -555,7 +597,9 @@ void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
                 ldlm_res2desc(lock->l_resource, &desc->l_resource);
                 desc->l_req_mode = lock->l_req_mode;
                 desc->l_granted_mode = lock->l_granted_mode;
-                desc->l_policy_data = lock->l_policy_data;
+                ldlm_convert_policy_to_wire(lock->l_resource->lr_type,
+                                            &lock->l_policy_data,
+                                            &desc->l_policy_data);
         }
 }
 
@@ -726,7 +770,11 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
                         ldlm_handle_bl_callback(ns, NULL, lock);
         } else if (ns_is_client(ns) &&
                    !lock->l_readers && !lock->l_writers &&
+                   !(lock->l_flags & LDLM_FL_NO_LRU) &&
                    !(lock->l_flags & LDLM_FL_BL_AST)) {
+
+                LDLM_DEBUG(lock, "add lock into lru list");
+
                 /* If this is a client-side namespace and this was the last
                  * reference, put it on the LRU. */
                 ldlm_lock_add_to_lru(lock);
@@ -742,6 +790,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
                     !ns_connect_lru_resize(ns))
                         ldlm_cancel_lru(ns, 0, LDLM_ASYNC, 0);
         } else {
+                LDLM_DEBUG(lock, "do not add lock into lru list");
                 unlock_res_and_lock(lock);
         }
 
@@ -1172,6 +1221,40 @@ ldlm_mode_t ldlm_lock_match(struct ldlm_namespace *ns, int flags,
         return rc ? mode : 0;
 }
 
+ldlm_mode_t ldlm_revalidate_lock_handle(struct lustre_handle *lockh,
+                                        __u64 *bits)
+{
+        struct ldlm_lock *lock;
+        ldlm_mode_t mode = 0;
+        ENTRY;
+
+        lock = ldlm_handle2lock(lockh);
+        if (lock != NULL) {
+                lock_res_and_lock(lock);
+                if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED)
+                        GOTO(out, mode);
+
+                if (lock->l_flags & LDLM_FL_CBPENDING &&
+                    lock->l_readers == 0 && lock->l_writers == 0)
+                        GOTO(out, mode);
+
+                if (bits)
+                        *bits = lock->l_policy_data.l_inodebits.bits;
+                mode = lock->l_granted_mode;
+                ldlm_lock_addref_internal_nolock(lock, mode);
+        }
+
+        EXIT;
+
+out:
+        if (lock != NULL) {
+                unlock_res_and_lock(lock);
+                LDLM_LOCK_PUT(lock);
+        }
+        return mode;
+}
+EXPORT_SYMBOL(ldlm_revalidate_lock_handle);
+
 /* Returns a referenced lock */
 struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
                                    const struct ldlm_res_id *res_id,
@@ -1372,32 +1455,13 @@ int ldlm_reprocess_queue(struct ldlm_resource *res, cfs_list_t *queue,
         RETURN(rc);
 }
 
-/* Helper function for ldlm_run_ast_work().
- *
- * Send an existing rpc set specified by @arg->set and then
- * destroy it. Create new one if @do_create flag is set. */
-static void
-ldlm_send_and_maybe_create_set(struct ldlm_cb_set_arg *arg, int do_create)
-{
-        ENTRY;
-
-        ptlrpc_set_wait(arg->set);
-        if (arg->type == LDLM_BL_CALLBACK)
-                OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_GLIMPSE, 2);
-        ptlrpc_set_destroy(arg->set);
-
-        if (do_create)
-                arg->set = ptlrpc_prep_set();
-
-        EXIT;
-}
-
 static int
 ldlm_work_bl_ast_lock(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg)
 {
         struct ldlm_lock_desc d;
         struct ldlm_lock *lock = cfs_list_entry(tmp, struct ldlm_lock,
                                                 l_bl_ast);
+        int rc;
         ENTRY;
 
         /* nobody should touch l_bl_ast */
@@ -1412,13 +1476,13 @@ ldlm_work_bl_ast_lock(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg)
 
         ldlm_lock2desc(lock->l_blocking_lock, &d);
 
-        lock->l_blocking_ast(lock, &d, (void *)arg,
-                             LDLM_CB_BLOCKING);
+        rc = lock->l_blocking_ast(lock, &d, (void *)arg,
+                                  LDLM_CB_BLOCKING);
         LDLM_LOCK_RELEASE(lock->l_blocking_lock);
         lock->l_blocking_lock = NULL;
         LDLM_LOCK_RELEASE(lock);
 
-        RETURN(1);
+        RETURN(rc);
 }
 
 static int
@@ -1450,10 +1514,8 @@ ldlm_work_cp_ast_lock(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg)
         lock->l_flags &= ~LDLM_FL_CP_REQD;
         unlock_res_and_lock(lock);
 
-        if (completion_callback != NULL) {
-                completion_callback(lock, 0, (void *)arg);
-                rc = 1;
-        }
+        if (completion_callback != NULL)
+                rc = completion_callback(lock, 0, (void *)arg);
         LDLM_LOCK_RELEASE(lock);
 
         RETURN(rc);
@@ -1465,6 +1527,7 @@ ldlm_work_revoke_ast_lock(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg)
         struct ldlm_lock_desc desc;
         struct ldlm_lock *lock = cfs_list_entry(tmp, struct ldlm_lock,
                                                 l_rk_ast);
+        int rc;
         ENTRY;
 
         cfs_list_del_init(&lock->l_rk_ast);
@@ -1474,27 +1537,29 @@ ldlm_work_revoke_ast_lock(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg)
         desc.l_req_mode = LCK_EX;
         desc.l_granted_mode = 0;
 
-        lock->l_blocking_ast(lock, &desc, (void*)arg, LDLM_CB_BLOCKING);
+        rc = lock->l_blocking_ast(lock, &desc, (void*)arg, LDLM_CB_BLOCKING);
         LDLM_LOCK_RELEASE(lock);
 
-        RETURN(1);
+        RETURN(rc);
 }
 
-int ldlm_run_ast_work(cfs_list_t *rpc_list, ldlm_desc_ast_t ast_type)
+int ldlm_run_ast_work(struct ldlm_namespace *ns, cfs_list_t *rpc_list,
+                      ldlm_desc_ast_t ast_type)
 {
-        struct ldlm_cb_set_arg arg;
+        struct ldlm_cb_set_arg arg = { 0 };
+        struct l_wait_info     lwi = { 0 };
         cfs_list_t *tmp, *pos;
         int (*work_ast_lock)(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg);
-        int ast_count;
+        unsigned int max_ast_count;
         ENTRY;
 
         if (cfs_list_empty(rpc_list))
                 RETURN(0);
 
-        arg.set = ptlrpc_prep_set();
-        if (NULL == arg.set)
-                RETURN(-ERESTART);
         cfs_atomic_set(&arg.restart, 0);
+        cfs_atomic_set(&arg.rpcs, 0);
+        cfs_waitq_init(&arg.waitq);
+
         switch (ast_type) {
         case LDLM_WORK_BL_AST:
                 arg.type = LDLM_BL_CALLBACK;
@@ -1512,27 +1577,21 @@ int ldlm_run_ast_work(cfs_list_t *rpc_list, ldlm_desc_ast_t ast_type)
                 LBUG();
         }
 
-        ast_count = 0;
+        max_ast_count = ns->ns_max_parallel_ast ? : UINT_MAX;
+        arg.threshold = max_ast_count;
+
         cfs_list_for_each_safe(tmp, pos, rpc_list) {
-                ast_count += work_ast_lock(tmp, &arg);
-
-                /* Send the request set if it exceeds the PARALLEL_AST_LIMIT,
-                 * and create a new set for requests that remained in
-                 * @rpc_list */
-                if (unlikely(ast_count == PARALLEL_AST_LIMIT)) {
-                        ldlm_send_and_maybe_create_set(&arg, 1);
-                        ast_count = 0;
-                }
+                (void)work_ast_lock(tmp, &arg);
+                if (cfs_atomic_read(&arg.rpcs) < max_ast_count)
+                        continue;
+
+                l_wait_event(arg.waitq,
+                             cfs_atomic_read(&arg.rpcs) < arg.threshold,
+                             &lwi);
         }
 
-        if (ast_count > 0)
-                ldlm_send_and_maybe_create_set(&arg, 0);
-        else
-                /* In case when number of ASTs is multiply of
-                 * PARALLEL_AST_LIMIT or @rpc_list was initially empty,
-                 * @arg.set must be destroyed here, otherwise we get
-                 * write memory leaking. */
-                ptlrpc_set_destroy(arg.set);
+        arg.threshold = 1;
+        l_wait_event(arg.waitq, cfs_atomic_read(&arg.rpcs) == 0, &lwi);
 
         RETURN(cfs_atomic_read(&arg.restart) ? -ERESTART : 0);
 }
@@ -1584,7 +1643,8 @@ void ldlm_reprocess_all(struct ldlm_resource *res)
                 ldlm_reprocess_queue(res, &res->lr_waiting, &rpc_list);
         unlock_res(res);
 
-        rc = ldlm_run_ast_work(&rpc_list, LDLM_WORK_CP_AST);
+        rc = ldlm_run_ast_work(ldlm_res_to_ns(res), &rpc_list,
+                               LDLM_WORK_CP_AST);
         if (rc == -ERESTART) {
                 LASSERT(cfs_list_empty(&rpc_list));
                 goto restart;
@@ -1663,15 +1723,19 @@ void ldlm_lock_cancel(struct ldlm_lock *lock)
 int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
 {
         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
+        int rc = -EINVAL;
         ENTRY;
 
-        if (lock == NULL)
-                RETURN(-EINVAL);
-
-        lock->l_ast_data = data;
-        LDLM_LOCK_PUT(lock);
-        RETURN(0);
+        if (lock) {
+                if (lock->l_ast_data == NULL)
+                        lock->l_ast_data = data;
+                if (lock->l_ast_data == data)
+                        rc = 0;
+                LDLM_LOCK_PUT(lock);
+        }
+        RETURN(rc);
 }
+EXPORT_SYMBOL(ldlm_lock_set_data);
 
 int ldlm_cancel_locks_for_export_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
                                     cfs_hlist_node_t *hnode, void *data)
@@ -1829,7 +1893,7 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
         unlock_res_and_lock(lock);
 
         if (granted)
-                ldlm_run_ast_work(&rpc_list, LDLM_WORK_CP_AST);
+                ldlm_run_ast_work(ns, &rpc_list, LDLM_WORK_CP_AST);
         if (node)
                 OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
         RETURN(res);