Whamcloud - gitweb
LU-1842 ldlm: support for sending GL ASTs to multiple locks
[fs/lustre-release.git] / lustre / ldlm / ldlm_lock.c
index 2b4392f..93ccbee 100644 (file)
@@ -335,10 +335,12 @@ int ldlm_lock_destroy_internal(struct ldlm_lock *lock)
         }
         lock->l_destroyed = 1;
 
-        if (lock->l_export && lock->l_export->exp_lock_hash &&
-            !cfs_hlist_unhashed(&lock->l_exp_hash))
-                cfs_hash_del(lock->l_export->exp_lock_hash,
-                             &lock->l_remote_handle, &lock->l_exp_hash);
+       if (lock->l_export && lock->l_export->exp_lock_hash) {
+               /* NB: it's safe to call cfs_hash_del() even lock isn't
+                * in exp_lock_hash. */
+               cfs_hash_del(lock->l_export->exp_lock_hash,
+                            &lock->l_remote_handle, &lock->l_exp_hash);
+       }
 
         ldlm_lock_remove_from_lru(lock);
         class_handle_unhash(&lock->l_handle);
@@ -436,6 +438,7 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource)
         CFS_INIT_LIST_HEAD(&lock->l_sl_mode);
         CFS_INIT_LIST_HEAD(&lock->l_sl_policy);
         CFS_INIT_HLIST_NODE(&lock->l_exp_hash);
+       CFS_INIT_HLIST_NODE(&lock->l_exp_flock_hash);
 
         lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats,
                              LDLM_NSS_LOCKS);
@@ -1514,152 +1517,189 @@ int ldlm_reprocess_queue(struct ldlm_resource *res, cfs_list_t *queue,
 #endif
 
 static int
-ldlm_work_bl_ast_lock(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg)
+ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 {
-        struct ldlm_lock_desc d;
-        struct ldlm_lock *lock = cfs_list_entry(tmp, struct ldlm_lock,
-                                                l_bl_ast);
-        int rc;
-        ENTRY;
-
-        /* nobody should touch l_bl_ast */
-        lock_res_and_lock(lock);
-        cfs_list_del_init(&lock->l_bl_ast);
+       struct ldlm_cb_set_arg *arg = opaq;
+       struct ldlm_lock_desc   d;
+       int                     rc;
+       struct ldlm_lock       *lock;
+       ENTRY;
 
-        LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
-        LASSERT(lock->l_bl_ast_run == 0);
-        LASSERT(lock->l_blocking_lock);
-        lock->l_bl_ast_run++;
-        unlock_res_and_lock(lock);
-
-        ldlm_lock2desc(lock->l_blocking_lock, &d);
+       if (cfs_list_empty(arg->list))
+               RETURN(-ENOENT);
 
-        rc = lock->l_blocking_ast(lock, &d, (void *)arg,
-                                  LDLM_CB_BLOCKING);
-        LDLM_LOCK_RELEASE(lock->l_blocking_lock);
-        lock->l_blocking_lock = NULL;
-        LDLM_LOCK_RELEASE(lock);
+       lock = cfs_list_entry(arg->list->next, struct ldlm_lock, l_bl_ast);
 
-        RETURN(rc);
-}
+       /* nobody should touch l_bl_ast */
+       lock_res_and_lock(lock);
+       cfs_list_del_init(&lock->l_bl_ast);
 
-static int
-ldlm_work_cp_ast_lock(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg)
-{
-        struct ldlm_lock *lock = cfs_list_entry(tmp, struct ldlm_lock, l_cp_ast);
-        ldlm_completion_callback completion_callback;
-        int rc = 0;
-        ENTRY;
+       LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
+       LASSERT(lock->l_bl_ast_run == 0);
+       LASSERT(lock->l_blocking_lock);
+       lock->l_bl_ast_run++;
+       unlock_res_and_lock(lock);
 
-        /* It's possible to receive a completion AST before we've set
-         * the l_completion_ast pointer: either because the AST arrived
-         * before the reply, or simply because there's a small race
-         * window between receiving the reply and finishing the local
-         * enqueue. (bug 842)
-         *
-         * This can't happen with the blocking_ast, however, because we
-         * will never call the local blocking_ast until we drop our
-         * reader/writer reference, which we won't do until we get the
-         * reply and finish enqueueing. */
+       ldlm_lock2desc(lock->l_blocking_lock, &d);
 
-        /* nobody should touch l_cp_ast */
-        lock_res_and_lock(lock);
-        cfs_list_del_init(&lock->l_cp_ast);
-        LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
-        /* save l_completion_ast since it can be changed by
-         * mds_intent_policy(), see bug 14225 */
-        completion_callback = lock->l_completion_ast;
-        lock->l_flags &= ~LDLM_FL_CP_REQD;
-        unlock_res_and_lock(lock);
+       rc = lock->l_blocking_ast(lock, &d, (void *)arg, LDLM_CB_BLOCKING);
+       LDLM_LOCK_RELEASE(lock->l_blocking_lock);
+       lock->l_blocking_lock = NULL;
+       LDLM_LOCK_RELEASE(lock);
 
-        if (completion_callback != NULL)
-                rc = completion_callback(lock, 0, (void *)arg);
-        LDLM_LOCK_RELEASE(lock);
+       RETURN(rc);
+}
 
-        RETURN(rc);
+static int
+ldlm_work_cp_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
+{
+       struct ldlm_cb_set_arg  *arg = opaq;
+       int                      rc = 0;
+       struct ldlm_lock        *lock;
+       ldlm_completion_callback completion_callback;
+       ENTRY;
+
+       if (cfs_list_empty(arg->list))
+               RETURN(-ENOENT);
+
+       lock = cfs_list_entry(arg->list->next, struct ldlm_lock, l_cp_ast);
+
+       /* It's possible to receive a completion AST before we've set
+        * the l_completion_ast pointer: either because the AST arrived
+        * before the reply, or simply because there's a small race
+        * window between receiving the reply and finishing the local
+        * enqueue. (bug 842)
+        *
+        * This can't happen with the blocking_ast, however, because we
+        * will never call the local blocking_ast until we drop our
+        * reader/writer reference, which we won't do until we get the
+        * reply and finish enqueueing. */
+
+       /* nobody should touch l_cp_ast */
+       lock_res_and_lock(lock);
+       cfs_list_del_init(&lock->l_cp_ast);
+       LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
+       /* save l_completion_ast since it can be changed by
+        * mds_intent_policy(), see bug 14225 */
+       completion_callback = lock->l_completion_ast;
+       lock->l_flags &= ~LDLM_FL_CP_REQD;
+       unlock_res_and_lock(lock);
+
+       if (completion_callback != NULL)
+               rc = completion_callback(lock, 0, (void *)arg);
+       LDLM_LOCK_RELEASE(lock);
+
+       RETURN(rc);
 }
 
 static int
-ldlm_work_revoke_ast_lock(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg)
+ldlm_work_revoke_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 {
-        struct ldlm_lock_desc desc;
-        struct ldlm_lock *lock = cfs_list_entry(tmp, struct ldlm_lock,
-                                                l_rk_ast);
-        int rc;
-        ENTRY;
+       struct ldlm_cb_set_arg *arg = opaq;
+       struct ldlm_lock_desc   desc;
+       int                     rc;
+       struct ldlm_lock       *lock;
+       ENTRY;
 
-        cfs_list_del_init(&lock->l_rk_ast);
+       if (cfs_list_empty(arg->list))
+               RETURN(-ENOENT);
 
-        /* the desc just pretend to exclusive */
-        ldlm_lock2desc(lock, &desc);
-        desc.l_req_mode = LCK_EX;
-        desc.l_granted_mode = 0;
+       lock = cfs_list_entry(arg->list->next, struct ldlm_lock, l_rk_ast);
+       cfs_list_del_init(&lock->l_rk_ast);
 
-        rc = lock->l_blocking_ast(lock, &desc, (void*)arg, LDLM_CB_BLOCKING);
-        LDLM_LOCK_RELEASE(lock);
+       /* the desc just pretend to exclusive */
+       ldlm_lock2desc(lock, &desc);
+       desc.l_req_mode = LCK_EX;
+       desc.l_granted_mode = 0;
 
-        RETURN(rc);
+       rc = lock->l_blocking_ast(lock, &desc, (void*)arg, LDLM_CB_BLOCKING);
+       LDLM_LOCK_RELEASE(lock);
+
+       RETURN(rc);
 }
 
-int ldlm_run_ast_work(struct ldlm_namespace *ns, cfs_list_t *rpc_list,
-                      ldlm_desc_ast_t ast_type)
+int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 {
-        struct l_wait_info     lwi = { 0 };
-        struct ldlm_cb_set_arg *arg;
-        cfs_list_t *tmp, *pos;
-        int (*work_ast_lock)(cfs_list_t *tmp, struct ldlm_cb_set_arg *arg);
-        unsigned int max_ast_count;
-        int rc;
-        ENTRY;
+       struct ldlm_cb_set_arg          *arg = opaq;
+       struct ldlm_glimpse_work        *gl_work;
+       struct ldlm_lock                *lock;
+       int                              rc = 0;
+       ENTRY;
 
-        if (cfs_list_empty(rpc_list))
-                RETURN(0);
+       if (cfs_list_empty(arg->list))
+               RETURN(-ENOENT);
 
-        OBD_ALLOC_PTR(arg);
-        if (arg == NULL)
-                RETURN(-ENOMEM);
-
-        cfs_atomic_set(&arg->restart, 0);
-        cfs_atomic_set(&arg->rpcs, 0);
-        cfs_atomic_set(&arg->refcount, 1);
-        cfs_waitq_init(&arg->waitq);
-
-        switch (ast_type) {
-        case LDLM_WORK_BL_AST:
-                arg->type = LDLM_BL_CALLBACK;
-                work_ast_lock = ldlm_work_bl_ast_lock;
-                break;
-        case LDLM_WORK_CP_AST:
-                arg->type = LDLM_CP_CALLBACK;
-                work_ast_lock = ldlm_work_cp_ast_lock;
-                break;
-        case LDLM_WORK_REVOKE_AST:
-                arg->type = LDLM_BL_CALLBACK;
-                work_ast_lock = ldlm_work_revoke_ast_lock;
-                break;
-        default:
-                LBUG();
-        }
+       gl_work = cfs_list_entry(arg->list->next, struct ldlm_glimpse_work,
+                                gl_list);
+       cfs_list_del_init(&gl_work->gl_list);
 
-        max_ast_count = ns->ns_max_parallel_ast ? : UINT_MAX;
-        arg->threshold = max_ast_count;
+       lock = gl_work->gl_lock;
+       if (lock->l_glimpse_ast(lock, (void*)arg) == 0)
+               rc = 1;
 
-        cfs_list_for_each_safe(tmp, pos, rpc_list) {
-                (void)work_ast_lock(tmp, arg);
-                if (cfs_atomic_read(&arg->rpcs) < max_ast_count)
-                        continue;
+       LDLM_LOCK_RELEASE(lock);
 
-                l_wait_event(arg->waitq,
-                             cfs_atomic_read(&arg->rpcs) < arg->threshold,
-                             &lwi);
-        }
+       if ((gl_work->gl_flags & LDLM_GL_WORK_NOFREE) == 0)
+               OBD_FREE_PTR(gl_work);
 
-        arg->threshold = 1;
-        l_wait_event(arg->waitq, cfs_atomic_read(&arg->rpcs) == 0, &lwi);
+       RETURN(rc);
+}
 
-        rc = cfs_atomic_read(&arg->restart) ? -ERESTART : 0;
-        ldlm_csa_put(arg);
-        RETURN(rc);
+int ldlm_run_ast_work(struct ldlm_namespace *ns, cfs_list_t *rpc_list,
+                      ldlm_desc_ast_t ast_type)
+{
+       struct ldlm_cb_set_arg *arg;
+       set_producer_func       work_ast_lock;
+       int                     rc;
+
+       if (cfs_list_empty(rpc_list))
+               RETURN(0);
+
+       OBD_ALLOC_PTR(arg);
+       if (arg == NULL)
+               RETURN(-ENOMEM);
+
+       cfs_atomic_set(&arg->restart, 0);
+       arg->list = rpc_list;
+
+       switch (ast_type) {
+               case LDLM_WORK_BL_AST:
+                       arg->type = LDLM_BL_CALLBACK;
+                       work_ast_lock = ldlm_work_bl_ast_lock;
+                       break;
+               case LDLM_WORK_CP_AST:
+                       arg->type = LDLM_CP_CALLBACK;
+                       work_ast_lock = ldlm_work_cp_ast_lock;
+                       break;
+               case LDLM_WORK_REVOKE_AST:
+                       arg->type = LDLM_BL_CALLBACK;
+                       work_ast_lock = ldlm_work_revoke_ast_lock;
+                       break;
+               case LDLM_WORK_GL_AST:
+                       arg->type = LDLM_GL_CALLBACK;
+                       work_ast_lock = ldlm_work_gl_ast_lock;
+                       break;
+               default:
+                       LBUG();
+       }
+
+       /* We create a ptlrpc request set with flow control extension.
+        * This request set will use the work_ast_lock function to produce new
+        * requests and will send a new request each time one completes in order
+        * to keep the number of requests in flight to ns_max_parallel_ast */
+       arg->set = ptlrpc_prep_fcset(ns->ns_max_parallel_ast ? : UINT_MAX,
+                                    work_ast_lock, arg);
+       if (arg->set == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       ptlrpc_set_wait(arg->set);
+       ptlrpc_set_destroy(arg->set);
+
+       rc = cfs_atomic_read(&arg->restart) ? -ERESTART : 0;
+       GOTO(out, rc);
+out:
+       OBD_FREE_PTR(arg);
+       return rc;
 }
 
 static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
@@ -1773,14 +1813,17 @@ void ldlm_lock_cancel(struct ldlm_lock *lock)
                 LBUG();
         }
 
-        ldlm_del_waiting_lock(lock);
+       if (lock->l_waited)
+               ldlm_del_waiting_lock(lock);
 
         /* Releases cancel callback. */
         ldlm_cancel_callback(lock);
 
         /* Yes, second time, just in case it was added again while we were
            running with no res lock in ldlm_cancel_callback */
-        ldlm_del_waiting_lock(lock);
+       if (lock->l_waited)
+               ldlm_del_waiting_lock(lock);
+
         ldlm_resource_unlink_lock(lock);
         ldlm_lock_destroy_nolock(lock);
 
@@ -1812,11 +1855,17 @@ int ldlm_lock_set_data(struct lustre_handle *lockh, void *data)
 }
 EXPORT_SYMBOL(ldlm_lock_set_data);
 
+struct export_cl_data {
+       struct obd_export       *ecl_exp;
+       int                     ecl_loop;
+};
+
 int ldlm_cancel_locks_for_export_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
                                     cfs_hlist_node_t *hnode, void *data)
 
 {
-        struct obd_export    *exp  = data;
+       struct export_cl_data   *ecl = (struct export_cl_data *)data;
+       struct obd_export       *exp  = ecl->ecl_exp;
         struct ldlm_lock     *lock = cfs_hash_object(hs, hnode);
         struct ldlm_resource *res;
 
@@ -1829,13 +1878,28 @@ int ldlm_cancel_locks_for_export_cb(cfs_hash_t *hs, cfs_hash_bd_t *bd,
         ldlm_reprocess_all(res);
         ldlm_resource_putref(res);
         LDLM_LOCK_RELEASE(lock);
-        return 0;
+
+       ecl->ecl_loop++;
+       if ((ecl->ecl_loop & -ecl->ecl_loop) == ecl->ecl_loop) {
+               CDEBUG(D_INFO,
+                      "Cancel lock %p for export %p (loop %d), still have "
+                      "%d locks left on hash table.\n",
+                      lock, exp, ecl->ecl_loop,
+                      cfs_atomic_read(&hs->hs_count));
+       }
+
+       return 0;
 }
 
 void ldlm_cancel_locks_for_export(struct obd_export *exp)
 {
-        cfs_hash_for_each_empty(exp->exp_lock_hash,
-                                ldlm_cancel_locks_for_export_cb, exp);
+       struct export_cl_data   ecl = {
+               .ecl_exp        = exp,
+               .ecl_loop       = 0,
+       };
+
+       cfs_hash_for_each_empty(exp->exp_lock_hash,
+                               ldlm_cancel_locks_for_export_cb, &ecl);
 }
 
 /**
@@ -1877,8 +1941,10 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
         struct ldlm_resource *res;
         struct ldlm_namespace *ns;
         int granted = 0;
-        int old_mode;
-        struct sl_insert_point prev;
+#ifdef HAVE_SERVER_SUPPORT
+       int old_mode;
+       struct sl_insert_point prev;
+#endif
         struct ldlm_interval *node;
         ENTRY;
 
@@ -1901,15 +1967,19 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
         res = lock->l_resource;
         ns  = ldlm_res_to_ns(res);
 
-        old_mode = lock->l_req_mode;
-        lock->l_req_mode = new_mode;
-        if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) {
-                /* remember the lock position where the lock might be
-                 * added back to the granted list later and also
-                 * remember the join mode for skiplist fixing. */
-                prev.res_link = lock->l_res_link.prev;
-                prev.mode_link = lock->l_sl_mode.prev;
-                prev.policy_link = lock->l_sl_policy.prev;
+#ifdef HAVE_SERVER_SUPPORT
+       old_mode = lock->l_req_mode;
+#endif
+       lock->l_req_mode = new_mode;
+       if (res->lr_type == LDLM_PLAIN || res->lr_type == LDLM_IBITS) {
+#ifdef HAVE_SERVER_SUPPORT
+               /* remember the lock position where the lock might be
+                * added back to the granted list later and also
+                * remember the join mode for skiplist fixing. */
+               prev.res_link = lock->l_res_link.prev;
+               prev.mode_link = lock->l_sl_mode.prev;
+               prev.policy_link = lock->l_sl_policy.prev;
+#endif
                 ldlm_resource_unlink_lock(lock);
         } else {
                 ldlm_resource_unlink_lock(lock);