* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2010, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
CFS_MODULE_PARM(ldlm_cpts, "s", charp, 0444,
"CPU partitions ldlm threads should run on");
-extern cfs_mem_cache_t *ldlm_resource_slab;
-extern cfs_mem_cache_t *ldlm_lock_slab;
-static cfs_mutex_t ldlm_ref_mutex;
+extern struct kmem_cache *ldlm_resource_slab;
+extern struct kmem_cache *ldlm_lock_slab;
+static struct mutex ldlm_ref_mutex;
static int ldlm_refcount;
struct ldlm_cb_async_args {
#define ELT_TERMINATE 2
struct ldlm_bl_pool {
- cfs_spinlock_t blp_lock;
+ spinlock_t blp_lock;
/*
* blp_prio_list is used for callbacks that should be handled
cfs_list_t blp_list;
cfs_waitq_t blp_waitq;
- cfs_completion_t blp_comp;
+ struct completion blp_comp;
cfs_atomic_t blp_num_threads;
cfs_atomic_t blp_busy_threads;
int blp_min_threads;
struct ldlm_lock *blwi_lock;
cfs_list_t blwi_head;
int blwi_count;
- cfs_completion_t blwi_comp;
- int blwi_mode;
+ struct completion blwi_comp;
+ ldlm_cancel_flags_t blwi_flags;
int blwi_mem_pressure;
};
#if defined(HAVE_SERVER_SUPPORT) && defined(__KERNEL__)
-/* w_l_spinlock protects both waiting_locks_list and expired_lock_thread */
-static cfs_spinlock_t waiting_locks_spinlock; /* BH lock (timer) */
+/**
+ * Protects both waiting_locks_list and expired_lock_thread.
+ */
+static spinlock_t waiting_locks_spinlock; /* BH lock (timer) */
+
+/**
+ * List for contended locks.
+ *
+ * As soon as a lock is contended, it gets placed on this list and
+ * expected time to get a response is filled in the lock. A special
+ * thread walks the list looking for locks that should be released and
+ * schedules client evictions for those that have not been released in
+ * time.
+ *
+ * All access to it should be under waiting_locks_spinlock.
+ */
static cfs_list_t waiting_locks_list;
static cfs_timer_t waiting_locks_timer;
static inline int have_expired_locks(void)
{
- int need_to_run;
+ int need_to_run;
- ENTRY;
- cfs_spin_lock_bh(&waiting_locks_spinlock);
- need_to_run = !cfs_list_empty(&expired_lock_thread.elt_expired_locks);
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ ENTRY;
+ spin_lock_bh(&waiting_locks_spinlock);
+ need_to_run = !cfs_list_empty(&expired_lock_thread.elt_expired_locks);
+ spin_unlock_bh(&waiting_locks_spinlock);
- RETURN(need_to_run);
+ RETURN(need_to_run);
}
+/**
+ * Check expired lock list for expired locks and time them out.
+ */
static int expired_lock_main(void *arg)
{
cfs_list_t *expired = &expired_lock_thread.elt_expired_locks;
int do_dump;
ENTRY;
- cfs_daemonize("ldlm_elt");
expired_lock_thread.elt_state = ELT_READY;
cfs_waitq_signal(&expired_lock_thread.elt_waitq);
expired_lock_thread.elt_state == ELT_TERMINATE,
&lwi);
- cfs_spin_lock_bh(&waiting_locks_spinlock);
- if (expired_lock_thread.elt_dump) {
- struct libcfs_debug_msg_data msgdata = {
- .msg_file = __FILE__,
- .msg_fn = "waiting_locks_callback",
- .msg_line = expired_lock_thread.elt_dump };
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ spin_lock_bh(&waiting_locks_spinlock);
+ if (expired_lock_thread.elt_dump) {
+ struct libcfs_debug_msg_data msgdata = {
+ .msg_file = __FILE__,
+ .msg_fn = "waiting_locks_callback",
+ .msg_line = expired_lock_thread.elt_dump };
+ spin_unlock_bh(&waiting_locks_spinlock);
- /* from waiting_locks_callback, but not in timer */
- libcfs_debug_dumplog();
- libcfs_run_lbug_upcall(&msgdata);
+ /* from waiting_locks_callback, but not in timer */
+ libcfs_debug_dumplog();
+ libcfs_run_lbug_upcall(&msgdata);
- cfs_spin_lock_bh(&waiting_locks_spinlock);
+ spin_lock_bh(&waiting_locks_spinlock);
expired_lock_thread.elt_dump = 0;
}
lock = cfs_list_entry(expired->next, struct ldlm_lock,
l_pending_chain);
- if ((void *)lock < LP_POISON + CFS_PAGE_SIZE &&
- (void *)lock >= LP_POISON) {
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
- CERROR("free lock on elt list %p\n", lock);
- LBUG();
- }
- cfs_list_del_init(&lock->l_pending_chain);
- if ((void *)lock->l_export < LP_POISON + CFS_PAGE_SIZE &&
+ if ((void *)lock < LP_POISON + PAGE_CACHE_SIZE &&
+ (void *)lock >= LP_POISON) {
+ spin_unlock_bh(&waiting_locks_spinlock);
+ CERROR("free lock on elt list %p\n", lock);
+ LBUG();
+ }
+ cfs_list_del_init(&lock->l_pending_chain);
+ if ((void *)lock->l_export <
+ LP_POISON + PAGE_CACHE_SIZE &&
(void *)lock->l_export >= LP_POISON) {
CERROR("lock with free export on elt list %p\n",
lock->l_export);
continue;
}
- if (lock->l_destroyed) {
+ if (lock->l_flags & LDLM_FL_DESTROYED) {
/* release the lock refcount where
* waiting_locks_callback() founds */
LDLM_LOCK_RELEASE(lock);
continue;
}
- export = class_export_lock_get(lock->l_export, lock);
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ export = class_export_lock_get(lock->l_export, lock);
+ spin_unlock_bh(&waiting_locks_spinlock);
- do_dump++;
- class_fail_export(export);
- class_export_lock_put(export, lock);
+ do_dump++;
+ class_fail_export(export);
+ class_export_lock_put(export, lock);
- /* release extra ref grabbed by ldlm_add_waiting_lock()
- * or ldlm_failed_ast() */
- LDLM_LOCK_RELEASE(lock);
+ /* release extra ref grabbed by ldlm_add_waiting_lock()
+ * or ldlm_failed_ast() */
+ LDLM_LOCK_RELEASE(lock);
- cfs_spin_lock_bh(&waiting_locks_spinlock);
- }
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ spin_lock_bh(&waiting_locks_spinlock);
+ }
+ spin_unlock_bh(&waiting_locks_spinlock);
if (do_dump && obd_dump_on_eviction) {
CERROR("dump the log upon eviction\n");
*/
static int ldlm_lock_busy(struct ldlm_lock *lock)
{
- struct ptlrpc_request *req;
- int match = 0;
- ENTRY;
+ struct ptlrpc_request *req;
+ int match = 0;
+ ENTRY;
- if (lock->l_export == NULL)
- return 0;
+ if (lock->l_export == NULL)
+ return 0;
- cfs_spin_lock_bh(&lock->l_export->exp_rpc_lock);
- cfs_list_for_each_entry(req, &lock->l_export->exp_hp_rpcs,
- rq_exp_list) {
- if (req->rq_ops->hpreq_lock_match) {
- match = req->rq_ops->hpreq_lock_match(req, lock);
- if (match)
- break;
- }
- }
- cfs_spin_unlock_bh(&lock->l_export->exp_rpc_lock);
- RETURN(match);
+ spin_lock_bh(&lock->l_export->exp_rpc_lock);
+ cfs_list_for_each_entry(req, &lock->l_export->exp_hp_rpcs,
+ rq_exp_list) {
+ if (req->rq_ops->hpreq_lock_match) {
+ match = req->rq_ops->hpreq_lock_match(req, lock);
+ if (match)
+ break;
+ }
+ }
+ spin_unlock_bh(&lock->l_export->exp_rpc_lock);
+ RETURN(match);
}
/* This is called from within a timer interrupt and cannot schedule */
struct ldlm_lock *lock;
int need_dump = 0;
- cfs_spin_lock_bh(&waiting_locks_spinlock);
+ spin_lock_bh(&waiting_locks_spinlock);
while (!cfs_list_empty(&waiting_locks_list)) {
lock = cfs_list_entry(waiting_locks_list.next, struct ldlm_lock,
l_pending_chain);
libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid));
cfs_list_del_init(&lock->l_pending_chain);
- if (lock->l_destroyed) {
+ if (lock->l_flags & LDLM_FL_DESTROYED) {
/* relay the lock refcount decrease to
* expired lock thread */
cfs_list_add(&lock->l_pending_chain,
libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid));
cfs_list_del_init(&lock->l_pending_chain);
- if (lock->l_destroyed) {
+ if (lock->l_flags & LDLM_FL_DESTROYED) {
/* relay the lock refcount decrease to
* expired lock thread */
cfs_list_add(&lock->l_pending_chain,
LDLM_LOCK_GET(lock);
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
- LDLM_DEBUG(lock, "prolong the busy lock");
- ldlm_refresh_waiting_lock(lock,
- ldlm_get_enq_timeout(lock));
- cfs_spin_lock_bh(&waiting_locks_spinlock);
+ spin_unlock_bh(&waiting_locks_spinlock);
+ LDLM_DEBUG(lock, "prolong the busy lock");
+ ldlm_refresh_waiting_lock(lock,
+ ldlm_get_enq_timeout(lock));
+ spin_lock_bh(&waiting_locks_spinlock);
if (!cont) {
LDLM_LOCK_RELEASE(lock);
timeout_rounded = (cfs_time_t)round_timeout(lock->l_callback_timeout);
cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
}
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ spin_unlock_bh(&waiting_locks_spinlock);
}
-/*
+/**
+ * Add lock to the list of contended locks.
+ *
* Indicate that we're waiting for a client to call us back cancelling a given
* lock. We add it to the pending-callback chain, and schedule the lock-timeout
* timer to fire appropriately. (We round up to the next second, to avoid
int timeout = ldlm_get_enq_timeout(lock);
/* NB: must be called with hold of lock_res_and_lock() */
- LASSERT(lock->l_res_locked);
- lock->l_waited = 1;
+ LASSERT(lock->l_flags & LDLM_FL_RES_LOCKED);
+ lock->l_flags |= LDLM_FL_WAITED;
LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK));
- cfs_spin_lock_bh(&waiting_locks_spinlock);
- if (lock->l_destroyed) {
- static cfs_time_t next;
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ spin_lock_bh(&waiting_locks_spinlock);
+ if (lock->l_flags & LDLM_FL_DESTROYED) {
+ static cfs_time_t next;
+ spin_unlock_bh(&waiting_locks_spinlock);
LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)");
if (cfs_time_after(cfs_time_current(), next)) {
next = cfs_time_shift(14400);
* waiting list */
LDLM_LOCK_GET(lock);
}
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ spin_unlock_bh(&waiting_locks_spinlock);
- if (ret) {
- cfs_spin_lock_bh(&lock->l_export->exp_bl_list_lock);
- if (cfs_list_empty(&lock->l_exp_list))
- cfs_list_add(&lock->l_exp_list,
- &lock->l_export->exp_bl_list);
- cfs_spin_unlock_bh(&lock->l_export->exp_bl_list_lock);
- }
+ if (ret) {
+ spin_lock_bh(&lock->l_export->exp_bl_list_lock);
+ if (cfs_list_empty(&lock->l_exp_list))
+ cfs_list_add(&lock->l_exp_list,
+ &lock->l_export->exp_bl_list);
+ spin_unlock_bh(&lock->l_export->exp_bl_list_lock);
+ }
- LDLM_DEBUG(lock, "%sadding to wait list(timeout: %d, AT: %s)",
- ret == 0 ? "not re-" : "", timeout,
- AT_OFF ? "off" : "on");
- return ret;
+ LDLM_DEBUG(lock, "%sadding to wait list(timeout: %d, AT: %s)",
+ ret == 0 ? "not re-" : "", timeout,
+ AT_OFF ? "off" : "on");
+ return ret;
}
-/*
+/**
* Remove a lock from the pending list, likely because it had its cancellation
* callback arrive without incident. This adjusts the lock-timeout timer if
* needed. Returns 0 if the lock wasn't pending after all, 1 if it was.
return 0;
}
- cfs_spin_lock_bh(&waiting_locks_spinlock);
- ret = __ldlm_del_waiting_lock(lock);
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ spin_lock_bh(&waiting_locks_spinlock);
+ ret = __ldlm_del_waiting_lock(lock);
+ spin_unlock_bh(&waiting_locks_spinlock);
- /* remove the lock out of export blocking list */
- cfs_spin_lock_bh(&lock->l_export->exp_bl_list_lock);
- cfs_list_del_init(&lock->l_exp_list);
- cfs_spin_unlock_bh(&lock->l_export->exp_bl_list_lock);
+ /* remove the lock out of export blocking list */
+ spin_lock_bh(&lock->l_export->exp_bl_list_lock);
+ cfs_list_del_init(&lock->l_exp_list);
+ spin_unlock_bh(&lock->l_export->exp_bl_list_lock);
if (ret) {
/* release lock ref if it has indeed been removed
}
EXPORT_SYMBOL(ldlm_del_waiting_lock);
-/*
- * Prolong the lock
+/**
+ * Prolong the contended lock waiting time.
*
* Called with namespace lock held.
*/
int ldlm_refresh_waiting_lock(struct ldlm_lock *lock, int timeout)
{
- if (lock->l_export == NULL) {
- /* We don't have a "waiting locks list" on clients. */
- LDLM_DEBUG(lock, "client lock: no-op");
- return 0;
- }
+ if (lock->l_export == NULL) {
+ /* We don't have a "waiting locks list" on clients. */
+ LDLM_DEBUG(lock, "client lock: no-op");
+ return 0;
+ }
- cfs_spin_lock_bh(&waiting_locks_spinlock);
+ spin_lock_bh(&waiting_locks_spinlock);
- if (cfs_list_empty(&lock->l_pending_chain)) {
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
- LDLM_DEBUG(lock, "wasn't waiting");
- return 0;
- }
+ if (cfs_list_empty(&lock->l_pending_chain)) {
+ spin_unlock_bh(&waiting_locks_spinlock);
+ LDLM_DEBUG(lock, "wasn't waiting");
+ return 0;
+ }
- /* we remove/add the lock to the waiting list, so no needs to
- * release/take a lock reference */
- __ldlm_del_waiting_lock(lock);
- __ldlm_add_waiting_lock(lock, timeout);
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ /* we remove/add the lock to the waiting list, so no needs to
+ * release/take a lock reference */
+ __ldlm_del_waiting_lock(lock);
+ __ldlm_add_waiting_lock(lock, timeout);
+ spin_unlock_bh(&waiting_locks_spinlock);
- LDLM_DEBUG(lock, "refreshed");
- return 1;
+ LDLM_DEBUG(lock, "refreshed");
+ return 1;
}
EXPORT_SYMBOL(ldlm_refresh_waiting_lock);
# ifdef HAVE_SERVER_SUPPORT
static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
{
- LASSERT(lock->l_res_locked);
- LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK));
+ LASSERT((lock->l_flags & (LDLM_FL_RES_LOCKED|LDLM_FL_CANCEL_ON_BLOCK))
+ == LDLM_FL_RES_LOCKED);
RETURN(1);
}
#ifdef HAVE_SERVER_SUPPORT
+/**
+ * Perform lock cleanup if AST sending failed.
+ */
static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,
const char *ast_type)
{
if (obd_dump_on_timeout)
libcfs_debug_dumplog();
#ifdef __KERNEL__
- cfs_spin_lock_bh(&waiting_locks_spinlock);
- if (__ldlm_del_waiting_lock(lock) == 0)
- /* the lock was not in any list, grab an extra ref before adding
- * the lock to the expired list */
- LDLM_LOCK_GET(lock);
- cfs_list_add(&lock->l_pending_chain,
- &expired_lock_thread.elt_expired_locks);
- cfs_waitq_signal(&expired_lock_thread.elt_waitq);
- cfs_spin_unlock_bh(&waiting_locks_spinlock);
+ spin_lock_bh(&waiting_locks_spinlock);
+ if (__ldlm_del_waiting_lock(lock) == 0)
+ /* the lock was not in any list, grab an extra ref before adding
+ * the lock to the expired list */
+ LDLM_LOCK_GET(lock);
+ cfs_list_add(&lock->l_pending_chain,
+ &expired_lock_thread.elt_expired_locks);
+ cfs_waitq_signal(&expired_lock_thread.elt_waitq);
+ spin_unlock_bh(&waiting_locks_spinlock);
#else
- class_fail_export(lock->l_export);
+ class_fail_export(lock->l_export);
#endif
}
+/**
+ * Perform lock cleanup if AST reply came with error.
+ */
static int ldlm_handle_ast_error(struct ldlm_lock *lock,
struct ptlrpc_request *req, int rc,
const char *ast_type)
*/
static void ldlm_lock_reorder_req(struct ldlm_lock *lock)
{
- struct ptlrpc_request *req;
- ENTRY;
+ struct ptlrpc_request *req;
+ ENTRY;
- if (lock->l_export == NULL) {
- LDLM_DEBUG(lock, "client lock: no-op");
- RETURN_EXIT;
- }
+ if (lock->l_export == NULL) {
+ LDLM_DEBUG(lock, "client lock: no-op");
+ RETURN_EXIT;
+ }
- cfs_spin_lock_bh(&lock->l_export->exp_rpc_lock);
- cfs_list_for_each_entry(req, &lock->l_export->exp_hp_rpcs,
- rq_exp_list) {
- /* Do not process requests that were not yet added to there
- * incoming queue or were already removed from there for
- * processing */
- if (!req->rq_hp && !cfs_list_empty(&req->rq_list) &&
- req->rq_ops->hpreq_lock_match &&
- req->rq_ops->hpreq_lock_match(req, lock))
- ptlrpc_hpreq_reorder(req);
- }
- cfs_spin_unlock_bh(&lock->l_export->exp_rpc_lock);
- EXIT;
+ spin_lock_bh(&lock->l_export->exp_rpc_lock);
+ cfs_list_for_each_entry(req, &lock->l_export->exp_hp_rpcs,
+ rq_exp_list) {
+ /* Do not process requests that were not yet added to there
+ * incoming queue or were already removed from there for
+ * processing. We evaluate ptlrpc_nrs_req_can_move() without
+ * holding svcpt->scp_req_lock, and then redo the check with
+ * the lock held once we need to obtain a reliable result.
+ */
+ if (ptlrpc_nrs_req_can_move(req) &&
+ req->rq_ops->hpreq_lock_match &&
+ req->rq_ops->hpreq_lock_match(req, lock))
+ ptlrpc_nrs_req_hp_move(req);
+ }
+ spin_unlock_bh(&lock->l_export->exp_rpc_lock);
+ EXIT;
}
-/*
+/**
* ->l_blocking_ast() method for server-side locks. This is invoked when newly
* enqueued server lock conflicts with given one.
*
- * Sends blocking ast rpc to the client owning that lock; arms timeout timer
+ * Sends blocking AST RPC to the client owning that lock; arms timeout timer
* to wait for client response.
*/
int ldlm_server_blocking_ast(struct ldlm_lock *lock,
RETURN(0);
}
- if (lock->l_destroyed) {
+ if (lock->l_flags & LDLM_FL_DESTROYED) {
/* What's the point? */
unlock_res_and_lock(lock);
ptlrpc_req_finished(req);
}
EXPORT_SYMBOL(ldlm_server_blocking_ast);
+/**
+ * ->l_completion_ast callback for a remote lock in server namespace.
+ *
+ * Sends AST to the client notifying it of lock granting. If initial
+ * lock response was not sent yet, instead of sending another RPC, just
+ * mark the lock as granted and client will understand
+ */
int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
{
struct ldlm_cb_set_arg *arg = data;
/* server namespace, doesn't need lock */
lvb_len = ldlm_lvbo_size(lock);
- if (lvb_len > 0)
- req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT,
- lvb_len);
-
+ /* LU-3124 & LU-2187: to not return layout in completion AST because
+ * it may deadlock for LU-2187, or client may not have enough space
+ * for large layout. The layout will be returned to client with an
+ * extra RPC to fetch xattr.lov */
+ if (ldlm_has_layout(lock))
+ lvb_len = 0;
+
+ req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT, lvb_len);
rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_CP_CALLBACK);
if (rc) {
ptlrpc_request_free(req);
void *lvb = req_capsule_client_get(&req->rq_pill, &RMF_DLM_LVB);
lvb_len = ldlm_lvbo_fill(lock, lvb, lvb_len);
- req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB,
- lvb_len, RCL_CLIENT);
+ if (lvb_len < 0) {
+ /* We still need to send the RPC to wake up the blocked
+ * enqueue thread on the client.
+ *
+ * Consider old client, there is no better way to notify
+ * the failure, just zero-sized the LVB, then the client
+ * will fail out as "-EPROTO". */
+ req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, 0,
+ RCL_CLIENT);
+ instant_cancel = 1;
+ } else {
+ req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB, lvb_len,
+ RCL_CLIENT);
+ }
}
LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)",
lock_res_and_lock(lock);
if (lock->l_flags & LDLM_FL_AST_SENT) {
body->lock_flags |= ldlm_flags_to_wire(LDLM_FL_AST_SENT);
- /* copy ast flags like LDLM_FL_DISCARD_DATA */
+ /* Copy AST flags like LDLM_FL_DISCARD_DATA. */
body->lock_flags |= ldlm_flags_to_wire(lock->l_flags &
LDLM_AST_FLAGS);
rc = ldlm_ast_fini(req, arg, lock, instant_cancel);
- RETURN(rc);
+ RETURN(lvb_len < 0 ? lvb_len : rc);
}
EXPORT_SYMBOL(ldlm_server_completion_ast);
+/**
+ * Server side ->l_glimpse_ast handler for client locks.
+ *
+ * Sends glimpse AST to the client and waits for reply. Then updates
+ * lvbo with the result.
+ */
int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
{
struct ldlm_cb_set_arg *arg = data;
}
EXPORT_SYMBOL(ldlm_glimpse_locks);
-/* return ldlm lock associated with a lock callback request */
+/* return LDLM lock associated with a lock callback request */
struct ldlm_lock *ldlm_request_lock(struct ptlrpc_request *req)
{
struct ldlm_cb_async_args *ca;
return;
}
-/*
- * Main server-side entry point into LDLM. This is called by ptlrpc service
- * threads to carry out client lock enqueueing requests.
+/**
+ * Main server-side entry point into LDLM for enqueue. This is called by ptlrpc
+ * service threads to carry out client lock enqueueing requests.
*/
int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
struct ptlrpc_request *req,
GOTO(out, rc = -EFAULT);
}
- if (req->rq_export->exp_connect_flags & OBD_CONNECT_IBITS) {
+ if (exp_connect_flags(req->rq_export) & OBD_CONNECT_IBITS) {
if (unlikely(dlm_req->lock_desc.l_resource.lr_type ==
LDLM_PLAIN)) {
DEBUG_REQ(D_ERROR, req,
/* INODEBITS_INTEROP: Perform conversion from plain lock to
* inodebits lock if client does not support them. */
- if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_IBITS) &&
+ if (!(exp_connect_flags(req->rq_export) & OBD_CONNECT_IBITS) &&
(dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN)) {
dlm_req->lock_desc.l_resource.lr_type = LDLM_IBITS;
dlm_req->lock_desc.l_policy_data.l_inodebits.bits =
lock = ldlm_lock_create(ns, &dlm_req->lock_desc.l_resource.lr_name,
dlm_req->lock_desc.l_resource.lr_type,
dlm_req->lock_desc.l_req_mode,
- cbs, NULL, 0);
-
+ cbs, NULL, 0, LVB_T_NONE);
if (!lock)
GOTO(out, rc = -ENOMEM);
lock->l_req_extent = lock->l_policy_data.l_extent;
err = ldlm_lock_enqueue(ns, &lock, cookie, &flags);
- if (err)
- GOTO(out, err);
+ if (err) {
+ if ((int)err < 0)
+ rc = (int)err;
+ GOTO(out, err);
+ }
dlm_rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
dlm_rep->lock_flags = ldlm_flags_to_wire(flags);
"(err=%d, rc=%d)", err, rc);
if (rc == 0) {
- int lvb_len = ldlm_lvbo_size(lock);
-
- if (lvb_len > 0) {
+ if (req_capsule_has_field(&req->rq_pill, &RMF_DLM_LVB,
+ RCL_SERVER) &&
+ ldlm_lvbo_size(lock) > 0) {
void *buf;
int buflen;
buflen = req_capsule_get_size(&req->rq_pill,
&RMF_DLM_LVB, RCL_SERVER);
buflen = ldlm_lvbo_fill(lock, buf, buflen);
- req_capsule_shrink(&req->rq_pill, &RMF_DLM_LVB,
- buflen, RCL_SERVER);
+ if (buflen >= 0)
+ req_capsule_shrink(&req->rq_pill,
+ &RMF_DLM_LVB,
+ buflen, RCL_SERVER);
+ else
+ rc = buflen;
}
} else {
lock_res_and_lock(lock);
}
EXPORT_SYMBOL(ldlm_handle_enqueue0);
+/**
+ * Old-style LDLM main entry point for server code enqueue.
+ */
int ldlm_handle_enqueue(struct ptlrpc_request *req,
ldlm_completion_callback completion_callback,
ldlm_blocking_callback blocking_callback,
}
EXPORT_SYMBOL(ldlm_handle_enqueue);
+/**
+ * Main LDLM entry point for server code to process lock conversion requests.
+ */
int ldlm_handle_convert0(struct ptlrpc_request *req,
const struct ldlm_request *dlm_req)
{
lock = ldlm_handle2lock(&dlm_req->lock_handle[0]);
if (!lock) {
- req->rq_status = EINVAL;
+ req->rq_status = LUSTRE_EINVAL;
} else {
void *res = NULL;
LDLM_DEBUG(lock, "converted waiting lock");
req->rq_status = 0;
} else {
- req->rq_status = EDEADLOCK;
+ req->rq_status = LUSTRE_EDEADLK;
}
}
}
EXPORT_SYMBOL(ldlm_handle_convert0);
+/**
+ * Old-style main LDLM entry point for server code to process lock conversion
+ * requests.
+ */
int ldlm_handle_convert(struct ptlrpc_request *req)
{
int rc;
}
EXPORT_SYMBOL(ldlm_handle_convert);
-/* Cancel all the locks whos handles are packed into ldlm_request */
+/**
+ * Cancel all the locks whose handles are packed into ldlm_request
+ *
+ * Called by server code expecting such combined cancel activity
+ * requests.
+ */
int ldlm_request_cancel(struct ptlrpc_request *req,
const struct ldlm_request *dlm_req, int first)
{
res = lock->l_resource;
done++;
+ /* This code is an optimization to only attempt lock
+ * granting on the resource (that could be CPU-expensive)
+ * after we are done cancelling lock in that resource. */
if (res != pres) {
if (pres != NULL) {
ldlm_reprocess_all(pres);
}
EXPORT_SYMBOL(ldlm_request_cancel);
+/**
+ * Main LDLM entry point for server code to cancel locks.
+ *
+ * Typically gets called from service handler on LDLM_CANCEL opc.
+ */
int ldlm_handle_cancel(struct ptlrpc_request *req)
{
struct ldlm_request *dlm_req;
RETURN(rc);
if (!ldlm_request_cancel(req, dlm_req, 0))
- req->rq_status = ESTALE;
+ req->rq_status = LUSTRE_ESTALE;
RETURN(ptlrpc_reply(req));
}
EXPORT_SYMBOL(ldlm_handle_cancel);
#endif /* HAVE_SERVER_SUPPORT */
+/**
+ * Callback handler for receiving incoming blocking ASTs.
+ *
+ * This can only happen on client side.
+ */
void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
struct ldlm_lock_desc *ld, struct ldlm_lock *lock)
{
EXIT;
}
+/**
+ * Callback handler for receiving incoming completion ASTs.
+ *
+ * This only can happen on client side.
+ */
static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
struct ldlm_namespace *ns,
struct ldlm_request *dlm_req,
struct ldlm_lock *lock)
{
int lvb_len;
- CFS_LIST_HEAD(ast_list);
- ENTRY;
+ CFS_LIST_HEAD(ast_list);
+ int rc = 0;
+ ENTRY;
- LDLM_DEBUG(lock, "client completion callback handler START");
+ LDLM_DEBUG(lock, "client completion callback handler START");
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
- int to = cfs_time_seconds(1);
- while (to > 0) {
- cfs_schedule_timeout_and_set_state(
- CFS_TASK_INTERRUPTIBLE, to);
- if (lock->l_granted_mode == lock->l_req_mode ||
- lock->l_destroyed)
- break;
- }
- }
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
+ int to = cfs_time_seconds(1);
+ while (to > 0) {
+ cfs_schedule_timeout_and_set_state(
+ CFS_TASK_INTERRUPTIBLE, to);
+ if (lock->l_granted_mode == lock->l_req_mode ||
+ lock->l_flags & LDLM_FL_DESTROYED)
+ break;
+ }
+ }
lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT);
- if (lvb_len > 0) {
+ if (lvb_len < 0) {
+ LDLM_ERROR(lock, "Fail to get lvb_len, rc = %d", lvb_len);
+ GOTO(out, rc = lvb_len);
+ } else if (lvb_len > 0) {
if (lock->l_lvb_len > 0) {
/* for extent lock, lvb contains ost_lvb{}. */
LASSERT(lock->l_lvb_data != NULL);
- LASSERTF(lock->l_lvb_len == lvb_len,
- "preallocated %d, actual %d.\n",
- lock->l_lvb_len, lvb_len);
- } else { /* for layout lock, lvb has variable length */
+
+ if (unlikely(lock->l_lvb_len < lvb_len)) {
+ LDLM_ERROR(lock, "Replied LVB is larger than "
+ "expectation, expected = %d, "
+ "replied = %d",
+ lock->l_lvb_len, lvb_len);
+ GOTO(out, rc = -EINVAL);
+ }
+ } else if (ldlm_has_layout(lock)) { /* for layout lock, lvb has
+ * variable length */
void *lvb_data;
OBD_ALLOC(lvb_data, lvb_len);
- if (lvb_data == NULL)
- LDLM_ERROR(lock, "no memory.\n");
-
- lock_res_and_lock(lock);
if (lvb_data == NULL) {
- lock->l_flags |= LDLM_FL_FAILED;
- } else {
- LASSERT(lock->l_lvb_data == NULL);
- lock->l_lvb_data = lvb_data;
- lock->l_lvb_len = lvb_len;
+ LDLM_ERROR(lock, "No memory: %d.\n", lvb_len);
+ GOTO(out, rc = -ENOMEM);
}
+
+ lock_res_and_lock(lock);
+ LASSERT(lock->l_lvb_data == NULL);
+ lock->l_lvb_data = lvb_data;
+ lock->l_lvb_len = lvb_len;
unlock_res_and_lock(lock);
}
}
- lock_res_and_lock(lock);
- if (lock->l_destroyed ||
- lock->l_granted_mode == lock->l_req_mode) {
- /* bug 11300: the lock has already been granted */
- unlock_res_and_lock(lock);
- LDLM_DEBUG(lock, "Double grant race happened");
- LDLM_LOCK_RELEASE(lock);
- EXIT;
- return;
- }
+ lock_res_and_lock(lock);
+ if ((lock->l_flags & LDLM_FL_DESTROYED) ||
+ lock->l_granted_mode == lock->l_req_mode) {
+ /* bug 11300: the lock has already been granted */
+ unlock_res_and_lock(lock);
+ LDLM_DEBUG(lock, "Double grant race happened");
+ GOTO(out, rc = 0);
+ }
- /* If we receive the completion AST before the actual enqueue returned,
- * then we might need to switch lock modes, resources, or extents. */
- if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
- lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
- LDLM_DEBUG(lock, "completion AST, new lock mode");
- }
+ /* If we receive the completion AST before the actual enqueue returned,
+ * then we might need to switch lock modes, resources, or extents. */
+ if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
+ lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
+ LDLM_DEBUG(lock, "completion AST, new lock mode");
+ }
- if (lock->l_resource->lr_type != LDLM_PLAIN) {
- ldlm_convert_policy_to_local(req->rq_export,
- dlm_req->lock_desc.l_resource.lr_type,
- &dlm_req->lock_desc.l_policy_data,
- &lock->l_policy_data);
- LDLM_DEBUG(lock, "completion AST, new policy data");
- }
+ if (lock->l_resource->lr_type != LDLM_PLAIN) {
+ ldlm_convert_policy_to_local(req->rq_export,
+ dlm_req->lock_desc.l_resource.lr_type,
+ &dlm_req->lock_desc.l_policy_data,
+ &lock->l_policy_data);
+ LDLM_DEBUG(lock, "completion AST, new policy data");
+ }
ldlm_resource_unlink_lock(lock);
if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
&lock->l_resource->lr_name,
sizeof(lock->l_resource->lr_name)) != 0) {
unlock_res_and_lock(lock);
- if (ldlm_lock_change_resource(ns, lock,
- &dlm_req->lock_desc.l_resource.lr_name) != 0) {
- LDLM_ERROR(lock, "Failed to allocate resource");
- LDLM_LOCK_RELEASE(lock);
- EXIT;
- return;
- }
+ rc = ldlm_lock_change_resource(ns, lock,
+ &dlm_req->lock_desc.l_resource.lr_name);
+ if (rc < 0) {
+ LDLM_ERROR(lock, "Failed to allocate resource");
+ GOTO(out, rc);
+ }
LDLM_DEBUG(lock, "completion AST, new resource");
CERROR("change resource!\n");
lock_res_and_lock(lock);
}
if (dlm_req->lock_flags & LDLM_FL_AST_SENT) {
- /* BL_AST locks are not needed in lru.
- * let ldlm_cancel_lru() be fast. */
+ /* BL_AST locks are not needed in LRU.
+ * Let ldlm_cancel_lru() be fast. */
ldlm_lock_remove_from_lru(lock);
lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
LDLM_DEBUG(lock, "completion AST includes blocking AST");
}
- if (lock->l_lvb_len) {
- if (req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB,
- RCL_CLIENT) < lock->l_lvb_len) {
- LDLM_ERROR(lock, "completion AST did not contain "
- "expected LVB!");
- } else {
- void *lvb = req_capsule_client_get(&req->rq_pill,
- &RMF_DLM_LVB);
- memcpy(lock->l_lvb_data, lvb, lock->l_lvb_len);
- }
- }
+ if (lock->l_lvb_len > 0) {
+ rc = ldlm_fill_lvb(lock, &req->rq_pill, RCL_CLIENT,
+ lock->l_lvb_data, lvb_len);
+ if (rc < 0) {
+ unlock_res_and_lock(lock);
+ GOTO(out, rc);
+ }
+ }
ldlm_grant_lock(lock, &ast_list);
unlock_res_and_lock(lock);
LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)",
lock);
- LDLM_LOCK_RELEASE(lock);
- EXIT;
+ GOTO(out, rc);
+
+out:
+ if (rc < 0) {
+ lock_res_and_lock(lock);
+ lock->l_flags |= LDLM_FL_FAILED;
+ unlock_res_and_lock(lock);
+ cfs_waitq_signal(&lock->l_waitq);
+ }
+ LDLM_LOCK_RELEASE(lock);
}
+/**
+ * Callback handler for receiving incoming glimpse ASTs.
+ *
+ * This only can happen on client side. After handling the glimpse AST
+ * we also consider dropping the lock here if it is unused locally for a
+ * long time.
+ */
static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
struct ldlm_namespace *ns,
struct ldlm_request *dlm_req,
}
#ifdef __KERNEL__
-static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi, int mode)
+static int __ldlm_bl_to_thread(struct ldlm_bl_work_item *blwi,
+ ldlm_cancel_flags_t cancel_flags)
{
- struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
- ENTRY;
+ struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
+ ENTRY;
- cfs_spin_lock(&blp->blp_lock);
- if (blwi->blwi_lock && blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) {
- /* add LDLM_FL_DISCARD_DATA requests to the priority list */
- cfs_list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
- } else {
- /* other blocking callbacks are added to the regular list */
- cfs_list_add_tail(&blwi->blwi_entry, &blp->blp_list);
- }
- cfs_spin_unlock(&blp->blp_lock);
+ spin_lock(&blp->blp_lock);
+ if (blwi->blwi_lock &&
+ blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) {
+ /* add LDLM_FL_DISCARD_DATA requests to the priority list */
+ cfs_list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
+ } else {
+ /* other blocking callbacks are added to the regular list */
+ cfs_list_add_tail(&blwi->blwi_entry, &blp->blp_list);
+ }
+ spin_unlock(&blp->blp_lock);
- cfs_waitq_signal(&blp->blp_waitq);
+ cfs_waitq_signal(&blp->blp_waitq);
- /* can not use blwi->blwi_mode as blwi could be already freed in
- LDLM_ASYNC mode */
- if (mode == LDLM_SYNC)
- cfs_wait_for_completion(&blwi->blwi_comp);
+ /* can not check blwi->blwi_flags as blwi could be already freed in
+ LCF_ASYNC mode */
+ if (!(cancel_flags & LCF_ASYNC))
+ wait_for_completion(&blwi->blwi_comp);
- RETURN(0);
+ RETURN(0);
}
static inline void init_blwi(struct ldlm_bl_work_item *blwi,
- struct ldlm_namespace *ns,
- struct ldlm_lock_desc *ld,
- cfs_list_t *cancels, int count,
- struct ldlm_lock *lock,
- int mode)
+ struct ldlm_namespace *ns,
+ struct ldlm_lock_desc *ld,
+ cfs_list_t *cancels, int count,
+ struct ldlm_lock *lock,
+ ldlm_cancel_flags_t cancel_flags)
{
- cfs_init_completion(&blwi->blwi_comp);
+ init_completion(&blwi->blwi_comp);
CFS_INIT_LIST_HEAD(&blwi->blwi_head);
- if (cfs_memory_pressure_get())
+ if (memory_pressure_get())
blwi->blwi_mem_pressure = 1;
blwi->blwi_ns = ns;
- blwi->blwi_mode = mode;
+ blwi->blwi_flags = cancel_flags;
if (ld != NULL)
blwi->blwi_ld = *ld;
if (count) {
}
}
+/**
+ * Queues a list of locks \a cancels containing \a count locks
+ * for later processing by a blocking thread. If \a count is zero,
+ * then the lock referenced as \a lock is queued instead.
+ *
+ * The blocking thread would then call ->l_blocking_ast callback in the lock.
+ * If list addition fails an error is returned and caller is supposed to
+ * call ->l_blocking_ast itself.
+ */
static int ldlm_bl_to_thread(struct ldlm_namespace *ns,
- struct ldlm_lock_desc *ld, struct ldlm_lock *lock,
- cfs_list_t *cancels, int count, int mode)
+ struct ldlm_lock_desc *ld,
+ struct ldlm_lock *lock,
+ cfs_list_t *cancels, int count,
+ ldlm_cancel_flags_t cancel_flags)
{
- ENTRY;
+ ENTRY;
- if (cancels && count == 0)
- RETURN(0);
+ if (cancels && count == 0)
+ RETURN(0);
- if (mode == LDLM_SYNC) {
- /* if it is synchronous call do minimum mem alloc, as it could
- * be triggered from kernel shrinker
- */
- struct ldlm_bl_work_item blwi;
- memset(&blwi, 0, sizeof(blwi));
- init_blwi(&blwi, ns, ld, cancels, count, lock, LDLM_SYNC);
- RETURN(__ldlm_bl_to_thread(&blwi, LDLM_SYNC));
- } else {
- struct ldlm_bl_work_item *blwi;
- OBD_ALLOC(blwi, sizeof(*blwi));
- if (blwi == NULL)
- RETURN(-ENOMEM);
- init_blwi(blwi, ns, ld, cancels, count, lock, LDLM_ASYNC);
+ if (cancel_flags & LCF_ASYNC) {
+ struct ldlm_bl_work_item *blwi;
- RETURN(__ldlm_bl_to_thread(blwi, LDLM_ASYNC));
- }
+ OBD_ALLOC(blwi, sizeof(*blwi));
+ if (blwi == NULL)
+ RETURN(-ENOMEM);
+ init_blwi(blwi, ns, ld, cancels, count, lock, cancel_flags);
+
+ RETURN(__ldlm_bl_to_thread(blwi, cancel_flags));
+ } else {
+ /* if it is synchronous call do minimum mem alloc, as it could
+ * be triggered from kernel shrinker
+ */
+ struct ldlm_bl_work_item blwi;
+
+ memset(&blwi, 0, sizeof(blwi));
+ init_blwi(&blwi, ns, ld, cancels, count, lock, cancel_flags);
+ RETURN(__ldlm_bl_to_thread(&blwi, cancel_flags));
+ }
}
#endif
int ldlm_bl_to_thread_lock(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
- struct ldlm_lock *lock)
+ struct ldlm_lock *lock)
{
#ifdef __KERNEL__
- RETURN(ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LDLM_ASYNC));
+ return ldlm_bl_to_thread(ns, ld, lock, NULL, 0, LCF_ASYNC);
#else
- RETURN(-ENOSYS);
+ return -ENOSYS;
#endif
}
int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
- cfs_list_t *cancels, int count, int mode)
+ cfs_list_t *cancels, int count,
+ ldlm_cancel_flags_t cancel_flags)
{
#ifdef __KERNEL__
- RETURN(ldlm_bl_to_thread(ns, ld, NULL, cancels, count, mode));
+ return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags);
#else
- RETURN(-ENOSYS);
+ return -ENOSYS;
#endif
}
CWARN("Send reply failed, maybe cause bug 21636.\n");
}
+static int ldlm_handle_qc_callback(struct ptlrpc_request *req)
+{
+ struct obd_quotactl *oqctl;
+ struct client_obd *cli = &req->rq_export->exp_obd->u.cli;
+
+ oqctl = req_capsule_client_get(&req->rq_pill, &RMF_OBD_QUOTACTL);
+ if (oqctl == NULL) {
+ CERROR("Can't unpack obd_quotactl\n");
+ RETURN(-EPROTO);
+ }
+
+ oqctl->qc_stat = ptlrpc_status_ntoh(oqctl->qc_stat);
+
+ cli->cl_qchk_stat = oqctl->qc_stat;
+ return 0;
+}
+
/* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */
static int ldlm_callback_handler(struct ptlrpc_request *req)
{
rc = ldlm_handle_setinfo(req);
ldlm_callback_reply(req, rc);
RETURN(0);
- case OBD_LOG_CANCEL: /* remove this eventually - for 1.4.0 compat */
- CERROR("shouldn't be handling OBD_LOG_CANCEL on DLM thread\n");
- req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
- RETURN(0);
- rc = llog_origin_handle_cancel(req);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
- RETURN(0);
- ldlm_callback_reply(req, rc);
- RETURN(0);
case LLOG_ORIGIN_HANDLE_CREATE:
req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
rc = llog_origin_handle_close(req);
ldlm_callback_reply(req, rc);
RETURN(0);
+ case OBD_QC_CALLBACK:
+ req_capsule_set(&req->rq_pill, &RQF_QC_CALLBACK);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_QC_CALLBACK_NET))
+ RETURN(0);
+ rc = ldlm_handle_qc_callback(req);
+ ldlm_callback_reply(req, rc);
+ RETURN(0);
default:
CERROR("unknown opcode %u\n",
lustre_msg_get_opc(req->rq_reqmsg));
* which the server has already started a blocking callback on. */
if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) &&
lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
- rc = ldlm_cli_cancel(&dlm_req->lock_handle[0]);
+ rc = ldlm_cli_cancel(&dlm_req->lock_handle[0], 0);
if (rc < 0)
CERROR("ldlm_cli_cancel: %d\n", rc);
}
&dlm_req->lock_handle[0]);
RETURN(0);
}
- /* BL_AST locks are not needed in lru.
- * let ldlm_cancel_lru() be fast. */
+ /* BL_AST locks are not needed in LRU.
+ * Let ldlm_cancel_lru() be fast. */
ldlm_lock_remove_from_lru(lock);
lock->l_flags |= LDLM_FL_BL_AST;
}
}
#ifdef HAVE_SERVER_SUPPORT
+/**
+ * Main handler for canceld thread.
+ *
+ * Separated into its own thread to avoid deadlocks.
+ */
static int ldlm_cancel_handler(struct ptlrpc_request *req)
{
int rc;
if (rc)
break;
RETURN(0);
- case OBD_LOG_CANCEL:
- req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
- RETURN(0);
- rc = llog_origin_handle_cancel(req);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
- RETURN(0);
- ldlm_callback_reply(req, rc);
- RETURN(0);
default:
CERROR("invalid opcode %d\n",
lustre_msg_get_opc(req->rq_reqmsg));
#ifdef __KERNEL__
static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp)
{
- struct ldlm_bl_work_item *blwi = NULL;
- static unsigned int num_bl = 0;
+ struct ldlm_bl_work_item *blwi = NULL;
+ static unsigned int num_bl = 0;
- cfs_spin_lock(&blp->blp_lock);
+ spin_lock(&blp->blp_lock);
/* process a request from the blp_list at least every blp_num_threads */
if (!cfs_list_empty(&blp->blp_list) &&
(cfs_list_empty(&blp->blp_prio_list) || num_bl == 0))
num_bl = 0;
cfs_list_del(&blwi->blwi_entry);
}
- cfs_spin_unlock(&blp->blp_lock);
+ spin_unlock(&blp->blp_lock);
- return blwi;
+ return blwi;
}
/* This only contains temporary data until the thread starts */
struct ldlm_bl_thread_data {
- char bltd_name[CFS_CURPROC_COMM_MAX];
- struct ldlm_bl_pool *bltd_blp;
- cfs_completion_t bltd_comp;
- int bltd_num;
+ char bltd_name[CFS_CURPROC_COMM_MAX];
+ struct ldlm_bl_pool *bltd_blp;
+ struct completion bltd_comp;
+ int bltd_num;
};
static int ldlm_bl_thread_main(void *arg);
static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
{
- struct ldlm_bl_thread_data bltd = { .bltd_blp = blp };
- int rc;
-
- cfs_init_completion(&bltd.bltd_comp);
- rc = cfs_create_thread(ldlm_bl_thread_main, &bltd, 0);
- if (rc < 0) {
- CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %d\n",
- cfs_atomic_read(&blp->blp_num_threads), rc);
- return rc;
- }
- cfs_wait_for_completion(&bltd.bltd_comp);
+ struct ldlm_bl_thread_data bltd = { .bltd_blp = blp };
+ cfs_task_t *task;
+
+ init_completion(&bltd.bltd_comp);
+ bltd.bltd_num = cfs_atomic_read(&blp->blp_num_threads);
+ snprintf(bltd.bltd_name, sizeof(bltd.bltd_name) - 1,
+ "ldlm_bl_%02d", bltd.bltd_num);
+ task = kthread_run(ldlm_bl_thread_main, &bltd, bltd.bltd_name);
+ if (IS_ERR(task)) {
+ CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
+ cfs_atomic_read(&blp->blp_num_threads), PTR_ERR(task));
+ return PTR_ERR(task);
+ }
+ wait_for_completion(&bltd.bltd_comp);
- return 0;
+ return 0;
}
+/**
+ * Main blocking requests processing thread.
+ *
+ * Callers put locks into its queue by calling ldlm_bl_to_thread.
+ * This thread in the end ends up doing actual call to ->l_blocking_ast
+ * for queued locks.
+ */
static int ldlm_bl_thread_main(void *arg)
{
struct ldlm_bl_pool *blp;
blp = bltd->bltd_blp;
- bltd->bltd_num =
- cfs_atomic_inc_return(&blp->blp_num_threads) - 1;
+ cfs_atomic_inc(&blp->blp_num_threads);
cfs_atomic_inc(&blp->blp_busy_threads);
- snprintf(bltd->bltd_name, sizeof(bltd->bltd_name) - 1,
- "ldlm_bl_%02d", bltd->bltd_num);
- cfs_daemonize(bltd->bltd_name);
-
- cfs_complete(&bltd->bltd_comp);
+ complete(&bltd->bltd_comp);
/* cannot use bltd after this, it is only on caller's stack */
}
ldlm_bl_thread_start(blp);
if (blwi->blwi_mem_pressure)
- cfs_memory_pressure_set();
+ memory_pressure_set();
if (blwi->blwi_count) {
int count;
- /* The special case when we cancel locks in lru
+ /* The special case when we cancel locks in LRU
* asynchronously, we pass the list of locks here.
* Thus locks are marked LDLM_FL_CANCELING, but NOT
* canceled locally yet. */
count = ldlm_cli_cancel_list_local(&blwi->blwi_head,
blwi->blwi_count,
LCF_BL_AST);
- ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL, 0);
+ ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
+ blwi->blwi_flags);
} else {
ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
blwi->blwi_lock);
}
if (blwi->blwi_mem_pressure)
- cfs_memory_pressure_clr();
+ memory_pressure_clr();
- if (blwi->blwi_mode == LDLM_ASYNC)
- OBD_FREE(blwi, sizeof(*blwi));
- else
- cfs_complete(&blwi->blwi_comp);
+ if (blwi->blwi_flags & LCF_ASYNC)
+ OBD_FREE(blwi, sizeof(*blwi));
+ else
+ complete(&blwi->blwi_comp);
}
cfs_atomic_dec(&blp->blp_busy_threads);
cfs_atomic_dec(&blp->blp_num_threads);
- cfs_complete(&blp->blp_comp);
+ complete(&blp->blp_comp);
RETURN(0);
}
{
int rc = 0;
ENTRY;
- cfs_mutex_lock(&ldlm_ref_mutex);
+ mutex_lock(&ldlm_ref_mutex);
if (++ldlm_refcount == 1) {
rc = ldlm_setup();
if (rc)
ldlm_refcount--;
}
- cfs_mutex_unlock(&ldlm_ref_mutex);
+ mutex_unlock(&ldlm_ref_mutex);
RETURN(rc);
}
void ldlm_put_ref(void)
{
ENTRY;
- cfs_mutex_lock(&ldlm_ref_mutex);
+ mutex_lock(&ldlm_ref_mutex);
if (ldlm_refcount == 1) {
int rc = ldlm_cleanup();
if (rc)
} else {
ldlm_refcount--;
}
- cfs_mutex_unlock(&ldlm_ref_mutex);
+ mutex_unlock(&ldlm_ref_mutex);
EXIT;
}
int ldlm_init_export(struct obd_export *exp)
{
+ int rc;
ENTRY;
exp->exp_lock_hash =
if (!exp->exp_lock_hash)
RETURN(-ENOMEM);
+ rc = ldlm_init_flock_export(exp);
+ if (rc)
+ GOTO(err, rc);
+
RETURN(0);
+err:
+ ldlm_destroy_export(exp);
+ RETURN(rc);
}
EXPORT_SYMBOL(ldlm_init_export);
.psc_name = "ldlm_cbd",
.psc_watchdog_factor = 2,
.psc_buf = {
- .bc_nbufs = LDLM_NBUFS,
+ .bc_nbufs = LDLM_CLIENT_NBUFS,
.bc_buf_size = LDLM_BUFSIZE,
.bc_req_max_size = LDLM_MAXREQSIZE,
.bc_rep_max_size = LDLM_MAXREPSIZE,
.psc_name = "ldlm_canceld",
.psc_watchdog_factor = 6,
.psc_buf = {
- .bc_nbufs = LDLM_NBUFS,
+ .bc_nbufs = LDLM_SERVER_NBUFS,
.bc_buf_size = LDLM_BUFSIZE,
.bc_req_max_size = LDLM_MAXREQSIZE,
.bc_rep_max_size = LDLM_MAXREPSIZE,
OBD_ALLOC(blp, sizeof(*blp));
if (blp == NULL)
GOTO(out, rc = -ENOMEM);
- ldlm_state->ldlm_bl_pool = blp;
+ ldlm_state->ldlm_bl_pool = blp;
- cfs_spin_lock_init(&blp->blp_lock);
+ spin_lock_init(&blp->blp_lock);
CFS_INIT_LIST_HEAD(&blp->blp_list);
CFS_INIT_LIST_HEAD(&blp->blp_prio_list);
cfs_waitq_init(&blp->blp_waitq);
}
# ifdef HAVE_SERVER_SUPPORT
- CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks);
- expired_lock_thread.elt_state = ELT_STOPPED;
- cfs_waitq_init(&expired_lock_thread.elt_waitq);
+ CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks);
+ expired_lock_thread.elt_state = ELT_STOPPED;
+ cfs_waitq_init(&expired_lock_thread.elt_waitq);
- CFS_INIT_LIST_HEAD(&waiting_locks_list);
- cfs_spin_lock_init(&waiting_locks_spinlock);
- cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0);
+ CFS_INIT_LIST_HEAD(&waiting_locks_list);
+ spin_lock_init(&waiting_locks_spinlock);
+ cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0);
- rc = cfs_create_thread(expired_lock_main, NULL, CFS_DAEMON_FLAGS);
- if (rc < 0) {
+ rc = PTR_ERR(kthread_run(expired_lock_main, NULL, "ldlm_elt"));
+ if (IS_ERR_VALUE(rc)) {
CERROR("Cannot start ldlm expired-lock thread: %d\n", rc);
GOTO(out, rc);
}
- cfs_wait_event(expired_lock_thread.elt_waitq,
- expired_lock_thread.elt_state == ELT_READY);
+ cfs_wait_event(expired_lock_thread.elt_waitq,
+ expired_lock_thread.elt_state == ELT_READY);
# endif /* HAVE_SERVER_SUPPORT */
rc = ldlm_pools_init();
while (cfs_atomic_read(&blp->blp_num_threads) > 0) {
struct ldlm_bl_work_item blwi = { .blwi_ns = NULL };
- cfs_init_completion(&blp->blp_comp);
+ init_completion(&blp->blp_comp);
- cfs_spin_lock(&blp->blp_lock);
+ spin_lock(&blp->blp_lock);
cfs_list_add_tail(&blwi.blwi_entry, &blp->blp_list);
cfs_waitq_signal(&blp->blp_waitq);
- cfs_spin_unlock(&blp->blp_lock);
+ spin_unlock(&blp->blp_lock);
- cfs_wait_for_completion(&blp->blp_comp);
+ wait_for_completion(&blp->blp_comp);
}
OBD_FREE(blp, sizeof(*blp));
int ldlm_init(void)
{
- cfs_mutex_init(&ldlm_ref_mutex);
- cfs_mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
- cfs_mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
- ldlm_resource_slab = cfs_mem_cache_create("ldlm_resources",
- sizeof(struct ldlm_resource), 0,
- CFS_SLAB_HWCACHE_ALIGN);
- if (ldlm_resource_slab == NULL)
- return -ENOMEM;
+ mutex_init(&ldlm_ref_mutex);
+ mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
+ mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
+ ldlm_resource_slab = kmem_cache_create("ldlm_resources",
+ sizeof(struct ldlm_resource), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (ldlm_resource_slab == NULL)
+ return -ENOMEM;
- ldlm_lock_slab = cfs_mem_cache_create("ldlm_locks",
+ ldlm_lock_slab = kmem_cache_create("ldlm_locks",
sizeof(struct ldlm_lock), 0,
- CFS_SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU);
+ SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU, NULL);
if (ldlm_lock_slab == NULL) {
- cfs_mem_cache_destroy(ldlm_resource_slab);
+ kmem_cache_destroy(ldlm_resource_slab);
return -ENOMEM;
}
- ldlm_interval_slab = cfs_mem_cache_create("interval_node",
+ ldlm_interval_slab = kmem_cache_create("interval_node",
sizeof(struct ldlm_interval),
- 0, CFS_SLAB_HWCACHE_ALIGN);
+ 0, SLAB_HWCACHE_ALIGN, NULL);
if (ldlm_interval_slab == NULL) {
- cfs_mem_cache_destroy(ldlm_resource_slab);
- cfs_mem_cache_destroy(ldlm_lock_slab);
+ kmem_cache_destroy(ldlm_resource_slab);
+ kmem_cache_destroy(ldlm_lock_slab);
return -ENOMEM;
}
#if LUSTRE_TRACKS_LOCK_EXP_REFS
void ldlm_exit(void)
{
- int rc;
- if (ldlm_refcount)
- CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount);
- rc = cfs_mem_cache_destroy(ldlm_resource_slab);
- LASSERTF(rc == 0, "couldn't free ldlm resource slab\n");
+ if (ldlm_refcount)
+ CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount);
+ kmem_cache_destroy(ldlm_resource_slab);
#ifdef __KERNEL__
- /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call
- * synchronize_rcu() to wait a grace period elapsed, so that
- * ldlm_lock_free() get a chance to be called. */
- synchronize_rcu();
+ /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call
+ * synchronize_rcu() to wait a grace period elapsed, so that
+ * ldlm_lock_free() get a chance to be called. */
+ synchronize_rcu();
#endif
- rc = cfs_mem_cache_destroy(ldlm_lock_slab);
- LASSERTF(rc == 0, "couldn't free ldlm lock slab\n");
- rc = cfs_mem_cache_destroy(ldlm_interval_slab);
- LASSERTF(rc == 0, "couldn't free interval node slab\n");
+ kmem_cache_destroy(ldlm_lock_slab);
+ kmem_cache_destroy(ldlm_interval_slab);
}