* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2012, Intel Corporation.
+ * Copyright (c) 2010, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
CFS_MODULE_PARM(ldlm_cpts, "s", charp, 0444,
"CPU partitions ldlm threads should run on");
-extern cfs_mem_cache_t *ldlm_resource_slab;
-extern cfs_mem_cache_t *ldlm_lock_slab;
+extern struct kmem_cache *ldlm_resource_slab;
+extern struct kmem_cache *ldlm_lock_slab;
static struct mutex ldlm_ref_mutex;
static int ldlm_refcount;
struct ldlm_bl_pool {
spinlock_t blp_lock;
- /*
- * blp_prio_list is used for callbacks that should be handled
- * as a priority. It is used for LDLM_FL_DISCARD_DATA requests.
- * see bug 13843
- */
- cfs_list_t blp_prio_list;
-
- /*
- * blp_list is used for all other callbacks which are likely
- * to take longer to process.
- */
- cfs_list_t blp_list;
-
- cfs_waitq_t blp_waitq;
- struct completion blp_comp;
- cfs_atomic_t blp_num_threads;
- cfs_atomic_t blp_busy_threads;
- int blp_min_threads;
- int blp_max_threads;
+ /*
+ * blp_prio_list is used for callbacks that should be handled
+ * as a priority. It is used for LDLM_FL_DISCARD_DATA requests.
+ * see bug 13843
+ */
+ cfs_list_t blp_prio_list;
+
+ /*
+ * blp_list is used for all other callbacks which are likely
+ * to take longer to process.
+ */
+ cfs_list_t blp_list;
+
+ wait_queue_head_t blp_waitq;
+ struct completion blp_comp;
+ atomic_t blp_num_threads;
+ atomic_t blp_busy_threads;
+ int blp_min_threads;
+ int blp_max_threads;
};
struct ldlm_bl_work_item {
* All access to it should be under waiting_locks_spinlock.
*/
static cfs_list_t waiting_locks_list;
-static cfs_timer_t waiting_locks_timer;
+static struct timer_list waiting_locks_timer;
static struct expired_lock_thread {
- cfs_waitq_t elt_waitq;
+ wait_queue_head_t elt_waitq;
int elt_state;
int elt_dump;
cfs_list_t elt_expired_locks;
*/
static int expired_lock_main(void *arg)
{
- cfs_list_t *expired = &expired_lock_thread.elt_expired_locks;
- struct l_wait_info lwi = { 0 };
- int do_dump;
+ cfs_list_t *expired = &expired_lock_thread.elt_expired_locks;
+ struct l_wait_info lwi = { 0 };
+ int do_dump;
- ENTRY;
- cfs_daemonize("ldlm_elt");
+ ENTRY;
- expired_lock_thread.elt_state = ELT_READY;
- cfs_waitq_signal(&expired_lock_thread.elt_waitq);
+ expired_lock_thread.elt_state = ELT_READY;
+ wake_up(&expired_lock_thread.elt_waitq);
- while (1) {
- l_wait_event(expired_lock_thread.elt_waitq,
- have_expired_locks() ||
- expired_lock_thread.elt_state == ELT_TERMINATE,
- &lwi);
+ while (1) {
+ l_wait_event(expired_lock_thread.elt_waitq,
+ have_expired_locks() ||
+ expired_lock_thread.elt_state == ELT_TERMINATE,
+ &lwi);
spin_lock_bh(&waiting_locks_spinlock);
if (expired_lock_thread.elt_dump) {
libcfs_run_lbug_upcall(&msgdata);
spin_lock_bh(&waiting_locks_spinlock);
- expired_lock_thread.elt_dump = 0;
- }
+ expired_lock_thread.elt_dump = 0;
+ }
- do_dump = 0;
+ do_dump = 0;
- while (!cfs_list_empty(expired)) {
- struct obd_export *export;
- struct ldlm_lock *lock;
+ while (!cfs_list_empty(expired)) {
+ struct obd_export *export;
+ struct ldlm_lock *lock;
- lock = cfs_list_entry(expired->next, struct ldlm_lock,
- l_pending_chain);
- if ((void *)lock < LP_POISON + CFS_PAGE_SIZE &&
- (void *)lock >= LP_POISON) {
+ lock = cfs_list_entry(expired->next, struct ldlm_lock,
+ l_pending_chain);
+ if ((void *)lock < LP_POISON + PAGE_CACHE_SIZE &&
+ (void *)lock >= LP_POISON) {
spin_unlock_bh(&waiting_locks_spinlock);
- CERROR("free lock on elt list %p\n", lock);
- LBUG();
- }
- cfs_list_del_init(&lock->l_pending_chain);
- if ((void *)lock->l_export < LP_POISON + CFS_PAGE_SIZE &&
- (void *)lock->l_export >= LP_POISON) {
- CERROR("lock with free export on elt list %p\n",
- lock->l_export);
- lock->l_export = NULL;
- LDLM_ERROR(lock, "free export");
- /* release extra ref grabbed by
- * ldlm_add_waiting_lock() or
- * ldlm_failed_ast() */
- LDLM_LOCK_RELEASE(lock);
- continue;
- }
+ CERROR("free lock on elt list %p\n", lock);
+ LBUG();
+ }
+ cfs_list_del_init(&lock->l_pending_chain);
+ if ((void *)lock->l_export <
+ LP_POISON + PAGE_CACHE_SIZE &&
+ (void *)lock->l_export >= LP_POISON) {
+ CERROR("lock with free export on elt list %p\n",
+ lock->l_export);
+ lock->l_export = NULL;
+ LDLM_ERROR(lock, "free export");
+ /* release extra ref grabbed by
+ * ldlm_add_waiting_lock() or
+ * ldlm_failed_ast() */
+ LDLM_LOCK_RELEASE(lock);
+ continue;
+ }
- if (lock->l_destroyed) {
+ if (ldlm_is_destroyed(lock)) {
/* release the lock refcount where
* waiting_locks_callback() founds */
LDLM_LOCK_RELEASE(lock);
}
spin_unlock_bh(&waiting_locks_spinlock);
- if (do_dump && obd_dump_on_eviction) {
- CERROR("dump the log upon eviction\n");
- libcfs_debug_dumplog();
- }
+ if (do_dump && obd_dump_on_eviction) {
+ CERROR("dump the log upon eviction\n");
+ libcfs_debug_dumplog();
+ }
- if (expired_lock_thread.elt_state == ELT_TERMINATE)
- break;
- }
+ if (expired_lock_thread.elt_state == ELT_TERMINATE)
+ break;
+ }
- expired_lock_thread.elt_state = ELT_STOPPED;
- cfs_waitq_signal(&expired_lock_thread.elt_waitq);
- RETURN(0);
+ expired_lock_thread.elt_state = ELT_STOPPED;
+ wake_up(&expired_lock_thread.elt_waitq);
+ RETURN(0);
}
static int ldlm_add_waiting_lock(struct ldlm_lock *lock);
(lock->l_req_mode == LCK_GROUP))
break;
- if (ptlrpc_check_suspend()) {
- /* there is a case when we talk to one mds, holding
- * lock from another mds. this way we easily can get
- * here, if second mds is being recovered. so, we
- * suspend timeouts. bug 6019 */
-
- LDLM_ERROR(lock, "recharge timeout: %s@%s nid %s ",
- lock->l_export->exp_client_uuid.uuid,
- lock->l_export->exp_connection->c_remote_uuid.uuid,
- libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid));
-
- cfs_list_del_init(&lock->l_pending_chain);
- if (lock->l_destroyed) {
- /* relay the lock refcount decrease to
- * expired lock thread */
- cfs_list_add(&lock->l_pending_chain,
- &expired_lock_thread.elt_expired_locks);
- } else {
- __ldlm_add_waiting_lock(lock,
- ldlm_get_enq_timeout(lock));
- }
- continue;
- }
-
- /* if timeout overlaps the activation time of suspended timeouts
- * then extend it to give a chance for client to reconnect */
- if (cfs_time_before(cfs_time_sub(lock->l_callback_timeout,
- cfs_time_seconds(obd_timeout)/2),
- ptlrpc_suspend_wakeup_time())) {
- LDLM_ERROR(lock, "extend timeout due to recovery: %s@%s nid %s ",
- lock->l_export->exp_client_uuid.uuid,
- lock->l_export->exp_connection->c_remote_uuid.uuid,
- libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid));
-
- cfs_list_del_init(&lock->l_pending_chain);
- if (lock->l_destroyed) {
- /* relay the lock refcount decrease to
- * expired lock thread */
- cfs_list_add(&lock->l_pending_chain,
- &expired_lock_thread.elt_expired_locks);
- } else {
- __ldlm_add_waiting_lock(lock,
- ldlm_get_enq_timeout(lock));
- }
- continue;
- }
-
/* Check if we need to prolong timeout */
if (!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_HPREQ_TIMEOUT) &&
ldlm_lock_busy(lock)) {
ldlm_lock_to_ns(lock)->ns_timeouts++;
LDLM_ERROR(lock, "lock callback timer expired after %lds: "
"evicting client at %s ",
- cfs_time_current_sec()- lock->l_last_activity,
+ cfs_time_current_sec() - lock->l_last_activity,
libcfs_nid2str(
lock->l_export->exp_connection->c_peer.nid));
if (obd_dump_on_timeout && need_dump)
expired_lock_thread.elt_dump = __LINE__;
- cfs_waitq_signal(&expired_lock_thread.elt_waitq);
+ wake_up(&expired_lock_thread.elt_waitq);
}
/*
int timeout = ldlm_get_enq_timeout(lock);
/* NB: must be called with hold of lock_res_and_lock() */
- LASSERT(lock->l_res_locked);
- lock->l_waited = 1;
+ LASSERT(ldlm_is_res_locked(lock));
+ ldlm_set_waited(lock);
- LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK));
+ LASSERT(!ldlm_is_cancel_on_block(lock));
spin_lock_bh(&waiting_locks_spinlock);
- if (lock->l_destroyed) {
+ if (ldlm_is_destroyed(lock)) {
static cfs_time_t next;
spin_unlock_bh(&waiting_locks_spinlock);
LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)");
# ifdef HAVE_SERVER_SUPPORT
static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
{
- LASSERT(lock->l_res_locked);
- LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK));
+ LASSERT(ldlm_is_res_locked(lock) && !ldlm_is_cancel_on_block(lock));
RETURN(1);
}
LDLM_LOCK_GET(lock);
cfs_list_add(&lock->l_pending_chain,
&expired_lock_thread.elt_expired_locks);
- cfs_waitq_signal(&expired_lock_thread.elt_waitq);
+ wake_up(&expired_lock_thread.elt_waitq);
spin_unlock_bh(&waiting_locks_spinlock);
#else
class_fail_export(lock->l_export);
libcfs_nid2str(peer.nid));
ldlm_lock_cancel(lock);
rc = -ERESTART;
- } else if (lock->l_flags & LDLM_FL_CANCEL) {
+ } else if (ldlm_is_cancel(lock)) {
LDLM_DEBUG(lock, "%s AST timeout from nid %s, but "
"cancel was received (AST reply lost?)",
ast_type, libcfs_nid2str(peer.nid));
LDLM_LOCK_RELEASE(lock);
if (rc == -ERESTART)
- cfs_atomic_inc(&arg->restart);
+ atomic_inc(&arg->restart);
- RETURN(0);
+ RETURN(0);
}
static inline int ldlm_ast_fini(struct ptlrpc_request *req,
rc = ptl_send_rpc(req, 1);
ptlrpc_req_finished(req);
if (rc == 0)
- cfs_atomic_inc(&arg->restart);
+ atomic_inc(&arg->restart);
} else {
LDLM_LOCK_GET(lock);
ptlrpc_set_add_req(arg->set, req);
RETURN(0);
}
- if (lock->l_destroyed) {
+ if (ldlm_is_destroyed(lock)) {
/* What's the point? */
unlock_res_and_lock(lock);
ptlrpc_req_finished(req);
RETURN(0);
}
- if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)
+ if (ldlm_is_cancel_on_block(lock))
instant_cancel = 1;
body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
body->lock_handle[0] = lock->l_remote_handle;
body->lock_desc = *desc;
- body->lock_flags |= ldlm_flags_to_wire(lock->l_flags & LDLM_AST_FLAGS);
+ body->lock_flags |= ldlm_flags_to_wire(lock->l_flags & LDLM_FL_AST_MASK);
LDLM_DEBUG(lock, "server preparing blocking AST");
if (AT_OFF)
req->rq_timeout = ldlm_get_rq_timeout();
+ lock->l_last_activity = cfs_time_current_sec();
+
if (lock->l_export && lock->l_export->exp_nid_stats &&
lock->l_export->exp_nid_stats->nid_ldlm_stats)
lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
total_enqueue_wait = cfs_time_sub(cfs_time_current_sec(),
lock->l_last_activity);
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_OST_LDLM_REPLY_NET)) {
+ LDLM_DEBUG(lock, "dropping CP AST");
+ RETURN(0);
+ }
+
req = ptlrpc_request_alloc(lock->l_export->exp_imp_reverse,
&RQF_LDLM_CP_CALLBACK);
if (req == NULL)
/* server namespace, doesn't need lock */
lvb_len = ldlm_lvbo_size(lock);
+ /* LU-3124 & LU-2187: to not return layout in completion AST because
+ * it may deadlock for LU-2187, or client may not have enough space
+ * for large layout. The layout will be returned to client with an
+ * extra RPC to fetch xattr.lov */
+ if (ldlm_has_layout(lock))
+ lvb_len = 0;
+
req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT, lvb_len);
rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_CP_CALLBACK);
if (rc) {
LDLM_DEBUG(lock, "server preparing completion AST (after %lds wait)",
total_enqueue_wait);
+ lock->l_last_activity = cfs_time_current_sec();
+
/* Server-side enqueue wait time estimate, used in
__ldlm_add_waiting_lock to set future enqueue timers */
if (total_enqueue_wait < ldlm_get_enq_timeout(lock))
/* We only send real blocking ASTs after the lock is granted */
lock_res_and_lock(lock);
- if (lock->l_flags & LDLM_FL_AST_SENT) {
+ if (ldlm_is_ast_sent(lock)) {
body->lock_flags |= ldlm_flags_to_wire(LDLM_FL_AST_SENT);
/* Copy AST flags like LDLM_FL_DISCARD_DATA. */
body->lock_flags |= ldlm_flags_to_wire(lock->l_flags &
- LDLM_AST_FLAGS);
+ LDLM_FL_AST_MASK);
/* We might get here prior to ldlm_handle_enqueue setting
* LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock
* ldlm_handle_enqueue will call ldlm_lock_cancel() still,
* that would not only cancel the lock, but will also remove
* it from waiting list */
- if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
+ if (ldlm_is_cancel_on_block(lock)) {
unlock_res_and_lock(lock);
ldlm_lock_cancel(lock);
instant_cancel = 1;
if (AT_OFF)
req->rq_timeout = ldlm_get_rq_timeout();
+ lock->l_last_activity = cfs_time_current_sec();
+
req->rq_interpret_reply = ldlm_cb_interpret;
if (lock->l_export && lock->l_export->exp_nid_stats &&
}
#endif
- if (unlikely(flags & LDLM_FL_REPLAY)) {
+ if (unlikely((flags & LDLM_FL_REPLAY) ||
+ (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT))) {
/* Find an existing lock in the per-export lock hash */
/* In the function below, .hs_keycmp resolves to
* ldlm_export_lock_keycmp() */
if (lock != NULL) {
DEBUG_REQ(D_DLMTRACE, req, "found existing lock cookie "
LPX64, lock->l_handle.h_cookie);
+ flags |= LDLM_FL_RESENT;
GOTO(existing_lock, rc = 0);
- }
+ }
}
- /* The lock's callback data might be set in the policy function */
- lock = ldlm_lock_create(ns, &dlm_req->lock_desc.l_resource.lr_name,
- dlm_req->lock_desc.l_resource.lr_type,
- dlm_req->lock_desc.l_req_mode,
+ /* The lock's callback data might be set in the policy function */
+ lock = ldlm_lock_create(ns, &dlm_req->lock_desc.l_resource.lr_name,
+ dlm_req->lock_desc.l_resource.lr_type,
+ dlm_req->lock_desc.l_req_mode,
cbs, NULL, 0, LVB_T_NONE);
- if (!lock)
- GOTO(out, rc = -ENOMEM);
+ if (IS_ERR(lock)) {
+ rc = PTR_ERR(lock);
+ lock = NULL;
+ GOTO(out, rc);
+ }
lock->l_last_activity = cfs_time_current_sec();
lock->l_remote_handle = dlm_req->lock_handle[0];
lock->l_req_extent = lock->l_policy_data.l_extent;
err = ldlm_lock_enqueue(ns, &lock, cookie, &flags);
- if (err)
- GOTO(out, err);
+ if (err) {
+ if ((int)err < 0)
+ rc = (int)err;
+ GOTO(out, err);
+ }
dlm_rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP);
dlm_rep->lock_flags = ldlm_flags_to_wire(flags);
/* Now take into account flags to be inherited from original lock
request both in reply to client and in our own lock flags. */
- dlm_rep->lock_flags |= dlm_req->lock_flags & LDLM_INHERIT_FLAGS;
+ dlm_rep->lock_flags |= dlm_req->lock_flags & LDLM_FL_INHERIT_MASK;
lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
- LDLM_INHERIT_FLAGS);
+ LDLM_FL_INHERIT_MASK);
/* Don't move a pending lock onto the export if it has already been
* disconnected due to eviction (bug 5683) or server umount (bug 24324).
OBD_FAIL_CHECK(OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT))) {
LDLM_ERROR(lock, "lock on destroyed export %p", req->rq_export);
rc = -ENOTCONN;
- } else if (lock->l_flags & LDLM_FL_AST_SENT) {
+ } else if (ldlm_is_ast_sent(lock)) {
dlm_rep->lock_flags |= ldlm_flags_to_wire(LDLM_FL_AST_SENT);
if (lock->l_granted_mode == lock->l_req_mode) {
/*
if ((dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN ||
dlm_req->lock_desc.l_resource.lr_type == LDLM_IBITS) &&
req->rq_export->exp_libclient) {
- if (unlikely(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) ||
+ if (unlikely(!ldlm_is_cancel_on_block(lock) ||
!(dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK))){
CERROR("Granting sync lock to libclient. "
"req fl %d, rep fl %d, lock fl "LPX64"\n",
/* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this
* ldlm_reprocess_all. If this moves, revisit that code. -phil */
- if (lock) {
- LDLM_DEBUG(lock, "server-side enqueue handler, sending reply"
- "(err=%d, rc=%d)", err, rc);
+ if (lock != NULL) {
+ LDLM_DEBUG(lock, "server-side enqueue handler, sending reply"
+ "(err=%d, rc=%d)", err, rc);
- if (rc == 0) {
+ if (rc == 0) {
if (req_capsule_has_field(&req->rq_pill, &RMF_DLM_LVB,
RCL_SERVER) &&
ldlm_lvbo_size(lock) > 0) {
req, lock);
buflen = req_capsule_get_size(&req->rq_pill,
&RMF_DLM_LVB, RCL_SERVER);
- buflen = ldlm_lvbo_fill(lock, buf, buflen);
- if (buflen >= 0)
- req_capsule_shrink(&req->rq_pill,
- &RMF_DLM_LVB,
- buflen, RCL_SERVER);
- else
+ if (buflen > 0) {
+ buflen = ldlm_lvbo_fill(lock, buf,
+ buflen);
+ if (buflen >= 0)
+ req_capsule_shrink(
+ &req->rq_pill,
+ &RMF_DLM_LVB,
+ buflen, RCL_SERVER);
+ else
+ rc = buflen;
+ } else {
rc = buflen;
+ }
}
} else {
lock_res_and_lock(lock);
lock = ldlm_handle2lock(&dlm_req->lock_handle[0]);
if (!lock) {
- req->rq_status = EINVAL;
+ req->rq_status = LUSTRE_EINVAL;
} else {
void *res = NULL;
LDLM_DEBUG(lock, "converted waiting lock");
req->rq_status = 0;
} else {
- req->rq_status = EDEADLOCK;
+ req->rq_status = LUSTRE_EDEADLK;
}
}
RETURN(rc);
if (!ldlm_request_cancel(req, dlm_req, 0))
- req->rq_status = ESTALE;
+ req->rq_status = LUSTRE_ESTALE;
RETURN(ptlrpc_reply(req));
}
LDLM_DEBUG(lock, "client blocking AST callback handler");
lock_res_and_lock(lock);
- lock->l_flags |= LDLM_FL_CBPENDING;
+ ldlm_set_cbpending(lock);
- if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)
- lock->l_flags |= LDLM_FL_CANCEL;
+ if (ldlm_is_cancel_on_block(lock))
+ ldlm_set_cancel(lock);
do_ast = (!lock->l_readers && !lock->l_writers);
unlock_res_and_lock(lock);
struct ldlm_lock *lock)
{
int lvb_len;
- CFS_LIST_HEAD(ast_list);
+ CFS_LIST_HEAD(ast_list);
int rc = 0;
- ENTRY;
+ ENTRY;
- LDLM_DEBUG(lock, "client completion callback handler START");
+ LDLM_DEBUG(lock, "client completion callback handler START");
- if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
- int to = cfs_time_seconds(1);
- while (to > 0) {
- cfs_schedule_timeout_and_set_state(
- CFS_TASK_INTERRUPTIBLE, to);
- if (lock->l_granted_mode == lock->l_req_mode ||
- lock->l_destroyed)
- break;
- }
- }
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
+ int to = cfs_time_seconds(1);
+ while (to > 0) {
+ schedule_timeout_and_set_state(
+ TASK_INTERRUPTIBLE, to);
+ if (lock->l_granted_mode == lock->l_req_mode ||
+ ldlm_is_destroyed(lock))
+ break;
+ }
+ }
lvb_len = req_capsule_get_size(&req->rq_pill, &RMF_DLM_LVB, RCL_CLIENT);
if (lvb_len < 0) {
* variable length */
void *lvb_data;
- OBD_ALLOC(lvb_data, lvb_len);
+ OBD_ALLOC_LARGE(lvb_data, lvb_len);
if (lvb_data == NULL) {
LDLM_ERROR(lock, "No memory: %d.\n", lvb_len);
GOTO(out, rc = -ENOMEM);
lock_res_and_lock(lock);
LASSERT(lock->l_lvb_data == NULL);
+ lock->l_lvb_type = LVB_T_LAYOUT;
lock->l_lvb_data = lvb_data;
lock->l_lvb_len = lvb_len;
unlock_res_and_lock(lock);
}
}
- lock_res_and_lock(lock);
- if (lock->l_destroyed ||
- lock->l_granted_mode == lock->l_req_mode) {
- /* bug 11300: the lock has already been granted */
- unlock_res_and_lock(lock);
- LDLM_DEBUG(lock, "Double grant race happened");
+ lock_res_and_lock(lock);
+ if (ldlm_is_destroyed(lock) ||
+ lock->l_granted_mode == lock->l_req_mode) {
+ /* bug 11300: the lock has already been granted */
+ unlock_res_and_lock(lock);
+ LDLM_DEBUG(lock, "Double grant race happened");
GOTO(out, rc = 0);
- }
+ }
- /* If we receive the completion AST before the actual enqueue returned,
- * then we might need to switch lock modes, resources, or extents. */
- if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
- lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
- LDLM_DEBUG(lock, "completion AST, new lock mode");
- }
+ /* If we receive the completion AST before the actual enqueue returned,
+ * then we might need to switch lock modes, resources, or extents. */
+ if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) {
+ lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
+ LDLM_DEBUG(lock, "completion AST, new lock mode");
+ }
- if (lock->l_resource->lr_type != LDLM_PLAIN) {
- ldlm_convert_policy_to_local(req->rq_export,
- dlm_req->lock_desc.l_resource.lr_type,
- &dlm_req->lock_desc.l_policy_data,
- &lock->l_policy_data);
- LDLM_DEBUG(lock, "completion AST, new policy data");
- }
+ if (lock->l_resource->lr_type != LDLM_PLAIN) {
+ ldlm_convert_policy_to_local(req->rq_export,
+ dlm_req->lock_desc.l_resource.lr_type,
+ &dlm_req->lock_desc.l_policy_data,
+ &lock->l_policy_data);
+ LDLM_DEBUG(lock, "completion AST, new policy data");
+ }
ldlm_resource_unlink_lock(lock);
if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
/* BL_AST locks are not needed in LRU.
* Let ldlm_cancel_lru() be fast. */
ldlm_lock_remove_from_lru(lock);
- lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
+ lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_BL_AST;
LDLM_DEBUG(lock, "completion AST includes blocking AST");
}
out:
if (rc < 0) {
lock_res_and_lock(lock);
- lock->l_flags |= LDLM_FL_FAILED;
+ ldlm_set_failed(lock);
unlock_res_and_lock(lock);
- cfs_waitq_signal(&lock->l_waitq);
+ wake_up(&lock->l_waitq);
}
LDLM_LOCK_RELEASE(lock);
}
spin_lock(&blp->blp_lock);
if (blwi->blwi_lock &&
- blwi->blwi_lock->l_flags & LDLM_FL_DISCARD_DATA) {
+ ldlm_is_discard_data(blwi->blwi_lock)) {
/* add LDLM_FL_DISCARD_DATA requests to the priority list */
cfs_list_add_tail(&blwi->blwi_entry, &blp->blp_prio_list);
} else {
}
spin_unlock(&blp->blp_lock);
- cfs_waitq_signal(&blp->blp_waitq);
+ wake_up(&blp->blp_waitq);
/* can not check blwi->blwi_flags as blwi could be already freed in
LCF_ASYNC mode */
init_completion(&blwi->blwi_comp);
CFS_INIT_LIST_HEAD(&blwi->blwi_head);
- if (cfs_memory_pressure_get())
+ if (memory_pressure_get())
blwi->blwi_mem_pressure = 1;
blwi->blwi_ns = ns;
RETURN(-EPROTO);
}
+ oqctl->qc_stat = ptlrpc_status_ntoh(oqctl->qc_stat);
+
cli->cl_qchk_stat = oqctl->qc_stat;
return 0;
}
rc = ldlm_handle_setinfo(req);
ldlm_callback_reply(req, rc);
RETURN(0);
- case OBD_LOG_CANCEL: /* remove this eventually - for 1.4.0 compat */
- CERROR("shouldn't be handling OBD_LOG_CANCEL on DLM thread\n");
- req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
- RETURN(0);
- rc = llog_origin_handle_cancel(req);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
- RETURN(0);
- ldlm_callback_reply(req, rc);
- RETURN(0);
case LLOG_ORIGIN_HANDLE_CREATE:
req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE);
if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOGD_NET))
RETURN(0);
}
- if ((lock->l_flags & LDLM_FL_FAIL_LOC) &&
+ if (ldlm_is_fail_loc(lock) &&
lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
/* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
lock_res_and_lock(lock);
lock->l_flags |= ldlm_flags_from_wire(dlm_req->lock_flags &
- LDLM_AST_FLAGS);
+ LDLM_FL_AST_MASK);
if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
/* If somebody cancels lock and cache is already dropped,
* or lock is failed before cp_ast received on client,
* we can tell the server we have no lock. Otherwise, we
* should send cancel after dropping the cache. */
- if (((lock->l_flags & LDLM_FL_CANCELING) &&
- (lock->l_flags & LDLM_FL_BL_DONE)) ||
- (lock->l_flags & LDLM_FL_FAILED)) {
+ if ((ldlm_is_canceling(lock) && ldlm_is_bl_done(lock)) ||
+ ldlm_is_failed(lock)) {
LDLM_DEBUG(lock, "callback on lock "
LPX64" - lock disappeared\n",
dlm_req->lock_handle[0].cookie);
/* BL_AST locks are not needed in LRU.
* Let ldlm_cancel_lru() be fast. */
ldlm_lock_remove_from_lru(lock);
- lock->l_flags |= LDLM_FL_BL_AST;
+ ldlm_set_bl_ast(lock);
}
unlock_res_and_lock(lock);
case LDLM_BL_CALLBACK:
CDEBUG(D_INODE, "blocking ast\n");
req_capsule_extend(&req->rq_pill, &RQF_LDLM_BL_CALLBACK);
- if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)) {
+ if (!ldlm_is_cancel_on_block(lock)) {
rc = ldlm_callback_reply(req, 0);
if (req->rq_no_reply || rc)
ldlm_callback_errmsg(req, "Normal process", rc,
if (rc)
break;
RETURN(0);
- case OBD_LOG_CANCEL:
- req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
- RETURN(0);
- rc = llog_origin_handle_cancel(req);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
- RETURN(0);
- ldlm_callback_reply(req, rc);
- RETURN(0);
default:
CERROR("invalid opcode %d\n",
lustre_msg_get_opc(req->rq_reqmsg));
if (lock == NULL)
continue;
- rc = !!(lock->l_flags & LDLM_FL_AST_SENT);
+ rc = ldlm_is_ast_sent(lock) ? 1 : 0;
if (rc)
LDLM_DEBUG(lock, "hpreq cancel lock");
LDLM_LOCK_PUT(lock);
return 0;
}
- if (lock->l_flags & LDLM_FL_AST_SENT) {
+ if (ldlm_is_ast_sent(lock)) {
unlock_res_and_lock(lock);
return 0;
}
LASSERT(lock->l_blocking_ast);
LASSERT(!lock->l_blocking_lock);
- lock->l_flags |= LDLM_FL_AST_SENT;
+ ldlm_set_ast_sent(lock);
if (lock->l_export && lock->l_export->exp_lock_hash) {
/* NB: it's safe to call cfs_hash_del() even lock isn't
* in exp_lock_hash. */
static unsigned int num_bl = 0;
spin_lock(&blp->blp_lock);
- /* process a request from the blp_list at least every blp_num_threads */
- if (!cfs_list_empty(&blp->blp_list) &&
- (cfs_list_empty(&blp->blp_prio_list) || num_bl == 0))
- blwi = cfs_list_entry(blp->blp_list.next,
- struct ldlm_bl_work_item, blwi_entry);
- else
- if (!cfs_list_empty(&blp->blp_prio_list))
- blwi = cfs_list_entry(blp->blp_prio_list.next,
- struct ldlm_bl_work_item,
- blwi_entry);
-
- if (blwi) {
- if (++num_bl >= cfs_atomic_read(&blp->blp_num_threads))
- num_bl = 0;
- cfs_list_del(&blwi->blwi_entry);
- }
+ /* process a request from the blp_list at least every blp_num_threads */
+ if (!cfs_list_empty(&blp->blp_list) &&
+ (cfs_list_empty(&blp->blp_prio_list) || num_bl == 0))
+ blwi = cfs_list_entry(blp->blp_list.next,
+ struct ldlm_bl_work_item, blwi_entry);
+ else
+ if (!cfs_list_empty(&blp->blp_prio_list))
+ blwi = cfs_list_entry(blp->blp_prio_list.next,
+ struct ldlm_bl_work_item,
+ blwi_entry);
+
+ if (blwi) {
+ if (++num_bl >= atomic_read(&blp->blp_num_threads))
+ num_bl = 0;
+ cfs_list_del(&blwi->blwi_entry);
+ }
spin_unlock(&blp->blp_lock);
return blwi;
static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
{
struct ldlm_bl_thread_data bltd = { .bltd_blp = blp };
- int rc;
+ struct task_struct *task;
init_completion(&bltd.bltd_comp);
- rc = cfs_create_thread(ldlm_bl_thread_main, &bltd, 0);
- if (rc < 0) {
- CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %d\n",
- cfs_atomic_read(&blp->blp_num_threads), rc);
- return rc;
+ bltd.bltd_num = atomic_read(&blp->blp_num_threads);
+ snprintf(bltd.bltd_name, sizeof(bltd.bltd_name) - 1,
+ "ldlm_bl_%02d", bltd.bltd_num);
+ task = kthread_run(ldlm_bl_thread_main, &bltd, bltd.bltd_name);
+ if (IS_ERR(task)) {
+ CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
+ atomic_read(&blp->blp_num_threads), PTR_ERR(task));
+ return PTR_ERR(task);
}
wait_for_completion(&bltd.bltd_comp);
static int ldlm_bl_thread_main(void *arg)
{
struct ldlm_bl_pool *blp;
+ struct ldlm_bl_thread_data *bltd = arg;
ENTRY;
- {
- struct ldlm_bl_thread_data *bltd = arg;
-
- blp = bltd->bltd_blp;
+ blp = bltd->bltd_blp;
- bltd->bltd_num =
- cfs_atomic_inc_return(&blp->blp_num_threads) - 1;
- cfs_atomic_inc(&blp->blp_busy_threads);
+ atomic_inc(&blp->blp_num_threads);
+ atomic_inc(&blp->blp_busy_threads);
- snprintf(bltd->bltd_name, sizeof(bltd->bltd_name) - 1,
- "ldlm_bl_%02d", bltd->bltd_num);
- cfs_daemonize(bltd->bltd_name);
-
- complete(&bltd->bltd_comp);
- /* cannot use bltd after this, it is only on caller's stack */
- }
+ complete(&bltd->bltd_comp);
+ /* cannot use bltd after this, it is only on caller's stack */
- while (1) {
- struct l_wait_info lwi = { 0 };
- struct ldlm_bl_work_item *blwi = NULL;
- int busy;
+ while (1) {
+ struct l_wait_info lwi = { 0 };
+ struct ldlm_bl_work_item *blwi = NULL;
+ int busy;
- blwi = ldlm_bl_get_work(blp);
+ blwi = ldlm_bl_get_work(blp);
- if (blwi == NULL) {
- cfs_atomic_dec(&blp->blp_busy_threads);
- l_wait_event_exclusive(blp->blp_waitq,
- (blwi = ldlm_bl_get_work(blp)) != NULL,
- &lwi);
- busy = cfs_atomic_inc_return(&blp->blp_busy_threads);
- } else {
- busy = cfs_atomic_read(&blp->blp_busy_threads);
- }
+ if (blwi == NULL) {
+ atomic_dec(&blp->blp_busy_threads);
+ l_wait_event_exclusive(blp->blp_waitq,
+ (blwi = ldlm_bl_get_work(blp)) != NULL,
+ &lwi);
+ busy = atomic_inc_return(&blp->blp_busy_threads);
+ } else {
+ busy = atomic_read(&blp->blp_busy_threads);
+ }
- if (blwi->blwi_ns == NULL)
- /* added by ldlm_cleanup() */
- break;
+ if (blwi->blwi_ns == NULL)
+ /* added by ldlm_cleanup() */
+ break;
- /* Not fatal if racy and have a few too many threads */
- if (unlikely(busy < blp->blp_max_threads &&
- busy >= cfs_atomic_read(&blp->blp_num_threads) &&
- !blwi->blwi_mem_pressure))
- /* discard the return value, we tried */
- ldlm_bl_thread_start(blp);
+ /* Not fatal if racy and have a few too many threads */
+ if (unlikely(busy < blp->blp_max_threads &&
+ busy >= atomic_read(&blp->blp_num_threads) &&
+ !blwi->blwi_mem_pressure))
+ /* discard the return value, we tried */
+ ldlm_bl_thread_start(blp);
if (blwi->blwi_mem_pressure)
- cfs_memory_pressure_set();
+ memory_pressure_set();
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL2, 4);
if (blwi->blwi_count) {
int count;
blwi->blwi_lock);
}
if (blwi->blwi_mem_pressure)
- cfs_memory_pressure_clr();
+ memory_pressure_clr();
if (blwi->blwi_flags & LCF_ASYNC)
OBD_FREE(blwi, sizeof(*blwi));
else
complete(&blwi->blwi_comp);
- }
+ }
- cfs_atomic_dec(&blp->blp_busy_threads);
- cfs_atomic_dec(&blp->blp_num_threads);
+ atomic_dec(&blp->blp_busy_threads);
+ atomic_dec(&blp->blp_num_threads);
complete(&blp->blp_comp);
- RETURN(0);
+ RETURN(0);
}
#endif
int ldlm_init_export(struct obd_export *exp)
{
+ int rc;
ENTRY;
exp->exp_lock_hash =
if (!exp->exp_lock_hash)
RETURN(-ENOMEM);
+ rc = ldlm_init_flock_export(exp);
+ if (rc)
+ GOTO(err, rc);
+
RETURN(0);
+err:
+ ldlm_destroy_export(exp);
+ RETURN(rc);
}
EXPORT_SYMBOL(ldlm_init_export);
static int ldlm_setup(void)
{
static struct ptlrpc_service_conf conf;
- struct ldlm_bl_pool *blp = NULL;
- int rc = 0;
+ struct ldlm_bl_pool *blp = NULL;
#ifdef __KERNEL__
- int i;
+# ifdef HAVE_SERVER_SUPPORT
+ struct task_struct *task;
+# endif
+ int i;
#endif
+ int rc = 0;
+
ENTRY;
if (ldlm_state != NULL)
ldlm_state->ldlm_bl_pool = blp;
spin_lock_init(&blp->blp_lock);
- CFS_INIT_LIST_HEAD(&blp->blp_list);
- CFS_INIT_LIST_HEAD(&blp->blp_prio_list);
- cfs_waitq_init(&blp->blp_waitq);
- cfs_atomic_set(&blp->blp_num_threads, 0);
- cfs_atomic_set(&blp->blp_busy_threads, 0);
+ CFS_INIT_LIST_HEAD(&blp->blp_list);
+ CFS_INIT_LIST_HEAD(&blp->blp_prio_list);
+ init_waitqueue_head(&blp->blp_waitq);
+ atomic_set(&blp->blp_num_threads, 0);
+ atomic_set(&blp->blp_busy_threads, 0);
#ifdef __KERNEL__
if (ldlm_num_threads == 0) {
}
# ifdef HAVE_SERVER_SUPPORT
- CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks);
- expired_lock_thread.elt_state = ELT_STOPPED;
- cfs_waitq_init(&expired_lock_thread.elt_waitq);
+ CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks);
+ expired_lock_thread.elt_state = ELT_STOPPED;
+ init_waitqueue_head(&expired_lock_thread.elt_waitq);
- CFS_INIT_LIST_HEAD(&waiting_locks_list);
+ CFS_INIT_LIST_HEAD(&waiting_locks_list);
spin_lock_init(&waiting_locks_spinlock);
- cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0);
+ cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0);
- rc = cfs_create_thread(expired_lock_main, NULL, CFS_DAEMON_FLAGS);
- if (rc < 0) {
+ task = kthread_run(expired_lock_main, NULL, "ldlm_elt");
+ if (IS_ERR(task)) {
+ rc = PTR_ERR(task);
CERROR("Cannot start ldlm expired-lock thread: %d\n", rc);
GOTO(out, rc);
}
- cfs_wait_event(expired_lock_thread.elt_waitq,
- expired_lock_thread.elt_state == ELT_READY);
+ wait_event(expired_lock_thread.elt_waitq,
+ expired_lock_thread.elt_state == ELT_READY);
# endif /* HAVE_SERVER_SUPPORT */
rc = ldlm_pools_init();
if (ldlm_state->ldlm_bl_pool != NULL) {
struct ldlm_bl_pool *blp = ldlm_state->ldlm_bl_pool;
- while (cfs_atomic_read(&blp->blp_num_threads) > 0) {
+ while (atomic_read(&blp->blp_num_threads) > 0) {
struct ldlm_bl_work_item blwi = { .blwi_ns = NULL };
init_completion(&blp->blp_comp);
spin_lock(&blp->blp_lock);
cfs_list_add_tail(&blwi.blwi_entry, &blp->blp_list);
- cfs_waitq_signal(&blp->blp_waitq);
+ wake_up(&blp->blp_waitq);
spin_unlock(&blp->blp_lock);
wait_for_completion(&blp->blp_comp);
# ifdef HAVE_SERVER_SUPPORT
if (expired_lock_thread.elt_state != ELT_STOPPED) {
expired_lock_thread.elt_state = ELT_TERMINATE;
- cfs_waitq_signal(&expired_lock_thread.elt_waitq);
- cfs_wait_event(expired_lock_thread.elt_waitq,
+ wake_up(&expired_lock_thread.elt_waitq);
+ wait_event(expired_lock_thread.elt_waitq,
expired_lock_thread.elt_state == ELT_STOPPED);
}
# endif
mutex_init(&ldlm_ref_mutex);
mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
- ldlm_resource_slab = cfs_mem_cache_create("ldlm_resources",
- sizeof(struct ldlm_resource), 0,
- CFS_SLAB_HWCACHE_ALIGN);
- if (ldlm_resource_slab == NULL)
- return -ENOMEM;
+ ldlm_resource_slab = kmem_cache_create("ldlm_resources",
+ sizeof(struct ldlm_resource), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (ldlm_resource_slab == NULL)
+ return -ENOMEM;
- ldlm_lock_slab = cfs_mem_cache_create("ldlm_locks",
+ ldlm_lock_slab = kmem_cache_create("ldlm_locks",
sizeof(struct ldlm_lock), 0,
- CFS_SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU);
+ SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU, NULL);
if (ldlm_lock_slab == NULL) {
- cfs_mem_cache_destroy(ldlm_resource_slab);
+ kmem_cache_destroy(ldlm_resource_slab);
return -ENOMEM;
}
- ldlm_interval_slab = cfs_mem_cache_create("interval_node",
+ ldlm_interval_slab = kmem_cache_create("interval_node",
sizeof(struct ldlm_interval),
- 0, CFS_SLAB_HWCACHE_ALIGN);
+ 0, SLAB_HWCACHE_ALIGN, NULL);
if (ldlm_interval_slab == NULL) {
- cfs_mem_cache_destroy(ldlm_resource_slab);
- cfs_mem_cache_destroy(ldlm_lock_slab);
+ kmem_cache_destroy(ldlm_resource_slab);
+ kmem_cache_destroy(ldlm_lock_slab);
return -ENOMEM;
}
#if LUSTRE_TRACKS_LOCK_EXP_REFS
void ldlm_exit(void)
{
- int rc;
- if (ldlm_refcount)
- CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount);
- rc = cfs_mem_cache_destroy(ldlm_resource_slab);
- LASSERTF(rc == 0, "couldn't free ldlm resource slab\n");
+ if (ldlm_refcount)
+ CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount);
+ kmem_cache_destroy(ldlm_resource_slab);
#ifdef __KERNEL__
- /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call
- * synchronize_rcu() to wait a grace period elapsed, so that
- * ldlm_lock_free() get a chance to be called. */
- synchronize_rcu();
+ /* ldlm_lock_put() use RCU to call ldlm_lock_free, so need call
+ * synchronize_rcu() to wait a grace period elapsed, so that
+ * ldlm_lock_free() get a chance to be called. */
+ synchronize_rcu();
#endif
- rc = cfs_mem_cache_destroy(ldlm_lock_slab);
- LASSERTF(rc == 0, "couldn't free ldlm lock slab\n");
- rc = cfs_mem_cache_destroy(ldlm_interval_slab);
- LASSERTF(rc == 0, "couldn't free interval node slab\n");
+ kmem_cache_destroy(ldlm_lock_slab);
+ kmem_cache_destroy(ldlm_interval_slab);
}