* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2015, Intel Corporation.
+ * Copyright (c) 2010, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <linux/kthread.h>
#include <linux/list.h>
#include <libcfs/libcfs.h>
+#include <lustre_errno.h>
#include <lustre_dlm.h>
#include <obd_class.h>
#include "ldlm_internal.h"
module_param(ldlm_cpts, charp, 0444);
MODULE_PARM_DESC(ldlm_cpts, "CPU partitions ldlm threads should run on");
-static struct mutex ldlm_ref_mutex;
+static DEFINE_MUTEX(ldlm_ref_mutex);
static int ldlm_refcount;
-struct ldlm_cb_async_args {
- struct ldlm_cb_set_arg *ca_set_arg;
- struct ldlm_lock *ca_lock;
-};
+struct kobject *ldlm_kobj;
+struct kset *ldlm_ns_kset;
+struct kset *ldlm_svc_kset;
/* LDLM state */
return timeout < 1 ? 1 : timeout;
}
-#define ELT_STOPPED 0
-#define ELT_READY 1
-#define ELT_TERMINATE 2
-
struct ldlm_bl_pool {
spinlock_t blp_lock;
/**
* Protects both waiting_locks_list and expired_lock_thread.
*/
-static spinlock_t waiting_locks_spinlock; /* BH lock (timer) */
+static DEFINE_SPINLOCK(waiting_locks_spinlock); /* BH lock (timer) */
/**
* List for contended locks.
*
* All access to it should be under waiting_locks_spinlock.
*/
-static struct list_head waiting_locks_list;
-static struct timer_list waiting_locks_timer;
+static LIST_HEAD(waiting_locks_list);
+static void waiting_locks_callback(unsigned long unused);
+static DEFINE_TIMER(waiting_locks_timer, waiting_locks_callback, 0, 0);
+
+enum elt_state {
+ ELT_STOPPED,
+ ELT_READY,
+ ELT_TERMINATE,
+};
-static struct expired_lock_thread {
- wait_queue_head_t elt_waitq;
- int elt_state;
- int elt_dump;
- struct list_head elt_expired_locks;
-} expired_lock_thread;
+static DECLARE_WAIT_QUEUE_HEAD(expired_lock_wait_queue);
+static enum elt_state expired_lock_thread_state = ELT_STOPPED;
+static int expired_lock_dump;
+static LIST_HEAD(expired_lock_list);
static inline int have_expired_locks(void)
{
ENTRY;
spin_lock_bh(&waiting_locks_spinlock);
- need_to_run = !list_empty(&expired_lock_thread.elt_expired_locks);
+ need_to_run = !list_empty(&expired_lock_list);
spin_unlock_bh(&waiting_locks_spinlock);
RETURN(need_to_run);
*/
static int expired_lock_main(void *arg)
{
- struct list_head *expired = &expired_lock_thread.elt_expired_locks;
+ struct list_head *expired = &expired_lock_list;
struct l_wait_info lwi = { 0 };
int do_dump;
ENTRY;
- expired_lock_thread.elt_state = ELT_READY;
- wake_up(&expired_lock_thread.elt_waitq);
+ expired_lock_thread_state = ELT_READY;
+ wake_up(&expired_lock_wait_queue);
while (1) {
- l_wait_event(expired_lock_thread.elt_waitq,
+ l_wait_event(expired_lock_wait_queue,
have_expired_locks() ||
- expired_lock_thread.elt_state == ELT_TERMINATE,
+ expired_lock_thread_state == ELT_TERMINATE,
&lwi);
spin_lock_bh(&waiting_locks_spinlock);
- if (expired_lock_thread.elt_dump) {
- struct libcfs_debug_msg_data msgdata = {
- .msg_file = __FILE__,
- .msg_fn = "waiting_locks_callback",
- .msg_line = expired_lock_thread.elt_dump };
+ if (expired_lock_dump) {
spin_unlock_bh(&waiting_locks_spinlock);
/* from waiting_locks_callback, but not in timer */
libcfs_debug_dumplog();
- libcfs_run_lbug_upcall(&msgdata);
spin_lock_bh(&waiting_locks_spinlock);
- expired_lock_thread.elt_dump = 0;
+ expired_lock_dump = 0;
}
do_dump = 0;
libcfs_debug_dumplog();
}
- if (expired_lock_thread.elt_state == ELT_TERMINATE)
+ if (expired_lock_thread_state == ELT_TERMINATE)
break;
}
- expired_lock_thread.elt_state = ELT_STOPPED;
- wake_up(&expired_lock_thread.elt_waitq);
+ expired_lock_thread_state = ELT_STOPPED;
+ wake_up(&expired_lock_wait_queue);
RETURN(0);
}
continue;
}
ldlm_lock_to_ns(lock)->ns_timeouts++;
- LDLM_ERROR(lock, "lock callback timer expired after %lds: "
+ LDLM_ERROR(lock, "lock callback timer expired after %llds: "
"evicting client at %s ",
- cfs_time_current_sec() - lock->l_last_activity,
+ ktime_get_real_seconds() - lock->l_last_activity,
libcfs_nid2str(
lock->l_export->exp_connection->c_peer.nid));
* the waiting_locks_list and ldlm_add_waiting_lock()
* already grabbed a ref */
list_del(&lock->l_pending_chain);
- list_add(&lock->l_pending_chain,
- &expired_lock_thread.elt_expired_locks);
+ list_add(&lock->l_pending_chain, &expired_lock_list);
need_dump = 1;
}
- if (!list_empty(&expired_lock_thread.elt_expired_locks)) {
+ if (!list_empty(&expired_lock_list)) {
if (obd_dump_on_timeout && need_dump)
- expired_lock_thread.elt_dump = __LINE__;
+ expired_lock_dump = __LINE__;
- wake_up(&expired_lock_thread.elt_waitq);
+ wake_up(&expired_lock_wait_queue);
}
/*
lock = list_entry(waiting_locks_list.next, struct ldlm_lock,
l_pending_chain);
timeout_rounded = (cfs_time_t)round_timeout(lock->l_callback_timeout);
- cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
+ mod_timer(&waiting_locks_timer, timeout_rounded);
}
spin_unlock_bh(&waiting_locks_spinlock);
}
timeout_rounded = round_timeout(lock->l_callback_timeout);
- if (cfs_time_before(timeout_rounded,
- cfs_timer_deadline(&waiting_locks_timer)) ||
- !cfs_timer_is_armed(&waiting_locks_timer)) {
- cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
+ if (cfs_time_before(timeout_rounded, waiting_locks_timer.expires) ||
+ !timer_pending(&waiting_locks_timer)) {
+ mod_timer(&waiting_locks_timer, timeout_rounded);
}
/* if the new lock has a shorter timeout than something earlier on
the list, we'll wait the longer amount of time; no big deal. */
}
ldlm_set_waited(lock);
- lock->l_last_activity = cfs_time_current_sec();
+ lock->l_last_activity = ktime_get_real_seconds();
ret = __ldlm_add_waiting_lock(lock, timeout);
if (ret) {
/* grab ref on the lock if it has been added to the
/* Removing the head of the list, adjust timer. */
if (list_next == &waiting_locks_list) {
/* No more, just cancel. */
- cfs_timer_disarm(&waiting_locks_timer);
+ del_timer(&waiting_locks_timer);
} else {
struct ldlm_lock *next;
next = list_entry(list_next, struct ldlm_lock,
l_pending_chain);
- cfs_timer_arm(&waiting_locks_timer,
- round_timeout(next->l_callback_timeout));
+ mod_timer(&waiting_locks_timer,
+ round_timeout(next->l_callback_timeout));
}
}
list_del_init(&lock->l_pending_chain);
/* the lock was not in any list, grab an extra ref before adding
* the lock to the expired list */
LDLM_LOCK_GET(lock);
- list_add(&lock->l_pending_chain,
- &expired_lock_thread.elt_expired_locks);
- wake_up(&expired_lock_thread.elt_waitq);
+ list_add(&lock->l_pending_chain, &expired_lock_list);
+ wake_up(&expired_lock_wait_queue);
spin_unlock_bh(&waiting_locks_spinlock);
}
struct ptlrpc_request *req, int rc,
const char *ast_type)
{
- lnet_process_id_t peer = req->rq_import->imp_connection->c_peer;
+ struct lnet_process_id peer = req->rq_import->imp_connection->c_peer;
if (!req->rq_replied || (rc && rc != -EINVAL)) {
if (lock->l_export && lock->l_export->exp_libclient) {
libcfs_nid2str(peer.nid));
ldlm_lock_cancel(lock);
rc = -ERESTART;
+ } else if (rc == -ENODEV || rc == -ESHUTDOWN ||
+ (rc == -EIO &&
+ req->rq_import->imp_state == LUSTRE_IMP_CLOSED)) {
+ /* Upon umount process the AST fails because cannot be
+ * sent. This shouldn't lead to the client eviction.
+ * -ENODEV error is returned by ptl_send_rpc() for
+ * new request in such import.
+ * -SHUTDOWN is returned by ptlrpc_import_delay_req()
+ * if imp_invalid is set or obd_no_recov.
+ * Meanwhile there is also check for LUSTRE_IMP_CLOSED
+ * in ptlrpc_import_delay_req() as well with -EIO code.
+ * In all such cases errors are ignored.
+ */
+ LDLM_DEBUG(lock, "%s AST can't be sent due to a server"
+ " %s failure or umount process: rc = %d\n",
+ ast_type,
+ req->rq_import->imp_obd->obd_name, rc);
} else {
LDLM_ERROR(lock,
"client (nid %s) %s %s AST (req@%p x%llu status %d rc %d), evict it",
/* update lvbo to return proper attributes.
* see bug 23174 */
ldlm_resource_getref(res);
- ldlm_res_lvbo_update(res, NULL, 1);
+ ldlm_lvbo_update(res, lock, NULL, 1);
ldlm_resource_putref(res);
}
ldlm_lock_cancel(lock);
* - Glimpse callback of remote lock might return
* -ELDLM_NO_LOCK_DATA when inode is cleared. LU-274
*/
- if (rc == -ELDLM_NO_LOCK_DATA) {
+ if (unlikely(arg->gl_interpret_reply)) {
+ rc = arg->gl_interpret_reply(env, req, data, rc);
+ } else if (rc == -ELDLM_NO_LOCK_DATA) {
LDLM_DEBUG(lock, "lost race - client has a lock but no "
"inode");
- ldlm_res_lvbo_update(lock->l_resource, NULL, 1);
+ ldlm_lvbo_update(lock->l_resource, lock, NULL, 1);
} else if (rc != 0) {
rc = ldlm_handle_ast_error(lock, req, rc, "glimpse");
} else {
- rc = ldlm_res_lvbo_update(lock->l_resource, req, 1);
+ rc = ldlm_lvbo_update(lock->l_resource, lock, req, 1);
}
break;
case LDLM_BL_CALLBACK:
/* Don't need to do anything here. */
RETURN(0);
+ if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_SRV_BL_AST)) {
+ LDLM_DEBUG(lock, "dropping BL AST");
+ RETURN(0);
+ }
+
LASSERT(lock);
LASSERT(data != NULL);
if (lock->l_export->exp_obd->obd_recovering != 0)
if (AT_OFF)
req->rq_timeout = ldlm_get_rq_timeout();
- lock->l_last_activity = cfs_time_current_sec();
+ lock->l_last_activity = ktime_get_real_seconds();
if (lock->l_export && lock->l_export->exp_nid_stats &&
lock->l_export->exp_nid_stats->nid_ldlm_stats)
}
}
- lock->l_last_activity = cfs_time_current_sec();
+ lock->l_last_activity = ktime_get_real_seconds();
LDLM_DEBUG(lock, "server preparing completion AST");
if (AT_OFF)
req->rq_timeout = ldlm_get_rq_timeout();
- lock->l_last_activity = cfs_time_current_sec();
+ lock->l_last_activity = ktime_get_real_seconds();
req->rq_interpret_reply = ldlm_cb_interpret;
if (res != NULL) {
ldlm_resource_getref(res);
LDLM_RESOURCE_ADDREF(res);
- ldlm_res_lvbo_update(res, NULL, 1);
+
+ if (!ldlm_is_discard_data(lock))
+ ldlm_lvbo_update(res, lock, NULL, 1);
}
pres = res;
}
if ((flags & LATF_STATS) && ldlm_is_ast_sent(lock)) {
- long delay = cfs_time_sub(cfs_time_current_sec(),
- lock->l_last_activity);
- LDLM_DEBUG(lock, "server cancels blocked lock after "
- CFS_DURATION_T"s", delay);
+ time64_t delay = ktime_get_real_seconds() -
+ lock->l_last_activity;
+ LDLM_DEBUG(lock, "server cancels blocked lock after %llds",
+ (s64)delay);
at_measured(&lock->l_export->exp_bl_lock_at, delay);
}
ldlm_lock_cancel(lock);
lock_res_and_lock(lock);
if (lock->l_granted_mode == LCK_PW &&
!lock->l_readers && !lock->l_writers &&
- cfs_time_after(cfs_time_current(),
- cfs_time_add(lock->l_last_used,
- cfs_time_seconds(10)))) {
+ ktime_after(ktime_get(),
+ ktime_add(lock->l_last_used,
+ ktime_set(10, 0)))) {
unlock_res_and_lock(lock);
if (ldlm_bl_to_thread_lock(ns, NULL, lock))
ldlm_handle_bl_callback(ns, NULL, lock);
void ldlm_revoke_export_locks(struct obd_export *exp)
{
struct list_head rpc_list;
- ENTRY;
+ ENTRY;
INIT_LIST_HEAD(&rpc_list);
- cfs_hash_for_each_empty(exp->exp_lock_hash,
- ldlm_revoke_lock_cb, &rpc_list);
- ldlm_run_ast_work(exp->exp_obd->obd_namespace, &rpc_list,
- LDLM_WORK_REVOKE_AST);
+ cfs_hash_for_each_nolock(exp->exp_lock_hash,
+ ldlm_revoke_lock_cb, &rpc_list, 0);
+ ldlm_run_ast_work(exp->exp_obd->obd_namespace, &rpc_list,
+ LDLM_WORK_REVOKE_AST);
- EXIT;
+ EXIT;
}
EXPORT_SYMBOL(ldlm_revoke_export_locks);
#endif /* HAVE_SERVER_SUPPORT */
if (rc == LDLM_ITER_STOP)
break;
+
+ /* If there are many namespaces, we will not sleep waiting for
+ * work, and must do a cond_resched to avoid holding the CPU
+ * for too long */
+ cond_resched();
}
atomic_dec(&blp->blp_num_threads);
}
EXPORT_SYMBOL(ldlm_destroy_export);
+static ssize_t cancel_unused_locks_before_replay_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", ldlm_cancel_unused_locks_before_replay);
+}
+
+static ssize_t cancel_unused_locks_before_replay_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
+{
+ int rc;
+ unsigned long val;
+
+ rc = kstrtoul(buffer, 10, &val);
+ if (rc)
+ return rc;
+
+ ldlm_cancel_unused_locks_before_replay = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(cancel_unused_locks_before_replay);
+
+static struct attribute *ldlm_attrs[] = {
+ &lustre_attr_cancel_unused_locks_before_replay.attr,
+ NULL,
+};
+
+static struct attribute_group ldlm_attr_group = {
+ .attrs = ldlm_attrs,
+};
+
static int ldlm_setup(void)
{
static struct ptlrpc_service_conf conf;
if (ldlm_state == NULL)
RETURN(-ENOMEM);
+ ldlm_kobj = kobject_create_and_add("ldlm", lustre_kobj);
+ if (!ldlm_kobj)
+ GOTO(out, -ENOMEM);
+
+ rc = sysfs_create_group(ldlm_kobj, &ldlm_attr_group);
+ if (rc)
+ GOTO(out, rc);
+
+ ldlm_ns_kset = kset_create_and_add("namespaces", NULL, ldlm_kobj);
+ if (!ldlm_ns_kset)
+ GOTO(out, -ENOMEM);
+
+ ldlm_svc_kset = kset_create_and_add("services", NULL, ldlm_kobj);
+ if (!ldlm_svc_kset)
+ GOTO(out, -ENOMEM);
+
#ifdef CONFIG_PROC_FS
- rc = ldlm_proc_setup();
- if (rc != 0)
+ rc = ldlm_proc_setup();
+ if (rc != 0)
GOTO(out, rc);
#endif /* CONFIG_PROC_FS */
},
};
ldlm_state->ldlm_cb_service = \
- ptlrpc_register_service(&conf, ldlm_svc_proc_dir);
+ ptlrpc_register_service(&conf, ldlm_svc_kset,
+ ldlm_svc_proc_dir);
if (IS_ERR(ldlm_state->ldlm_cb_service)) {
CERROR("failed to start service\n");
rc = PTR_ERR(ldlm_state->ldlm_cb_service);
},
};
ldlm_state->ldlm_cancel_service = \
- ptlrpc_register_service(&conf, ldlm_svc_proc_dir);
+ ptlrpc_register_service(&conf, ldlm_svc_kset,
+ ldlm_svc_proc_dir);
if (IS_ERR(ldlm_state->ldlm_cancel_service)) {
CERROR("failed to start service\n");
rc = PTR_ERR(ldlm_state->ldlm_cancel_service);
}
#ifdef HAVE_SERVER_SUPPORT
- INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks);
- expired_lock_thread.elt_state = ELT_STOPPED;
- init_waitqueue_head(&expired_lock_thread.elt_waitq);
-
- INIT_LIST_HEAD(&waiting_locks_list);
- spin_lock_init(&waiting_locks_spinlock);
- cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, NULL);
-
task = kthread_run(expired_lock_main, NULL, "ldlm_elt");
if (IS_ERR(task)) {
rc = PTR_ERR(task);
GOTO(out, rc);
}
- wait_event(expired_lock_thread.elt_waitq,
- expired_lock_thread.elt_state == ELT_READY);
+ wait_event(expired_lock_wait_queue,
+ expired_lock_thread_state == ELT_READY);
#endif /* HAVE_SERVER_SUPPORT */
rc = ldlm_pools_init();
ptlrpc_unregister_service(ldlm_state->ldlm_cancel_service);
#endif
+ if (ldlm_ns_kset)
+ kset_unregister(ldlm_ns_kset);
+ if (ldlm_svc_kset)
+ kset_unregister(ldlm_svc_kset);
+ if (ldlm_kobj)
+ kobject_put(ldlm_kobj);
+
ldlm_proc_cleanup();
#ifdef HAVE_SERVER_SUPPORT
- if (expired_lock_thread.elt_state != ELT_STOPPED) {
- expired_lock_thread.elt_state = ELT_TERMINATE;
- wake_up(&expired_lock_thread.elt_waitq);
- wait_event(expired_lock_thread.elt_waitq,
- expired_lock_thread.elt_state == ELT_STOPPED);
+ if (expired_lock_thread_state != ELT_STOPPED) {
+ expired_lock_thread_state = ELT_TERMINATE;
+ wake_up(&expired_lock_wait_queue);
+ wait_event(expired_lock_wait_queue,
+ expired_lock_thread_state == ELT_STOPPED);
}
#endif
int ldlm_init(void)
{
- mutex_init(&ldlm_ref_mutex);
- mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
- mutex_init(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
-
- INIT_LIST_HEAD(&ldlm_srv_namespace_list);
- INIT_LIST_HEAD(&ldlm_cli_active_namespace_list);
- INIT_LIST_HEAD(&ldlm_cli_inactive_namespace_list);
-
ldlm_resource_slab = kmem_cache_create("ldlm_resources",
sizeof(struct ldlm_resource), 0,
SLAB_HWCACHE_ALIGN, NULL);