/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Peter Braam <braam@clusterfs.com>
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * This file is part of the Lustre file system, http://www.lustre.org
+ * Lustre is a trademark of Cluster File Systems, Inc.
*
- * by Cluster File Systems, Inc.
+ * You may have signed or agreed to another license before downloading
+ * this software. If so, you are bound by the terms and conditions
+ * of that agreement, and the following does not apply to you. See the
+ * LICENSE file included with this distribution for more information.
+ *
+ * If you did not agree to a different license, then this copy of Lustre
+ * is open source software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * In either case, Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * license text for more details.
*/
#define DEBUG_SUBSYSTEM S_LDLM
+#ifdef __KERNEL__
+# include <lustre_dlm.h>
+#else
+# include <liblustre.h>
+#endif
+
+#include <obd_class.h>
+#include "ldlm_internal.h"
+
+cfs_mem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
-#include <linux/lustre_dlm.h>
+atomic_t ldlm_srv_namespace_nr = ATOMIC_INIT(0);
+atomic_t ldlm_cli_namespace_nr = ATOMIC_INIT(0);
-kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
+struct semaphore ldlm_srv_namespace_lock;
+struct list_head ldlm_srv_namespace_list =
+ CFS_LIST_HEAD_INIT(ldlm_srv_namespace_list);
-spinlock_t ldlm_namespace_lock = SPIN_LOCK_UNLOCKED;
-struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list);
-static struct proc_dir_entry *ldlm_ns_proc_dir = NULL;
+struct semaphore ldlm_cli_namespace_lock;
+struct list_head ldlm_cli_namespace_list =
+ CFS_LIST_HEAD_INIT(ldlm_cli_namespace_list);
+
+cfs_proc_dir_entry_t *ldlm_type_proc_dir = NULL;
+cfs_proc_dir_entry_t *ldlm_ns_proc_dir = NULL;
+cfs_proc_dir_entry_t *ldlm_svc_proc_dir = NULL;
+
+#ifdef LPROCFS
+static int ldlm_proc_dump_ns(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ ldlm_dump_all_namespaces(LDLM_NAMESPACE_SERVER, D_DLMTRACE);
+ ldlm_dump_all_namespaces(LDLM_NAMESPACE_CLIENT, D_DLMTRACE);
+ RETURN(count);
+}
-int ldlm_proc_setup(struct obd_device *obd)
+int ldlm_proc_setup(void)
{
+ int rc;
+ struct lprocfs_vars list[] = {
+ { "dump_namespaces", NULL, ldlm_proc_dump_ns, NULL },
+ { NULL }};
ENTRY;
+ LASSERT(ldlm_ns_proc_dir == NULL);
+
+ ldlm_type_proc_dir = lprocfs_register(OBD_LDLM_DEVICENAME,
+ proc_lustre_root,
+ NULL, NULL);
+ if (IS_ERR(ldlm_type_proc_dir)) {
+ CERROR("LProcFS failed in ldlm-init\n");
+ rc = PTR_ERR(ldlm_type_proc_dir);
+ GOTO(err, rc);
+ }
- if (obd->obd_proc_entry == NULL)
- RETURN(-EINVAL);
+ ldlm_ns_proc_dir = lprocfs_register("namespaces",
+ ldlm_type_proc_dir,
+ NULL, NULL);
+ if (IS_ERR(ldlm_ns_proc_dir)) {
+ CERROR("LProcFS failed in ldlm-init\n");
+ rc = PTR_ERR(ldlm_ns_proc_dir);
+ GOTO(err_type, rc);
+ }
- ldlm_ns_proc_dir = proc_mkdir("namespaces", obd->obd_proc_entry);
- if (ldlm_ns_proc_dir == NULL) {
- CERROR("Couldn't create /proc/lustre/ldlm/namespaces\n");
- RETURN(-EPERM);
+ ldlm_svc_proc_dir = lprocfs_register("services",
+ ldlm_type_proc_dir,
+ NULL, NULL);
+ if (IS_ERR(ldlm_svc_proc_dir)) {
+ CERROR("LProcFS failed in ldlm-init\n");
+ rc = PTR_ERR(ldlm_svc_proc_dir);
+ GOTO(err_ns, rc);
}
+
+ rc = lprocfs_add_vars(ldlm_type_proc_dir, list, NULL);
+
RETURN(0);
+
+err_ns:
+ lprocfs_remove(&ldlm_ns_proc_dir);
+err_type:
+ lprocfs_remove(&ldlm_type_proc_dir);
+err:
+ ldlm_svc_proc_dir = NULL;
+ RETURN(rc);
}
-void ldlm_proc_cleanup(struct obd_device *obd)
+void ldlm_proc_cleanup(void)
{
- proc_lustre_remove_obd_entry("namespaces", obd);
+ if (ldlm_svc_proc_dir)
+ lprocfs_remove(&ldlm_svc_proc_dir);
+
+ if (ldlm_ns_proc_dir)
+ lprocfs_remove(&ldlm_ns_proc_dir);
+
+ if (ldlm_type_proc_dir)
+ lprocfs_remove(&ldlm_type_proc_dir);
}
-struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
+static int lprocfs_rd_lru_size(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
{
- struct ldlm_namespace *ns = NULL;
- struct list_head *bucket;
+ struct ldlm_namespace *ns = data;
+ __u32 *nr = &ns->ns_max_unused;
- OBD_ALLOC(ns, sizeof(*ns));
- if (!ns) {
- LBUG();
- GOTO(out, NULL);
+ if (ns_connect_lru_resize(ns))
+ nr = &ns->ns_nr_unused;
+ return lprocfs_rd_uint(page, start, off, count, eof, nr);
+}
+
+static int lprocfs_wr_lru_size(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ struct ldlm_namespace *ns = data;
+ char dummy[MAX_STRING_SIZE + 1], *end;
+ unsigned long tmp;
+ int lru_resize;
+
+ dummy[MAX_STRING_SIZE] = '\0';
+ if (copy_from_user(dummy, buffer, MAX_STRING_SIZE))
+ return -EFAULT;
+
+ if (count == 6 && memcmp(dummy, "clear", 5) == 0) {
+ CDEBUG(D_DLMTRACE,
+ "dropping all unused locks from namespace %s\n",
+ ns->ns_name);
+ if (ns_connect_lru_resize(ns)) {
+ int canceled, unused = ns->ns_nr_unused;
+
+ /* Try to cancel all @ns_nr_unused locks. */
+ canceled = ldlm_cancel_lru(ns, unused, LDLM_SYNC,
+ LDLM_CANCEL_PASSED);
+ if (canceled < unused) {
+ CERROR("not all requested locks are canceled, "
+ "requested: %d, canceled: %d\n", unused,
+ canceled);
+ return -EINVAL;
+ }
+ } else {
+ tmp = ns->ns_max_unused;
+ ns->ns_max_unused = 0;
+ ldlm_cancel_lru(ns, 0, LDLM_SYNC, LDLM_CANCEL_PASSED);
+ ns->ns_max_unused = tmp;
+ }
+ return count;
}
- ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE);
- if (!ns->ns_hash) {
- LBUG();
- GOTO(out, ns);
+ tmp = simple_strtoul(dummy, &end, 0);
+ if (dummy == end) {
+ CERROR("invalid value written\n");
+ return -EINVAL;
+ }
+ lru_resize = (tmp == 0);
+
+ if (ns_connect_lru_resize(ns)) {
+ if (!lru_resize)
+ ns->ns_max_unused = (unsigned int)tmp;
+
+ if (tmp > ns->ns_nr_unused)
+ tmp = ns->ns_nr_unused;
+ tmp = ns->ns_nr_unused - tmp;
+
+ CDEBUG(D_DLMTRACE, "changing namespace %s unused locks from %u to %u\n",
+ ns->ns_name, ns->ns_nr_unused, (unsigned int)tmp);
+ ldlm_cancel_lru(ns, (unsigned int)tmp, LDLM_ASYNC, LDLM_CANCEL_PASSED);
+
+ if (!lru_resize) {
+ CDEBUG(D_DLMTRACE, "disable lru_resize for namespace %s\n",
+ ns->ns_name);
+ ns->ns_connect_flags &= ~OBD_CONNECT_LRU_RESIZE;
+ }
+ } else {
+ CDEBUG(D_DLMTRACE, "changing namespace %s max_unused from %u to %u\n",
+ ns->ns_name, ns->ns_max_unused, (unsigned int)tmp);
+ ns->ns_max_unused = (unsigned int)tmp;
+ ldlm_cancel_lru(ns, 0, LDLM_ASYNC, LDLM_CANCEL_PASSED);
+
+ /* Make sure that originally lru resize was supported before
+ * turning it on here. */
+ if (lru_resize &&
+ (ns->ns_orig_connect_flags & OBD_CONNECT_LRU_RESIZE)) {
+ CDEBUG(D_DLMTRACE, "enable lru_resize for namespace %s\n",
+ ns->ns_name);
+ ns->ns_connect_flags |= OBD_CONNECT_LRU_RESIZE;
+ }
}
- OBD_ALLOC(ns->ns_name, strlen(name) + 1);
- if (!ns->ns_name) {
- LBUG();
- GOTO(out, ns);
+ return count;
+}
+
+void ldlm_proc_namespace(struct ldlm_namespace *ns)
+{
+ struct lprocfs_vars lock_vars[2];
+ char lock_name[MAX_STRING_SIZE + 1];
+
+ LASSERT(ns != NULL);
+ LASSERT(ns->ns_name != NULL);
+
+ lock_name[MAX_STRING_SIZE] = '\0';
+
+ memset(lock_vars, 0, sizeof(lock_vars));
+ lock_vars[0].name = lock_name;
+
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/resource_count", ns->ns_name);
+ lock_vars[0].data = &ns->ns_refcount;
+ lock_vars[0].read_fptr = lprocfs_rd_atomic;
+ lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name);
+ lock_vars[0].data = &ns->ns_locks;
+ lock_vars[0].read_fptr = lprocfs_rd_atomic;
+ lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+ if (ns_is_client(ns)) {
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_unused_count",
+ ns->ns_name);
+ lock_vars[0].data = &ns->ns_nr_unused;
+ lock_vars[0].read_fptr = lprocfs_rd_uint;
+ lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_size",
+ ns->ns_name);
+ lock_vars[0].data = ns;
+ lock_vars[0].read_fptr = lprocfs_rd_lru_size;
+ lock_vars[0].write_fptr = lprocfs_wr_lru_size;
+ lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/shrink_thumb",
+ ns->ns_name);
+ lock_vars[0].data = ns;
+ lock_vars[0].read_fptr = lprocfs_rd_uint;
+ lock_vars[0].write_fptr = lprocfs_wr_uint;
+ lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_max_age",
+ ns->ns_name);
+ lock_vars[0].data = &ns->ns_max_age;
+ lock_vars[0].read_fptr = lprocfs_rd_uint;
+ lock_vars[0].write_fptr = lprocfs_wr_uint;
+ lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
}
- strcpy(ns->ns_name, name);
+}
+#undef MAX_STRING_SIZE
+#else
+#define ldlm_proc_namespace(ns) do {} while (0)
+#endif /* LPROCFS */
- ptlrpc_init_client(NULL, NULL, LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL,
- &ns->ns_rpc_client);
+struct ldlm_namespace *ldlm_namespace_new(char *name, ldlm_side_t client,
+ ldlm_appetite_t apt)
+{
+ struct ldlm_namespace *ns = NULL;
+ struct list_head *bucket;
+ int rc, idx, namelen;
+ ENTRY;
- INIT_LIST_HEAD(&ns->ns_root_list);
- l_lock_init(&ns->ns_lock);
+ rc = ldlm_get_ref();
+ if (rc) {
+ CERROR("ldlm_get_ref failed: %d\n", rc);
+ RETURN(NULL);
+ }
+
+ OBD_ALLOC_PTR(ns);
+ if (!ns)
+ GOTO(out_ref, NULL);
+
+ OBD_VMALLOC(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+ if (!ns->ns_hash)
+ GOTO(out_ns, NULL);
+
+ ns->ns_shrink_thumb = LDLM_LOCK_SHRINK_THUMB;
+ ns->ns_appetite = apt;
+ namelen = strlen(name);
+ OBD_ALLOC(ns->ns_name, namelen + 1);
+ if (!ns->ns_name)
+ GOTO(out_hash, NULL);
+
+ strcpy(ns->ns_name, name);
+
+ CFS_INIT_LIST_HEAD(&ns->ns_root_list);
ns->ns_refcount = 0;
ns->ns_client = client;
+ spin_lock_init(&ns->ns_hash_lock);
+ atomic_set(&ns->ns_locks, 0);
+ ns->ns_resources = 0;
+ cfs_waitq_init(&ns->ns_waitq);
for (bucket = ns->ns_hash + RES_HASH_SIZE - 1; bucket >= ns->ns_hash;
bucket--)
- INIT_LIST_HEAD(bucket);
+ CFS_INIT_LIST_HEAD(bucket);
+
+ CFS_INIT_LIST_HEAD(&ns->ns_unused_list);
+ ns->ns_nr_unused = 0;
+ ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
+ ns->ns_max_age = LDLM_DEFAULT_MAX_ALIVE;
+ spin_lock_init(&ns->ns_unused_lock);
+ ns->ns_orig_connect_flags = 0;
+ ns->ns_connect_flags = 0;
+ ldlm_proc_namespace(ns);
+
+ idx = atomic_read(ldlm_namespace_nr(client));
+ rc = ldlm_pool_init(&ns->ns_pool, ns, idx, client);
+ if (rc) {
+ CERROR("Can't initialize lock pool, rc %d\n", rc);
+ GOTO(out_proc, rc);
+ }
- spin_lock(&ldlm_namespace_lock);
- list_add(&ns->ns_list_chain, &ldlm_namespace_list);
- ns->ns_proc_dir = proc_mkdir(ns->ns_name, ldlm_ns_proc_dir);
- if (ns->ns_proc_dir == NULL)
- CERROR("Unable to create proc directory for namespace.\n");
- spin_unlock(&ldlm_namespace_lock);
+ mutex_down(ldlm_namespace_lock(client));
+ list_add(&ns->ns_list_chain, ldlm_namespace_list(client));
+ atomic_inc(ldlm_namespace_nr(client));
+ mutex_up(ldlm_namespace_lock(client));
RETURN(ns);
-
- out:
- if (ns && ns->ns_hash)
- vfree(ns->ns_hash);
- if (ns && ns->ns_name)
- OBD_FREE(ns->ns_name, strlen(name) + 1);
- if (ns)
- OBD_FREE(ns, sizeof(*ns));
- return NULL;
+out_proc:
+ ldlm_namespace_cleanup(ns, 0);
+ OBD_FREE(ns->ns_name, namelen + 1);
+out_hash:
+ OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+out_ns:
+ OBD_FREE_PTR(ns);
+out_ref:
+ ldlm_put_ref(0);
+ RETURN(NULL);
}
extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
-static void cleanup_resource(struct ldlm_resource *res, struct list_head *q)
+/* If flags contains FL_LOCAL_ONLY, don't try to tell the server, just cleanup.
+ * This is currently only used for recovery, and we make certain assumptions
+ * as a result--notably, that we shouldn't cancel locks with refs. -phil
+ *
+ * Called with the ns_lock held. */
+static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
+ int flags)
{
- struct list_head *tmp, *pos;
- int rc = 0, client = res->lr_namespace->ns_client;
+ struct list_head *tmp;
+ int rc = 0, client = ns_is_client(res->lr_namespace);
+ int local_only = (flags & LDLM_FL_LOCAL_ONLY);
ENTRY;
- list_for_each_safe(tmp, pos, q) {
- struct ldlm_lock *lock;
- lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- LDLM_LOCK_GET(lock);
+
+ do {
+ struct ldlm_lock *lock = NULL;
+
+ /* first, we look for non-cleaned-yet lock
+ * all cleaned locks are marked by CLEANED flag */
+ lock_res(res);
+ list_for_each(tmp, q) {
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+ if (lock->l_flags & LDLM_FL_CLEANED) {
+ lock = NULL;
+ continue;
+ }
+ LDLM_LOCK_GET(lock);
+ lock->l_flags |= LDLM_FL_CLEANED;
+ break;
+ }
+
+ if (lock == NULL) {
+ unlock_res(res);
+ break;
+ }
+
+ /* Set CBPENDING so nothing in the cancellation path
+ * can match this lock */
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ lock->l_flags |= LDLM_FL_FAILED;
+ lock->l_flags |= flags;
+
+ /* ... without sending a CANCEL message for local_only. */
+ if (local_only)
+ lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+
+ if (local_only && (lock->l_readers || lock->l_writers)) {
+ /* This is a little bit gross, but much better than the
+ * alternative: pretend that we got a blocking AST from
+ * the server, so that when the lock is decref'd, it
+ * will go away ... */
+ unlock_res(res);
+ LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
+ if (lock->l_completion_ast)
+ lock->l_completion_ast(lock, 0, NULL);
+ LDLM_LOCK_PUT(lock);
+ continue;
+ }
if (client) {
struct lustre_handle lockh;
+
+ unlock_res(res);
ldlm_lock2handle(lock, &lockh);
- /* can we get away without a connh here? */
rc = ldlm_cli_cancel(&lockh);
- if (rc != ELDLM_OK) {
- /* It failed remotely, but we'll force it to
- * cleanup locally. */
+ if (rc)
CERROR("ldlm_cli_cancel: %d\n", rc);
- ldlm_lock_cancel(lock);
- }
} else {
- CERROR("Freeing lock %p still held by client node.\n",
- lock);
- ldlm_lock_dump(lock);
-
ldlm_resource_unlink_lock(lock);
+ unlock_res(res);
+ LDLM_DEBUG(lock, "Freeing a lock still held by a "
+ "client node");
ldlm_lock_destroy(lock);
}
LDLM_LOCK_PUT(lock);
- }
+ } while (1);
+
+ EXIT;
}
-int ldlm_namespace_free(struct ldlm_namespace *ns)
+int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags)
{
- struct list_head *tmp, *pos;
+ struct list_head *tmp;
int i;
+ if (ns == NULL) {
+ CDEBUG(D_INFO, "NULL ns, skipping cleanup\n");
+ return ELDLM_OK;
+ }
+
+ for (i = 0; i < RES_HASH_SIZE; i++) {
+ spin_lock(&ns->ns_hash_lock);
+ tmp = ns->ns_hash[i].next;
+ while (tmp != &(ns->ns_hash[i])) {
+ struct ldlm_resource *res;
+ res = list_entry(tmp, struct ldlm_resource, lr_hash);
+ ldlm_resource_getref(res);
+ spin_unlock(&ns->ns_hash_lock);
+
+ cleanup_resource(res, &res->lr_granted, flags);
+ cleanup_resource(res, &res->lr_converting, flags);
+ cleanup_resource(res, &res->lr_waiting, flags);
+
+ spin_lock(&ns->ns_hash_lock);
+ tmp = tmp->next;
+
+ /* XXX: former stuff caused issues in case of race
+ * between ldlm_namespace_cleanup() and lockd() when
+ * client gets blocking ast when lock gets distracted by
+ * server. This is 1_4 branch solution, let's see how
+ * will it behave. */
+ if (!ldlm_resource_putref_locked(res))
+ CDEBUG(D_INFO,
+ "Namespace %s resource refcount nonzero "
+ "(%d) after lock cleanup; forcing cleanup.\n",
+ ns->ns_name, atomic_read(&res->lr_refcount));
+ }
+ spin_unlock(&ns->ns_hash_lock);
+ }
+
+ return ELDLM_OK;
+}
+
+/* Cleanup, but also free, the namespace */
+int ldlm_namespace_free_prior(struct ldlm_namespace *ns)
+{
+ ENTRY;
if (!ns)
RETURN(ELDLM_OK);
- spin_lock(&ldlm_namespace_lock);
+ mutex_down(ldlm_namespace_lock(ns->ns_client));
list_del(&ns->ns_list_chain);
- remove_proc_entry(ns->ns_name, ldlm_ns_proc_dir);
- spin_unlock(&ldlm_namespace_lock);
+ atomic_dec(ldlm_namespace_nr(ns->ns_client));
+ ldlm_pool_fini(&ns->ns_pool);
+ mutex_up(ldlm_namespace_lock(ns->ns_client));
+
+ /* At shutdown time, don't call the cancellation callback */
+ ldlm_namespace_cleanup(ns, 0);
+
+ if (ns->ns_refcount > 0) {
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
+ int rc;
+ CDEBUG(D_DLMTRACE,
+ "dlm namespace %s free waiting on refcount %d\n",
+ ns->ns_name, ns->ns_refcount);
+ rc = l_wait_event(ns->ns_waitq,
+ ns->ns_refcount == 0, &lwi);
+ if (ns->ns_refcount)
+ LCONSOLE_ERROR_MSG(0x139, "Lock manager: wait for %s "
+ "namespace cleanup aborted with %d "
+ "resources in use. (%d)\nI'm going "
+ "to try to clean up anyway, but I "
+ "might need a reboot of this node.\n",
+ ns->ns_name, (int) ns->ns_refcount,
+ rc);
+ CDEBUG(D_DLMTRACE,
+ "dlm namespace %s free done waiting\n", ns->ns_name);
+ }
- l_lock(&ns->ns_lock);
+ RETURN(ELDLM_OK);
+}
- for (i = 0; i < RES_HASH_SIZE; i++) {
- list_for_each_safe(tmp, pos, &(ns->ns_hash[i])) {
- struct ldlm_resource *res;
- res = list_entry(tmp, struct ldlm_resource, lr_hash);
- ldlm_resource_getref(res);
+int ldlm_namespace_free_post(struct ldlm_namespace *ns, int force)
+{
+ ENTRY;
+ if (!ns)
+ RETURN(ELDLM_OK);
- cleanup_resource(res, &res->lr_granted);
- cleanup_resource(res, &res->lr_converting);
- cleanup_resource(res, &res->lr_waiting);
-
- if (!ldlm_resource_put(res)) {
- CERROR("Resource refcount nonzero (%d) after "
- "lock cleanup; forcing cleanup.\n",
- atomic_read(&res->lr_refcount));
- ldlm_resource_dump(res);
- atomic_set(&res->lr_refcount, 1);
- ldlm_resource_put(res);
- }
+#ifdef LPROCFS
+ {
+ struct proc_dir_entry *dir;
+ dir = lprocfs_srch(ldlm_ns_proc_dir, ns->ns_name);
+ if (dir == NULL) {
+ CERROR("dlm namespace %s has no procfs dir?\n",
+ ns->ns_name);
+ } else {
+ lprocfs_remove(&dir);
}
}
-
- vfree(ns->ns_hash /* , sizeof(struct list_head) * RES_HASH_SIZE */);
- ptlrpc_cleanup_client(&ns->ns_rpc_client);
+#endif
+ OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
- OBD_FREE(ns, sizeof(*ns));
+ OBD_FREE_PTR(ns);
+ ldlm_put_ref(force);
+ RETURN(ELDLM_OK);
+}
+
+/* Cleanup the resource, and free namespace.
+ * bug 12864:
+ * Deadlock issue:
+ * proc1: destroy import
+ * class_disconnect_export(grab cl_sem) ->
+ * -> ldlm_namespace_free ->
+ * -> lprocfs_remove(grab _lprocfs_lock).
+ * proc2: read proc info
+ * lprocfs_fops_read(grab _lprocfs_lock) ->
+ * -> osc_rd_active, etc(grab cl_sem).
+ *
+ * So that I have to split the ldlm_namespace_free into two parts - the first
+ * part ldlm_namespace_free_prior is used to cleanup the resource which is
+ * being used; the 2nd part ldlm_namespace_free_post is used to unregister the
+ * lprocfs entries, and then free memory. It will be called w/o cli->cl_sem
+ * held.
+ */
+int ldlm_namespace_free(struct ldlm_namespace *ns, int force)
+{
+ ldlm_namespace_free_prior(ns);
+ ldlm_namespace_free_post(ns, force);
return ELDLM_OK;
}
-static __u32 ldlm_hash_fn(struct ldlm_resource *parent, __u64 *name)
+
+void ldlm_namespace_get_nolock(struct ldlm_namespace *ns)
+{
+ LASSERT(ns->ns_refcount >= 0);
+ ns->ns_refcount++;
+}
+
+void ldlm_namespace_get(struct ldlm_namespace *ns)
+{
+ spin_lock(&ns->ns_hash_lock);
+ ldlm_namespace_get_nolock(ns);
+ spin_unlock(&ns->ns_hash_lock);
+}
+
+void ldlm_namespace_put_nolock(struct ldlm_namespace *ns, int wakeup)
+{
+ LASSERT(ns->ns_refcount > 0);
+ ns->ns_refcount--;
+ if (ns->ns_refcount == 0 && wakeup)
+ wake_up(&ns->ns_waitq);
+}
+
+void ldlm_namespace_put(struct ldlm_namespace *ns, int wakeup)
+{
+ spin_lock(&ns->ns_hash_lock);
+ ldlm_namespace_put_nolock(ns, wakeup);
+ spin_unlock(&ns->ns_hash_lock);
+}
+
+/* Should be called under ldlm_namespace_lock(client) taken */
+void ldlm_namespace_move(struct ldlm_namespace *ns, ldlm_side_t client)
+{
+ LASSERT(!list_empty(&ns->ns_list_chain));
+ LASSERT_SEM_LOCKED(ldlm_namespace_lock(client));
+ list_move_tail(&ns->ns_list_chain, ldlm_namespace_list(client));
+}
+
+/* Should be called under ldlm_namespace_lock(client) taken */
+struct ldlm_namespace *ldlm_namespace_first(ldlm_side_t client)
+{
+ LASSERT_SEM_LOCKED(ldlm_namespace_lock(client));
+ LASSERT(!list_empty(ldlm_namespace_list(client)));
+ return container_of(ldlm_namespace_list(client)->next,
+ struct ldlm_namespace, ns_list_chain);
+}
+static __u32 ldlm_hash_fn(struct ldlm_resource *parent,
+ const struct ldlm_res_id *name)
{
__u32 hash = 0;
int i;
for (i = 0; i < RES_NAME_SIZE; i++)
- hash += name[i];
+ hash += name->name[i];
hash += (__u32)((unsigned long)parent >> 4);
{
struct ldlm_resource *res;
- res = kmem_cache_alloc(ldlm_resource_slab, SLAB_KERNEL);
- if (res == NULL) {
- LBUG();
+ OBD_SLAB_ALLOC(res, ldlm_resource_slab, CFS_ALLOC_IO, sizeof *res);
+ if (res == NULL)
return NULL;
- }
- memset(res, 0, sizeof(*res));
- INIT_LIST_HEAD(&res->lr_children);
- INIT_LIST_HEAD(&res->lr_childof);
- INIT_LIST_HEAD(&res->lr_granted);
- INIT_LIST_HEAD(&res->lr_converting);
- INIT_LIST_HEAD(&res->lr_waiting);
+ memset(res, 0, sizeof(*res));
+ CFS_INIT_LIST_HEAD(&res->lr_children);
+ CFS_INIT_LIST_HEAD(&res->lr_childof);
+ CFS_INIT_LIST_HEAD(&res->lr_granted);
+ CFS_INIT_LIST_HEAD(&res->lr_converting);
+ CFS_INIT_LIST_HEAD(&res->lr_waiting);
atomic_set(&res->lr_refcount, 1);
+ spin_lock_init(&res->lr_lock);
+
+ /* one who creates the resource must unlock
+ * the semaphore after lvb initialization */
+ init_MUTEX_LOCKED(&res->lr_lvb_sem);
return res;
}
+/* must be called with hash lock held */
+static struct ldlm_resource *
+ldlm_resource_find(struct ldlm_namespace *ns, const struct ldlm_res_id *name,
+ __u32 hash)
+{
+ struct list_head *bucket, *tmp;
+ struct ldlm_resource *res;
+
+ LASSERT_SPIN_LOCKED(&ns->ns_hash_lock);
+ bucket = ns->ns_hash + hash;
+
+ list_for_each(tmp, bucket) {
+ res = list_entry(tmp, struct ldlm_resource, lr_hash);
+ if (memcmp(&res->lr_name, name, sizeof(res->lr_name)) == 0)
+ return res;
+ }
+
+ return NULL;
+}
+
/* Args: locked namespace
* Returns: newly-allocated, referenced, unlocked resource */
-static struct ldlm_resource *ldlm_resource_add(struct ldlm_namespace *ns,
- struct ldlm_resource *parent,
- __u64 *name, __u32 type)
+static struct ldlm_resource *
+ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
+ const struct ldlm_res_id *name, __u32 hash, ldlm_type_t type)
{
struct list_head *bucket;
- struct ldlm_resource *res;
+ struct ldlm_resource *res, *old_res;
ENTRY;
- if (type < LDLM_MIN_TYPE || type > LDLM_MAX_TYPE) {
- LBUG();
- RETURN(NULL);
- }
+ LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE,
+ "type: %d\n", type);
res = ldlm_resource_new();
- if (!res) {
- LBUG();
+ if (!res)
RETURN(NULL);
- }
- memcpy(res->lr_name, name, sizeof(res->lr_name));
+ res->lr_name = *name;
res->lr_namespace = ns;
- ns->ns_refcount++;
-
res->lr_type = type;
res->lr_most_restr = LCK_NL;
- bucket = ns->ns_hash + ldlm_hash_fn(parent, name);
+ spin_lock(&ns->ns_hash_lock);
+ old_res = ldlm_resource_find(ns, name, hash);
+ if (old_res) {
+ /* someone won the race and added the resource before */
+ ldlm_resource_getref(old_res);
+ spin_unlock(&ns->ns_hash_lock);
+ OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res);
+ /* synchronize WRT resource creation */
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
+ down(&old_res->lr_lvb_sem);
+ up(&old_res->lr_lvb_sem);
+ }
+ RETURN(old_res);
+ }
+
+ /* we won! let's add the resource */
+ bucket = ns->ns_hash + hash;
list_add(&res->lr_hash, bucket);
+ ns->ns_resources++;
+ ldlm_namespace_get_nolock(ns);
- if (parent == NULL)
+ if (parent == NULL) {
list_add(&res->lr_childof, &ns->ns_root_list);
- else {
+ } else {
res->lr_parent = parent;
list_add(&res->lr_childof, &parent->lr_children);
}
+ spin_unlock(&ns->ns_hash_lock);
+
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
+ int rc;
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2);
+ rc = ns->ns_lvbo->lvbo_init(res);
+ if (rc)
+ CERROR("lvbo_init failed for resource "
+ LPU64": rc %d\n", name->name[0], rc);
+ /* we create resource with locked lr_lvb_sem */
+ up(&res->lr_lvb_sem);
+ }
RETURN(res);
}
/* Args: unlocked namespace
* Locks: takes and releases ns->ns_lock and res->lr_lock
* Returns: referenced, unlocked ldlm_resource or NULL */
-struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
- struct ldlm_resource *parent,
- __u64 *name, __u32 type, int create)
+struct ldlm_resource *
+ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
+ const struct ldlm_res_id *name, ldlm_type_t type, int create)
{
- struct list_head *bucket;
- struct list_head *tmp = bucket;
+ __u32 hash = ldlm_hash_fn(parent, name);
struct ldlm_resource *res = NULL;
ENTRY;
- if (ns == NULL || ns->ns_hash == NULL) {
- LBUG();
- RETURN(NULL);
- }
-
- l_lock(&ns->ns_lock);
- bucket = ns->ns_hash + ldlm_hash_fn(parent, name);
-
- list_for_each(tmp, bucket) {
- struct ldlm_resource *chk;
- chk = list_entry(tmp, struct ldlm_resource, lr_hash);
-
- if (memcmp(chk->lr_name, name, sizeof(chk->lr_name)) == 0) {
- res = chk;
- atomic_inc(&res->lr_refcount);
- EXIT;
- break;
+ LASSERT(ns != NULL);
+ LASSERT(ns->ns_hash != NULL);
+ LASSERT(name->name[0] != 0);
+
+ spin_lock(&ns->ns_hash_lock);
+ res = ldlm_resource_find(ns, name, hash);
+ if (res) {
+ ldlm_resource_getref(res);
+ spin_unlock(&ns->ns_hash_lock);
+ /* synchronize WRT resource creation */
+ if (ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
+ down(&res->lr_lvb_sem);
+ up(&res->lr_lvb_sem);
}
+ RETURN(res);
}
+ spin_unlock(&ns->ns_hash_lock);
- if (res == NULL && create)
- res = ldlm_resource_add(ns, parent, name, type);
- l_unlock(&ns->ns_lock);
+ if (create == 0)
+ RETURN(NULL);
+ res = ldlm_resource_add(ns, parent, name, hash, type);
RETURN(res);
}
struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res)
{
+ LASSERT(res != NULL);
+ LASSERT(res != LP_POISON);
atomic_inc(&res->lr_refcount);
+ CDEBUG(D_INFO, "getref res: %p count: %d\n", res,
+ atomic_read(&res->lr_refcount));
return res;
}
-/* Returns 1 if the resource was freed, 0 if it remains. */
-int ldlm_resource_put(struct ldlm_resource *res)
+void __ldlm_resource_putref_final(struct ldlm_resource *res)
{
- int rc = 0;
+ struct ldlm_namespace *ns = res->lr_namespace;
- if (atomic_dec_and_test(&res->lr_refcount)) {
- struct ldlm_namespace *ns = res->lr_namespace;
- ENTRY;
+ LASSERT_SPIN_LOCKED(&ns->ns_hash_lock);
- l_lock(&ns->ns_lock);
+ if (!list_empty(&res->lr_granted)) {
+ ldlm_resource_dump(D_ERROR, res);
+ LBUG();
+ }
- if (atomic_read(&res->lr_refcount) != 0) {
- /* We lost the race. */
- l_unlock(&ns->ns_lock);
- goto out;
- }
+ if (!list_empty(&res->lr_converting)) {
+ ldlm_resource_dump(D_ERROR, res);
+ LBUG();
+ }
- if (!list_empty(&res->lr_granted))
- LBUG();
+ if (!list_empty(&res->lr_waiting)) {
+ ldlm_resource_dump(D_ERROR, res);
+ LBUG();
+ }
- if (!list_empty(&res->lr_converting))
- LBUG();
+ if (!list_empty(&res->lr_children)) {
+ ldlm_resource_dump(D_ERROR, res);
+ LBUG();
+ }
- if (!list_empty(&res->lr_waiting))
- LBUG();
+ /* Pass 0 here to not wake ->ns_waitq up yet, we will do it few
+ * lines below when all children are freed. */
+ ldlm_namespace_put_nolock(ns, 0);
+ list_del_init(&res->lr_hash);
+ list_del_init(&res->lr_childof);
- if (!list_empty(&res->lr_children))
- LBUG();
+ ns->ns_resources--;
+ if (ns->ns_resources == 0)
+ wake_up(&ns->ns_waitq);
+}
- ns->ns_refcount--;
- list_del(&res->lr_hash);
- list_del(&res->lr_childof);
+/* Returns 1 if the resource was freed, 0 if it remains. */
+int ldlm_resource_putref(struct ldlm_resource *res)
+{
+ struct ldlm_namespace *ns = res->lr_namespace;
+ int rc = 0;
+ ENTRY;
- kmem_cache_free(ldlm_resource_slab, res);
- l_unlock(&ns->ns_lock);
+ CDEBUG(D_INFO, "putref res: %p count: %d\n", res,
+ atomic_read(&res->lr_refcount) - 1);
+ LASSERTF(atomic_read(&res->lr_refcount) > 0, "%d",
+ atomic_read(&res->lr_refcount));
+ LASSERTF(atomic_read(&res->lr_refcount) < LI_POISON, "%d",
+ atomic_read(&res->lr_refcount));
+
+ if (atomic_dec_and_lock(&res->lr_refcount, &ns->ns_hash_lock)) {
+ __ldlm_resource_putref_final(res);
+ spin_unlock(&ns->ns_hash_lock);
+ if (res->lr_lvb_data)
+ OBD_FREE(res->lr_lvb_data, res->lr_lvb_len);
+ OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res);
+ rc = 1;
+ }
+
+ RETURN(rc);
+}
+
+/* Returns 1 if the resource was freed, 0 if it remains. */
+int ldlm_resource_putref_locked(struct ldlm_resource *res)
+{
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_INFO, "putref res: %p count: %d\n", res,
+ atomic_read(&res->lr_refcount) - 1);
+ LASSERT(atomic_read(&res->lr_refcount) > 0);
+ LASSERT(atomic_read(&res->lr_refcount) < LI_POISON);
+
+ LASSERT(atomic_read(&res->lr_refcount) >= 0);
+ if (atomic_dec_and_test(&res->lr_refcount)) {
+ __ldlm_resource_putref_final(res);
+ if (res->lr_lvb_data)
+ OBD_FREE(res->lr_lvb_data, res->lr_lvb_len);
+ OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res);
rc = 1;
- } else {
- ENTRY;
- out:
- if (atomic_read(&res->lr_refcount) < 0)
- LBUG();
}
RETURN(rc);
void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
struct ldlm_lock *lock)
{
- l_lock(&res->lr_namespace->ns_lock);
+ check_res_locked(res);
- ldlm_resource_dump(res);
- ldlm_lock_dump(lock);
+ ldlm_resource_dump(D_OTHER, res);
+ CDEBUG(D_OTHER, "About to add this lock:\n");
+ ldlm_lock_dump(D_OTHER, lock, 0);
- if (!list_empty(&lock->l_res_link))
- LBUG();
+ if (lock->l_destroyed) {
+ CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
+ return;
+ }
+
+ LASSERT(list_empty(&lock->l_res_link));
+
+ list_add_tail(&lock->l_res_link, head);
+}
+
+void ldlm_resource_insert_lock_after(struct ldlm_lock *original,
+ struct ldlm_lock *new)
+{
+ struct ldlm_resource *res = original->l_resource;
+
+ check_res_locked(res);
+
+ ldlm_resource_dump(D_OTHER, res);
+ CDEBUG(D_OTHER, "About to insert this lock after %p:\n", original);
+ ldlm_lock_dump(D_OTHER, new, 0);
+
+ if (new->l_destroyed) {
+ CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
+ goto out;
+ }
- list_add(&lock->l_res_link, head);
- l_unlock(&res->lr_namespace->ns_lock);
+ LASSERT(list_empty(&new->l_res_link));
+
+ list_add(&new->l_res_link, &original->l_res_link);
+ out:;
}
void ldlm_resource_unlink_lock(struct ldlm_lock *lock)
{
- l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ check_res_locked(lock->l_resource);
+ ldlm_unlink_lock_skiplist(lock);
list_del_init(&lock->l_res_link);
- l_unlock(&lock->l_resource->lr_namespace->ns_lock);
}
void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
{
desc->lr_type = res->lr_type;
- memcpy(desc->lr_name, res->lr_name, sizeof(desc->lr_name));
- memcpy(desc->lr_version, res->lr_version, sizeof(desc->lr_version));
+ desc->lr_name = res->lr_name;
}
-void ldlm_dump_all_namespaces(void)
+void ldlm_dump_all_namespaces(ldlm_side_t client, int level)
{
struct list_head *tmp;
- spin_lock(&ldlm_namespace_lock);
+ if (!((libcfs_debug | D_ERROR) & level))
+ return;
+
+ mutex_down(ldlm_namespace_lock(client));
- list_for_each(tmp, &ldlm_namespace_list) {
+ list_for_each(tmp, ldlm_namespace_list(client)) {
struct ldlm_namespace *ns;
ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain);
- ldlm_namespace_dump(ns);
+ ldlm_namespace_dump(level, ns);
}
- spin_unlock(&ldlm_namespace_lock);
+ mutex_up(ldlm_namespace_lock(client));
}
-void ldlm_namespace_dump(struct ldlm_namespace *ns)
+void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
{
struct list_head *tmp;
- l_lock(&ns->ns_lock);
- CDEBUG(D_OTHER, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name,
- ns->ns_refcount, ns->ns_client);
+ if (!((libcfs_debug | D_ERROR) & level))
+ return;
+
+ CDEBUG(level, "--- Namespace: %s (rc: %d, side: %s)\n",
+ ns->ns_name, ns->ns_refcount,
+ ns_is_client(ns) ? "client" : "server");
+
+ if (cfs_time_before(cfs_time_current(), ns->ns_next_dump))
+ return;
- list_for_each(tmp, &ns->ns_root_list) {
+ spin_lock(&ns->ns_hash_lock);
+ tmp = ns->ns_root_list.next;
+ while (tmp != &ns->ns_root_list) {
struct ldlm_resource *res;
res = list_entry(tmp, struct ldlm_resource, lr_childof);
- /* Once we have resources with children, this should really dump
- * them recursively. */
- ldlm_resource_dump(res);
+ ldlm_resource_getref(res);
+ spin_unlock(&ns->ns_hash_lock);
+
+ lock_res(res);
+ ldlm_resource_dump(level, res);
+ unlock_res(res);
+
+ spin_lock(&ns->ns_hash_lock);
+ tmp = tmp->next;
+ ldlm_resource_putref_locked(res);
}
- l_unlock(&ns->ns_lock);
+ ns->ns_next_dump = cfs_time_shift(10);
+ spin_unlock(&ns->ns_hash_lock);
}
-void ldlm_resource_dump(struct ldlm_resource *res)
+void ldlm_resource_dump(int level, struct ldlm_resource *res)
{
struct list_head *tmp;
- char name[256];
+ int pos;
- if (RES_NAME_SIZE != 3)
- LBUG();
+ CLASSERT(RES_NAME_SIZE == 4);
- snprintf(name, sizeof(name), "%Lx %Lx %Lx",
- (unsigned long long)res->lr_name[0],
- (unsigned long long)res->lr_name[1],
- (unsigned long long)res->lr_name[2]);
+ if (!((libcfs_debug | D_ERROR) & level))
+ return;
- CDEBUG(D_OTHER, "--- Resource: %p (%s) (rc: %d)\n", res, name,
+ CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
+ ") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1],
+ res->lr_name.name[2], res->lr_name.name[3],
atomic_read(&res->lr_refcount));
- CDEBUG(D_OTHER, "Namespace: %p (%s)\n", res->lr_namespace,
- res->lr_namespace->ns_name);
- CDEBUG(D_OTHER, "Parent: %p, root: %p\n", res->lr_parent, res->lr_root);
-
- CDEBUG(D_OTHER, "Granted locks:\n");
- list_for_each(tmp, &res->lr_granted) {
- struct ldlm_lock *lock;
- lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(lock);
- }
- CDEBUG(D_OTHER, "Converting locks:\n");
- list_for_each(tmp, &res->lr_converting) {
- struct ldlm_lock *lock;
- lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(lock);
+ if (!list_empty(&res->lr_granted)) {
+ pos = 0;
+ CDEBUG(level, "Granted locks:\n");
+ list_for_each(tmp, &res->lr_granted) {
+ struct ldlm_lock *lock;
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+ ldlm_lock_dump(level, lock, ++pos);
+ }
}
-
- CDEBUG(D_OTHER, "Waiting locks:\n");
- list_for_each(tmp, &res->lr_waiting) {
- struct ldlm_lock *lock;
- lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(lock);
+ if (!list_empty(&res->lr_converting)) {
+ pos = 0;
+ CDEBUG(level, "Converting locks:\n");
+ list_for_each(tmp, &res->lr_converting) {
+ struct ldlm_lock *lock;
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+ ldlm_lock_dump(level, lock, ++pos);
+ }
+ }
+ if (!list_empty(&res->lr_waiting)) {
+ pos = 0;
+ CDEBUG(level, "Waiting locks:\n");
+ list_for_each(tmp, &res->lr_waiting) {
+ struct ldlm_lock *lock;
+ lock = list_entry(tmp, struct ldlm_lock, l_res_link);
+ ldlm_lock_dump(level, lock, ++pos);
+ }
}
}