1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
5 * Author: Phil Schwan <phil@clusterfs.com>
6 * Author: Peter Braam <braam@clusterfs.com>
8 * This file is part of the Lustre file system, http://www.lustre.org
9 * Lustre is a trademark of Cluster File Systems, Inc.
11 * You may have signed or agreed to another license before downloading
12 * this software. If so, you are bound by the terms and conditions
13 * of that agreement, and the following does not apply to you. See the
14 * LICENSE file included with this distribution for more information.
16 * If you did not agree to a different license, then this copy of Lustre
17 * is open source software; you can redistribute it and/or modify it
18 * under the terms of version 2 of the GNU General Public License as
19 * published by the Free Software Foundation.
21 * In either case, Lustre is distributed in the hope that it will be
22 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
23 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * license text for more details.
27 #define DEBUG_SUBSYSTEM S_LDLM
29 # include <linux/lustre_dlm.h>
31 # include <liblustre.h>
34 #include <linux/obd_class.h>
35 #include "ldlm_internal.h"
37 kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
39 DECLARE_MUTEX(ldlm_namespace_lock);
40 struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list);
41 struct proc_dir_entry *ldlm_type_proc_dir = NULL;
42 struct proc_dir_entry *ldlm_ns_proc_dir = NULL;
43 struct proc_dir_entry *ldlm_svc_proc_dir = NULL;
46 static int ldlm_proc_dump_ns(struct file *file, const char *buffer,
47 unsigned long count, void *data)
49 ldlm_dump_all_namespaces(D_DLMTRACE);
53 int ldlm_proc_setup(void)
56 struct lprocfs_vars list[] = {
57 { "dump_namespaces", NULL, ldlm_proc_dump_ns, NULL },
60 LASSERT(ldlm_ns_proc_dir == NULL);
62 ldlm_type_proc_dir = lprocfs_register(OBD_LDLM_DEVICENAME,
65 if (IS_ERR(ldlm_type_proc_dir)) {
66 CERROR("LProcFS failed in ldlm-init\n");
67 rc = PTR_ERR(ldlm_type_proc_dir);
71 ldlm_ns_proc_dir = lprocfs_register("namespaces",
74 if (IS_ERR(ldlm_ns_proc_dir)) {
75 CERROR("LProcFS failed in ldlm-init\n");
76 rc = PTR_ERR(ldlm_ns_proc_dir);
80 ldlm_svc_proc_dir = lprocfs_register("services",
83 if (IS_ERR(ldlm_svc_proc_dir)) {
84 CERROR("LProcFS failed in ldlm-init\n");
85 rc = PTR_ERR(ldlm_svc_proc_dir);
89 rc = lprocfs_add_vars(ldlm_type_proc_dir, list, NULL);
94 lprocfs_remove(ldlm_ns_proc_dir);
96 lprocfs_remove(ldlm_type_proc_dir);
98 ldlm_type_proc_dir = NULL;
99 ldlm_ns_proc_dir = NULL;
100 ldlm_svc_proc_dir = NULL;
104 void ldlm_proc_cleanup(void)
106 if (ldlm_svc_proc_dir) {
107 lprocfs_remove(ldlm_svc_proc_dir);
108 ldlm_svc_proc_dir = NULL;
111 if (ldlm_ns_proc_dir) {
112 lprocfs_remove(ldlm_ns_proc_dir);
113 ldlm_ns_proc_dir = NULL;
116 if (ldlm_type_proc_dir) {
117 lprocfs_remove(ldlm_type_proc_dir);
118 ldlm_type_proc_dir = NULL;
122 static int lprocfs_uint_rd(char *page, char **start, off_t off,
123 int count, int *eof, void *data)
125 unsigned int *temp = (unsigned int *)data;
126 return snprintf(page, count, "%u\n", *temp);
129 static int lprocfs_read_lru_size(char *page, char **start, off_t off,
130 int count, int *eof, void *data)
132 struct ldlm_namespace *ns = data;
133 return lprocfs_uint_rd(page, start, off, count, eof,
137 #define MAX_STRING_SIZE 128
138 static int lprocfs_write_lru_size(struct file *file, const char *buffer,
139 unsigned long count, void *data)
141 struct ldlm_namespace *ns = data;
142 char dummy[MAX_STRING_SIZE + 1], *end;
145 dummy[MAX_STRING_SIZE] = '\0';
146 if (copy_from_user(dummy, buffer, MAX_STRING_SIZE))
149 if (count == 6 && memcmp(dummy, "clear", 5) == 0) {
151 "dropping all unused locks from namespace %s\n",
153 tmp = ns->ns_max_unused;
154 ns->ns_max_unused = 0;
155 ldlm_cancel_lru(ns, LDLM_SYNC);
156 ns->ns_max_unused = tmp;
160 tmp = simple_strtoul(dummy, &end, 0);
161 if (tmp == 0 && *end) {
162 CERROR("invalid value written\n");
166 CDEBUG(D_DLMTRACE, "changing namespace %s max_unused from %u to %u\n",
167 ns->ns_name, ns->ns_max_unused, (unsigned int)tmp);
168 ns->ns_max_unused = (unsigned int)tmp;
170 ldlm_cancel_lru(ns, LDLM_ASYNC);
175 void ldlm_proc_namespace(struct ldlm_namespace *ns)
177 struct lprocfs_vars lock_vars[2];
178 char lock_name[MAX_STRING_SIZE + 1];
181 LASSERT(ns->ns_name != NULL);
183 lock_name[MAX_STRING_SIZE] = '\0';
185 memset(lock_vars, 0, sizeof(lock_vars));
186 lock_vars[0].name = lock_name;
188 snprintf(lock_name, MAX_STRING_SIZE, "%s/resource_count", ns->ns_name);
189 lock_vars[0].data = &ns->ns_refcount;
190 lock_vars[0].read_fptr = lprocfs_rd_atomic;
191 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
193 snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name);
194 lock_vars[0].data = &ns->ns_locks;
195 lock_vars[0].read_fptr = lprocfs_rd_u64;
196 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
199 snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_unused_count",
201 lock_vars[0].data = &ns->ns_nr_unused;
202 lock_vars[0].read_fptr = lprocfs_uint_rd;
203 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
205 snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_size",
207 lock_vars[0].data = ns;
208 lock_vars[0].read_fptr = lprocfs_read_lru_size;
209 lock_vars[0].write_fptr = lprocfs_write_lru_size;
210 lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
213 #undef MAX_STRING_SIZE
215 #define ldlm_proc_namespace(ns) do {} while (0)
218 struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
220 struct ldlm_namespace *ns = NULL;
221 struct list_head *bucket;
227 CERROR("ldlm_get_ref failed: %d\n", rc);
231 OBD_ALLOC(ns, sizeof(*ns));
235 OBD_VMALLOC(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
239 OBD_ALLOC(ns->ns_name, strlen(name) + 1);
241 GOTO(out_hash, NULL);
243 strcpy(ns->ns_name, name);
245 INIT_LIST_HEAD(&ns->ns_root_list);
246 l_lock_init(&ns->ns_lock);
247 init_waitqueue_head(&ns->ns_refcount_waitq);
248 atomic_set(&ns->ns_refcount, 0);
249 ns->ns_client = client;
250 spin_lock_init(&ns->ns_counter_lock);
253 for (bucket = ns->ns_hash + RES_HASH_SIZE - 1; bucket >= ns->ns_hash;
255 INIT_LIST_HEAD(bucket);
257 INIT_LIST_HEAD(&ns->ns_unused_list);
258 ns->ns_nr_unused = 0;
259 ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
261 down(&ldlm_namespace_lock);
262 list_add(&ns->ns_list_chain, &ldlm_namespace_list);
263 up(&ldlm_namespace_lock);
264 ldlm_proc_namespace(ns);
268 POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
269 OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
271 OBD_FREE(ns, sizeof(*ns));
277 extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
279 /* If flags contains FL_LOCAL_ONLY, don't try to tell the server, just cleanup.
280 * This is currently only used for recovery, and we make certain assumptions
281 * as a result--notably, that we shouldn't cancel locks with refs. -phil
283 * Called with the ns_lock held. */
284 static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
287 struct list_head *tmp, *pos;
288 int rc = 0, client = res->lr_namespace->ns_client;
289 int local_only = (flags & LDLM_FL_LOCAL_ONLY);
292 list_for_each_safe(tmp, pos, q) {
293 struct ldlm_lock *lock;
294 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
297 /* Set CBPENDING so nothing in the cancellation path
298 * can match this lock */
299 lock->l_flags |= LDLM_FL_CBPENDING;
300 lock->l_flags |= LDLM_FL_FAILED;
301 lock->l_flags |= flags;
303 if (local_only && (lock->l_readers || lock->l_writers)) {
304 /* This is a little bit gross, but much better than the
305 * alternative: pretend that we got a blocking AST from
306 * the server, so that when the lock is decref'd, it
307 * will go away ... */
308 /* ... without sending a CANCEL message. */
309 lock->l_flags |= LDLM_FL_LOCAL_ONLY;
310 LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
311 if (lock->l_completion_ast)
312 lock->l_completion_ast(lock, 0, NULL);
318 struct lustre_handle lockh;
319 ldlm_lock2handle(lock, &lockh);
321 rc = ldlm_cli_cancel(&lockh);
323 CERROR("ldlm_cli_cancel: %d\n", rc);
325 /* Force local cleanup on errors, too. */
326 if (local_only || rc != ELDLM_OK)
327 ldlm_lock_cancel(lock);
329 LDLM_DEBUG(lock, "Freeing a lock still held by a "
332 ldlm_resource_unlink_lock(lock);
333 ldlm_lock_destroy(lock);
340 int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags)
345 CDEBUG(D_INFO, "NULL ns, skipping cleanup\n");
349 l_lock(&ns->ns_lock);
350 for (i = 0; i < RES_HASH_SIZE; i++) {
351 struct list_head *tmp, *pos;
352 list_for_each_safe(tmp, pos, &(ns->ns_hash[i])) {
353 struct ldlm_resource *res;
354 res = list_entry(tmp, struct ldlm_resource, lr_hash);
355 ldlm_resource_getref(res);
357 cleanup_resource(res, &res->lr_granted, flags);
358 cleanup_resource(res, &res->lr_converting, flags);
359 cleanup_resource(res, &res->lr_waiting, flags);
361 if (!ldlm_resource_putref(res)) {
362 CERROR("Namespace %s resource refcount %d "
363 "after lock cleanup; forcing cleanup.\n",
365 atomic_read(&res->lr_refcount));
369 l_unlock(&ns->ns_lock);
374 /* Cleanup, but also free, the namespace */
375 int ldlm_namespace_free(struct ldlm_namespace *ns, int force)
380 down(&ldlm_namespace_lock);
381 list_del(&ns->ns_list_chain);
382 up(&ldlm_namespace_lock);
384 /* At shutdown time, don't call the cancellation callback */
385 ldlm_namespace_cleanup(ns, 0);
389 struct proc_dir_entry *dir;
390 dir = lprocfs_srch(ldlm_ns_proc_dir, ns->ns_name);
392 CERROR("dlm namespace %s has no procfs dir?\n",
400 if (atomic_read(&ns->ns_refcount) > 0) {
401 struct l_wait_info lwi = LWI_INTR(NULL, NULL);
404 "dlm namespace %s free waiting on refcount %d\n",
405 ns->ns_name, atomic_read(&ns->ns_refcount));
406 rc = l_wait_event(ns->ns_refcount_waitq,
407 atomic_read(&ns->ns_refcount) == 0, &lwi);
408 if (atomic_read(&ns->ns_refcount)) {
409 LCONSOLE_ERROR("Lock manager: wait for %s namespace "
410 "cleanup aborted with %d resources in "
411 "use. (%d)\nI'm going to try to clean "
412 "up anyway, but I might need a reboot "
413 "of this node.\n", ns->ns_name,
414 atomic_read(&ns->ns_refcount), rc);
417 "dlm namespace %s free done waiting\n", ns->ns_name);
420 POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
421 OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
422 OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
423 OBD_FREE(ns, sizeof(*ns));
430 static __u32 ldlm_hash_fn(struct ldlm_resource *parent, struct ldlm_res_id name)
435 for (i = 0; i < RES_NAME_SIZE; i++)
436 hash += name.name[i];
438 hash += (__u32)((unsigned long)parent >> 4);
440 return (hash & RES_HASH_MASK);
443 static struct ldlm_resource *ldlm_resource_new(void)
445 struct ldlm_resource *res;
447 OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res);
451 memset(res, 0, sizeof(*res));
453 INIT_LIST_HEAD(&res->lr_children);
454 INIT_LIST_HEAD(&res->lr_childof);
455 INIT_LIST_HEAD(&res->lr_granted);
456 INIT_LIST_HEAD(&res->lr_converting);
457 INIT_LIST_HEAD(&res->lr_waiting);
458 sema_init(&res->lr_lvb_sem, 1);
459 atomic_set(&res->lr_refcount, 1);
464 /* Args: locked namespace
465 * Returns: newly-allocated, referenced, unlocked resource */
466 static struct ldlm_resource *
467 ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent,
468 struct ldlm_res_id name, ldlm_type_t type)
470 struct list_head *bucket;
471 struct ldlm_resource *res;
474 LASSERTF(type >= LDLM_MIN_TYPE && type < LDLM_MAX_TYPE,
477 res = ldlm_resource_new();
481 l_lock(&ns->ns_lock);
482 memcpy(&res->lr_name, &name, sizeof(res->lr_name));
483 res->lr_namespace = ns;
484 atomic_inc(&ns->ns_refcount);
487 res->lr_most_restr = LCK_NL;
489 bucket = ns->ns_hash + ldlm_hash_fn(parent, name);
490 list_add(&res->lr_hash, bucket);
492 if (parent == NULL) {
493 list_add(&res->lr_childof, &ns->ns_root_list);
495 res->lr_parent = parent;
496 list_add(&res->lr_childof, &parent->lr_children);
498 l_unlock(&ns->ns_lock);
504 /* Args: unlocked namespace
505 * Locks: takes and releases ns->ns_lock and res->lr_lock
506 * Returns: referenced, unlocked ldlm_resource or NULL */
507 struct ldlm_resource *
508 ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
509 struct ldlm_res_id name, ldlm_type_t type, int create)
511 struct list_head *bucket, *tmp;
512 struct ldlm_resource *res = NULL;
516 LASSERT(ns->ns_hash != NULL);
517 LASSERT(name.name[0] != 0);
519 l_lock(&ns->ns_lock);
520 bucket = ns->ns_hash + ldlm_hash_fn(parent, name);
522 list_for_each(tmp, bucket) {
523 res = list_entry(tmp, struct ldlm_resource, lr_hash);
525 if (memcmp(&res->lr_name, &name, sizeof(res->lr_name)) == 0) {
526 ldlm_resource_getref(res);
527 l_unlock(&ns->ns_lock);
533 res = ldlm_resource_add(ns, parent, name, type);
540 if (create && ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
543 /* Although this is technically a lock inversion risk (lvb_sem
544 * should be taken before DLM lock), this resource was just
545 * created, so nobody else can take the lvb_sem yet. -p */
546 down(&res->lr_lvb_sem);
547 /* Drop the dlm lock, because lvbo_init can touch the disk */
548 l_unlock(&ns->ns_lock);
549 OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2);
550 rc = ns->ns_lvbo->lvbo_init(res);
551 up(&res->lr_lvb_sem);
553 CERROR("lvbo_init failed for resource "LPU64"/"LPU64
554 ": rc %d\n", name.name[0], name.name[1], rc);
557 l_unlock(&ns->ns_lock);
563 struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res)
565 LASSERT(res != NULL);
566 LASSERT(res != LP_POISON);
567 atomic_inc(&res->lr_refcount);
568 CDEBUG(D_INFO, "getref res: %p count: %d\n", res,
569 atomic_read(&res->lr_refcount));
573 /* Returns 1 if the resource was freed, 0 if it remains. */
574 int ldlm_resource_putref(struct ldlm_resource *res)
579 CDEBUG(D_INFO, "putref res: %p count: %d\n", res,
580 atomic_read(&res->lr_refcount) - 1);
581 LASSERT(atomic_read(&res->lr_refcount) > 0);
582 LASSERT(atomic_read(&res->lr_refcount) < LI_POISON);
584 if (atomic_dec_and_test(&res->lr_refcount)) {
585 struct ldlm_namespace *ns = res->lr_namespace;
588 l_lock(&ns->ns_lock);
590 if (atomic_read(&res->lr_refcount) != 0) {
591 /* We lost the race. */
592 l_unlock(&ns->ns_lock);
596 if (!list_empty(&res->lr_granted)) {
597 ldlm_resource_dump(D_ERROR, res);
601 if (!list_empty(&res->lr_converting)) {
602 ldlm_resource_dump(D_ERROR, res);
606 if (!list_empty(&res->lr_waiting)) {
607 ldlm_resource_dump(D_ERROR, res);
611 if (!list_empty(&res->lr_children)) {
612 ldlm_resource_dump(D_ERROR, res);
616 list_del_init(&res->lr_hash);
617 list_del_init(&res->lr_childof);
618 if (res->lr_lvb_data)
619 OBD_FREE(res->lr_lvb_data, res->lr_lvb_len);
620 l_unlock(&ns->ns_lock);
622 OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res);
624 if (atomic_dec_and_test(&ns->ns_refcount)) {
625 CDEBUG(D_DLMTRACE, "last ref on ns %s\n", ns->ns_name);
626 wake_up(&ns->ns_refcount_waitq);
636 void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
637 struct ldlm_lock *lock)
639 l_lock(&res->lr_namespace->ns_lock);
641 ldlm_resource_dump(D_OTHER, res);
642 CDEBUG(D_OTHER, "About to add this lock:\n");
643 ldlm_lock_dump(D_OTHER, lock, 0);
645 if (lock->l_destroyed) {
646 CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
650 LASSERT(list_empty(&lock->l_res_link));
652 list_add_tail(&lock->l_res_link, head);
654 l_unlock(&res->lr_namespace->ns_lock);
657 void ldlm_resource_insert_lock_after(struct ldlm_lock *original,
658 struct ldlm_lock *new)
660 struct ldlm_resource *res = original->l_resource;
662 l_lock(&res->lr_namespace->ns_lock);
664 ldlm_resource_dump(D_OTHER, res);
665 CDEBUG(D_OTHER, "About to insert this lock after %p:\n", original);
666 ldlm_lock_dump(D_OTHER, new, 0);
668 if (new->l_destroyed) {
669 CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n");
673 LASSERT(list_empty(&new->l_res_link));
675 list_add(&new->l_res_link, &original->l_res_link);
677 l_unlock(&res->lr_namespace->ns_lock);
680 void ldlm_resource_unlink_lock(struct ldlm_lock *lock)
682 l_lock(&lock->l_resource->lr_namespace->ns_lock);
683 list_del_init(&lock->l_res_link);
684 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
686 EXPORT_SYMBOL(ldlm_resource_unlink_lock);
688 void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc)
690 desc->lr_type = res->lr_type;
691 memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name));
694 void ldlm_dump_all_namespaces(int level)
696 struct list_head *tmp;
698 down(&ldlm_namespace_lock);
700 list_for_each(tmp, &ldlm_namespace_list) {
701 struct ldlm_namespace *ns;
702 ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain);
703 ldlm_namespace_dump(level, ns);
706 up(&ldlm_namespace_lock);
709 void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
711 struct list_head *tmp;
713 CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name,
714 atomic_read(&ns->ns_refcount), ns->ns_client);
716 l_lock(&ns->ns_lock);
717 if (time_after(jiffies, ns->ns_next_dump)) {
718 list_for_each(tmp, &ns->ns_root_list) {
719 struct ldlm_resource *res;
720 res = list_entry(tmp, struct ldlm_resource, lr_childof);
722 /* Once we have resources with children, this should
723 * really dump them recursively. */
724 ldlm_resource_dump(level, res);
726 ns->ns_next_dump = jiffies + 10 * HZ;
728 l_unlock(&ns->ns_lock);
731 void ldlm_resource_dump(int level, struct ldlm_resource *res)
733 struct list_head *tmp;
736 if (RES_NAME_SIZE != 4)
739 CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
740 ") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1],
741 res->lr_name.name[2], res->lr_name.name[3],
742 atomic_read(&res->lr_refcount));
744 if (!list_empty(&res->lr_granted)) {
746 CDEBUG(level, "Granted locks:\n");
747 list_for_each(tmp, &res->lr_granted) {
748 struct ldlm_lock *lock;
749 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
750 ldlm_lock_dump(level, lock, ++pos);
753 if (!list_empty(&res->lr_converting)) {
755 CDEBUG(level, "Converting locks:\n");
756 list_for_each(tmp, &res->lr_converting) {
757 struct ldlm_lock *lock;
758 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
759 ldlm_lock_dump(level, lock, ++pos);
762 if (!list_empty(&res->lr_waiting)) {
764 CDEBUG(level, "Waiting locks:\n");
765 list_for_each(tmp, &res->lr_waiting) {
766 struct ldlm_lock *lock;
767 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
768 ldlm_lock_dump(level, lock, ++pos);