1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * by Cluster File Systems, Inc.
10 * authors, Peter Braam <braam@clusterfs.com> &
11 * Phil Schwan <phil@clusterfs.com>
14 #define DEBUG_SUBSYSTEM S_LDLM
16 #include <linux/slab.h>
17 #include <linux/module.h>
18 #include <linux/lustre_dlm.h>
19 #include <linux/lustre_mds.h>
21 extern kmem_cache_t *ldlm_lock_slab;
22 int (*mds_reint_p)(int offset, struct ptlrpc_request *req) = NULL;
23 int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req) = NULL;
25 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b);
26 static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie,
27 ldlm_mode_t mode, void *data);
29 ldlm_res_compat ldlm_res_compat_table [] = {
30 [LDLM_PLAIN] ldlm_plain_compat,
31 [LDLM_EXTENT] ldlm_extent_compat,
32 [LDLM_MDSINTENT] ldlm_plain_compat
35 ldlm_res_policy ldlm_res_policy_table [] = {
37 [LDLM_EXTENT] ldlm_extent_policy,
38 [LDLM_MDSINTENT] ldlm_intent_policy
41 static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie,
42 ldlm_mode_t mode, void *data)
44 struct ptlrpc_request *req = req_cookie;
51 if (req->rq_reqmsg->bufcount > 1) {
52 /* an intent needs to be considered */
53 struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
54 struct mds_body *mds_rep;
55 struct ldlm_reply *rep;
56 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
57 __u32 type = lock->l_resource->lr_type;
58 __u64 new_resid[3] = {0, 0, 0}, old_res;
59 int bufcount, rc, size[3] = {sizeof(struct ldlm_reply),
60 sizeof(struct mds_body),
63 it->opc = NTOH__u64(it->opc);
65 LDLM_DEBUG(lock, "intent policy, opc: %Ld", it->opc);
70 /* Note that in the negative case you may be returning
71 * a file and its obdo */
73 case IT_CREAT|IT_OPEN:
84 size[1] = sizeof(struct obdo);
90 rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
93 rc = req->rq_status = -ENOMEM;
97 rep = lustre_msg_buf(req->rq_repmsg, 0);
98 rep->lock_policy_res1 = 1;
103 case IT_CREAT|IT_OPEN:
111 if (mds_reint_p == NULL)
113 inter_module_get_request
114 ("mds_reint", "mds");
115 if (IS_ERR(mds_reint_p)) {
116 CERROR("MDSINTENT locks require the MDS "
121 rc = mds_reint_p(2, req);
129 if (mds_getattr_name_p == NULL)
131 inter_module_get_request
132 ("mds_getattr_name", "mds");
133 if (IS_ERR(mds_getattr_name_p)) {
134 CERROR("MDSINTENT locks require the MDS "
139 rc = mds_getattr_name_p(2, req);
145 case IT_READDIR|IT_OPEN:
149 CERROR("Unhandled intent\n");
153 if (it->opc == IT_UNLINK || it->opc == IT_RMDIR)
154 RETURN(ELDLM_LOCK_ABORTED);
156 mds_rep = lustre_msg_buf(req->rq_repmsg, 1);
157 rep->lock_policy_res2 = req->rq_status;
158 new_resid[0] = mds_rep->ino;
159 old_res = lock->l_resource->lr_name[0];
161 CDEBUG(D_INFO, "remote intent: locking %d instead of"
162 "%ld\n", mds_rep->ino, (long)old_res);
163 ldlm_resource_put(lock->l_resource);
166 ldlm_resource_get(ns, NULL, new_resid, type, 1);
167 if (lock->l_resource == NULL) {
171 LDLM_DEBUG(lock, "intent policy, old res %ld",
173 RETURN(ELDLM_LOCK_CHANGED);
175 int size = sizeof(struct ldlm_reply);
176 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
179 CERROR("out of memory\n");
187 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b)
189 return lockmode_compat(a->l_req_mode, b->l_req_mode);
192 /* Args: referenced, unlocked parent (or NULL)
193 * referenced, unlocked resource
194 * Locks: parent->l_lock */
195 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
196 struct ldlm_resource *resource)
198 struct ldlm_lock *lock;
200 if (resource == NULL)
203 lock = kmem_cache_alloc(ldlm_lock_slab, SLAB_KERNEL);
207 memset(lock, 0, sizeof(*lock));
208 lock->l_resource = resource;
209 INIT_LIST_HEAD(&lock->l_children);
210 INIT_LIST_HEAD(&lock->l_res_link);
211 init_waitqueue_head(&lock->l_waitq);
212 lock->l_lock = SPIN_LOCK_UNLOCKED;
214 if (parent != NULL) {
215 spin_lock(&parent->l_lock);
216 lock->l_parent = parent;
217 list_add(&lock->l_childof, &parent->l_children);
218 spin_unlock(&parent->l_lock);
224 /* Args: unreferenced, locked lock
226 * Caller must do its own ldlm_resource_put() on lock->l_resource */
227 void ldlm_lock_free(struct ldlm_lock *lock)
229 if (!list_empty(&lock->l_children)) {
230 CERROR("lock %p still has children (%p)!\n", lock,
231 lock->l_children.next);
232 ldlm_lock_dump(lock);
236 if (lock->l_readers || lock->l_writers)
237 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
238 "writers)\n", lock->l_readers, lock->l_writers);
240 if (lock->l_connection)
241 ptlrpc_put_connection(lock->l_connection);
242 kmem_cache_free(ldlm_lock_slab, lock);
245 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
247 ldlm_res2desc(lock->l_resource, &desc->l_resource);
248 desc->l_req_mode = lock->l_req_mode;
249 desc->l_granted_mode = lock->l_granted_mode;
250 memcpy(&desc->l_extent, &lock->l_extent, sizeof(desc->l_extent));
251 memcpy(desc->l_version, lock->l_version, sizeof(desc->l_version));
254 /* Args: unlocked lock */
255 void ldlm_lock_addref(struct ldlm_lock *lock, __u32 mode)
257 spin_lock(&lock->l_lock);
258 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
262 spin_unlock(&lock->l_lock);
265 int ldlm_send_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock *new)
267 struct ptlrpc_request *req = NULL;
270 spin_lock(&lock->l_lock);
271 if (lock->l_flags & LDLM_FL_AST_SENT) {
275 lock->l_flags |= LDLM_FL_AST_SENT;
277 lock->l_blocking_ast(lock, new, lock->l_data, lock->l_data_len, &req);
278 spin_unlock(&lock->l_lock);
280 struct list_head *list = lock->l_resource->lr_tmp;
281 list_add(&req->rq_multi, list);
286 /* Args: unlocked lock */
287 void ldlm_lock_decref(struct ldlm_lock *lock, __u32 mode)
294 spin_lock(&lock->l_lock);
295 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
299 if (!lock->l_readers && !lock->l_writers &&
300 lock->l_flags & LDLM_FL_DYING) {
301 /* Read this lock its rights. */
302 if (!lock->l_resource->lr_namespace->ns_client) {
303 CERROR("LDLM_FL_DYING set on non-local lock!\n");
307 CDEBUG(D_INFO, "final decref done on dying lock, "
308 "calling callback.\n");
309 spin_unlock(&lock->l_lock);
310 /* This function pointer is unfortunately overloaded. This
311 * call will not result in an RPC. */
312 lock->l_blocking_ast(lock, NULL, lock->l_data,
313 lock->l_data_len, NULL);
315 spin_unlock(&lock->l_lock);
319 /* Args: unlocked lock */
320 static int _ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs,
321 struct list_head *queue)
323 struct list_head *tmp, *pos;
326 list_for_each_safe(tmp, pos, queue) {
327 struct ldlm_lock *child;
328 ldlm_res_compat compat;
330 child = list_entry(tmp, struct ldlm_lock, l_res_link);
334 compat = ldlm_res_compat_table[child->l_resource->lr_type];
335 if (compat(child, lock)) {
336 CDEBUG(D_OTHER, "compat function succeded, next.\n");
339 if (lockmode_compat(child->l_granted_mode, lock->l_req_mode)) {
340 CDEBUG(D_OTHER, "lock modes are compatible, next.\n");
346 CDEBUG(D_OTHER, "compat function failed and lock modes incompat\n");
347 if (send_cbs && child->l_blocking_ast != NULL) {
348 CDEBUG(D_OTHER, "incompatible; sending blocking AST.\n");
349 /* It's very difficult to actually send the AST from
350 * here, because we'd have to drop the lock before going
351 * to sleep to wait for the reply. Instead we build the
352 * packet and send it later. */
353 ldlm_send_blocking_ast(child, lock);
360 /* Args: unlocked lock */
361 static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs)
366 rc = _ldlm_lock_compat(lock, send_cbs, &lock->l_resource->lr_granted);
367 /* FIXME: should we be sending ASTs to converting? */
368 rc |= _ldlm_lock_compat(lock, send_cbs,
369 &lock->l_resource->lr_converting);
374 /* Args: locked lock, locked resource */
375 void ldlm_grant_lock(struct ldlm_resource *res, struct ldlm_lock *lock)
379 ldlm_resource_add_lock(res, &res->lr_granted, lock);
380 lock->l_granted_mode = lock->l_req_mode;
382 if (lock->l_granted_mode < res->lr_most_restr)
383 res->lr_most_restr = lock->l_granted_mode;
385 if (lock->l_completion_ast)
386 lock->l_completion_ast(lock, NULL, lock->l_data,
387 lock->l_data_len, NULL);
391 static int search_queue(struct list_head *queue, ldlm_mode_t mode,
392 struct ldlm_extent *extent, struct lustre_handle *lockh)
394 struct list_head *tmp;
396 list_for_each(tmp, queue) {
397 struct ldlm_lock *lock;
398 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
400 if (lock->l_flags & LDLM_FL_DYING)
403 /* lock_convert() takes the resource lock, so we're sure that
404 * req_mode, lr_type, and l_cookie won't change beneath us */
405 if (lock->l_req_mode != mode)
408 if (lock->l_resource->lr_type == LDLM_EXTENT &&
409 (lock->l_extent.start > extent->start ||
410 lock->l_extent.end < extent->end))
413 ldlm_lock_addref(lock, mode);
414 ldlm_object2handle(lock, lockh);
421 /* Must be called with no resource or lock locks held.
423 * Returns 1 if it finds an already-existing lock that is compatible; in this
424 * case, lockh is filled in with a addref()ed lock */
425 int ldlm_local_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
426 void *cookie, int cookielen, ldlm_mode_t mode,
427 struct lustre_handle *lockh)
429 struct ldlm_resource *res;
433 res = ldlm_resource_get(ns, NULL, res_id, type, 0);
437 spin_lock(&res->lr_lock);
438 if (search_queue(&res->lr_granted, mode, cookie, lockh))
440 if (search_queue(&res->lr_converting, mode, cookie, lockh))
442 if (search_queue(&res->lr_waiting, mode, cookie, lockh))
447 ldlm_resource_put(res);
448 spin_unlock(&res->lr_lock);
452 /* Must be called without the resource lock held. Returns a referenced,
453 * unlocked ldlm_lock. */
454 ldlm_error_t ldlm_local_lock_create(struct ldlm_namespace *ns,
455 struct lustre_handle *parent_lock_handle,
456 __u64 *res_id, __u32 type,
460 struct lustre_handle *lockh)
462 struct ldlm_resource *res, *parent_res = NULL;
463 struct ldlm_lock *lock, *parent_lock;
465 parent_lock = lustre_handle2object(parent_lock_handle);
467 parent_res = parent_lock->l_resource;
469 res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
473 lock = ldlm_lock_new(parent_lock, res);
475 spin_lock(&res->lr_lock);
476 ldlm_resource_put(res);
477 spin_unlock(&res->lr_lock);
481 lock->l_req_mode = mode;
483 lock->l_data_len = data_len;
484 ldlm_lock_addref(lock, mode);
486 ldlm_object2handle(lock, lockh);
490 /* Must be called with lock->l_lock and lock->l_resource->lr_lock not held */
491 ldlm_error_t ldlm_local_lock_enqueue(struct lustre_handle *lockh,
492 void *cookie, int cookie_len,
494 ldlm_lock_callback completion,
495 ldlm_lock_callback blocking)
497 struct ldlm_resource *res;
498 struct ldlm_lock *lock;
499 int incompat = 0, local;
500 ldlm_res_policy policy;
503 lock = lustre_handle2object(lockh);
504 res = lock->l_resource;
505 local = res->lr_namespace->ns_client;
506 spin_lock(&res->lr_lock);
508 lock->l_blocking_ast = blocking;
510 if (res->lr_type == LDLM_EXTENT)
511 memcpy(&lock->l_extent, cookie, sizeof(lock->l_extent));
513 /* policies are not executed on the client */
514 if (!local && (policy = ldlm_res_policy_table[res->lr_type])) {
517 /* We do this dancing with refcounts and locks because the
518 * policy function could send an RPC */
520 spin_unlock(&res->lr_lock);
522 rc = policy(lock, cookie, lock->l_req_mode, NULL);
524 spin_lock(&res->lr_lock);
525 ldlm_resource_put(res);
527 if (rc == ELDLM_LOCK_CHANGED) {
528 res = lock->l_resource;
529 *flags |= LDLM_FL_LOCK_CHANGED;
530 } else if (rc == ELDLM_LOCK_ABORTED) {
532 ldlm_resource_put(lock->l_resource);
533 ldlm_lock_free(lock);
538 lock->l_cookie = cookie;
539 lock->l_cookie_len = cookie_len;
541 if (local && lock->l_req_mode == lock->l_granted_mode) {
542 /* The server returned a blocked lock, but it was granted before
543 * we got a chance to actually enqueue it. We don't need to do
545 GOTO(out_noput, ELDLM_OK);
548 /* If this is a local resource, put it on the appropriate list. */
549 list_del_init(&lock->l_res_link);
551 if (*flags & LDLM_FL_BLOCK_CONV)
552 ldlm_resource_add_lock(res, res->lr_converting.prev,
554 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
555 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
557 ldlm_grant_lock(res, lock);
561 /* FIXME: We may want to optimize by checking lr_most_restr */
562 if (!list_empty(&res->lr_converting)) {
563 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
564 *flags |= LDLM_FL_BLOCK_CONV;
567 if (!list_empty(&res->lr_waiting)) {
568 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
569 *flags |= LDLM_FL_BLOCK_WAIT;
572 incompat = ldlm_lock_compat(lock, 0);
574 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
575 *flags |= LDLM_FL_BLOCK_GRANTED;
579 ldlm_grant_lock(res, lock);
582 /* We're called with a lock that has a referenced resource and is not on
583 * any resource list. When we added it to a list, we incurred an extra
585 ldlm_resource_put(lock->l_resource);
587 /* Don't set 'completion_ast' until here so that if the lock is granted
588 * immediately we don't do an unnecessary completion call. */
589 lock->l_completion_ast = completion;
590 spin_unlock(&res->lr_lock);
594 /* Must be called with resource->lr_lock taken. */
595 static int ldlm_reprocess_queue(struct ldlm_resource *res,
596 struct list_head *converting)
598 struct list_head *tmp, *pos;
601 list_for_each_safe(tmp, pos, converting) {
602 struct ldlm_lock *pending;
603 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
605 CDEBUG(D_INFO, "Reprocessing lock %p\n", pending);
607 /* the resource lock protects ldlm_lock_compat */
608 if (ldlm_lock_compat(pending, 1))
611 list_del_init(&pending->l_res_link);
612 ldlm_grant_lock(res, pending);
614 ldlm_lock_addref(pending, pending->l_req_mode);
615 ldlm_lock_decref(pending, pending->l_granted_mode);
621 /* Must be called with resource->lr_lock not taken. */
622 void ldlm_reprocess_all(struct ldlm_resource *res)
624 struct list_head rpc_list, *tmp, *pos;
626 INIT_LIST_HEAD(&rpc_list);
628 /* Local lock trees don't get reprocessed. */
629 if (res->lr_namespace->ns_client)
632 spin_lock(&res->lr_lock);
633 res->lr_tmp = &rpc_list;
635 ldlm_reprocess_queue(res, &res->lr_converting);
636 if (list_empty(&res->lr_converting))
637 ldlm_reprocess_queue(res, &res->lr_waiting);
640 spin_unlock(&res->lr_lock);
642 list_for_each_safe(tmp, pos, &rpc_list) {
644 struct ptlrpc_request *req =
645 list_entry(tmp, struct ptlrpc_request, rq_multi);
647 CDEBUG(D_INFO, "Sending callback.\n");
649 rc = ptlrpc_queue_wait(req);
650 rc = ptlrpc_check_status(req, rc);
651 ptlrpc_free_req(req);
653 CERROR("Callback send failed: %d\n", rc);
657 /* Must be called with lock and lock->l_resource unlocked */
658 struct ldlm_resource *ldlm_local_lock_cancel(struct ldlm_lock *lock)
660 struct ldlm_resource *res;
663 res = lock->l_resource;
665 spin_lock(&res->lr_lock);
666 spin_lock(&lock->l_lock);
668 if (lock->l_readers || lock->l_writers)
669 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
670 "writers)\n", lock->l_readers, lock->l_writers);
672 if (ldlm_resource_del_lock(lock))
673 res = NULL; /* res was freed, nothing else to do. */
675 spin_unlock(&res->lr_lock);
676 ldlm_lock_free(lock);
681 /* Must be called with lock and lock->l_resource unlocked */
682 struct ldlm_resource *ldlm_local_lock_convert(struct lustre_handle *lockh,
683 int new_mode, int *flags)
685 struct ldlm_lock *lock;
686 struct ldlm_resource *res;
689 lock = lustre_handle2object(lockh);
690 res = lock->l_resource;
692 spin_lock(&res->lr_lock);
694 lock->l_req_mode = new_mode;
695 list_del_init(&lock->l_res_link);
697 /* If this is a local resource, put it on the appropriate list. */
698 if (res->lr_namespace->ns_client) {
699 if (*flags & LDLM_FL_BLOCK_CONV)
700 ldlm_resource_add_lock(res, res->lr_converting.prev,
702 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
703 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
705 ldlm_grant_lock(res, lock);
707 list_add(&lock->l_res_link, res->lr_converting.prev);
710 spin_unlock(&res->lr_lock);
715 void ldlm_lock_dump(struct ldlm_lock *lock)
719 if (!(portal_debug & D_OTHER))
722 if (RES_VERSION_SIZE != 4)
726 CDEBUG(D_OTHER, " NULL LDLM lock\n");
730 snprintf(ver, sizeof(ver), "%x %x %x %x",
731 lock->l_version[0], lock->l_version[1],
732 lock->l_version[2], lock->l_version[3]);
734 CDEBUG(D_OTHER, " -- Lock dump: %p (%s)\n", lock, ver);
735 CDEBUG(D_OTHER, " Parent: %p\n", lock->l_parent);
736 CDEBUG(D_OTHER, " Resource: %p (%Ld)\n", lock->l_resource,
737 lock->l_resource->lr_name[0]);
738 CDEBUG(D_OTHER, " Requested mode: %d, granted mode: %d\n",
739 (int)lock->l_req_mode, (int)lock->l_granted_mode);
740 CDEBUG(D_OTHER, " Readers: %u ; Writers; %u\n",
741 lock->l_readers, lock->l_writers);
742 if (lock->l_resource->lr_type == LDLM_EXTENT)
743 CDEBUG(D_OTHER, " Extent: %Lu -> %Lu\n",
744 (unsigned long long)lock->l_extent.start,
745 (unsigned long long)lock->l_extent.end);