1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2002 Cluster File Systems, Inc.
6 * This code is issued under the GNU General Public License.
7 * See the file COPYING in this distribution
9 * by Cluster File Systems, Inc.
10 * authors, Peter Braam <braam@clusterfs.com> &
11 * Phil Schwan <phil@clusterfs.com>
14 #define DEBUG_SUBSYSTEM S_LDLM
16 #include <linux/slab.h>
17 #include <linux/module.h>
18 #include <linux/lustre_dlm.h>
19 #include <linux/lustre_mds.h>
21 extern kmem_cache_t *ldlm_lock_slab;
22 int (*mds_reint_p)(int offset, struct ptlrpc_request *req) = NULL;
23 int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req) = NULL;
25 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b);
26 static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie,
27 ldlm_mode_t mode, void *data);
29 ldlm_res_compat ldlm_res_compat_table [] = {
30 [LDLM_PLAIN] ldlm_plain_compat,
31 [LDLM_EXTENT] ldlm_extent_compat,
32 [LDLM_MDSINTENT] ldlm_plain_compat
35 ldlm_res_policy ldlm_res_policy_table [] = {
37 [LDLM_EXTENT] ldlm_extent_policy,
38 [LDLM_MDSINTENT] ldlm_intent_policy
41 static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie,
42 ldlm_mode_t mode, void *data)
44 struct ptlrpc_request *req = req_cookie;
51 if (req->rq_reqmsg->bufcount > 1) {
52 /* an intent needs to be considered */
53 struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
54 struct mds_body *mds_rep;
55 struct ldlm_reply *rep;
56 struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
57 __u32 type = lock->l_resource->lr_type;
58 __u64 new_resid[3] = {0, 0, 0};
59 int bufcount, rc, size[3] = {sizeof(struct ldlm_reply),
60 sizeof(struct mds_body),
63 it->opc = NTOH__u64(it->opc);
67 /* Note that in the negative case you may be returning
68 * a file and its obdo */
70 case IT_CREAT|IT_OPEN:
83 rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
86 rc = req->rq_status = -ENOMEM;
90 rep = lustre_msg_buf(req->rq_repmsg, 0);
91 rep->lock_policy_res1 = 1;
94 case IT_CREAT|IT_OPEN:
101 if (mds_reint_p == NULL)
103 inter_module_get_request
104 ("mds_reint", "mds");
105 if (IS_ERR(mds_reint_p)) {
106 CERROR("MDSINTENT locks require the MDS "
111 rc = mds_reint_p(2, req);
119 if (mds_getattr_name_p == NULL)
121 inter_module_get_request
122 ("mds_getattr_name", "mds");
123 if (IS_ERR(mds_getattr_name_p)) {
124 CERROR("MDSINTENT locks require the MDS "
129 rc = mds_getattr_name_p(2, req);
133 case IT_READDIR|IT_OPEN:
137 CERROR("Unhandled intent\n");
141 mds_rep = lustre_msg_buf(req->rq_repmsg, 1);
142 rep->lock_policy_res2 = req->rq_status;
143 new_resid[0] = mds_rep->ino;
145 CDEBUG(D_INFO, "remote intent: locking %d instead of"
146 "%ld\n", mds_rep->ino,
147 (long)lock->l_resource->lr_name[0]);
148 ldlm_resource_put(lock->l_resource);
151 ldlm_resource_get(ns, NULL, new_resid, type, 1);
152 if (lock->l_resource == NULL) {
156 RETURN(ELDLM_LOCK_CHANGED);
158 int size = sizeof(struct ldlm_reply);
159 rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
162 CERROR("out of memory\n");
170 static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b)
172 return lockmode_compat(a->l_req_mode, b->l_req_mode);
175 /* Args: referenced, unlocked parent (or NULL)
176 * referenced, unlocked resource
177 * Locks: parent->l_lock */
178 static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
179 struct ldlm_resource *resource)
181 struct ldlm_lock *lock;
183 if (resource == NULL)
186 lock = kmem_cache_alloc(ldlm_lock_slab, SLAB_KERNEL);
190 memset(lock, 0, sizeof(*lock));
191 lock->l_resource = resource;
192 INIT_LIST_HEAD(&lock->l_children);
193 INIT_LIST_HEAD(&lock->l_res_link);
194 init_waitqueue_head(&lock->l_waitq);
195 lock->l_lock = SPIN_LOCK_UNLOCKED;
197 if (parent != NULL) {
198 spin_lock(&parent->l_lock);
199 lock->l_parent = parent;
200 list_add(&lock->l_childof, &parent->l_children);
201 spin_unlock(&parent->l_lock);
207 /* Args: unreferenced, locked lock
209 * Caller must do its own ldlm_resource_put() on lock->l_resource */
210 void ldlm_lock_free(struct ldlm_lock *lock)
212 if (!list_empty(&lock->l_children)) {
213 CERROR("lock %p still has children (%p)!\n", lock,
214 lock->l_children.next);
215 ldlm_lock_dump(lock);
219 if (lock->l_readers || lock->l_writers)
220 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
221 "writers)\n", lock->l_readers, lock->l_writers);
223 if (lock->l_connection)
224 ptlrpc_put_connection(lock->l_connection);
225 kmem_cache_free(ldlm_lock_slab, lock);
228 void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc)
230 ldlm_res2desc(lock->l_resource, &desc->l_resource);
231 desc->l_req_mode = lock->l_req_mode;
232 desc->l_granted_mode = lock->l_granted_mode;
233 memcpy(&desc->l_extent, &lock->l_extent, sizeof(desc->l_extent));
234 memcpy(desc->l_version, lock->l_version, sizeof(desc->l_version));
237 /* Args: unlocked lock */
238 void ldlm_lock_addref(struct ldlm_lock *lock, __u32 mode)
240 spin_lock(&lock->l_lock);
241 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
245 spin_unlock(&lock->l_lock);
248 void ldlm_send_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock *new)
252 spin_lock(&lock->l_lock);
253 if (lock->l_flags & LDLM_FL_AST_SENT) {
258 lock->l_flags |= LDLM_FL_AST_SENT;
259 spin_unlock(&lock->l_lock);
261 lock->l_blocking_ast(lock, new, lock->l_data, lock->l_data_len);
265 /* Args: unlocked lock */
266 void ldlm_lock_decref(struct ldlm_lock *lock, __u32 mode)
273 spin_lock(&lock->l_lock);
274 if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
278 if (!lock->l_readers && !lock->l_writers &&
279 lock->l_flags & LDLM_FL_DYING) {
280 /* Read this lock its rights. */
281 if (!lock->l_resource->lr_namespace->ns_local) {
282 CERROR("LDLM_FL_DYING set on non-local lock!\n");
286 CDEBUG(D_INFO, "final decref done on dying lock, "
287 "calling callback.\n");
288 spin_unlock(&lock->l_lock);
289 lock->l_blocking_ast(lock, NULL, lock->l_data,
292 spin_unlock(&lock->l_lock);
296 /* Args: locked lock */
297 static int _ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs,
298 struct list_head *queue)
300 struct list_head *tmp, *pos;
303 list_for_each_safe(tmp, pos, queue) {
304 struct ldlm_lock *child;
305 ldlm_res_compat compat;
307 child = list_entry(tmp, struct ldlm_lock, l_res_link);
311 compat = ldlm_res_compat_table[child->l_resource->lr_type];
312 if (compat(child, lock)) {
313 CDEBUG(D_OTHER, "compat function succeded, next.\n");
316 if (lockmode_compat(child->l_granted_mode, lock->l_req_mode)) {
317 CDEBUG(D_OTHER, "lock modes are compatible, next.\n");
323 CDEBUG(D_OTHER, "compat function failed and lock modes incompat\n");
324 if (send_cbs && child->l_blocking_ast != NULL) {
325 CDEBUG(D_OTHER, "incompatible; sending blocking AST.\n");
326 ldlm_send_blocking_ast(child, lock);
333 /* Args: unlocked lock */
334 static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs)
339 rc = _ldlm_lock_compat(lock, send_cbs, &lock->l_resource->lr_granted);
340 /* FIXME: should we be sending ASTs to converting? */
341 rc |= _ldlm_lock_compat(lock, send_cbs,
342 &lock->l_resource->lr_converting);
347 /* Args: locked lock, locked resource */
348 void ldlm_grant_lock(struct ldlm_resource *res, struct ldlm_lock *lock)
352 ldlm_resource_add_lock(res, &res->lr_granted, lock);
353 lock->l_granted_mode = lock->l_req_mode;
355 if (lock->l_granted_mode < res->lr_most_restr)
356 res->lr_most_restr = lock->l_granted_mode;
358 if (lock->l_completion_ast)
359 lock->l_completion_ast(lock, NULL,
360 lock->l_data, lock->l_data_len);
364 static int search_queue(struct list_head *queue, ldlm_mode_t mode,
365 struct ldlm_extent *extent, struct lustre_handle *lockh)
367 struct list_head *tmp;
369 list_for_each(tmp, queue) {
370 struct ldlm_lock *lock;
371 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
373 if (lock->l_flags & LDLM_FL_DYING)
376 /* lock_convert() takes the resource lock, so we're sure that
377 * req_mode, lr_type, and l_cookie won't change beneath us */
378 if (lock->l_req_mode != mode)
381 if (lock->l_resource->lr_type == LDLM_EXTENT &&
382 (lock->l_extent.start > extent->start ||
383 lock->l_extent.end < extent->end))
386 ldlm_lock_addref(lock, mode);
387 ldlm_object2handle(lock, lockh);
394 /* Must be called with no resource or lock locks held.
396 * Returns 1 if it finds an already-existing lock that is compatible; in this
397 * case, lockh is filled in with a addref()ed lock */
398 int ldlm_local_lock_match(struct ldlm_namespace *ns, __u64 *res_id, __u32 type,
399 void *cookie, int cookielen, ldlm_mode_t mode,
400 struct lustre_handle *lockh)
402 struct ldlm_resource *res;
406 res = ldlm_resource_get(ns, NULL, res_id, type, 0);
410 spin_lock(&res->lr_lock);
411 if (search_queue(&res->lr_granted, mode, cookie, lockh))
413 if (search_queue(&res->lr_converting, mode, cookie, lockh))
415 if (search_queue(&res->lr_waiting, mode, cookie, lockh))
420 ldlm_resource_put(res);
421 spin_unlock(&res->lr_lock);
425 /* Must be called without the resource lock held. Returns a referenced,
426 * unlocked ldlm_lock. */
427 ldlm_error_t ldlm_local_lock_create(struct ldlm_namespace *ns,
428 struct lustre_handle *parent_lock_handle,
429 __u64 *res_id, __u32 type,
433 struct lustre_handle *lockh)
435 struct ldlm_resource *res, *parent_res = NULL;
436 struct ldlm_lock *lock, *parent_lock;
438 parent_lock = lustre_handle2object(parent_lock_handle);
440 parent_res = parent_lock->l_resource;
442 res = ldlm_resource_get(ns, parent_res, res_id, type, 1);
446 lock = ldlm_lock_new(parent_lock, res);
448 spin_lock(&res->lr_lock);
449 ldlm_resource_put(res);
450 spin_unlock(&res->lr_lock);
454 lock->l_req_mode = mode;
456 lock->l_data_len = data_len;
457 ldlm_lock_addref(lock, mode);
459 ldlm_object2handle(lock, lockh);
463 /* Must be called with lock->l_lock and lock->l_resource->lr_lock not held */
464 ldlm_error_t ldlm_local_lock_enqueue(struct lustre_handle *lockh,
465 void *cookie, int cookie_len,
467 ldlm_lock_callback completion,
468 ldlm_lock_callback blocking)
470 struct ldlm_resource *res;
471 struct ldlm_lock *lock;
472 int incompat = 0, local;
473 ldlm_res_policy policy;
476 lock = lustre_handle2object(lockh);
477 res = lock->l_resource;
478 local = res->lr_namespace->ns_local;
479 spin_lock(&res->lr_lock);
481 lock->l_blocking_ast = blocking;
483 if (res->lr_type == LDLM_EXTENT)
484 memcpy(&lock->l_extent, cookie, sizeof(lock->l_extent));
486 /* policies are not executed on the client */
487 if (!local && (policy = ldlm_res_policy_table[res->lr_type])) {
488 int rc = policy(lock, cookie, lock->l_req_mode, NULL);
489 if (rc == ELDLM_LOCK_CHANGED) {
490 res = lock->l_resource;
491 *flags |= LDLM_FL_LOCK_CHANGED;
495 lock->l_cookie = cookie;
496 lock->l_cookie_len = cookie_len;
498 if (local && lock->l_req_mode == lock->l_granted_mode) {
499 /* The server returned a blocked lock, but it was granted before
500 * we got a chance to actually enqueue it. We don't need to do
505 /* If this is a local resource, put it on the appropriate list. */
506 list_del_init(&lock->l_res_link);
508 if (*flags & LDLM_FL_BLOCK_CONV)
509 ldlm_resource_add_lock(res, res->lr_converting.prev,
511 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
512 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
514 ldlm_grant_lock(res, lock);
518 /* FIXME: We may want to optimize by checking lr_most_restr */
519 if (!list_empty(&res->lr_converting)) {
520 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
521 *flags |= LDLM_FL_BLOCK_CONV;
524 if (!list_empty(&res->lr_waiting)) {
525 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
526 *flags |= LDLM_FL_BLOCK_WAIT;
529 incompat = ldlm_lock_compat(lock, 0);
531 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
532 *flags |= LDLM_FL_BLOCK_GRANTED;
536 ldlm_grant_lock(res, lock);
539 /* Don't set 'completion_ast' until here so that if the lock is granted
540 * immediately we don't do an unnecessary completion call. */
541 lock->l_completion_ast = completion;
542 spin_unlock(&res->lr_lock);
546 /* Must be called with resource->lr_lock taken. */
547 static int ldlm_reprocess_queue(struct ldlm_resource *res,
548 struct list_head *converting)
550 struct list_head *tmp, *pos;
553 list_for_each_safe(tmp, pos, converting) {
554 struct ldlm_lock *pending;
555 pending = list_entry(tmp, struct ldlm_lock, l_res_link);
557 /* the resource lock protects ldlm_lock_compat */
558 if (ldlm_lock_compat(pending, 1))
561 list_del_init(&pending->l_res_link);
562 ldlm_grant_lock(res, pending);
564 ldlm_lock_addref(pending, pending->l_req_mode);
565 ldlm_lock_decref(pending, pending->l_granted_mode);
571 /* Must be called with resource->lr_lock not taken. */
572 void ldlm_reprocess_all(struct ldlm_resource *res)
574 /* Local lock trees don't get reprocessed. */
575 if (res->lr_namespace->ns_local)
578 spin_lock(&res->lr_lock);
579 ldlm_reprocess_queue(res, &res->lr_converting);
580 if (list_empty(&res->lr_converting))
581 ldlm_reprocess_queue(res, &res->lr_waiting);
582 spin_unlock(&res->lr_lock);
585 /* Must be called with lock and lock->l_resource unlocked */
586 struct ldlm_resource *ldlm_local_lock_cancel(struct ldlm_lock *lock)
588 struct ldlm_resource *res;
591 res = lock->l_resource;
593 spin_lock(&res->lr_lock);
594 spin_lock(&lock->l_lock);
596 if (lock->l_readers || lock->l_writers)
597 CDEBUG(D_INFO, "lock still has references (%d readers, %d "
598 "writers)\n", lock->l_readers, lock->l_writers);
600 ldlm_resource_del_lock(lock);
601 if (ldlm_resource_put(res))
602 res = NULL; /* res was freed, nothing else to do. */
604 spin_unlock(&res->lr_lock);
605 ldlm_lock_free(lock);
610 /* Must be called with lock and lock->l_resource unlocked */
611 struct ldlm_resource *ldlm_local_lock_convert(struct lustre_handle *lockh,
612 int new_mode, int *flags)
614 struct ldlm_lock *lock;
615 struct ldlm_resource *res;
618 lock = lustre_handle2object(lockh);
619 res = lock->l_resource;
621 spin_lock(&res->lr_lock);
623 lock->l_req_mode = new_mode;
624 list_del_init(&lock->l_res_link);
626 /* If this is a local resource, put it on the appropriate list. */
627 if (res->lr_namespace->ns_local) {
628 if (*flags & LDLM_FL_BLOCK_CONV)
629 ldlm_resource_add_lock(res, res->lr_converting.prev,
631 else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
632 ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
634 ldlm_grant_lock(res, lock);
636 list_add(&lock->l_res_link, res->lr_converting.prev);
639 spin_unlock(&res->lr_lock);
644 void ldlm_lock_dump(struct ldlm_lock *lock)
648 if (!(portal_debug & D_OTHER))
651 if (RES_VERSION_SIZE != 4)
655 CDEBUG(D_OTHER, " NULL LDLM lock\n");
659 snprintf(ver, sizeof(ver), "%x %x %x %x",
660 lock->l_version[0], lock->l_version[1],
661 lock->l_version[2], lock->l_version[3]);
663 CDEBUG(D_OTHER, " -- Lock dump: %p (%s)\n", lock, ver);
664 CDEBUG(D_OTHER, " Parent: %p\n", lock->l_parent);
665 CDEBUG(D_OTHER, " Resource: %p\n", lock->l_resource);
666 CDEBUG(D_OTHER, " Requested mode: %d, granted mode: %d\n",
667 (int)lock->l_req_mode, (int)lock->l_granted_mode);
668 CDEBUG(D_OTHER, " Readers: %u ; Writers; %u\n",
669 lock->l_readers, lock->l_writers);
670 if (lock->l_resource->lr_type == LDLM_EXTENT)
671 CDEBUG(D_OTHER, " Extent: %Lu -> %Lu\n",
672 (unsigned long long)lock->l_extent.start,
673 (unsigned long long)lock->l_extent.end);