1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ldlm/ldlm_extent.c
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
42 #define DEBUG_SUBSYSTEM S_LDLM
44 # include <liblustre.h>
46 # include <libcfs/libcfs.h>
49 #include <lustre_dlm.h>
50 #include <obd_support.h>
52 #include <obd_class.h>
53 #include <lustre_lib.h>
55 #include "ldlm_internal.h"
57 #define LDLM_MAX_GROWN_EXTENT (32 * 1024 * 1024 - 1)
59 /* fixup the ldlm_extent after expanding */
60 static void ldlm_extent_internal_policy_fixup(struct ldlm_lock *req,
61 struct ldlm_extent *new_ex,
64 ldlm_mode_t req_mode = req->l_req_mode;
65 __u64 req_start = req->l_req_extent.start;
66 __u64 req_end = req->l_req_extent.end;
67 __u64 req_align, mask;
69 if (conflicting > 32 && (req_mode == LCK_PW || req_mode == LCK_CW)) {
70 if (req_end < req_start + LDLM_MAX_GROWN_EXTENT)
71 new_ex->end = min(req_start + LDLM_MAX_GROWN_EXTENT,
75 if (new_ex->start == 0 && new_ex->end == OBD_OBJECT_EOF) {
80 /* we need to ensure that the lock extent is properly aligned to what
81 * the client requested. We align it to the lowest-common denominator
82 * of the clients requested lock start and end alignment. */
84 req_align = (req_end + 1) | req_start;
86 while ((req_align & mask) == 0)
90 /* We can only shrink the lock, not grow it.
91 * This should never cause lock to be smaller than requested,
92 * since requested lock was already aligned on these boundaries. */
93 new_ex->start = ((new_ex->start - 1) | mask) + 1;
94 new_ex->end = ((new_ex->end + 1) & ~mask) - 1;
95 LASSERTF(new_ex->start <= req_start,
96 "mask "LPX64" grant start "LPU64" req start "LPU64"\n",
97 mask, new_ex->start, req_start);
98 LASSERTF(new_ex->end >= req_end,
99 "mask "LPX64" grant end "LPU64" req end "LPU64"\n",
100 mask, new_ex->end, req_end);
103 /* The purpose of this function is to return:
104 * - the maximum extent
105 * - containing the requested extent
106 * - and not overlapping existing conflicting extents outside the requested one
108 * Use interval tree to expand the lock extent for granted lock.
110 static void ldlm_extent_internal_policy_granted(struct ldlm_lock *req,
111 struct ldlm_extent *new_ex)
113 struct ldlm_resource *res = req->l_resource;
114 ldlm_mode_t req_mode = req->l_req_mode;
115 __u64 req_start = req->l_req_extent.start;
116 __u64 req_end = req->l_req_extent.end;
117 struct ldlm_interval_tree *tree;
118 struct interval_node_extent limiter = { new_ex->start, new_ex->end };
123 lockmode_verify(req_mode);
125 /* using interval tree to handle the ldlm extent granted locks */
126 for (idx = 0; idx < LCK_MODE_NUM; idx++) {
127 struct interval_node_extent ext = { req_start, req_end };
129 tree = &res->lr_itree[idx];
130 if (lockmode_compat(tree->lit_mode, req_mode))
133 conflicting += tree->lit_size;
135 limiter.start = req_start;
137 if (interval_is_overlapped(tree->lit_root, &ext))
139 "req_mode = %d, tree->lit_mode = %d, "
140 "tree->lit_size = %d\n",
141 req_mode, tree->lit_mode, tree->lit_size);
142 interval_expand(tree->lit_root, &ext, &limiter);
143 limiter.start = max(limiter.start, ext.start);
144 limiter.end = min(limiter.end, ext.end);
145 if (limiter.start == req_start && limiter.end == req_end)
149 new_ex->start = limiter.start;
150 new_ex->end = limiter.end;
151 LASSERT(new_ex->start <= req_start);
152 LASSERT(new_ex->end >= req_end);
154 ldlm_extent_internal_policy_fixup(req, new_ex, conflicting);
158 /* The purpose of this function is to return:
159 * - the maximum extent
160 * - containing the requested extent
161 * - and not overlapping existing conflicting extents outside the requested one
164 ldlm_extent_internal_policy_waiting(struct ldlm_lock *req,
165 struct ldlm_extent *new_ex)
168 struct ldlm_resource *res = req->l_resource;
169 ldlm_mode_t req_mode = req->l_req_mode;
170 __u64 req_start = req->l_req_extent.start;
171 __u64 req_end = req->l_req_extent.end;
175 lockmode_verify(req_mode);
177 /* for waiting locks */
178 cfs_list_for_each(tmp, &res->lr_waiting) {
179 struct ldlm_lock *lock;
180 struct ldlm_extent *l_extent;
182 lock = cfs_list_entry(tmp, struct ldlm_lock, l_res_link);
183 l_extent = &lock->l_policy_data.l_extent;
185 /* We already hit the minimum requested size, search no more */
186 if (new_ex->start == req_start && new_ex->end == req_end) {
191 /* Don't conflict with ourselves */
195 /* Locks are compatible, overlap doesn't matter */
196 /* Until bug 20 is fixed, try to avoid granting overlapping
197 * locks on one client (they take a long time to cancel) */
198 if (lockmode_compat(lock->l_req_mode, req_mode) &&
199 lock->l_export != req->l_export)
202 /* If this is a high-traffic lock, don't grow downwards at all
203 * or grow upwards too much */
206 new_ex->start = req_start;
208 /* If lock doesn't overlap new_ex, skip it. */
209 if (!ldlm_extent_overlap(l_extent, new_ex))
212 /* Locks conflicting in requested extents and we can't satisfy
213 * both locks, so ignore it. Either we will ping-pong this
214 * extent (we would regardless of what extent we granted) or
215 * lock is unused and it shouldn't limit our extent growth. */
216 if (ldlm_extent_overlap(&lock->l_req_extent,&req->l_req_extent))
219 /* We grow extents downwards only as far as they don't overlap
220 * with already-granted locks, on the assumption that clients
221 * will be writing beyond the initial requested end and would
222 * then need to enqueue a new lock beyond previous request.
223 * l_req_extent->end strictly < req_start, checked above. */
224 if (l_extent->start < req_start && new_ex->start != req_start) {
225 if (l_extent->end >= req_start)
226 new_ex->start = req_start;
228 new_ex->start = min(l_extent->end+1, req_start);
231 /* If we need to cancel this lock anyways because our request
232 * overlaps the granted lock, we grow up to its requested
233 * extent start instead of limiting this extent, assuming that
234 * clients are writing forwards and the lock had over grown
235 * its extent downwards before we enqueued our request. */
236 if (l_extent->end > req_end) {
237 if (l_extent->start <= req_end)
238 new_ex->end = max(lock->l_req_extent.start - 1,
241 new_ex->end = max(l_extent->start - 1, req_end);
245 ldlm_extent_internal_policy_fixup(req, new_ex, conflicting);
250 /* In order to determine the largest possible extent we can grant, we need
251 * to scan all of the queues. */
252 static void ldlm_extent_policy(struct ldlm_resource *res,
253 struct ldlm_lock *lock, int *flags)
255 struct ldlm_extent new_ex = { .start = 0, .end = OBD_OBJECT_EOF };
257 if (lock->l_export == NULL)
259 * this is local lock taken by server (e.g., as a part of
260 * OST-side locking, or unlink handling). Expansion doesn't
261 * make a lot of sense for local locks, because they are
262 * dropped immediately on operation completion and would only
263 * conflict with other threads.
267 if (lock->l_policy_data.l_extent.start == 0 &&
268 lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF)
269 /* fast-path whole file locks */
272 ldlm_extent_internal_policy_granted(lock, &new_ex);
273 ldlm_extent_internal_policy_waiting(lock, &new_ex);
275 if (new_ex.start != lock->l_policy_data.l_extent.start ||
276 new_ex.end != lock->l_policy_data.l_extent.end) {
277 *flags |= LDLM_FL_LOCK_CHANGED;
278 lock->l_policy_data.l_extent.start = new_ex.start;
279 lock->l_policy_data.l_extent.end = new_ex.end;
283 static int ldlm_check_contention(struct ldlm_lock *lock, int contended_locks)
285 struct ldlm_resource *res = lock->l_resource;
286 cfs_time_t now = cfs_time_current();
288 if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_SET_CONTENTION))
291 CDEBUG(D_DLMTRACE, "contended locks = %d\n", contended_locks);
292 if (contended_locks > res->lr_namespace->ns_contended_locks)
293 res->lr_contention_time = now;
294 return cfs_time_before(now, cfs_time_add(res->lr_contention_time,
295 cfs_time_seconds(res->lr_namespace->ns_contention_time)));
298 struct ldlm_extent_compat_args {
299 cfs_list_t *work_list;
300 struct ldlm_lock *lock;
306 static enum interval_iter ldlm_extent_compat_cb(struct interval_node *n,
309 struct ldlm_extent_compat_args *priv = data;
310 struct ldlm_interval *node = to_ldlm_interval(n);
311 struct ldlm_extent *extent;
312 cfs_list_t *work_list = priv->work_list;
313 struct ldlm_lock *lock, *enq = priv->lock;
314 ldlm_mode_t mode = priv->mode;
318 LASSERT(!cfs_list_empty(&node->li_group));
320 cfs_list_for_each_entry(lock, &node->li_group, l_sl_policy) {
321 /* interval tree is for granted lock */
322 LASSERTF(mode == lock->l_granted_mode,
323 "mode = %s, lock->l_granted_mode = %s\n",
325 ldlm_lockname[lock->l_granted_mode]);
327 if (lock->l_blocking_ast)
328 ldlm_add_ast_work_item(lock, enq, work_list);
331 /* don't count conflicting glimpse locks */
332 extent = ldlm_interval_extent(node);
333 if (!(mode == LCK_PR &&
334 extent->start == 0 && extent->end == OBD_OBJECT_EOF))
335 *priv->locks += count;
340 RETURN(INTERVAL_ITER_CONT);
343 /* Determine if the lock is compatible with all locks on the queue.
344 * We stop walking the queue if we hit ourselves so we don't take
345 * conflicting locks enqueued after us into accound, or we'd wait forever.
347 * 0 if the lock is not compatible
348 * 1 if the lock is compatible
349 * 2 if this group lock is compatible and requires no further checking
350 * negative error, such as EWOULDBLOCK for group locks
353 ldlm_extent_compat_queue(cfs_list_t *queue, struct ldlm_lock *req,
354 int *flags, ldlm_error_t *err,
355 cfs_list_t *work_list, int *contended_locks)
358 struct ldlm_lock *lock;
359 struct ldlm_resource *res = req->l_resource;
360 ldlm_mode_t req_mode = req->l_req_mode;
361 __u64 req_start = req->l_req_extent.start;
362 __u64 req_end = req->l_req_extent.end;
365 int check_contention;
368 lockmode_verify(req_mode);
370 /* Using interval tree for granted lock */
371 if (queue == &res->lr_granted) {
372 struct ldlm_interval_tree *tree;
373 struct ldlm_extent_compat_args data = {.work_list = work_list,
375 .locks = contended_locks,
377 struct interval_node_extent ex = { .start = req_start,
381 for (idx = 0; idx < LCK_MODE_NUM; idx++) {
382 tree = &res->lr_itree[idx];
383 if (tree->lit_root == NULL) /* empty tree, skipped */
386 data.mode = tree->lit_mode;
387 if (lockmode_compat(req_mode, tree->lit_mode)) {
388 struct ldlm_interval *node;
389 struct ldlm_extent *extent;
391 if (req_mode != LCK_GROUP)
394 /* group lock, grant it immediately if
396 node = to_ldlm_interval(tree->lit_root);
397 extent = ldlm_interval_extent(node);
398 if (req->l_policy_data.l_extent.gid ==
403 if (tree->lit_mode == LCK_GROUP) {
404 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
405 compat = -EWOULDBLOCK;
409 *flags |= LDLM_FL_NO_TIMEOUT;
413 /* if work list is not NULL,add all
414 locks in the tree to work list */
416 interval_iterate(tree->lit_root,
417 ldlm_extent_compat_cb, &data);
422 rc = interval_is_overlapped(tree->lit_root,&ex);
426 interval_search(tree->lit_root, &ex,
427 ldlm_extent_compat_cb, &data);
428 if (!cfs_list_empty(work_list) && compat)
432 } else { /* for waiting queue */
433 cfs_list_for_each(tmp, queue) {
434 check_contention = 1;
436 lock = cfs_list_entry(tmp, struct ldlm_lock,
442 if (unlikely(scan)) {
443 /* We only get here if we are queuing GROUP lock
444 and met some incompatible one. The main idea of this
445 code is to insert GROUP lock past compatible GROUP
446 lock in the waiting queue or if there is not any,
447 then in front of first non-GROUP lock */
448 if (lock->l_req_mode != LCK_GROUP) {
449 /* Ok, we hit non-GROUP lock, there should
450 * be no more GROUP locks later on, queue in
451 * front of first non-GROUP lock */
453 ldlm_resource_insert_lock_after(lock, req);
454 cfs_list_del_init(&lock->l_res_link);
455 ldlm_resource_insert_lock_after(req, lock);
459 if (req->l_policy_data.l_extent.gid ==
460 lock->l_policy_data.l_extent.gid) {
462 ldlm_resource_insert_lock_after(lock, req);
469 /* locks are compatible, overlap doesn't matter */
470 if (lockmode_compat(lock->l_req_mode, req_mode)) {
471 if (req_mode == LCK_PR &&
472 ((lock->l_policy_data.l_extent.start <=
473 req->l_policy_data.l_extent.start) &&
474 (lock->l_policy_data.l_extent.end >=
475 req->l_policy_data.l_extent.end))) {
476 /* If we met a PR lock just like us or wider,
477 and nobody down the list conflicted with
478 it, that means we can skip processing of
479 the rest of the list and safely place
480 ourselves at the end of the list, or grant
481 (dependent if we met an conflicting locks
483 In case of 1st enqueue only we continue
484 traversing if there is something conflicting
485 down the list because we need to make sure
486 that something is marked as AST_SENT as well,
487 in cse of empy worklist we would exit on
488 first conflict met. */
489 /* There IS a case where such flag is
490 not set for a lock, yet it blocks
491 something. Luckily for us this is
492 only during destroy, so lock is
493 exclusive. So here we are safe */
494 if (!(lock->l_flags & LDLM_FL_AST_SENT)) {
499 /* non-group locks are compatible, overlap doesn't
501 if (likely(req_mode != LCK_GROUP))
504 /* If we are trying to get a GROUP lock and there is
505 another one of this kind, we need to compare gid */
506 if (req->l_policy_data.l_extent.gid ==
507 lock->l_policy_data.l_extent.gid) {
508 /* If existing lock with matched gid is granted,
509 we grant new one too. */
510 if (lock->l_req_mode == lock->l_granted_mode)
513 /* Otherwise we are scanning queue of waiting
514 * locks and it means current request would
515 * block along with existing lock (that is
517 * If we are in nonblocking mode - return
519 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
520 compat = -EWOULDBLOCK;
523 /* If this group lock is compatible with another
524 * group lock on the waiting list, they must be
525 * together in the list, so they can be granted
526 * at the same time. Otherwise the later lock
527 * can get stuck behind another, incompatible,
529 ldlm_resource_insert_lock_after(lock, req);
530 /* Because 'lock' is not granted, we can stop
531 * processing this queue and return immediately.
532 * There is no need to check the rest of the
538 if (unlikely(req_mode == LCK_GROUP &&
539 (lock->l_req_mode != lock->l_granted_mode))) {
542 if (lock->l_req_mode != LCK_GROUP) {
543 /* Ok, we hit non-GROUP lock, there should be no
544 more GROUP locks later on, queue in front of
545 first non-GROUP lock */
547 ldlm_resource_insert_lock_after(lock, req);
548 cfs_list_del_init(&lock->l_res_link);
549 ldlm_resource_insert_lock_after(req, lock);
552 if (req->l_policy_data.l_extent.gid ==
553 lock->l_policy_data.l_extent.gid) {
555 ldlm_resource_insert_lock_after(lock, req);
561 if (unlikely(lock->l_req_mode == LCK_GROUP)) {
562 /* If compared lock is GROUP, then requested is PR/PW/
563 * so this is not compatible; extent range does not
565 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
566 compat = -EWOULDBLOCK;
569 *flags |= LDLM_FL_NO_TIMEOUT;
571 } else if (lock->l_policy_data.l_extent.end < req_start ||
572 lock->l_policy_data.l_extent.start > req_end) {
573 /* if a non group lock doesn't overlap skip it */
575 } else if (lock->l_req_extent.end < req_start ||
576 lock->l_req_extent.start > req_end) {
577 /* false contention, the requests doesn't really overlap */
578 check_contention = 0;
584 /* don't count conflicting glimpse locks */
585 if (lock->l_req_mode == LCK_PR &&
586 lock->l_policy_data.l_extent.start == 0 &&
587 lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF)
588 check_contention = 0;
590 *contended_locks += check_contention;
593 if (lock->l_blocking_ast)
594 ldlm_add_ast_work_item(lock, req, work_list);
598 if (ldlm_check_contention(req, *contended_locks) &&
600 (*flags & LDLM_FL_DENY_ON_CONTENTION) &&
601 req->l_req_mode != LCK_GROUP &&
602 req_end - req_start <=
603 req->l_resource->lr_namespace->ns_max_nolock_size)
604 GOTO(destroylock, compat = -EUSERS);
608 cfs_list_del_init(&req->l_res_link);
609 ldlm_lock_destroy_nolock(req);
614 static void discard_bl_list(cfs_list_t *bl_list)
616 cfs_list_t *tmp, *pos;
619 cfs_list_for_each_safe(pos, tmp, bl_list) {
620 struct ldlm_lock *lock =
621 cfs_list_entry(pos, struct ldlm_lock, l_bl_ast);
623 cfs_list_del_init(&lock->l_bl_ast);
624 LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
625 lock->l_flags &= ~LDLM_FL_AST_SENT;
626 LASSERT(lock->l_bl_ast_run == 0);
627 LASSERT(lock->l_blocking_lock);
628 LDLM_LOCK_RELEASE(lock->l_blocking_lock);
629 lock->l_blocking_lock = NULL;
630 LDLM_LOCK_RELEASE(lock);
635 /* If first_enq is 0 (ie, called from ldlm_reprocess_queue):
636 * - blocking ASTs have already been sent
637 * - must call this function with the ns lock held
639 * If first_enq is 1 (ie, called from ldlm_lock_enqueue):
640 * - blocking ASTs have not been sent
641 * - must call this function with the ns lock held once */
642 int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
643 ldlm_error_t *err, cfs_list_t *work_list)
645 struct ldlm_resource *res = lock->l_resource;
646 CFS_LIST_HEAD(rpc_list);
648 int contended_locks = 0;
651 LASSERT(cfs_list_empty(&res->lr_converting));
652 LASSERT(!(*flags & LDLM_FL_DENY_ON_CONTENTION) ||
653 !(lock->l_flags & LDLM_AST_DISCARD_DATA));
654 check_res_locked(res);
658 /* Careful observers will note that we don't handle -EWOULDBLOCK
659 * here, but it's ok for a non-obvious reason -- compat_queue
660 * can only return -EWOULDBLOCK if (flags & BLOCK_NOWAIT).
661 * flags should always be zero here, and if that ever stops
662 * being true, we want to find out. */
663 LASSERT(*flags == 0);
664 rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags,
665 err, NULL, &contended_locks);
667 rc = ldlm_extent_compat_queue(&res->lr_waiting, lock,
672 RETURN(LDLM_ITER_STOP);
674 ldlm_resource_unlink_lock(lock);
676 if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_EVICT_RACE))
677 ldlm_extent_policy(res, lock, flags);
678 ldlm_grant_lock(lock, work_list);
679 RETURN(LDLM_ITER_CONTINUE);
684 rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags, err,
685 &rpc_list, &contended_locks);
687 GOTO(out, rc); /* lock was destroyed */
691 rc2 = ldlm_extent_compat_queue(&res->lr_waiting, lock, flags, err,
692 &rpc_list, &contended_locks);
694 GOTO(out, rc = rc2); /* lock was destroyed */
698 ldlm_extent_policy(res, lock, flags);
699 ldlm_resource_unlink_lock(lock);
700 ldlm_grant_lock(lock, NULL);
702 /* If either of the compat_queue()s returned failure, then we
703 * have ASTs to send and must go onto the waiting list.
705 * bug 2322: we used to unlink and re-add here, which was a
706 * terrible folly -- if we goto restart, we could get
707 * re-ordered! Causes deadlock, because ASTs aren't sent! */
708 if (cfs_list_empty(&lock->l_res_link))
709 ldlm_resource_add_lock(res, &res->lr_waiting, lock);
711 rc = ldlm_run_ast_work(&rpc_list, LDLM_WORK_BL_AST);
713 if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_OST_FAIL_RACE) &&
714 !ns_is_client(res->lr_namespace))
715 class_fail_export(lock->l_export);
718 if (rc == -ERESTART) {
720 /* 15715: The lock was granted and destroyed after
721 * resource lock was dropped. Interval node was freed
722 * in ldlm_lock_destroy. Anyway, this always happens
723 * when a client is being evicted. So it would be
724 * ok to return an error. -jay */
725 if (lock->l_destroyed) {
727 GOTO(out, rc = -EAGAIN);
730 /* lock was granted while resource was unlocked. */
731 if (lock->l_granted_mode == lock->l_req_mode) {
732 /* bug 11300: if the lock has been granted,
733 * break earlier because otherwise, we will go
734 * to restart and ldlm_resource_unlink will be
735 * called and it causes the interval node to be
736 * freed. Then we will fail at
737 * ldlm_extent_add_lock() */
738 *flags &= ~(LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV |
743 GOTO(restart, -ERESTART);
746 *flags |= LDLM_FL_BLOCK_GRANTED;
747 /* this way we force client to wait for the lock
748 * endlessly once the lock is enqueued -bzzz */
749 *flags |= LDLM_FL_NO_TIMEOUT;
754 if (!cfs_list_empty(&rpc_list)) {
755 LASSERT(!(lock->l_flags & LDLM_AST_DISCARD_DATA));
756 discard_bl_list(&rpc_list);
761 /* When a lock is cancelled by a client, the KMS may undergo change if this
762 * is the "highest lock". This function returns the new KMS value.
763 * Caller must hold ns_lock already.
765 * NB: A lock on [x,y] protects a KMS of up to y + 1 bytes! */
766 __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms)
768 struct ldlm_resource *res = lock->l_resource;
770 struct ldlm_lock *lck;
774 /* don't let another thread in ldlm_extent_shift_kms race in
775 * just after we finish and take our lock into account in its
776 * calculation of the kms */
777 lock->l_flags |= LDLM_FL_KMS_IGNORE;
779 cfs_list_for_each(tmp, &res->lr_granted) {
780 lck = cfs_list_entry(tmp, struct ldlm_lock, l_res_link);
782 if (lck->l_flags & LDLM_FL_KMS_IGNORE)
785 if (lck->l_policy_data.l_extent.end >= old_kms)
788 /* This extent _has_ to be smaller than old_kms (checked above)
789 * so kms can only ever be smaller or the same as old_kms. */
790 if (lck->l_policy_data.l_extent.end + 1 > kms)
791 kms = lck->l_policy_data.l_extent.end + 1;
793 LASSERTF(kms <= old_kms, "kms "LPU64" old_kms "LPU64"\n", kms, old_kms);
798 cfs_mem_cache_t *ldlm_interval_slab;
799 struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock)
801 struct ldlm_interval *node;
804 LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
805 OBD_SLAB_ALLOC_PTR_GFP(node, ldlm_interval_slab, CFS_ALLOC_IO);
809 CFS_INIT_LIST_HEAD(&node->li_group);
810 ldlm_interval_attach(node, lock);
814 void ldlm_interval_free(struct ldlm_interval *node)
817 LASSERT(cfs_list_empty(&node->li_group));
818 LASSERT(!interval_is_intree(&node->li_node));
819 OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
823 /* interval tree, for LDLM_EXTENT. */
824 void ldlm_interval_attach(struct ldlm_interval *n,
827 LASSERT(l->l_tree_node == NULL);
828 LASSERT(l->l_resource->lr_type == LDLM_EXTENT);
830 cfs_list_add_tail(&l->l_sl_policy, &n->li_group);
834 struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l)
836 struct ldlm_interval *n = l->l_tree_node;
841 LASSERT(!cfs_list_empty(&n->li_group));
842 l->l_tree_node = NULL;
843 cfs_list_del_init(&l->l_sl_policy);
845 return (cfs_list_empty(&n->li_group) ? n : NULL);
848 static inline int lock_mode_to_index(ldlm_mode_t mode)
853 LASSERT(IS_PO2(mode));
854 for (index = -1; mode; index++, mode >>= 1) ;
855 LASSERT(index < LCK_MODE_NUM);
859 void ldlm_extent_add_lock(struct ldlm_resource *res,
860 struct ldlm_lock *lock)
862 struct interval_node *found, **root;
863 struct ldlm_interval *node;
864 struct ldlm_extent *extent;
867 LASSERT(lock->l_granted_mode == lock->l_req_mode);
869 node = lock->l_tree_node;
870 LASSERT(node != NULL);
871 LASSERT(!interval_is_intree(&node->li_node));
873 idx = lock_mode_to_index(lock->l_granted_mode);
874 LASSERT(lock->l_granted_mode == 1 << idx);
875 LASSERT(lock->l_granted_mode == res->lr_itree[idx].lit_mode);
877 /* node extent initialize */
878 extent = &lock->l_policy_data.l_extent;
879 interval_set(&node->li_node, extent->start, extent->end);
881 root = &res->lr_itree[idx].lit_root;
882 found = interval_insert(&node->li_node, root);
883 if (found) { /* The policy group found. */
884 struct ldlm_interval *tmp = ldlm_interval_detach(lock);
885 LASSERT(tmp != NULL);
886 ldlm_interval_free(tmp);
887 ldlm_interval_attach(to_ldlm_interval(found), lock);
889 res->lr_itree[idx].lit_size++;
891 /* even though we use interval tree to manage the extent lock, we also
892 * add the locks into grant list, for debug purpose, .. */
893 ldlm_resource_add_lock(res, &res->lr_granted, lock);
896 void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
898 struct ldlm_resource *res = lock->l_resource;
899 struct ldlm_interval *node = lock->l_tree_node;
900 struct ldlm_interval_tree *tree;
903 if (!node || !interval_is_intree(&node->li_node)) /* duplicate unlink */
906 idx = lock_mode_to_index(lock->l_granted_mode);
907 LASSERT(lock->l_granted_mode == 1 << idx);
908 tree = &res->lr_itree[idx];
910 LASSERT(tree->lit_root != NULL); /* assure the tree is not null */
913 node = ldlm_interval_detach(lock);
915 interval_erase(&node->li_node, &tree->lit_root);
916 ldlm_interval_free(node);