1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5 * Author: Peter Braam <braam@clusterfs.com>
6 * Author: Phil Schwan <phil@clusterfs.com>
8 * This file is part of the Lustre file system, http://www.lustre.org
9 * Lustre is a trademark of Cluster File Systems, Inc.
11 * You may have signed or agreed to another license before downloading
12 * this software. If so, you are bound by the terms and conditions
13 * of that agreement, and the following does not apply to you. See the
14 * LICENSE file included with this distribution for more information.
16 * If you did not agree to a different license, then this copy of Lustre
17 * is open source software; you can redistribute it and/or modify it
18 * under the terms of version 2 of the GNU General Public License as
19 * published by the Free Software Foundation.
21 * In either case, Lustre is distributed in the hope that it will be
22 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
23 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * license text for more details.
27 #define DEBUG_SUBSYSTEM S_LDLM
29 # include <liblustre.h>
31 # include <libcfs/libcfs.h>
32 # include <libcfs/kp30.h>
35 #include <lustre_dlm.h>
36 #include <obd_support.h>
38 #include <lustre_lib.h>
40 #include "ldlm_internal.h"
42 #define LDLM_MAX_GROWN_EXTENT (32 * 1024 * 1024 - 1)
44 /* fixup the ldlm_extent after expanding */
45 static void ldlm_extent_internal_policy_fixup(struct ldlm_lock *req,
46 struct ldlm_extent *new_ex,
49 ldlm_mode_t req_mode = req->l_req_mode;
50 __u64 req_start = req->l_req_extent.start;
51 __u64 req_end = req->l_req_extent.end;
52 __u64 req_align, mask;
54 if (conflicting > 32 && (req_mode == LCK_PW || req_mode == LCK_CW)) {
55 if (req_end < req_start + LDLM_MAX_GROWN_EXTENT)
56 new_ex->end = min(req_start + LDLM_MAX_GROWN_EXTENT,
60 if (new_ex->start == 0 && new_ex->end == OBD_OBJECT_EOF) {
65 /* we need to ensure that the lock extent is properly aligned to what
66 * the client requested. We align it to the lowest-common denominator
67 * of the clients requested lock start and end alignment. */
69 req_align = (req_end + 1) | req_start;
71 while ((req_align & mask) == 0)
75 /* We can only shrink the lock, not grow it.
76 * This should never cause lock to be smaller than requested,
77 * since requested lock was already aligned on these boundaries. */
78 new_ex->start = ((new_ex->start - 1) | mask) + 1;
79 new_ex->end = ((new_ex->end + 1) & ~mask) - 1;
80 LASSERTF(new_ex->start <= req_start,
81 "mask "LPX64" grant start "LPU64" req start "LPU64"\n",
82 mask, new_ex->start, req_start);
83 LASSERTF(new_ex->end >= req_end,
84 "mask "LPX64" grant end "LPU64" req end "LPU64"\n",
85 mask, new_ex->end, req_end);
88 /* The purpose of this function is to return:
89 * - the maximum extent
90 * - containing the requested extent
91 * - and not overlapping existing conflicting extents outside the requested one
93 * Use interval tree to expand the lock extent for granted lock.
95 static void ldlm_extent_internal_policy_granted(struct ldlm_lock *req,
96 struct ldlm_extent *new_ex)
98 struct ldlm_resource *res = req->l_resource;
99 ldlm_mode_t req_mode = req->l_req_mode;
100 __u64 req_start = req->l_req_extent.start;
101 __u64 req_end = req->l_req_extent.end;
102 struct ldlm_interval_tree *tree;
103 struct interval_node_extent limiter = { new_ex->start, new_ex->end };
108 lockmode_verify(req_mode);
110 /* using interval tree to handle the ldlm extent granted locks */
111 for (idx = 0; idx < LCK_MODE_NUM; idx++) {
112 struct interval_node_extent ext = { req_start, req_end };
114 tree = &res->lr_itree[idx];
115 if (lockmode_compat(tree->lit_mode, req_mode))
118 conflicting += tree->lit_size;
120 limiter.start = req_start;
122 if (interval_is_overlapped(tree->lit_root, &ext))
123 printk("req_mode = %d, tree->lit_mode = %d, tree->lit_size = %d\n",
124 req_mode, tree->lit_mode, tree->lit_size);
125 interval_expand(tree->lit_root, &ext, &limiter);
126 limiter.start = max(limiter.start, ext.start);
127 limiter.end = min(limiter.end, ext.end);
128 if (limiter.start == req_start && limiter.end == req_end)
132 new_ex->start = limiter.start;
133 new_ex->end = limiter.end;
134 LASSERT(new_ex->start <= req_start);
135 LASSERT(new_ex->end >= req_end);
137 ldlm_extent_internal_policy_fixup(req, new_ex, conflicting);
141 /* The purpose of this function is to return:
142 * - the maximum extent
143 * - containing the requested extent
144 * - and not overlapping existing conflicting extents outside the requested one
147 ldlm_extent_internal_policy_waiting(struct ldlm_lock *req,
148 struct ldlm_extent *new_ex)
150 struct list_head *tmp;
151 struct ldlm_resource *res = req->l_resource;
152 ldlm_mode_t req_mode = req->l_req_mode;
153 __u64 req_start = req->l_req_extent.start;
154 __u64 req_end = req->l_req_extent.end;
158 lockmode_verify(req_mode);
160 /* for waiting locks */
161 list_for_each(tmp, &res->lr_waiting) {
162 struct ldlm_lock *lock;
163 struct ldlm_extent *l_extent;
165 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
166 l_extent = &lock->l_policy_data.l_extent;
168 /* We already hit the minimum requested size, search no more */
169 if (new_ex->start == req_start && new_ex->end == req_end) {
174 /* Don't conflict with ourselves */
178 /* Locks are compatible, overlap doesn't matter */
179 /* Until bug 20 is fixed, try to avoid granting overlapping
180 * locks on one client (they take a long time to cancel) */
181 if (lockmode_compat(lock->l_req_mode, req_mode) &&
182 lock->l_export != req->l_export)
185 /* If this is a high-traffic lock, don't grow downwards at all
186 * or grow upwards too much */
189 new_ex->start = req_start;
191 /* If lock doesn't overlap new_ex, skip it. */
192 if (!ldlm_extent_overlap(l_extent, new_ex))
195 /* Locks conflicting in requested extents and we can't satisfy
196 * both locks, so ignore it. Either we will ping-pong this
197 * extent (we would regardless of what extent we granted) or
198 * lock is unused and it shouldn't limit our extent growth. */
199 if (ldlm_extent_overlap(&lock->l_req_extent,&req->l_req_extent))
202 /* We grow extents downwards only as far as they don't overlap
203 * with already-granted locks, on the assumtion that clients
204 * will be writing beyond the initial requested end and would
205 * then need to enqueue a new lock beyond previous request.
206 * l_req_extent->end strictly < req_start, checked above. */
207 if (l_extent->start < req_start && new_ex->start != req_start) {
208 if (l_extent->end >= req_start)
209 new_ex->start = req_start;
211 new_ex->start = min(l_extent->end+1, req_start);
214 /* If we need to cancel this lock anyways because our request
215 * overlaps the granted lock, we grow up to its requested
216 * extent start instead of limiting this extent, assuming that
217 * clients are writing forwards and the lock had over grown
218 * its extent downwards before we enqueued our request. */
219 if (l_extent->end > req_end) {
220 if (l_extent->start <= req_end)
221 new_ex->end = max(lock->l_req_extent.start - 1,
224 new_ex->end = max(l_extent->start - 1, req_end);
228 ldlm_extent_internal_policy_fixup(req, new_ex, conflicting);
233 /* In order to determine the largest possible extent we can grant, we need
234 * to scan all of the queues. */
235 static void ldlm_extent_policy(struct ldlm_resource *res,
236 struct ldlm_lock *lock, int *flags)
238 struct ldlm_extent new_ex = { .start = 0, .end = OBD_OBJECT_EOF };
240 if (lock->l_export == NULL)
242 * this is local lock taken by server (e.g., as a part of
243 * OST-side locking, or unlink handling). Expansion doesn't
244 * make a lot of sense for local locks, because they are
245 * dropped immediately on operation completion and would only
246 * conflict with other threads.
250 if (lock->l_policy_data.l_extent.start == 0 &&
251 lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF)
252 /* fast-path whole file locks */
255 ldlm_extent_internal_policy_granted(lock, &new_ex);
256 ldlm_extent_internal_policy_waiting(lock, &new_ex);
258 if (new_ex.start != lock->l_policy_data.l_extent.start ||
259 new_ex.end != lock->l_policy_data.l_extent.end) {
260 *flags |= LDLM_FL_LOCK_CHANGED;
261 lock->l_policy_data.l_extent.start = new_ex.start;
262 lock->l_policy_data.l_extent.end = new_ex.end;
266 static int ldlm_check_contention(struct ldlm_lock *lock, int contended_locks)
268 struct ldlm_resource *res = lock->l_resource;
269 cfs_time_t now = cfs_time_current();
271 CDEBUG(D_DLMTRACE, "contended locks = %d\n", contended_locks);
272 if (contended_locks > res->lr_namespace->ns_contended_locks)
273 res->lr_contention_time = now;
274 return cfs_time_before(now, cfs_time_add(res->lr_contention_time,
275 cfs_time_seconds(res->lr_namespace->ns_contention_time)));
278 struct ldlm_extent_compat_args {
279 struct list_head *work_list;
280 struct ldlm_lock *lock;
286 static enum interval_iter ldlm_extent_compat_cb(struct interval_node *n,
289 struct ldlm_extent_compat_args *priv = data;
290 struct ldlm_interval *node = to_ldlm_interval(n);
291 struct ldlm_extent *extent;
292 struct list_head *work_list = priv->work_list;
293 struct ldlm_lock *lock, *enq = priv->lock;
294 ldlm_mode_t mode = priv->mode;
298 LASSERT(!list_empty(&node->li_group));
300 list_for_each_entry(lock, &node->li_group, l_sl_policy) {
301 /* interval tree is for granted lock */
302 LASSERTF(mode == lock->l_granted_mode,
303 "mode = %s, lock->l_granted_mode = %s\n",
305 ldlm_lockname[lock->l_granted_mode]);
307 if (lock->l_blocking_ast)
308 ldlm_add_ast_work_item(lock, enq, work_list);
311 /* don't count conflicting glimpse locks */
312 extent = ldlm_interval_extent(node);
313 if (!(mode == LCK_PR &&
314 extent->start == 0 && extent->end == OBD_OBJECT_EOF))
315 *priv->locks += count;
320 RETURN(INTERVAL_ITER_CONT);
323 /* Determine if the lock is compatible with all locks on the queue.
324 * We stop walking the queue if we hit ourselves so we don't take
325 * conflicting locks enqueued after us into accound, or we'd wait forever.
327 * 0 if the lock is not compatible
328 * 1 if the lock is compatible
329 * 2 if this group lock is compatible and requires no further checking
330 * negative error, such as EWOULDBLOCK for group locks
333 ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req,
334 int *flags, ldlm_error_t *err,
335 struct list_head *work_list, int *contended_locks)
337 struct list_head *tmp;
338 struct ldlm_lock *lock;
339 struct ldlm_resource *res = req->l_resource;
340 ldlm_mode_t req_mode = req->l_req_mode;
341 __u64 req_start = req->l_req_extent.start;
342 __u64 req_end = req->l_req_extent.end;
345 int check_contention;
348 lockmode_verify(req_mode);
350 /* Using interval tree for granted lock */
351 if (queue == &res->lr_granted) {
352 struct ldlm_interval_tree *tree;
353 struct ldlm_extent_compat_args data = {.work_list = work_list,
355 .locks = contended_locks,
357 struct interval_node_extent ex = { .start = req_start,
361 for (idx = 0; idx < LCK_MODE_NUM; idx++) {
362 tree = &res->lr_itree[idx];
363 if (tree->lit_root == NULL) /* empty tree, skipped */
366 data.mode = tree->lit_mode;
367 if (lockmode_compat(req_mode, tree->lit_mode)) {
368 struct ldlm_interval *node;
369 struct ldlm_extent *extent;
371 if (req_mode != LCK_GROUP)
374 /* group lock, grant it immediately if
376 node = to_ldlm_interval(tree->lit_root);
377 extent = ldlm_interval_extent(node);
378 if (req->l_policy_data.l_extent.gid ==
383 if (tree->lit_mode == LCK_GROUP) {
384 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
385 compat = -EWOULDBLOCK;
389 *flags |= LDLM_FL_NO_TIMEOUT;
393 /* if work list is not NULL,add all
394 locks in the tree to work list */
396 interval_iterate(tree->lit_root,
397 ldlm_extent_compat_cb, &data);
402 rc = interval_is_overlapped(tree->lit_root,&ex);
406 interval_search(tree->lit_root, &ex,
407 ldlm_extent_compat_cb, &data);
408 if (!list_empty(work_list) && compat)
412 } else { /* for waiting queue */
413 list_for_each(tmp, queue) {
414 check_contention = 1;
416 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
421 if (unlikely(scan)) {
422 /* We only get here if we are queuing GROUP lock
423 and met some incompatible one. The main idea of this
424 code is to insert GROUP lock past compatible GROUP
425 lock in the waiting queue or if there is not any,
426 then in front of first non-GROUP lock */
427 if (lock->l_req_mode != LCK_GROUP) {
428 /* Ok, we hit non-GROUP lock, there should
429 * be no more GROUP locks later on, queue in
430 * front of first non-GROUP lock */
432 ldlm_resource_insert_lock_after(lock, req);
433 list_del_init(&lock->l_res_link);
434 ldlm_resource_insert_lock_after(req, lock);
438 if (req->l_policy_data.l_extent.gid ==
439 lock->l_policy_data.l_extent.gid) {
441 ldlm_resource_insert_lock_after(lock, req);
448 /* locks are compatible, overlap doesn't matter */
449 if (lockmode_compat(lock->l_req_mode, req_mode)) {
450 if (req_mode == LCK_PR &&
451 ((lock->l_policy_data.l_extent.start <=
452 req->l_policy_data.l_extent.start) &&
453 (lock->l_policy_data.l_extent.end >=
454 req->l_policy_data.l_extent.end))) {
455 /* If we met a PR lock just like us or wider,
456 and nobody down the list conflicted with
457 it, that means we can skip processing of
458 the rest of the list and safely place
459 ourselves at the end of the list, or grant
460 (dependent if we met an conflicting locks
462 In case of 1st enqueue only we continue
463 traversing if there is something conflicting
464 down the list because we need to make sure
465 that something is marked as AST_SENT as well,
466 in cse of empy worklist we would exit on
467 first conflict met. */
468 /* There IS a case where such flag is
469 not set for a lock, yet it blocks
470 something. Luckily for us this is
471 only during destroy, so lock is
472 exclusive. So here we are safe */
473 if (!(lock->l_flags & LDLM_FL_AST_SENT)) {
478 /* non-group locks are compatible, overlap doesn't
480 if (likely(req_mode != LCK_GROUP))
483 /* If we are trying to get a GROUP lock and there is
484 another one of this kind, we need to compare gid */
485 if (req->l_policy_data.l_extent.gid ==
486 lock->l_policy_data.l_extent.gid) {
487 /* If existing lock with matched gid is granted,
488 we grant new one too. */
489 if (lock->l_req_mode == lock->l_granted_mode)
492 /* Otherwise we are scanning queue of waiting
493 * locks and it means current request would
494 * block along with existing lock (that is
496 * If we are in nonblocking mode - return
498 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
499 compat = -EWOULDBLOCK;
502 /* If this group lock is compatible with another
503 * group lock on the waiting list, they must be
504 * together in the list, so they can be granted
505 * at the same time. Otherwise the later lock
506 * can get stuck behind another, incompatible,
508 ldlm_resource_insert_lock_after(lock, req);
509 /* Because 'lock' is not granted, we can stop
510 * processing this queue and return immediately.
511 * There is no need to check the rest of the
517 if (unlikely(req_mode == LCK_GROUP &&
518 (lock->l_req_mode != lock->l_granted_mode))) {
521 if (lock->l_req_mode != LCK_GROUP) {
522 /* Ok, we hit non-GROUP lock, there should be no
523 more GROUP locks later on, queue in front of
524 first non-GROUP lock */
526 ldlm_resource_insert_lock_after(lock, req);
527 list_del_init(&lock->l_res_link);
528 ldlm_resource_insert_lock_after(req, lock);
531 if (req->l_policy_data.l_extent.gid ==
532 lock->l_policy_data.l_extent.gid) {
534 ldlm_resource_insert_lock_after(lock, req);
540 if (unlikely(lock->l_req_mode == LCK_GROUP)) {
541 /* If compared lock is GROUP, then requested is PR/PW/
542 * so this is not compatible; extent range does not
544 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
545 compat = -EWOULDBLOCK;
548 *flags |= LDLM_FL_NO_TIMEOUT;
550 } else if (lock->l_policy_data.l_extent.end < req_start ||
551 lock->l_policy_data.l_extent.start > req_end) {
552 /* if a non group lock doesn't overlap skip it */
554 } else if (lock->l_req_extent.end < req_start ||
555 lock->l_req_extent.start > req_end) {
556 /* false contention, the requests doesn't really overlap */
557 check_contention = 0;
563 /* don't count conflicting glimpse locks */
564 if (lock->l_req_mode == LCK_PR &&
565 lock->l_policy_data.l_extent.start == 0 &&
566 lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF)
567 check_contention = 0;
569 *contended_locks += check_contention;
572 if (lock->l_blocking_ast)
573 ldlm_add_ast_work_item(lock, req, work_list);
577 if (ldlm_check_contention(req, *contended_locks) &&
579 (*flags & LDLM_FL_DENY_ON_CONTENTION) &&
580 req->l_req_mode != LCK_GROUP &&
581 req_end - req_start <=
582 req->l_resource->lr_namespace->ns_max_nolock_size)
583 GOTO(destroylock, compat = -EUSERS);
587 list_del_init(&req->l_res_link);
588 ldlm_lock_destroy_nolock(req);
593 static void discard_bl_list(struct list_head *bl_list)
595 struct list_head *tmp, *pos;
598 list_for_each_safe(pos, tmp, bl_list) {
599 struct ldlm_lock *lock =
600 list_entry(pos, struct ldlm_lock, l_bl_ast);
602 list_del_init(&lock->l_bl_ast);
603 LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
604 lock->l_flags &= ~LDLM_FL_AST_SENT;
605 LASSERT(lock->l_bl_ast_run == 0);
606 LASSERT(lock->l_blocking_lock);
607 LDLM_LOCK_PUT(lock->l_blocking_lock);
608 lock->l_blocking_lock = NULL;
614 /* If first_enq is 0 (ie, called from ldlm_reprocess_queue):
615 * - blocking ASTs have already been sent
616 * - must call this function with the ns lock held
618 * If first_enq is 1 (ie, called from ldlm_lock_enqueue):
619 * - blocking ASTs have not been sent
620 * - must call this function with the ns lock held once */
621 int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
622 ldlm_error_t *err, struct list_head *work_list)
624 struct ldlm_resource *res = lock->l_resource;
625 CFS_LIST_HEAD(rpc_list);
627 int contended_locks = 0;
630 LASSERT(list_empty(&res->lr_converting));
631 LASSERT(!(*flags & LDLM_FL_DENY_ON_CONTENTION) ||
632 !(lock->l_flags & LDLM_AST_DISCARD_DATA));
633 check_res_locked(res);
637 /* Careful observers will note that we don't handle -EWOULDBLOCK
638 * here, but it's ok for a non-obvious reason -- compat_queue
639 * can only return -EWOULDBLOCK if (flags & BLOCK_NOWAIT).
640 * flags should always be zero here, and if that ever stops
641 * being true, we want to find out. */
642 LASSERT(*flags == 0);
643 rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags,
644 err, NULL, &contended_locks);
646 rc = ldlm_extent_compat_queue(&res->lr_waiting, lock,
651 RETURN(LDLM_ITER_STOP);
653 ldlm_resource_unlink_lock(lock);
655 if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_EVICT_RACE))
656 ldlm_extent_policy(res, lock, flags);
657 ldlm_grant_lock(lock, work_list);
658 RETURN(LDLM_ITER_CONTINUE);
663 rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags, err,
664 &rpc_list, &contended_locks);
666 GOTO(out, rc); /* lock was destroyed */
670 rc2 = ldlm_extent_compat_queue(&res->lr_waiting, lock, flags, err,
671 &rpc_list, &contended_locks);
673 GOTO(out, rc = rc2); /* lock was destroyed */
677 ldlm_extent_policy(res, lock, flags);
678 ldlm_resource_unlink_lock(lock);
679 ldlm_grant_lock(lock, NULL);
681 /* If either of the compat_queue()s returned failure, then we
682 * have ASTs to send and must go onto the waiting list.
684 * bug 2322: we used to unlink and re-add here, which was a
685 * terrible folly -- if we goto restart, we could get
686 * re-ordered! Causes deadlock, because ASTs aren't sent! */
687 if (list_empty(&lock->l_res_link))
688 ldlm_resource_add_lock(res, &res->lr_waiting, lock);
690 rc = ldlm_run_ast_work(&rpc_list, LDLM_WORK_BL_AST);
693 if (rc == -ERESTART) {
694 /* lock was granted while resource was unlocked. */
695 if (lock->l_granted_mode == lock->l_req_mode) {
696 /* bug 11300: if the lock has been granted,
697 * break earlier because otherwise, we will go
698 * to restart and ldlm_resource_unlink will be
699 * called and it causes the interval node to be
700 * freed. Then we will fail at
701 * ldlm_extent_add_lock() */
702 *flags &= ~(LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV |
707 GOTO(restart, -ERESTART);
710 *flags |= LDLM_FL_BLOCK_GRANTED;
711 /* this way we force client to wait for the lock
712 * endlessly once the lock is enqueued -bzzz */
713 *flags |= LDLM_FL_NO_TIMEOUT;
718 if (!list_empty(&rpc_list)) {
719 LASSERT(!(lock->l_flags & LDLM_AST_DISCARD_DATA));
720 discard_bl_list(&rpc_list);
725 /* When a lock is cancelled by a client, the KMS may undergo change if this
726 * is the "highest lock". This function returns the new KMS value.
727 * Caller must hold ns_lock already.
729 * NB: A lock on [x,y] protects a KMS of up to y + 1 bytes! */
730 __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms)
732 struct ldlm_resource *res = lock->l_resource;
733 struct list_head *tmp;
734 struct ldlm_lock *lck;
738 /* don't let another thread in ldlm_extent_shift_kms race in
739 * just after we finish and take our lock into account in its
740 * calculation of the kms */
741 lock->l_flags |= LDLM_FL_KMS_IGNORE;
743 list_for_each(tmp, &res->lr_granted) {
744 lck = list_entry(tmp, struct ldlm_lock, l_res_link);
746 if (lck->l_flags & LDLM_FL_KMS_IGNORE)
749 if (lck->l_policy_data.l_extent.end >= old_kms)
752 /* This extent _has_ to be smaller than old_kms (checked above)
753 * so kms can only ever be smaller or the same as old_kms. */
754 if (lck->l_policy_data.l_extent.end + 1 > kms)
755 kms = lck->l_policy_data.l_extent.end + 1;
757 LASSERTF(kms <= old_kms, "kms "LPU64" old_kms "LPU64"\n", kms, old_kms);
762 cfs_mem_cache_t *ldlm_interval_slab;
763 struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock)
765 struct ldlm_interval *node;
768 LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
769 OBD_SLAB_ALLOC(node, ldlm_interval_slab, CFS_ALLOC_IO, sizeof(*node));
773 CFS_INIT_LIST_HEAD(&node->li_group);
774 ldlm_interval_attach(node, lock);
778 void ldlm_interval_free(struct ldlm_interval *node)
781 LASSERT(list_empty(&node->li_group));
782 OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
786 /* interval tree, for LDLM_EXTENT. */
787 void ldlm_interval_attach(struct ldlm_interval *n,
790 LASSERT(l->l_tree_node == NULL);
791 LASSERT(l->l_resource->lr_type == LDLM_EXTENT);
793 list_add_tail(&l->l_sl_policy, &n->li_group);
797 struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l)
799 struct ldlm_interval *n = l->l_tree_node;
804 LASSERT(!list_empty(&n->li_group));
805 l->l_tree_node = NULL;
806 list_del_init(&l->l_sl_policy);
808 return (list_empty(&n->li_group) ? n : NULL);
811 static inline int lock_mode_to_index(ldlm_mode_t mode)
816 LASSERT(IS_PO2(mode));
817 for (index = -1; mode; index++, mode >>= 1) ;
818 LASSERT(index < LCK_MODE_NUM);
822 void ldlm_extent_add_lock(struct ldlm_resource *res,
823 struct ldlm_lock *lock)
825 struct interval_node *found, **root;
826 struct ldlm_interval *node;
827 struct ldlm_extent *extent;
830 LASSERT(lock->l_granted_mode == lock->l_req_mode);
832 node = lock->l_tree_node;
833 LASSERT(node != NULL);
835 idx = lock_mode_to_index(lock->l_granted_mode);
836 LASSERT(lock->l_granted_mode == 1 << idx);
837 LASSERT(lock->l_granted_mode == res->lr_itree[idx].lit_mode);
839 /* node extent initialize */
840 extent = &lock->l_policy_data.l_extent;
841 interval_set(&node->li_node, extent->start, extent->end);
843 root = &res->lr_itree[idx].lit_root;
844 found = interval_insert(&node->li_node, root);
845 if (found) { /* The policy group found. */
846 struct ldlm_interval *tmp = ldlm_interval_detach(lock);
847 LASSERT(tmp != NULL);
848 ldlm_interval_free(tmp);
849 ldlm_interval_attach(to_ldlm_interval(found), lock);
851 res->lr_itree[idx].lit_size++;
853 /* even though we use interval tree to manage the extent lock, we also
854 * add the locks into grant list, for debug purpose, .. */
855 ldlm_resource_add_lock(res, &res->lr_granted, lock);
858 void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
860 struct ldlm_resource *res = lock->l_resource;
861 struct ldlm_interval *node;
862 struct ldlm_interval_tree *tree;
865 if (lock->l_granted_mode != lock->l_req_mode)
868 LASSERT(lock->l_tree_node != NULL);
869 idx = lock_mode_to_index(lock->l_granted_mode);
870 LASSERT(lock->l_granted_mode == 1 << idx);
871 tree = &res->lr_itree[idx];
873 LASSERT(tree->lit_root != NULL); /* assure the tree is not null */
876 node = ldlm_interval_detach(lock);
878 interval_erase(&node->li_node, &tree->lit_root);
879 ldlm_interval_free(node);