1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see [sun.com URL with a
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ldlm/ldlm_extent.c
38 * Author: Peter Braam <braam@clusterfs.com>
39 * Author: Phil Schwan <phil@clusterfs.com>
42 #define DEBUG_SUBSYSTEM S_LDLM
44 # include <liblustre.h>
46 # include <libcfs/libcfs.h>
49 #include <lustre_dlm.h>
50 #include <obd_support.h>
52 #include <lustre_lib.h>
54 #include "ldlm_internal.h"
56 #define LDLM_MAX_GROWN_EXTENT (32 * 1024 * 1024 - 1)
58 /* fixup the ldlm_extent after expanding */
59 static void ldlm_extent_internal_policy_fixup(struct ldlm_lock *req,
60 struct ldlm_extent *new_ex,
63 ldlm_mode_t req_mode = req->l_req_mode;
64 __u64 req_start = req->l_req_extent.start;
65 __u64 req_end = req->l_req_extent.end;
66 __u64 req_align, mask;
68 if (conflicting > 32 && (req_mode == LCK_PW || req_mode == LCK_CW)) {
69 if (req_end < req_start + LDLM_MAX_GROWN_EXTENT)
70 new_ex->end = min(req_start + LDLM_MAX_GROWN_EXTENT,
74 if (new_ex->start == 0 && new_ex->end == OBD_OBJECT_EOF) {
79 /* we need to ensure that the lock extent is properly aligned to what
80 * the client requested. We align it to the lowest-common denominator
81 * of the clients requested lock start and end alignment. */
83 req_align = (req_end + 1) | req_start;
85 while ((req_align & mask) == 0)
89 /* We can only shrink the lock, not grow it.
90 * This should never cause lock to be smaller than requested,
91 * since requested lock was already aligned on these boundaries. */
92 new_ex->start = ((new_ex->start - 1) | mask) + 1;
93 new_ex->end = ((new_ex->end + 1) & ~mask) - 1;
94 LASSERTF(new_ex->start <= req_start,
95 "mask "LPX64" grant start "LPU64" req start "LPU64"\n",
96 mask, new_ex->start, req_start);
97 LASSERTF(new_ex->end >= req_end,
98 "mask "LPX64" grant end "LPU64" req end "LPU64"\n",
99 mask, new_ex->end, req_end);
102 /* The purpose of this function is to return:
103 * - the maximum extent
104 * - containing the requested extent
105 * - and not overlapping existing conflicting extents outside the requested one
107 * Use interval tree to expand the lock extent for granted lock.
109 static void ldlm_extent_internal_policy_granted(struct ldlm_lock *req,
110 struct ldlm_extent *new_ex)
112 struct ldlm_resource *res = req->l_resource;
113 ldlm_mode_t req_mode = req->l_req_mode;
114 __u64 req_start = req->l_req_extent.start;
115 __u64 req_end = req->l_req_extent.end;
116 struct ldlm_interval_tree *tree;
117 struct interval_node_extent limiter = { new_ex->start, new_ex->end };
122 lockmode_verify(req_mode);
124 /* using interval tree to handle the ldlm extent granted locks */
125 for (idx = 0; idx < LCK_MODE_NUM; idx++) {
126 struct interval_node_extent ext = { req_start, req_end };
128 tree = &res->lr_itree[idx];
129 if (lockmode_compat(tree->lit_mode, req_mode))
132 conflicting += tree->lit_size;
134 limiter.start = req_start;
136 if (interval_is_overlapped(tree->lit_root, &ext))
137 printk("req_mode = %d, tree->lit_mode = %d, tree->lit_size = %d\n",
138 req_mode, tree->lit_mode, tree->lit_size);
139 interval_expand(tree->lit_root, &ext, &limiter);
140 limiter.start = max(limiter.start, ext.start);
141 limiter.end = min(limiter.end, ext.end);
142 if (limiter.start == req_start && limiter.end == req_end)
146 new_ex->start = limiter.start;
147 new_ex->end = limiter.end;
148 LASSERT(new_ex->start <= req_start);
149 LASSERT(new_ex->end >= req_end);
151 ldlm_extent_internal_policy_fixup(req, new_ex, conflicting);
155 /* The purpose of this function is to return:
156 * - the maximum extent
157 * - containing the requested extent
158 * - and not overlapping existing conflicting extents outside the requested one
161 ldlm_extent_internal_policy_waiting(struct ldlm_lock *req,
162 struct ldlm_extent *new_ex)
164 struct list_head *tmp;
165 struct ldlm_resource *res = req->l_resource;
166 ldlm_mode_t req_mode = req->l_req_mode;
167 __u64 req_start = req->l_req_extent.start;
168 __u64 req_end = req->l_req_extent.end;
172 lockmode_verify(req_mode);
174 /* for waiting locks */
175 list_for_each(tmp, &res->lr_waiting) {
176 struct ldlm_lock *lock;
177 struct ldlm_extent *l_extent;
179 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
180 l_extent = &lock->l_policy_data.l_extent;
182 /* We already hit the minimum requested size, search no more */
183 if (new_ex->start == req_start && new_ex->end == req_end) {
188 /* Don't conflict with ourselves */
192 /* Locks are compatible, overlap doesn't matter */
193 /* Until bug 20 is fixed, try to avoid granting overlapping
194 * locks on one client (they take a long time to cancel) */
195 if (lockmode_compat(lock->l_req_mode, req_mode) &&
196 lock->l_export != req->l_export)
199 /* If this is a high-traffic lock, don't grow downwards at all
200 * or grow upwards too much */
203 new_ex->start = req_start;
205 /* If lock doesn't overlap new_ex, skip it. */
206 if (!ldlm_extent_overlap(l_extent, new_ex))
209 /* Locks conflicting in requested extents and we can't satisfy
210 * both locks, so ignore it. Either we will ping-pong this
211 * extent (we would regardless of what extent we granted) or
212 * lock is unused and it shouldn't limit our extent growth. */
213 if (ldlm_extent_overlap(&lock->l_req_extent,&req->l_req_extent))
216 /* We grow extents downwards only as far as they don't overlap
217 * with already-granted locks, on the assumtion that clients
218 * will be writing beyond the initial requested end and would
219 * then need to enqueue a new lock beyond previous request.
220 * l_req_extent->end strictly < req_start, checked above. */
221 if (l_extent->start < req_start && new_ex->start != req_start) {
222 if (l_extent->end >= req_start)
223 new_ex->start = req_start;
225 new_ex->start = min(l_extent->end+1, req_start);
228 /* If we need to cancel this lock anyways because our request
229 * overlaps the granted lock, we grow up to its requested
230 * extent start instead of limiting this extent, assuming that
231 * clients are writing forwards and the lock had over grown
232 * its extent downwards before we enqueued our request. */
233 if (l_extent->end > req_end) {
234 if (l_extent->start <= req_end)
235 new_ex->end = max(lock->l_req_extent.start - 1,
238 new_ex->end = max(l_extent->start - 1, req_end);
242 ldlm_extent_internal_policy_fixup(req, new_ex, conflicting);
247 /* In order to determine the largest possible extent we can grant, we need
248 * to scan all of the queues. */
249 static void ldlm_extent_policy(struct ldlm_resource *res,
250 struct ldlm_lock *lock, int *flags)
252 struct ldlm_extent new_ex = { .start = 0, .end = OBD_OBJECT_EOF };
254 if (lock->l_export == NULL)
256 * this is local lock taken by server (e.g., as a part of
257 * OST-side locking, or unlink handling). Expansion doesn't
258 * make a lot of sense for local locks, because they are
259 * dropped immediately on operation completion and would only
260 * conflict with other threads.
264 if (lock->l_policy_data.l_extent.start == 0 &&
265 lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF)
266 /* fast-path whole file locks */
269 ldlm_extent_internal_policy_granted(lock, &new_ex);
270 ldlm_extent_internal_policy_waiting(lock, &new_ex);
272 if (new_ex.start != lock->l_policy_data.l_extent.start ||
273 new_ex.end != lock->l_policy_data.l_extent.end) {
274 *flags |= LDLM_FL_LOCK_CHANGED;
275 lock->l_policy_data.l_extent.start = new_ex.start;
276 lock->l_policy_data.l_extent.end = new_ex.end;
280 static int ldlm_check_contention(struct ldlm_lock *lock, int contended_locks)
282 struct ldlm_resource *res = lock->l_resource;
283 cfs_time_t now = cfs_time_current();
285 CDEBUG(D_DLMTRACE, "contended locks = %d\n", contended_locks);
286 if (contended_locks > res->lr_namespace->ns_contended_locks)
287 res->lr_contention_time = now;
288 return cfs_time_before(now, cfs_time_add(res->lr_contention_time,
289 cfs_time_seconds(res->lr_namespace->ns_contention_time)));
292 struct ldlm_extent_compat_args {
293 struct list_head *work_list;
294 struct ldlm_lock *lock;
300 static enum interval_iter ldlm_extent_compat_cb(struct interval_node *n,
303 struct ldlm_extent_compat_args *priv = data;
304 struct ldlm_interval *node = to_ldlm_interval(n);
305 struct ldlm_extent *extent;
306 struct list_head *work_list = priv->work_list;
307 struct ldlm_lock *lock, *enq = priv->lock;
308 ldlm_mode_t mode = priv->mode;
312 LASSERT(!list_empty(&node->li_group));
314 list_for_each_entry(lock, &node->li_group, l_sl_policy) {
315 /* interval tree is for granted lock */
316 LASSERTF(mode == lock->l_granted_mode,
317 "mode = %s, lock->l_granted_mode = %s\n",
319 ldlm_lockname[lock->l_granted_mode]);
321 if (lock->l_blocking_ast)
322 ldlm_add_ast_work_item(lock, enq, work_list);
325 /* don't count conflicting glimpse locks */
326 extent = ldlm_interval_extent(node);
327 if (!(mode == LCK_PR &&
328 extent->start == 0 && extent->end == OBD_OBJECT_EOF))
329 *priv->locks += count;
334 RETURN(INTERVAL_ITER_CONT);
337 /* Determine if the lock is compatible with all locks on the queue.
338 * We stop walking the queue if we hit ourselves so we don't take
339 * conflicting locks enqueued after us into accound, or we'd wait forever.
341 * 0 if the lock is not compatible
342 * 1 if the lock is compatible
343 * 2 if this group lock is compatible and requires no further checking
344 * negative error, such as EWOULDBLOCK for group locks
347 ldlm_extent_compat_queue(struct list_head *queue, struct ldlm_lock *req,
348 int *flags, ldlm_error_t *err,
349 struct list_head *work_list, int *contended_locks)
351 struct list_head *tmp;
352 struct ldlm_lock *lock;
353 struct ldlm_resource *res = req->l_resource;
354 ldlm_mode_t req_mode = req->l_req_mode;
355 __u64 req_start = req->l_req_extent.start;
356 __u64 req_end = req->l_req_extent.end;
359 int check_contention;
362 lockmode_verify(req_mode);
364 /* Using interval tree for granted lock */
365 if (queue == &res->lr_granted) {
366 struct ldlm_interval_tree *tree;
367 struct ldlm_extent_compat_args data = {.work_list = work_list,
369 .locks = contended_locks,
371 struct interval_node_extent ex = { .start = req_start,
375 for (idx = 0; idx < LCK_MODE_NUM; idx++) {
376 tree = &res->lr_itree[idx];
377 if (tree->lit_root == NULL) /* empty tree, skipped */
380 data.mode = tree->lit_mode;
381 if (lockmode_compat(req_mode, tree->lit_mode)) {
382 struct ldlm_interval *node;
383 struct ldlm_extent *extent;
385 if (req_mode != LCK_GROUP)
388 /* group lock, grant it immediately if
390 node = to_ldlm_interval(tree->lit_root);
391 extent = ldlm_interval_extent(node);
392 if (req->l_policy_data.l_extent.gid ==
397 if (tree->lit_mode == LCK_GROUP) {
398 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
399 compat = -EWOULDBLOCK;
403 *flags |= LDLM_FL_NO_TIMEOUT;
407 /* if work list is not NULL,add all
408 locks in the tree to work list */
410 interval_iterate(tree->lit_root,
411 ldlm_extent_compat_cb, &data);
416 rc = interval_is_overlapped(tree->lit_root,&ex);
420 interval_search(tree->lit_root, &ex,
421 ldlm_extent_compat_cb, &data);
422 if (!list_empty(work_list) && compat)
426 } else { /* for waiting queue */
427 list_for_each(tmp, queue) {
428 check_contention = 1;
430 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
435 if (unlikely(scan)) {
436 /* We only get here if we are queuing GROUP lock
437 and met some incompatible one. The main idea of this
438 code is to insert GROUP lock past compatible GROUP
439 lock in the waiting queue or if there is not any,
440 then in front of first non-GROUP lock */
441 if (lock->l_req_mode != LCK_GROUP) {
442 /* Ok, we hit non-GROUP lock, there should
443 * be no more GROUP locks later on, queue in
444 * front of first non-GROUP lock */
446 ldlm_resource_insert_lock_after(lock, req);
447 list_del_init(&lock->l_res_link);
448 ldlm_resource_insert_lock_after(req, lock);
452 if (req->l_policy_data.l_extent.gid ==
453 lock->l_policy_data.l_extent.gid) {
455 ldlm_resource_insert_lock_after(lock, req);
462 /* locks are compatible, overlap doesn't matter */
463 if (lockmode_compat(lock->l_req_mode, req_mode)) {
464 if (req_mode == LCK_PR &&
465 ((lock->l_policy_data.l_extent.start <=
466 req->l_policy_data.l_extent.start) &&
467 (lock->l_policy_data.l_extent.end >=
468 req->l_policy_data.l_extent.end))) {
469 /* If we met a PR lock just like us or wider,
470 and nobody down the list conflicted with
471 it, that means we can skip processing of
472 the rest of the list and safely place
473 ourselves at the end of the list, or grant
474 (dependent if we met an conflicting locks
476 In case of 1st enqueue only we continue
477 traversing if there is something conflicting
478 down the list because we need to make sure
479 that something is marked as AST_SENT as well,
480 in cse of empy worklist we would exit on
481 first conflict met. */
482 /* There IS a case where such flag is
483 not set for a lock, yet it blocks
484 something. Luckily for us this is
485 only during destroy, so lock is
486 exclusive. So here we are safe */
487 if (!(lock->l_flags & LDLM_FL_AST_SENT)) {
492 /* non-group locks are compatible, overlap doesn't
494 if (likely(req_mode != LCK_GROUP))
497 /* If we are trying to get a GROUP lock and there is
498 another one of this kind, we need to compare gid */
499 if (req->l_policy_data.l_extent.gid ==
500 lock->l_policy_data.l_extent.gid) {
501 /* If existing lock with matched gid is granted,
502 we grant new one too. */
503 if (lock->l_req_mode == lock->l_granted_mode)
506 /* Otherwise we are scanning queue of waiting
507 * locks and it means current request would
508 * block along with existing lock (that is
510 * If we are in nonblocking mode - return
512 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
513 compat = -EWOULDBLOCK;
516 /* If this group lock is compatible with another
517 * group lock on the waiting list, they must be
518 * together in the list, so they can be granted
519 * at the same time. Otherwise the later lock
520 * can get stuck behind another, incompatible,
522 ldlm_resource_insert_lock_after(lock, req);
523 /* Because 'lock' is not granted, we can stop
524 * processing this queue and return immediately.
525 * There is no need to check the rest of the
531 if (unlikely(req_mode == LCK_GROUP &&
532 (lock->l_req_mode != lock->l_granted_mode))) {
535 if (lock->l_req_mode != LCK_GROUP) {
536 /* Ok, we hit non-GROUP lock, there should be no
537 more GROUP locks later on, queue in front of
538 first non-GROUP lock */
540 ldlm_resource_insert_lock_after(lock, req);
541 list_del_init(&lock->l_res_link);
542 ldlm_resource_insert_lock_after(req, lock);
545 if (req->l_policy_data.l_extent.gid ==
546 lock->l_policy_data.l_extent.gid) {
548 ldlm_resource_insert_lock_after(lock, req);
554 if (unlikely(lock->l_req_mode == LCK_GROUP)) {
555 /* If compared lock is GROUP, then requested is PR/PW/
556 * so this is not compatible; extent range does not
558 if (*flags & LDLM_FL_BLOCK_NOWAIT) {
559 compat = -EWOULDBLOCK;
562 *flags |= LDLM_FL_NO_TIMEOUT;
564 } else if (lock->l_policy_data.l_extent.end < req_start ||
565 lock->l_policy_data.l_extent.start > req_end) {
566 /* if a non group lock doesn't overlap skip it */
568 } else if (lock->l_req_extent.end < req_start ||
569 lock->l_req_extent.start > req_end) {
570 /* false contention, the requests doesn't really overlap */
571 check_contention = 0;
577 /* don't count conflicting glimpse locks */
578 if (lock->l_req_mode == LCK_PR &&
579 lock->l_policy_data.l_extent.start == 0 &&
580 lock->l_policy_data.l_extent.end == OBD_OBJECT_EOF)
581 check_contention = 0;
583 *contended_locks += check_contention;
586 if (lock->l_blocking_ast)
587 ldlm_add_ast_work_item(lock, req, work_list);
591 if (ldlm_check_contention(req, *contended_locks) &&
593 (*flags & LDLM_FL_DENY_ON_CONTENTION) &&
594 req->l_req_mode != LCK_GROUP &&
595 req_end - req_start <=
596 req->l_resource->lr_namespace->ns_max_nolock_size)
597 GOTO(destroylock, compat = -EUSERS);
601 list_del_init(&req->l_res_link);
602 ldlm_lock_destroy_nolock(req);
607 static void discard_bl_list(struct list_head *bl_list)
609 struct list_head *tmp, *pos;
612 list_for_each_safe(pos, tmp, bl_list) {
613 struct ldlm_lock *lock =
614 list_entry(pos, struct ldlm_lock, l_bl_ast);
616 list_del_init(&lock->l_bl_ast);
617 LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
618 lock->l_flags &= ~LDLM_FL_AST_SENT;
619 LASSERT(lock->l_bl_ast_run == 0);
620 LASSERT(lock->l_blocking_lock);
621 LDLM_LOCK_PUT(lock->l_blocking_lock);
622 lock->l_blocking_lock = NULL;
628 /* If first_enq is 0 (ie, called from ldlm_reprocess_queue):
629 * - blocking ASTs have already been sent
630 * - must call this function with the ns lock held
632 * If first_enq is 1 (ie, called from ldlm_lock_enqueue):
633 * - blocking ASTs have not been sent
634 * - must call this function with the ns lock held once */
635 int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
636 ldlm_error_t *err, struct list_head *work_list)
638 struct ldlm_resource *res = lock->l_resource;
639 CFS_LIST_HEAD(rpc_list);
641 int contended_locks = 0;
644 LASSERT(list_empty(&res->lr_converting));
645 LASSERT(!(*flags & LDLM_FL_DENY_ON_CONTENTION) ||
646 !(lock->l_flags & LDLM_AST_DISCARD_DATA));
647 check_res_locked(res);
651 /* Careful observers will note that we don't handle -EWOULDBLOCK
652 * here, but it's ok for a non-obvious reason -- compat_queue
653 * can only return -EWOULDBLOCK if (flags & BLOCK_NOWAIT).
654 * flags should always be zero here, and if that ever stops
655 * being true, we want to find out. */
656 LASSERT(*flags == 0);
657 rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags,
658 err, NULL, &contended_locks);
660 rc = ldlm_extent_compat_queue(&res->lr_waiting, lock,
665 RETURN(LDLM_ITER_STOP);
667 ldlm_resource_unlink_lock(lock);
669 if (!OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_EVICT_RACE))
670 ldlm_extent_policy(res, lock, flags);
671 ldlm_grant_lock(lock, work_list);
672 RETURN(LDLM_ITER_CONTINUE);
677 rc = ldlm_extent_compat_queue(&res->lr_granted, lock, flags, err,
678 &rpc_list, &contended_locks);
680 GOTO(out, rc); /* lock was destroyed */
684 rc2 = ldlm_extent_compat_queue(&res->lr_waiting, lock, flags, err,
685 &rpc_list, &contended_locks);
687 GOTO(out, rc = rc2); /* lock was destroyed */
691 ldlm_extent_policy(res, lock, flags);
692 ldlm_resource_unlink_lock(lock);
693 ldlm_grant_lock(lock, NULL);
695 /* If either of the compat_queue()s returned failure, then we
696 * have ASTs to send and must go onto the waiting list.
698 * bug 2322: we used to unlink and re-add here, which was a
699 * terrible folly -- if we goto restart, we could get
700 * re-ordered! Causes deadlock, because ASTs aren't sent! */
701 if (list_empty(&lock->l_res_link))
702 ldlm_resource_add_lock(res, &res->lr_waiting, lock);
704 rc = ldlm_run_ast_work(&rpc_list, LDLM_WORK_BL_AST);
707 if (rc == -ERESTART) {
708 /* lock was granted while resource was unlocked. */
709 if (lock->l_granted_mode == lock->l_req_mode) {
710 /* bug 11300: if the lock has been granted,
711 * break earlier because otherwise, we will go
712 * to restart and ldlm_resource_unlink will be
713 * called and it causes the interval node to be
714 * freed. Then we will fail at
715 * ldlm_extent_add_lock() */
716 *flags &= ~(LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV |
721 GOTO(restart, -ERESTART);
724 *flags |= LDLM_FL_BLOCK_GRANTED;
725 /* this way we force client to wait for the lock
726 * endlessly once the lock is enqueued -bzzz */
727 *flags |= LDLM_FL_NO_TIMEOUT;
732 if (!list_empty(&rpc_list)) {
733 LASSERT(!(lock->l_flags & LDLM_AST_DISCARD_DATA));
734 discard_bl_list(&rpc_list);
739 /* When a lock is cancelled by a client, the KMS may undergo change if this
740 * is the "highest lock". This function returns the new KMS value.
741 * Caller must hold ns_lock already.
743 * NB: A lock on [x,y] protects a KMS of up to y + 1 bytes! */
744 __u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms)
746 struct ldlm_resource *res = lock->l_resource;
747 struct list_head *tmp;
748 struct ldlm_lock *lck;
752 /* don't let another thread in ldlm_extent_shift_kms race in
753 * just after we finish and take our lock into account in its
754 * calculation of the kms */
755 lock->l_flags |= LDLM_FL_KMS_IGNORE;
757 list_for_each(tmp, &res->lr_granted) {
758 lck = list_entry(tmp, struct ldlm_lock, l_res_link);
760 if (lck->l_flags & LDLM_FL_KMS_IGNORE)
763 if (lck->l_policy_data.l_extent.end >= old_kms)
766 /* This extent _has_ to be smaller than old_kms (checked above)
767 * so kms can only ever be smaller or the same as old_kms. */
768 if (lck->l_policy_data.l_extent.end + 1 > kms)
769 kms = lck->l_policy_data.l_extent.end + 1;
771 LASSERTF(kms <= old_kms, "kms "LPU64" old_kms "LPU64"\n", kms, old_kms);
776 cfs_mem_cache_t *ldlm_interval_slab;
777 struct ldlm_interval *ldlm_interval_alloc(struct ldlm_lock *lock)
779 struct ldlm_interval *node;
782 LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
783 OBD_SLAB_ALLOC(node, ldlm_interval_slab, CFS_ALLOC_IO, sizeof(*node));
787 CFS_INIT_LIST_HEAD(&node->li_group);
788 ldlm_interval_attach(node, lock);
792 void ldlm_interval_free(struct ldlm_interval *node)
795 LASSERT(list_empty(&node->li_group));
796 OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
800 /* interval tree, for LDLM_EXTENT. */
801 void ldlm_interval_attach(struct ldlm_interval *n,
804 LASSERT(l->l_tree_node == NULL);
805 LASSERT(l->l_resource->lr_type == LDLM_EXTENT);
807 list_add_tail(&l->l_sl_policy, &n->li_group);
811 struct ldlm_interval *ldlm_interval_detach(struct ldlm_lock *l)
813 struct ldlm_interval *n = l->l_tree_node;
818 LASSERT(!list_empty(&n->li_group));
819 l->l_tree_node = NULL;
820 list_del_init(&l->l_sl_policy);
822 return (list_empty(&n->li_group) ? n : NULL);
825 static inline int lock_mode_to_index(ldlm_mode_t mode)
830 LASSERT(IS_PO2(mode));
831 for (index = -1; mode; index++, mode >>= 1) ;
832 LASSERT(index < LCK_MODE_NUM);
836 void ldlm_extent_add_lock(struct ldlm_resource *res,
837 struct ldlm_lock *lock)
839 struct interval_node *found, **root;
840 struct ldlm_interval *node;
841 struct ldlm_extent *extent;
844 LASSERT(lock->l_granted_mode == lock->l_req_mode);
846 node = lock->l_tree_node;
847 LASSERT(node != NULL);
849 idx = lock_mode_to_index(lock->l_granted_mode);
850 LASSERT(lock->l_granted_mode == 1 << idx);
851 LASSERT(lock->l_granted_mode == res->lr_itree[idx].lit_mode);
853 /* node extent initialize */
854 extent = &lock->l_policy_data.l_extent;
855 interval_set(&node->li_node, extent->start, extent->end);
857 root = &res->lr_itree[idx].lit_root;
858 found = interval_insert(&node->li_node, root);
859 if (found) { /* The policy group found. */
860 struct ldlm_interval *tmp = ldlm_interval_detach(lock);
861 LASSERT(tmp != NULL);
862 ldlm_interval_free(tmp);
863 ldlm_interval_attach(to_ldlm_interval(found), lock);
865 res->lr_itree[idx].lit_size++;
867 /* even though we use interval tree to manage the extent lock, we also
868 * add the locks into grant list, for debug purpose, .. */
869 ldlm_resource_add_lock(res, &res->lr_granted, lock);
872 void ldlm_extent_unlink_lock(struct ldlm_lock *lock)
874 struct ldlm_resource *res = lock->l_resource;
875 struct ldlm_interval *node;
876 struct ldlm_interval_tree *tree;
879 if (lock->l_granted_mode != lock->l_req_mode)
882 LASSERT(lock->l_tree_node != NULL);
883 idx = lock_mode_to_index(lock->l_granted_mode);
884 LASSERT(lock->l_granted_mode == 1 << idx);
885 tree = &res->lr_itree[idx];
887 LASSERT(tree->lit_root != NULL); /* assure the tree is not null */
890 node = ldlm_interval_detach(lock);
892 interval_erase(&node->li_node, &tree->lit_root);
893 ldlm_interval_free(node);