Whamcloud - gitweb
b=16098
[fs/lustre-release.git] / lustre / ldlm / interval_tree.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see [sun.com URL with a
20  * copy of GPLv2].
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ldlm/interval_tree.c
37  *
38  * Interval tree library used by ldlm extent lock code
39  *
40  * Author: Huang Wei <huangwei@clusterfs.com>
41  * Author: Jay Xiong <jinshan.xiong@sun.com>
42  */
43 #ifdef __KERNEL__
44 # include <lustre_dlm.h>
45 #else
46 # include <liblustre.h>
47 #endif
48 #include <obd_support.h>
49 #include <interval_tree.h>
50
51 enum {
52         INTERVAL_RED = 0,
53         INTERVAL_BLACK = 1
54 };
55
56 static inline int node_is_left_child(struct interval_node *node)
57 {
58         LASSERT(node->in_parent != NULL);
59         return node == node->in_parent->in_left;
60 }
61
62 static inline int node_is_right_child(struct interval_node *node)
63 {
64         LASSERT(node->in_parent != NULL);
65         return node == node->in_parent->in_right;
66 }
67
68 static inline int node_is_red(struct interval_node *node)
69 {
70         return node->in_color == INTERVAL_RED;
71 }
72
73 static inline int node_is_black(struct interval_node *node)
74 {
75         return node->in_color == INTERVAL_BLACK;
76 }
77
78 static inline int extent_compare(struct interval_node_extent *e1,
79                                  struct interval_node_extent *e2)
80 {
81         int rc;
82         if (e1->start == e2->start) {
83                 if (e1->end < e2->end)
84                         rc = -1;
85                 else if (e1->end > e2->end)
86                         rc = 1;
87                 else
88                         rc = 0;
89         } else {
90                 if (e1->start < e2->start)
91                         rc = -1;
92                 else
93                         rc = 1;
94         }
95         return rc;
96 }
97
98 static inline int extent_equal(struct interval_node_extent *e1,
99                                struct interval_node_extent *e2)
100 {
101         return (e1->start == e2->start) && (e1->end == e2->end);
102 }
103
104 static inline int extent_overlapped(struct interval_node_extent *e1, 
105                                     struct interval_node_extent *e2)
106 {
107         return (e1->start <= e2->end) && (e2->start <= e1->end);
108 }
109
110 static inline int node_compare(struct interval_node *n1,
111                                struct interval_node *n2)
112 {
113         return extent_compare(&n1->in_extent, &n2->in_extent);
114 }
115
116 static inline int node_equal(struct interval_node *n1,
117                              struct interval_node *n2)
118 {
119         return extent_equal(&n1->in_extent, &n2->in_extent);
120 }
121
122 static inline __u64 max_u64(__u64 x, __u64 y)
123 {
124         return x > y ? x : y;
125 }
126
127 static inline __u64 min_u64(__u64 x, __u64 y)
128 {
129         return x < y ? x : y;
130 }
131
132 #define interval_for_each(node, root)                   \
133 for (node = interval_first(root); node != NULL;         \
134      node = interval_next(node))
135
136 #define interval_for_each_reverse(node, root)           \
137 for (node = interval_last(root); node != NULL;          \
138      node = interval_prev(node))
139
140 static struct interval_node *interval_first(struct interval_node *node)
141 {
142         ENTRY;
143
144         if (!node)
145                 RETURN(NULL);
146         while (node->in_left)
147                 node = node->in_left;
148         RETURN(node);
149 }
150
151 static struct interval_node *interval_last(struct interval_node *node)
152 {
153         ENTRY;
154
155         if (!node)
156                 RETURN(NULL);
157         while (node->in_right)
158                 node = node->in_right;
159         RETURN(node);
160 }
161
162 static struct interval_node *interval_next(struct interval_node *node)
163 {
164         ENTRY;
165
166         if (!node)
167                 RETURN(NULL);
168         if (node->in_right)
169                 RETURN(interval_first(node->in_right));
170         while (node->in_parent && node_is_right_child(node))
171                 node = node->in_parent;
172         RETURN(node->in_parent);
173 }
174
175 static struct interval_node *interval_prev(struct interval_node *node)
176 {
177         ENTRY;
178
179         if (!node)
180                 RETURN(NULL);
181
182         if (node->in_left)
183                 RETURN(interval_last(node->in_left));
184
185         while (node->in_parent && node_is_left_child(node))
186                 node = node->in_parent;
187
188         RETURN(node->in_parent);
189 }
190
191 enum interval_iter interval_iterate(struct interval_node *root,
192                                     interval_callback_t func,
193                                     void *data)
194 {
195         struct interval_node *node;
196         enum interval_iter rc = INTERVAL_ITER_CONT;
197         ENTRY;
198         
199         interval_for_each(node, root) {
200                 rc = func(node, data);
201                 if (rc == INTERVAL_ITER_STOP)
202                         break;
203         }
204
205         RETURN(rc);
206 }
207 EXPORT_SYMBOL(interval_iterate);
208
209 enum interval_iter interval_iterate_reverse(struct interval_node *root,
210                                             interval_callback_t func,
211                                             void *data)
212 {
213         struct interval_node *node;
214         enum interval_iter rc = INTERVAL_ITER_CONT;
215         ENTRY;
216         
217         interval_for_each_reverse(node, root) {
218                 rc = func(node, data);
219                 if (rc == INTERVAL_ITER_STOP)
220                         break;
221         }
222
223         RETURN(rc);
224 }
225 EXPORT_SYMBOL(interval_iterate_reverse);
226
227 /* try to find a node with same interval in the tree,
228  * if found, return the pointer to the node, otherwise return NULL*/
229 struct interval_node *interval_find(struct interval_node *root,
230                                     struct interval_node_extent *ex)
231 {
232         struct interval_node *walk = root;
233         int rc;
234         ENTRY;
235
236         while (walk) {
237                 rc = extent_compare(ex, &walk->in_extent);
238                 if (rc == 0)
239                         break;
240                 else if (rc < 0)
241                         walk = walk->in_left;
242                 else
243                         walk = walk->in_right;
244         }
245
246         RETURN(walk);
247 }
248 EXPORT_SYMBOL(interval_find);
249
250 static void __rotate_change_maxhigh(struct interval_node *node,
251                                     struct interval_node *rotate)
252 {
253         __u64 left_max, right_max;
254
255         rotate->in_max_high = node->in_max_high;
256         left_max = node->in_left ? node->in_left->in_max_high : 0;
257         right_max = node->in_right ? node->in_right->in_max_high : 0;
258         node->in_max_high  = max_u64(interval_high(node),
259                                      max_u64(left_max,right_max));
260 }
261
262 /* The left rotation "pivots" around the link from node to node->right, and
263  * - node will be linked to node->right's left child, and
264  * - node->right's left child will be linked to node's right child.  */
265 static void __rotate_left(struct interval_node *node,
266                           struct interval_node **root)
267 {
268         struct interval_node *right = node->in_right;
269         struct interval_node *parent = node->in_parent;
270
271         node->in_right = right->in_left;
272         if (node->in_right)
273                 right->in_left->in_parent = node;
274
275         right->in_left = node;
276         right->in_parent = parent;
277         if (parent) {
278                 if (node_is_left_child(node))
279                         parent->in_left = right;
280                 else
281                         parent->in_right = right;
282         } else {
283                 *root = right;
284         }
285         node->in_parent = right;
286
287         /* update max_high for node and right */
288         __rotate_change_maxhigh(node, right);
289 }
290
291 /* The right rotation "pivots" around the link from node to node->left, and
292  * - node will be linked to node->left's right child, and
293  * - node->left's right child will be linked to node's left child.  */
294 static void __rotate_right(struct interval_node *node,
295                            struct interval_node **root)
296 {
297         struct interval_node *left = node->in_left;
298         struct interval_node *parent = node->in_parent;
299
300         node->in_left = left->in_right;
301         if (node->in_left)
302                 left->in_right->in_parent = node;
303         left->in_right = node;
304
305         left->in_parent = parent;
306         if (parent) {
307                 if (node_is_right_child(node))
308                         parent->in_right = left;
309                 else
310                         parent->in_left = left;
311         } else {
312                 *root = left;
313         }
314         node->in_parent = left;
315
316         /* update max_high for node and left */
317         __rotate_change_maxhigh(node, left);
318 }
319
320 #define interval_swap(a, b) do {                        \
321         struct interval_node *c = a; a = b; b = c;      \
322 } while (0)
323
324 /*
325  * Operations INSERT and DELETE, when run on a tree with n keys, 
326  * take O(logN) time.Because they modify the tree, the result 
327  * may violate the red-black properties.To restore these properties, 
328  * we must change the colors of some of the nodes in the tree 
329  * and also change the pointer structure.
330  */
331 static void interval_insert_color(struct interval_node *node,
332                                   struct interval_node **root)
333 {
334         struct interval_node *parent, *gparent;
335         ENTRY;
336
337         while ((parent = node->in_parent) && node_is_red(parent)) {
338                 gparent = parent->in_parent;
339                 /* Parent is RED, so gparent must not be NULL */
340                 if (node_is_left_child(parent)) {
341                         struct interval_node *uncle;
342                         uncle = gparent->in_right;
343                         if (uncle && node_is_red(uncle)) {
344                                 uncle->in_color = INTERVAL_BLACK;
345                                 parent->in_color = INTERVAL_BLACK;
346                                 gparent->in_color = INTERVAL_RED;
347                                 node = gparent;
348                                 continue;
349                         }
350
351                         if (parent->in_right == node) {
352                                 __rotate_left(parent, root);
353                                 interval_swap(node, parent);
354                         }
355
356                         parent->in_color = INTERVAL_BLACK;
357                         gparent->in_color = INTERVAL_RED;
358                         __rotate_right(gparent, root);
359                 } else {
360                         struct interval_node *uncle;
361                         uncle = gparent->in_left;
362                         if (uncle && node_is_red(uncle)) {
363                                 uncle->in_color = INTERVAL_BLACK;
364                                 parent->in_color = INTERVAL_BLACK;
365                                 gparent->in_color = INTERVAL_RED;
366                                 node = gparent;
367                                 continue;
368                         }
369
370                         if (node_is_left_child(node)) {
371                                 __rotate_right(parent, root);
372                                 interval_swap(node, parent);
373                         }
374
375                         parent->in_color = INTERVAL_BLACK;
376                         gparent->in_color = INTERVAL_RED;
377                         __rotate_left(gparent, root);
378                 }
379         }
380
381         (*root)->in_color = INTERVAL_BLACK;
382         EXIT;
383 }
384
385 struct interval_node *interval_insert(struct interval_node *node,
386                                       struct interval_node **root)
387                      
388 {
389         struct interval_node **p, *parent = NULL;
390         ENTRY;
391
392         p = root;
393         while (*p) {
394                 parent = *p;
395                 if (node_equal(parent, node))
396                         RETURN(parent);
397
398                 /* max_high field must be updated after each iteration */
399                 if (parent->in_max_high < interval_high(node))
400                         parent->in_max_high = interval_high(node);
401
402                 if (node_compare(node, parent) < 0)
403                         p = &parent->in_left;
404                 else 
405                         p = &parent->in_right;
406         }
407
408         /* link node into the tree */
409         node->in_parent = parent;
410         node->in_color = INTERVAL_RED;
411         node->in_left = node->in_right = NULL;
412         *p = node;
413
414         interval_insert_color(node, root);
415
416         RETURN(NULL);
417 }
418 EXPORT_SYMBOL(interval_insert);
419
420 static inline int node_is_black_or_0(struct interval_node *node)
421 {
422         return !node || node_is_black(node);
423 }
424
425 static void interval_erase_color(struct interval_node *node,
426                                  struct interval_node *parent,
427                                  struct interval_node **root)
428 {
429         struct interval_node *tmp;
430         ENTRY;
431
432         while (node_is_black_or_0(node) && node != *root) {
433                 if (parent->in_left == node) {
434                         tmp = parent->in_right;
435                         if (node_is_red(tmp)) {
436                                 tmp->in_color = INTERVAL_BLACK;
437                                 parent->in_color = INTERVAL_RED;
438                                 __rotate_left(parent, root);
439                                 tmp = parent->in_right;
440                         }
441                         if (node_is_black_or_0(tmp->in_left) &&
442                             node_is_black_or_0(tmp->in_right)) {
443                                 tmp->in_color = INTERVAL_RED;
444                                 node = parent;
445                                 parent = node->in_parent;
446                         } else {
447                                 if (node_is_black_or_0(tmp->in_right)) {
448                                         struct interval_node *o_left;
449                                         if ((o_left = tmp->in_left))
450                                              o_left->in_color = INTERVAL_BLACK;
451                                         tmp->in_color = INTERVAL_RED;
452                                         __rotate_right(tmp, root);
453                                         tmp = parent->in_right;
454                                 }
455                                 tmp->in_color = parent->in_color;
456                                 parent->in_color = INTERVAL_BLACK;
457                                 if (tmp->in_right)
458                                     tmp->in_right->in_color = INTERVAL_BLACK;
459                                 __rotate_left(parent, root);
460                                 node = *root;
461                                 break;
462                         }
463                 } else {
464                         tmp = parent->in_left;
465                         if (node_is_red(tmp)) {
466                                 tmp->in_color = INTERVAL_BLACK;
467                                 parent->in_color = INTERVAL_RED;
468                                 __rotate_right(parent, root);
469                                 tmp = parent->in_left;
470                         }
471                         if (node_is_black_or_0(tmp->in_left) &&
472                             node_is_black_or_0(tmp->in_right)) {
473                                 tmp->in_color = INTERVAL_RED;
474                                 node = parent;
475                                 parent = node->in_parent;
476                         } else {
477                                 if (node_is_black_or_0(tmp->in_left)) {
478                                         struct interval_node *o_right;
479                                         if ((o_right = tmp->in_right))
480                                             o_right->in_color = INTERVAL_BLACK;
481                                         tmp->in_color = INTERVAL_RED;
482                                         __rotate_left(tmp, root);
483                                         tmp = parent->in_left;
484                                 }
485                                 tmp->in_color = parent->in_color;
486                                 parent->in_color = INTERVAL_BLACK;
487                                 if (tmp->in_left)
488                                         tmp->in_left->in_color = INTERVAL_BLACK;
489                                 __rotate_right(parent, root);
490                                 node = *root;
491                                 break;
492                         }
493                 }
494         }
495         if (node)
496                 node->in_color = INTERVAL_BLACK;
497         EXIT;
498 }
499
500 /* 
501  * if the @max_high value of @node is changed, this function traverse  a path 
502  * from node  up to the root to update max_high for the whole tree.
503  */
504 static void update_maxhigh(struct interval_node *node,
505                            __u64  old_maxhigh)
506 {
507         __u64 left_max, right_max;
508         ENTRY;
509
510         while (node) {
511                 left_max = node->in_left ? node->in_left->in_max_high : 0;
512                 right_max = node->in_right ? node->in_right->in_max_high : 0;
513                 node->in_max_high = max_u64(interval_high(node),
514                                             max_u64(left_max, right_max));
515
516                 if (node->in_max_high >= old_maxhigh)
517                         break;
518                 node = node->in_parent;
519         }
520         EXIT;
521 }
522
523 void interval_erase(struct interval_node *node,
524                     struct interval_node **root)
525 {
526         struct interval_node *child, *parent;
527         int color;
528         ENTRY;
529
530         if (!node->in_left) {
531                 child = node->in_right;
532         } else if (!node->in_right) {
533                 child = node->in_left;
534         } else { /* Both left and right child are not NULL */
535                 struct interval_node *old = node;
536
537                 node = interval_next(node);
538                 child = node->in_right;
539                 parent = node->in_parent;
540                 color = node->in_color;
541
542                 if (child)
543                         child->in_parent = parent;
544                 if (parent == old) {
545                         parent->in_right = child;
546                         parent = node;
547                 } else {
548                         parent->in_left = child;
549                 }
550
551                 node->in_color = old->in_color;
552                 node->in_right = old->in_right;
553                 node->in_left = old->in_left;
554                 node->in_parent = old->in_parent;
555
556                 if (old->in_parent) {
557                         if (node_is_left_child(old))
558                                 old->in_parent->in_left = node;
559                         else
560                                 old->in_parent->in_right = node;
561                 } else {
562                         *root = node;
563                 }
564
565                 old->in_left->in_parent = node;
566                 if (old->in_right)
567                         old->in_right->in_parent = node;
568                 update_maxhigh(child, node->in_max_high);
569                 update_maxhigh(node, old->in_max_high);
570                 goto color;
571         }
572         parent = node->in_parent;
573         color = node->in_color;
574
575         if (child)
576                 child->in_parent = parent;
577         if (parent) {
578                 if (node_is_left_child(node))
579                         parent->in_left = child;
580                 else
581                         parent->in_right = child;
582         } else {
583                 *root = child;
584         }
585
586         update_maxhigh(child, node->in_max_high);
587
588 color:
589         if (color == INTERVAL_BLACK)
590                 interval_erase_color(child, parent, root);
591         EXIT;
592 }
593 EXPORT_SYMBOL(interval_erase);
594
595 static inline int interval_may_overlap(struct interval_node *node,
596                                           struct interval_node_extent *ext)
597 {
598         return (ext->start <= node->in_max_high &&
599                 ext->end >= interval_low(node));
600 }
601
602 /*
603  * This function finds all intervals that overlap interval ext,
604  * and calls func to handle resulted intervals one by one.
605  * in lustre, this function will find all conflicting locks in
606  * the granted queue and add these locks to the ast work list.
607  *
608  * {
609  *       if (node == NULL)
610  *               return 0;
611  *       if (ext->end < interval_low(node)) {
612  *               interval_search(node->in_left, ext, func, data);
613  *       } else if (interval_may_overlap(node, ext)) {
614  *               if (extent_overlapped(ext, &node->in_extent))
615  *                       func(node, data);
616  *               interval_search(node->in_left, ext, func, data);
617  *               interval_search(node->in_right, ext, func, data);
618  *       }
619  *       return 0;
620  * }
621  *
622  */
623 enum interval_iter interval_search(struct interval_node *node,
624                                    struct interval_node_extent *ext,
625                                    interval_callback_t func,
626                                    void *data)
627 {
628         struct interval_node *parent;
629         enum interval_iter rc = INTERVAL_ITER_CONT;
630
631         LASSERT(ext != NULL);
632         LASSERT(func != NULL);
633
634         while (node) {
635                 if (ext->end < interval_low(node)) {
636                         if (node->in_left) {
637                                 node = node->in_left;
638                                 continue;
639                         }
640                 } else if (interval_may_overlap(node, ext)) {
641                         if (extent_overlapped(ext, &node->in_extent)) {
642                                 rc = func(node, data);
643                                 if (rc == INTERVAL_ITER_STOP)
644                                         break;
645                         }
646
647                         if (node->in_left) {
648                                 node = node->in_left;
649                                 continue;
650                         }
651                         if (node->in_right) {
652                                 node = node->in_right;
653                                 continue;
654                         }
655                 } 
656
657                 parent = node->in_parent;
658                 while (parent) {
659                         if (node_is_left_child(node) &&
660                             parent->in_right) {
661                                 /* If we ever got the left, it means that the 
662                                  * parent met ext->end<interval_low(parent), or
663                                  * may_overlap(parent). If the former is true,
664                                  * we needn't go back. So stop early and check
665                                  * may_overlap(parent) after this loop.  */
666                                 node = parent->in_right;
667                                 break;
668                         }
669                         node = parent;
670                         parent = parent->in_parent;
671                 }
672                 if (parent == NULL || !interval_may_overlap(parent, ext))
673                         break;
674         }
675
676         return rc;
677 }
678 EXPORT_SYMBOL(interval_search);
679
680 static enum interval_iter interval_overlap_cb(struct interval_node *n,
681                                               void *args)
682 {
683         *(int *)args = 1;
684         return INTERVAL_ITER_STOP;
685 }
686
687 int interval_is_overlapped(struct interval_node *root,
688                            struct interval_node_extent *ext)
689 {
690         int has = 0;
691         (void)interval_search(root, ext, interval_overlap_cb, &has);
692         return has;
693 }
694 EXPORT_SYMBOL(interval_is_overlapped);
695
696 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
697  * some extents, because programs seldom do IO backward.
698  *
699  * The recursive algorithm of expanding low:
700  * expand_low {
701  *        struct interval_node *tmp;
702  *        static __u64 res = 0;
703  *
704  *        if (root == NULL)
705  *                return res;
706  *        if (root->in_max_high < low) {
707  *                res = max_u64(root->in_max_high + 1, res);
708  *                return res;
709  *        } else if (low < interval_low(root)) {
710  *                interval_expand_low(root->in_left, low);
711  *                return res;
712  *        }
713  *
714  *        if (interval_high(root) < low)
715  *                res = max_u64(interval_high(root) + 1, res);
716  *        interval_expand_low(root->in_left, low);
717  *        interval_expand_low(root->in_right, low);
718  *
719  *        return res;
720  * }
721  *
722  * It's much easy to eliminate the recursion, see interval_search for 
723  * an example. -jay
724  */
725 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
726 {
727         /* we only concern the empty tree right now. */
728         if (root == NULL)
729                 return 0;
730         return low;
731 }
732
733 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
734 {
735         __u64 result = ~0;
736
737         while (node != NULL) {
738                 if (node->in_max_high < high)
739                         break;
740                         
741                 if (interval_low(node) > high) {
742                         result = interval_low(node) - 1;
743                         node = node->in_left;
744                 } else {
745                         node = node->in_right;
746                 }
747         }
748
749         return result;
750 }
751
752 /* expanding the extent based on @ext. */
753 void interval_expand(struct interval_node *root,
754                      struct interval_node_extent *ext,
755                      struct interval_node_extent *limiter)
756 {
757         /* The assertion of interval_is_overlapped is expensive because we may
758          * travel many nodes to find the overlapped node. */
759         LASSERT(interval_is_overlapped(root, ext) == 0);
760         if (!limiter || limiter->start < ext->start)
761                 ext->start = interval_expand_low(root, ext->start);
762         if (!limiter || limiter->end > ext->end)
763                 ext->end = interval_expand_high(root, ext->end);
764         LASSERT(interval_is_overlapped(root, ext) == 0);
765 }
766 EXPORT_SYMBOL(interval_expand);