Whamcloud - gitweb
262f2590cbb0ee26ab0698990ec1246b7717c86d
[fs/lustre-release.git] / lustre / ldlm / interval_tree.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * lustre/ldlm/interval_tree.c
35  *
36  * Interval tree library used by ldlm extent lock code
37  *
38  * Author: Huang Wei <huangwei@clusterfs.com>
39  * Author: Jay Xiong <jinshan.xiong@sun.com>
40  */
41 #ifdef __KERNEL__
42 # include <lustre_dlm.h>
43 #else
44 # include <libcfs/libcfs.h>
45 #endif
46 #include <interval_tree.h>
47
48 enum {
49         INTERVAL_RED = 0,
50         INTERVAL_BLACK = 1
51 };
52
53 static inline int node_is_left_child(struct interval_node *node)
54 {
55         LASSERT(node->in_parent != NULL);
56         return node == node->in_parent->in_left;
57 }
58
59 static inline int node_is_right_child(struct interval_node *node)
60 {
61         LASSERT(node->in_parent != NULL);
62         return node == node->in_parent->in_right;
63 }
64
65 static inline int node_is_red(struct interval_node *node)
66 {
67         return node->in_color == INTERVAL_RED;
68 }
69
70 static inline int node_is_black(struct interval_node *node)
71 {
72         return node->in_color == INTERVAL_BLACK;
73 }
74
75 static inline int extent_compare(struct interval_node_extent *e1,
76                                  struct interval_node_extent *e2)
77 {
78         int rc;
79         if (e1->start == e2->start) {
80                 if (e1->end < e2->end)
81                         rc = -1;
82                 else if (e1->end > e2->end)
83                         rc = 1;
84                 else
85                         rc = 0;
86         } else {
87                 if (e1->start < e2->start)
88                         rc = -1;
89                 else
90                         rc = 1;
91         }
92         return rc;
93 }
94
95 static inline int extent_equal(struct interval_node_extent *e1,
96                                struct interval_node_extent *e2)
97 {
98         return (e1->start == e2->start) && (e1->end == e2->end);
99 }
100
101 static inline int extent_overlapped(struct interval_node_extent *e1,
102                                     struct interval_node_extent *e2)
103 {
104         return (e1->start <= e2->end) && (e2->start <= e1->end);
105 }
106
107 static inline int node_compare(struct interval_node *n1,
108                                struct interval_node *n2)
109 {
110         return extent_compare(&n1->in_extent, &n2->in_extent);
111 }
112
113 int node_equal(struct interval_node *n1, struct interval_node *n2)
114 {
115         return extent_equal(&n1->in_extent, &n2->in_extent);
116 }
117 EXPORT_SYMBOL(node_equal);
118
119 static inline __u64 max_u64(__u64 x, __u64 y)
120 {
121         return x > y ? x : y;
122 }
123
124 static inline __u64 min_u64(__u64 x, __u64 y)
125 {
126         return x < y ? x : y;
127 }
128
129 #define interval_for_each(node, root)                   \
130 for (node = interval_first(root); node != NULL;         \
131      node = interval_next(node))
132
133 #define interval_for_each_reverse(node, root)           \
134 for (node = interval_last(root); node != NULL;          \
135      node = interval_prev(node))
136
137 static struct interval_node *interval_first(struct interval_node *node)
138 {
139         ENTRY;
140
141         if (!node)
142                 RETURN(NULL);
143         while (node->in_left)
144                 node = node->in_left;
145         RETURN(node);
146 }
147
148 static struct interval_node *interval_last(struct interval_node *node)
149 {
150         ENTRY;
151
152         if (!node)
153                 RETURN(NULL);
154         while (node->in_right)
155                 node = node->in_right;
156         RETURN(node);
157 }
158
159 static struct interval_node *interval_next(struct interval_node *node)
160 {
161         ENTRY;
162
163         if (!node)
164                 RETURN(NULL);
165         if (node->in_right)
166                 RETURN(interval_first(node->in_right));
167         while (node->in_parent && node_is_right_child(node))
168                 node = node->in_parent;
169         RETURN(node->in_parent);
170 }
171
172 static struct interval_node *interval_prev(struct interval_node *node)
173 {
174         ENTRY;
175
176         if (!node)
177                 RETURN(NULL);
178
179         if (node->in_left)
180                 RETURN(interval_last(node->in_left));
181
182         while (node->in_parent && node_is_left_child(node))
183                 node = node->in_parent;
184
185         RETURN(node->in_parent);
186 }
187
188 enum interval_iter interval_iterate(struct interval_node *root,
189                                     interval_callback_t func,
190                                     void *data)
191 {
192         struct interval_node *node;
193         enum interval_iter rc = INTERVAL_ITER_CONT;
194         ENTRY;
195         
196         interval_for_each(node, root) {
197                 rc = func(node, data);
198                 if (rc == INTERVAL_ITER_STOP)
199                         break;
200         }
201
202         RETURN(rc);
203 }
204 EXPORT_SYMBOL(interval_iterate);
205
206 enum interval_iter interval_iterate_reverse(struct interval_node *root,
207                                             interval_callback_t func,
208                                             void *data)
209 {
210         struct interval_node *node;
211         enum interval_iter rc = INTERVAL_ITER_CONT;
212         ENTRY;
213         
214         interval_for_each_reverse(node, root) {
215                 rc = func(node, data);
216                 if (rc == INTERVAL_ITER_STOP)
217                         break;
218         }
219
220         RETURN(rc);
221 }
222 EXPORT_SYMBOL(interval_iterate_reverse);
223
224 /* try to find a node with same interval in the tree,
225  * if found, return the pointer to the node, otherwise return NULL*/
226 struct interval_node *interval_find(struct interval_node *root,
227                                     struct interval_node_extent *ex)
228 {
229         struct interval_node *walk = root;
230         int rc;
231         ENTRY;
232
233         while (walk) {
234                 rc = extent_compare(ex, &walk->in_extent);
235                 if (rc == 0)
236                         break;
237                 else if (rc < 0)
238                         walk = walk->in_left;
239                 else
240                         walk = walk->in_right;
241         }
242
243         RETURN(walk);
244 }
245 EXPORT_SYMBOL(interval_find);
246
247 static void __rotate_change_maxhigh(struct interval_node *node,
248                                     struct interval_node *rotate)
249 {
250         __u64 left_max, right_max;
251
252         rotate->in_max_high = node->in_max_high;
253         left_max = node->in_left ? node->in_left->in_max_high : 0;
254         right_max = node->in_right ? node->in_right->in_max_high : 0;
255         node->in_max_high  = max_u64(interval_high(node),
256                                      max_u64(left_max,right_max));
257 }
258
259 /* The left rotation "pivots" around the link from node to node->right, and
260  * - node will be linked to node->right's left child, and
261  * - node->right's left child will be linked to node's right child.  */
262 static void __rotate_left(struct interval_node *node,
263                           struct interval_node **root)
264 {
265         struct interval_node *right = node->in_right;
266         struct interval_node *parent = node->in_parent;
267
268         node->in_right = right->in_left;
269         if (node->in_right)
270                 right->in_left->in_parent = node;
271
272         right->in_left = node;
273         right->in_parent = parent;
274         if (parent) {
275                 if (node_is_left_child(node))
276                         parent->in_left = right;
277                 else
278                         parent->in_right = right;
279         } else {
280                 *root = right;
281         }
282         node->in_parent = right;
283
284         /* update max_high for node and right */
285         __rotate_change_maxhigh(node, right);
286 }
287
288 /* The right rotation "pivots" around the link from node to node->left, and
289  * - node will be linked to node->left's right child, and
290  * - node->left's right child will be linked to node's left child.  */
291 static void __rotate_right(struct interval_node *node,
292                            struct interval_node **root)
293 {
294         struct interval_node *left = node->in_left;
295         struct interval_node *parent = node->in_parent;
296
297         node->in_left = left->in_right;
298         if (node->in_left)
299                 left->in_right->in_parent = node;
300         left->in_right = node;
301
302         left->in_parent = parent;
303         if (parent) {
304                 if (node_is_right_child(node))
305                         parent->in_right = left;
306                 else
307                         parent->in_left = left;
308         } else {
309                 *root = left;
310         }
311         node->in_parent = left;
312
313         /* update max_high for node and left */
314         __rotate_change_maxhigh(node, left);
315 }
316
317 #define interval_swap(a, b) do {                        \
318         struct interval_node *c = a; a = b; b = c;      \
319 } while (0)
320
321 /*
322  * Operations INSERT and DELETE, when run on a tree with n keys, 
323  * take O(logN) time.Because they modify the tree, the result 
324  * may violate the red-black properties.To restore these properties, 
325  * we must change the colors of some of the nodes in the tree 
326  * and also change the pointer structure.
327  */
328 static void interval_insert_color(struct interval_node *node,
329                                   struct interval_node **root)
330 {
331         struct interval_node *parent, *gparent;
332         ENTRY;
333
334         while ((parent = node->in_parent) && node_is_red(parent)) {
335                 gparent = parent->in_parent;
336                 /* Parent is RED, so gparent must not be NULL */
337                 if (node_is_left_child(parent)) {
338                         struct interval_node *uncle;
339                         uncle = gparent->in_right;
340                         if (uncle && node_is_red(uncle)) {
341                                 uncle->in_color = INTERVAL_BLACK;
342                                 parent->in_color = INTERVAL_BLACK;
343                                 gparent->in_color = INTERVAL_RED;
344                                 node = gparent;
345                                 continue;
346                         }
347
348                         if (parent->in_right == node) {
349                                 __rotate_left(parent, root);
350                                 interval_swap(node, parent);
351                         }
352
353                         parent->in_color = INTERVAL_BLACK;
354                         gparent->in_color = INTERVAL_RED;
355                         __rotate_right(gparent, root);
356                 } else {
357                         struct interval_node *uncle;
358                         uncle = gparent->in_left;
359                         if (uncle && node_is_red(uncle)) {
360                                 uncle->in_color = INTERVAL_BLACK;
361                                 parent->in_color = INTERVAL_BLACK;
362                                 gparent->in_color = INTERVAL_RED;
363                                 node = gparent;
364                                 continue;
365                         }
366
367                         if (node_is_left_child(node)) {
368                                 __rotate_right(parent, root);
369                                 interval_swap(node, parent);
370                         }
371
372                         parent->in_color = INTERVAL_BLACK;
373                         gparent->in_color = INTERVAL_RED;
374                         __rotate_left(gparent, root);
375                 }
376         }
377
378         (*root)->in_color = INTERVAL_BLACK;
379         EXIT;
380 }
381
382 struct interval_node *interval_insert(struct interval_node *node,
383                                       struct interval_node **root)
384                      
385 {
386         struct interval_node **p, *parent = NULL;
387         ENTRY;
388
389         LASSERT(!interval_is_intree(node));
390         p = root;
391         while (*p) {
392                 parent = *p;
393                 if (node_equal(parent, node))
394                         RETURN(parent);
395
396                 /* max_high field must be updated after each iteration */
397                 if (parent->in_max_high < interval_high(node))
398                         parent->in_max_high = interval_high(node);
399
400                 if (node_compare(node, parent) < 0)
401                         p = &parent->in_left;
402                 else 
403                         p = &parent->in_right;
404         }
405
406         /* link node into the tree */
407         node->in_parent = parent;
408         node->in_color = INTERVAL_RED;
409         node->in_left = node->in_right = NULL;
410         *p = node;
411
412         interval_insert_color(node, root);
413         node->in_intree = 1;
414
415         RETURN(NULL);
416 }
417 EXPORT_SYMBOL(interval_insert);
418
419 static inline int node_is_black_or_0(struct interval_node *node)
420 {
421         return !node || node_is_black(node);
422 }
423
424 static void interval_erase_color(struct interval_node *node,
425                                  struct interval_node *parent,
426                                  struct interval_node **root)
427 {
428         struct interval_node *tmp;
429         ENTRY;
430
431         while (node_is_black_or_0(node) && node != *root) {
432                 if (parent->in_left == node) {
433                         tmp = parent->in_right;
434                         if (node_is_red(tmp)) {
435                                 tmp->in_color = INTERVAL_BLACK;
436                                 parent->in_color = INTERVAL_RED;
437                                 __rotate_left(parent, root);
438                                 tmp = parent->in_right;
439                         }
440                         if (node_is_black_or_0(tmp->in_left) &&
441                             node_is_black_or_0(tmp->in_right)) {
442                                 tmp->in_color = INTERVAL_RED;
443                                 node = parent;
444                                 parent = node->in_parent;
445                         } else {
446                                 if (node_is_black_or_0(tmp->in_right)) {
447                                         struct interval_node *o_left;
448                                         if ((o_left = tmp->in_left))
449                                              o_left->in_color = INTERVAL_BLACK;
450                                         tmp->in_color = INTERVAL_RED;
451                                         __rotate_right(tmp, root);
452                                         tmp = parent->in_right;
453                                 }
454                                 tmp->in_color = parent->in_color;
455                                 parent->in_color = INTERVAL_BLACK;
456                                 if (tmp->in_right)
457                                     tmp->in_right->in_color = INTERVAL_BLACK;
458                                 __rotate_left(parent, root);
459                                 node = *root;
460                                 break;
461                         }
462                 } else {
463                         tmp = parent->in_left;
464                         if (node_is_red(tmp)) {
465                                 tmp->in_color = INTERVAL_BLACK;
466                                 parent->in_color = INTERVAL_RED;
467                                 __rotate_right(parent, root);
468                                 tmp = parent->in_left;
469                         }
470                         if (node_is_black_or_0(tmp->in_left) &&
471                             node_is_black_or_0(tmp->in_right)) {
472                                 tmp->in_color = INTERVAL_RED;
473                                 node = parent;
474                                 parent = node->in_parent;
475                         } else {
476                                 if (node_is_black_or_0(tmp->in_left)) {
477                                         struct interval_node *o_right;
478                                         if ((o_right = tmp->in_right))
479                                             o_right->in_color = INTERVAL_BLACK;
480                                         tmp->in_color = INTERVAL_RED;
481                                         __rotate_left(tmp, root);
482                                         tmp = parent->in_left;
483                                 }
484                                 tmp->in_color = parent->in_color;
485                                 parent->in_color = INTERVAL_BLACK;
486                                 if (tmp->in_left)
487                                         tmp->in_left->in_color = INTERVAL_BLACK;
488                                 __rotate_right(parent, root);
489                                 node = *root;
490                                 break;
491                         }
492                 }
493         }
494         if (node)
495                 node->in_color = INTERVAL_BLACK;
496         EXIT;
497 }
498
499 /* 
500  * if the @max_high value of @node is changed, this function traverse  a path 
501  * from node  up to the root to update max_high for the whole tree.
502  */
503 static void update_maxhigh(struct interval_node *node,
504                            __u64  old_maxhigh)
505 {
506         __u64 left_max, right_max;
507         ENTRY;
508
509         while (node) {
510                 left_max = node->in_left ? node->in_left->in_max_high : 0;
511                 right_max = node->in_right ? node->in_right->in_max_high : 0;
512                 node->in_max_high = max_u64(interval_high(node),
513                                             max_u64(left_max, right_max));
514
515                 if (node->in_max_high >= old_maxhigh)
516                         break;
517                 node = node->in_parent;
518         }
519         EXIT;
520 }
521
522 void interval_erase(struct interval_node *node,
523                     struct interval_node **root)
524 {
525         struct interval_node *child, *parent;
526         int color;
527         ENTRY;
528
529         LASSERT(interval_is_intree(node));
530         node->in_intree = 0;
531         if (!node->in_left) {
532                 child = node->in_right;
533         } else if (!node->in_right) {
534                 child = node->in_left;
535         } else { /* Both left and right child are not NULL */
536                 struct interval_node *old = node;
537
538                 node = interval_next(node);
539                 child = node->in_right;
540                 parent = node->in_parent;
541                 color = node->in_color;
542
543                 if (child)
544                         child->in_parent = parent;
545                 if (parent == old)
546                         parent->in_right = child;
547                 else
548                         parent->in_left = child;
549
550                 node->in_color = old->in_color;
551                 node->in_right = old->in_right;
552                 node->in_left = old->in_left;
553                 node->in_parent = old->in_parent;
554
555                 if (old->in_parent) {
556                         if (node_is_left_child(old))
557                                 old->in_parent->in_left = node;
558                         else
559                                 old->in_parent->in_right = node;
560                 } else {
561                         *root = node;
562                 }
563
564                 old->in_left->in_parent = node;
565                 if (old->in_right)
566                         old->in_right->in_parent = node;
567                 update_maxhigh(child ? : parent, node->in_max_high);
568                 update_maxhigh(node, old->in_max_high);
569                 if (parent == old)
570                          parent = node;
571                 goto color;
572         }
573         parent = node->in_parent;
574         color = node->in_color;
575
576         if (child)
577                 child->in_parent = parent;
578         if (parent) {
579                 if (node_is_left_child(node))
580                         parent->in_left = child;
581                 else
582                         parent->in_right = child;
583         } else {
584                 *root = child;
585         }
586
587         update_maxhigh(child ? : parent, node->in_max_high);
588
589 color:
590         if (color == INTERVAL_BLACK)
591                 interval_erase_color(child, parent, root);
592         EXIT;
593 }
594 EXPORT_SYMBOL(interval_erase);
595
596 static inline int interval_may_overlap(struct interval_node *node,
597                                           struct interval_node_extent *ext)
598 {
599         return (ext->start <= node->in_max_high &&
600                 ext->end >= interval_low(node));
601 }
602
603 /*
604  * This function finds all intervals that overlap interval ext,
605  * and calls func to handle resulted intervals one by one.
606  * in lustre, this function will find all conflicting locks in
607  * the granted queue and add these locks to the ast work list.
608  *
609  * {
610  *       if (node == NULL)
611  *               return 0;
612  *       if (ext->end < interval_low(node)) {
613  *               interval_search(node->in_left, ext, func, data);
614  *       } else if (interval_may_overlap(node, ext)) {
615  *               if (extent_overlapped(ext, &node->in_extent))
616  *                       func(node, data);
617  *               interval_search(node->in_left, ext, func, data);
618  *               interval_search(node->in_right, ext, func, data);
619  *       }
620  *       return 0;
621  * }
622  *
623  */
624 enum interval_iter interval_search(struct interval_node *node,
625                                    struct interval_node_extent *ext,
626                                    interval_callback_t func,
627                                    void *data)
628 {
629         struct interval_node *parent;
630         enum interval_iter rc = INTERVAL_ITER_CONT;
631
632         ENTRY;
633
634         LASSERT(ext != NULL);
635         LASSERT(func != NULL);
636
637         while (node) {
638                 if (ext->end < interval_low(node)) {
639                         if (node->in_left) {
640                                 node = node->in_left;
641                                 continue;
642                         }
643                 } else if (interval_may_overlap(node, ext)) {
644                         if (extent_overlapped(ext, &node->in_extent)) {
645                                 rc = func(node, data);
646                                 if (rc == INTERVAL_ITER_STOP)
647                                         break;
648                         }
649
650                         if (node->in_left) {
651                                 node = node->in_left;
652                                 continue;
653                         }
654                         if (node->in_right) {
655                                 node = node->in_right;
656                                 continue;
657                         }
658                 }
659
660                 parent = node->in_parent;
661                 while (parent) {
662                         if (node_is_left_child(node) &&
663                             parent->in_right) {
664                                 /* If we ever got the left, it means that the
665                                  * parent met ext->end<interval_low(parent), or
666                                  * may_overlap(parent). If the former is true,
667                                  * we needn't go back. So stop early and check
668                                  * may_overlap(parent) after this loop.  */
669                                 node = parent->in_right;
670                                 break;
671                         }
672                         node = parent;
673                         parent = parent->in_parent;
674                 }
675                 if (parent == NULL || !interval_may_overlap(parent, ext))
676                         break;
677         }
678
679         RETURN(rc);
680 }
681 EXPORT_SYMBOL(interval_search);
682
683 static enum interval_iter interval_overlap_cb(struct interval_node *n,
684                                               void *args)
685 {
686         *(int *)args = 1;
687         return INTERVAL_ITER_STOP;
688 }
689
690 int interval_is_overlapped(struct interval_node *root,
691                            struct interval_node_extent *ext)
692 {
693         int has = 0;
694         (void)interval_search(root, ext, interval_overlap_cb, &has);
695         return has;
696 }
697 EXPORT_SYMBOL(interval_is_overlapped);
698
699 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
700  * some extents, because programs seldom do IO backward.
701  *
702  * The recursive algorithm of expanding low:
703  * expand_low {
704  *        struct interval_node *tmp;
705  *        static __u64 res = 0;
706  *
707  *        if (root == NULL)
708  *                return res;
709  *        if (root->in_max_high < low) {
710  *                res = max_u64(root->in_max_high + 1, res);
711  *                return res;
712  *        } else if (low < interval_low(root)) {
713  *                interval_expand_low(root->in_left, low);
714  *                return res;
715  *        }
716  *
717  *        if (interval_high(root) < low)
718  *                res = max_u64(interval_high(root) + 1, res);
719  *        interval_expand_low(root->in_left, low);
720  *        interval_expand_low(root->in_right, low);
721  *
722  *        return res;
723  * }
724  *
725  * It's much easy to eliminate the recursion, see interval_search for 
726  * an example. -jay
727  */
728 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
729 {
730         /* we only concern the empty tree right now. */
731         if (root == NULL)
732                 return 0;
733         return low;
734 }
735
736 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
737 {
738         __u64 result = ~0;
739
740         while (node != NULL) {
741                 if (node->in_max_high < high)
742                         break;
743                         
744                 if (interval_low(node) > high) {
745                         result = interval_low(node) - 1;
746                         node = node->in_left;
747                 } else {
748                         node = node->in_right;
749                 }
750         }
751
752         return result;
753 }
754
755 /* expanding the extent based on @ext. */
756 void interval_expand(struct interval_node *root,
757                      struct interval_node_extent *ext,
758                      struct interval_node_extent *limiter)
759 {
760         /* The assertion of interval_is_overlapped is expensive because we may
761          * travel many nodes to find the overlapped node. */
762         LASSERT(interval_is_overlapped(root, ext) == 0);
763         if (!limiter || limiter->start < ext->start)
764                 ext->start = interval_expand_low(root, ext->start);
765         if (!limiter || limiter->end > ext->end)
766                 ext->end = interval_expand_high(root, ext->end);
767         LASSERT(interval_is_overlapped(root, ext) == 0);
768 }
769 EXPORT_SYMBOL(interval_expand);