Whamcloud - gitweb
b=11300
[fs/lustre-release.git] / lustre / ldlm / interval_tree.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Interval tree library used by ldlm extent lock code
5  *
6  *  Copyright (c) 2007 Cluster File Systems, Inc.
7  *   Author: Huang Wei <huangwei@clusterfs.com>
8  *   Author: Jay Xiong <jinshan.xiong@sun.com>
9  *
10  *   This file is part of the Lustre file system, http://www.lustre.org
11  *   Lustre is a trademark of Cluster File Systems, Inc.
12  *
13  *   You may have signed or agreed to another license before downloading
14  *   this software.  If so, you are bound by the terms and conditions
15  *   of that agreement, and the following does not apply to you.  See the
16  *   LICENSE file included with this distribution for more information.
17  *
18  *   If you did not agree to a different license, then this copy of Lustre
19  *   is open source software; you can redistribute it and/or modify it
20  *   under the terms of version 2 of the GNU General Public License as
21  *   published by the Free Software Foundation.
22  *
23  *   In either case, Lustre is distributed in the hope that it will be
24  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
25  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26  *   license text for more details.
27  */
28 #ifdef __KERNEL__
29 # include <lustre_dlm.h>
30 #else
31 # include <liblustre.h>
32 # include <libcfs/kp30.h>
33 #endif
34 #include <obd_support.h>
35 #include <interval_tree.h>
36
37 enum {
38         INTERVAL_RED = 0,
39         INTERVAL_BLACK = 1
40 };
41
42 static inline int node_is_left_child(struct interval_node *node)
43 {
44         LASSERT(node->in_parent != NULL);
45         return node == node->in_parent->in_left;
46 }
47
48 static inline int node_is_right_child(struct interval_node *node)
49 {
50         LASSERT(node->in_parent != NULL);
51         return node == node->in_parent->in_right;
52 }
53
54 static inline int node_is_red(struct interval_node *node)
55 {
56         return node->in_color == INTERVAL_RED;
57 }
58
59 static inline int node_is_black(struct interval_node *node)
60 {
61         return node->in_color == INTERVAL_BLACK;
62 }
63
64 static inline int extent_compare(struct interval_node_extent *e1,
65                                  struct interval_node_extent *e2)
66 {
67         int rc;
68         if (e1->start == e2->start) {
69                 if (e1->end < e2->end)
70                         rc = -1;
71                 else if (e1->end > e2->end)
72                         rc = 1;
73                 else
74                         rc = 0;
75         } else {
76                 if (e1->start < e2->start)
77                         rc = -1;
78                 else
79                         rc = 1;
80         }
81         return rc;
82 }
83
84 static inline int extent_equal(struct interval_node_extent *e1,
85                                struct interval_node_extent *e2)
86 {
87         return (e1->start == e2->start) && (e1->end == e2->end);
88 }
89
90 static inline int extent_overlapped(struct interval_node_extent *e1, 
91                                     struct interval_node_extent *e2)
92 {
93         return (e1->start <= e2->end) && (e2->start <= e1->end);
94 }
95
96 static inline int node_compare(struct interval_node *n1,
97                                struct interval_node *n2)
98 {
99         return extent_compare(&n1->in_extent, &n2->in_extent);
100 }
101
102 static inline int node_equal(struct interval_node *n1,
103                              struct interval_node *n2)
104 {
105         return extent_equal(&n1->in_extent, &n2->in_extent);
106 }
107
108 static inline __u64 max_u64(__u64 x, __u64 y)
109 {
110         return x > y ? x : y;
111 }
112
113 static inline __u64 min_u64(__u64 x, __u64 y)
114 {
115         return x < y ? x : y;
116 }
117
118 #define interval_for_each(node, root)                   \
119 for (node = interval_first(root); node != NULL;         \
120      node = interval_next(node))
121
122 #define interval_for_each_reverse(node, root)           \
123 for (node = interval_last(root); node != NULL;          \
124      node = interval_prev(node))
125
126 static struct interval_node *interval_first(struct interval_node *node)
127 {
128         ENTRY;
129
130         if (!node)
131                 RETURN(NULL);
132         while (node->in_left)
133                 node = node->in_left;
134         RETURN(node);
135 }
136
137 static struct interval_node *interval_last(struct interval_node *node)
138 {
139         ENTRY;
140
141         if (!node)
142                 RETURN(NULL);
143         while (node->in_right)
144                 node = node->in_right;
145         RETURN(node);
146 }
147
148 static struct interval_node *interval_next(struct interval_node *node)
149 {
150         ENTRY;
151
152         if (!node)
153                 RETURN(NULL);
154         if (node->in_right)
155                 RETURN(interval_first(node->in_right));
156         while (node->in_parent && node_is_right_child(node))
157                 node = node->in_parent;
158         RETURN(node->in_parent);
159 }
160
161 static struct interval_node *interval_prev(struct interval_node *node)
162 {
163         ENTRY;
164
165         if (!node)
166                 RETURN(NULL);
167
168         if (node->in_left)
169                 RETURN(interval_last(node->in_left));
170
171         while (node->in_parent && node_is_left_child(node))
172                 node = node->in_parent;
173
174         RETURN(node->in_parent);
175 }
176
177 enum interval_iter interval_iterate(struct interval_node *root,
178                                     interval_callback_t func,
179                                     void *data)
180 {
181         struct interval_node *node;
182         enum interval_iter rc = INTERVAL_ITER_CONT;
183         ENTRY;
184         
185         interval_for_each(node, root) {
186                 rc = func(node, data);
187                 if (rc == INTERVAL_ITER_STOP)
188                         break;
189         }
190
191         RETURN(rc);
192 }
193 EXPORT_SYMBOL(interval_iterate);
194
195 enum interval_iter interval_iterate_reverse(struct interval_node *root,
196                                             interval_callback_t func,
197                                             void *data)
198 {
199         struct interval_node *node;
200         enum interval_iter rc = INTERVAL_ITER_CONT;
201         ENTRY;
202         
203         interval_for_each_reverse(node, root) {
204                 rc = func(node, data);
205                 if (rc == INTERVAL_ITER_STOP)
206                         break;
207         }
208
209         RETURN(rc);
210 }
211 EXPORT_SYMBOL(interval_iterate_reverse);
212
213 /* try to find a node with same interval in the tree,
214  * if found, return the pointer to the node, otherwise return NULL*/
215 struct interval_node *interval_find(struct interval_node *root,
216                                     struct interval_node_extent *ex)
217 {
218         struct interval_node *walk = root;
219         int rc;
220         ENTRY;
221
222         while (walk) {
223                 rc = extent_compare(ex, &walk->in_extent);
224                 if (rc == 0)
225                         break;
226                 else if (rc < 0)
227                         walk = walk->in_left;
228                 else
229                         walk = walk->in_right;
230         }
231
232         RETURN(walk);
233 }
234 EXPORT_SYMBOL(interval_find);
235
236 static void __rotate_change_maxhigh(struct interval_node *node,
237                                     struct interval_node *rotate)
238 {
239         __u64 left_max, right_max;
240
241         rotate->in_max_high = node->in_max_high;
242         left_max = node->in_left ? node->in_left->in_max_high : 0;
243         right_max = node->in_right ? node->in_right->in_max_high : 0;
244         node->in_max_high  = max_u64(interval_high(node),
245                                      max_u64(left_max,right_max));
246 }
247
248 /* The left rotation "pivots" around the link from node to node->right, and
249  * - node will be linked to node->right's left child, and
250  * - node->right's left child will be linked to node's right child.  */
251 static void __rotate_left(struct interval_node *node,
252                           struct interval_node **root)
253 {
254         struct interval_node *right = node->in_right;
255         struct interval_node *parent = node->in_parent;
256
257         node->in_right = right->in_left;
258         if (node->in_right)
259                 right->in_left->in_parent = node;
260
261         right->in_left = node;
262         right->in_parent = parent;
263         if (parent) {
264                 if (node_is_left_child(node))
265                         parent->in_left = right;
266                 else
267                         parent->in_right = right;
268         } else {
269                 *root = right;
270         }
271         node->in_parent = right;
272
273         /* update max_high for node and right */
274         __rotate_change_maxhigh(node, right);
275 }
276
277 /* The right rotation "pivots" around the link from node to node->left, and
278  * - node will be linked to node->left's right child, and
279  * - node->left's right child will be linked to node's left child.  */
280 static void __rotate_right(struct interval_node *node,
281                            struct interval_node **root)
282 {
283         struct interval_node *left = node->in_left;
284         struct interval_node *parent = node->in_parent;
285
286         node->in_left = left->in_right;
287         if (node->in_left)
288                 left->in_right->in_parent = node;
289         left->in_right = node;
290
291         left->in_parent = parent;
292         if (parent) {
293                 if (node_is_right_child(node))
294                         parent->in_right = left;
295                 else
296                         parent->in_left = left;
297         } else {
298                 *root = left;
299         }
300         node->in_parent = left;
301
302         /* update max_high for node and left */
303         __rotate_change_maxhigh(node, left);
304 }
305
306 #define interval_swap(a, b) do {                        \
307         struct interval_node *c = a; a = b; b = c;      \
308 } while (0)
309
310 /*
311  * Operations INSERT and DELETE, when run on a tree with n keys, 
312  * take O(logN) time.Because they modify the tree, the result 
313  * may violate the red-black properties.To restore these properties, 
314  * we must change the colors of some of the nodes in the tree 
315  * and also change the pointer structure.
316  */
317 static void interval_insert_color(struct interval_node *node,
318                                   struct interval_node **root)
319 {
320         struct interval_node *parent, *gparent;
321         ENTRY;
322
323         while ((parent = node->in_parent) && node_is_red(parent)) {
324                 gparent = parent->in_parent;
325                 /* Parent is RED, so gparent must not be NULL */
326                 if (node_is_left_child(parent)) {
327                         struct interval_node *uncle;
328                         uncle = gparent->in_right;
329                         if (uncle && node_is_red(uncle)) {
330                                 uncle->in_color = INTERVAL_BLACK;
331                                 parent->in_color = INTERVAL_BLACK;
332                                 gparent->in_color = INTERVAL_RED;
333                                 node = gparent;
334                                 continue;
335                         }
336
337                         if (parent->in_right == node) {
338                                 __rotate_left(parent, root);
339                                 interval_swap(node, parent);
340                         }
341
342                         parent->in_color = INTERVAL_BLACK;
343                         gparent->in_color = INTERVAL_RED;
344                         __rotate_right(gparent, root);
345                 } else {
346                         struct interval_node *uncle;
347                         uncle = gparent->in_left;
348                         if (uncle && node_is_red(uncle)) {
349                                 uncle->in_color = INTERVAL_BLACK;
350                                 parent->in_color = INTERVAL_BLACK;
351                                 gparent->in_color = INTERVAL_RED;
352                                 node = gparent;
353                                 continue;
354                         }
355
356                         if (node_is_left_child(node)) {
357                                 __rotate_right(parent, root);
358                                 interval_swap(node, parent);
359                         }
360
361                         parent->in_color = INTERVAL_BLACK;
362                         gparent->in_color = INTERVAL_RED;
363                         __rotate_left(gparent, root);
364                 }
365         }
366
367         (*root)->in_color = INTERVAL_BLACK;
368         EXIT;
369 }
370
371 struct interval_node *interval_insert(struct interval_node *node,
372                                       struct interval_node **root)
373                      
374 {
375         struct interval_node **p, *parent = NULL;
376         ENTRY;
377
378         p = root;
379         while (*p) {
380                 parent = *p;
381                 if (node_equal(parent, node))
382                         RETURN(parent);
383
384                 /* max_high field must be updated after each iteration */
385                 if (parent->in_max_high < interval_high(node))
386                         parent->in_max_high = interval_high(node);
387
388                 if (node_compare(node, parent) < 0)
389                         p = &parent->in_left;
390                 else 
391                         p = &parent->in_right;
392         }
393
394         /* link node into the tree */
395         node->in_parent = parent;
396         node->in_color = INTERVAL_RED;
397         node->in_left = node->in_right = NULL;
398         *p = node;
399
400         interval_insert_color(node, root);
401
402         RETURN(NULL);
403 }
404 EXPORT_SYMBOL(interval_insert);
405
406 static inline int node_is_black_or_0(struct interval_node *node)
407 {
408         return !node || node_is_black(node);
409 }
410
411 static void interval_erase_color(struct interval_node *node,
412                                  struct interval_node *parent,
413                                  struct interval_node **root)
414 {
415         struct interval_node *tmp;
416         ENTRY;
417
418         while (node_is_black_or_0(node) && node != *root) {
419                 if (parent->in_left == node) {
420                         tmp = parent->in_right;
421                         if (node_is_red(tmp)) {
422                                 tmp->in_color = INTERVAL_BLACK;
423                                 parent->in_color = INTERVAL_RED;
424                                 __rotate_left(parent, root);
425                                 tmp = parent->in_right;
426                         }
427                         if (node_is_black_or_0(tmp->in_left) &&
428                             node_is_black_or_0(tmp->in_right)) {
429                                 tmp->in_color = INTERVAL_RED;
430                                 node = parent;
431                                 parent = node->in_parent;
432                         } else {
433                                 if (node_is_black_or_0(tmp->in_right)) {
434                                         struct interval_node *o_left;
435                                         if ((o_left = tmp->in_left))
436                                              o_left->in_color = INTERVAL_BLACK;
437                                         tmp->in_color = INTERVAL_RED;
438                                         __rotate_right(tmp, root);
439                                         tmp = parent->in_right;
440                                 }
441                                 tmp->in_color = parent->in_color;
442                                 parent->in_color = INTERVAL_BLACK;
443                                 if (tmp->in_right)
444                                     tmp->in_right->in_color = INTERVAL_BLACK;
445                                 __rotate_left(parent, root);
446                                 node = *root;
447                                 break;
448                         }
449                 } else {
450                         tmp = parent->in_left;
451                         if (node_is_red(tmp)) {
452                                 tmp->in_color = INTERVAL_BLACK;
453                                 parent->in_color = INTERVAL_RED;
454                                 __rotate_right(parent, root);
455                                 tmp = parent->in_left;
456                         }
457                         if (node_is_black_or_0(tmp->in_left) &&
458                             node_is_black_or_0(tmp->in_right)) {
459                                 tmp->in_color = INTERVAL_RED;
460                                 node = parent;
461                                 parent = node->in_parent;
462                         } else {
463                                 if (node_is_black_or_0(tmp->in_left)) {
464                                         struct interval_node *o_right;
465                                         if ((o_right = tmp->in_right))
466                                             o_right->in_color = INTERVAL_BLACK;
467                                         tmp->in_color = INTERVAL_RED;
468                                         __rotate_left(tmp, root);
469                                         tmp = parent->in_left;
470                                 }
471                                 tmp->in_color = parent->in_color;
472                                 parent->in_color = INTERVAL_BLACK;
473                                 if (tmp->in_left)
474                                         tmp->in_left->in_color = INTERVAL_BLACK;
475                                 __rotate_right(parent, root);
476                                 node = *root;
477                                 break;
478                         }
479                 }
480         }
481         if (node)
482                 node->in_color = INTERVAL_BLACK;
483         EXIT;
484 }
485
486 /* 
487  * if the @max_high value of @node is changed, this function traverse  a path 
488  * from node  up to the root to update max_high for the whole tree.
489  */
490 static void update_maxhigh(struct interval_node *node,
491                            __u64  old_maxhigh)
492 {
493         __u64 left_max, right_max;
494         ENTRY;
495
496         while (node) {
497                 left_max = node->in_left ? node->in_left->in_max_high : 0;
498                 right_max = node->in_right ? node->in_right->in_max_high : 0;
499                 node->in_max_high = max_u64(interval_high(node),
500                                             max_u64(left_max, right_max));
501
502                 if (node->in_max_high >= old_maxhigh)
503                         break;
504                 node = node->in_parent;
505         }
506         EXIT;
507 }
508
509 void interval_erase(struct interval_node *node,
510                     struct interval_node **root)
511 {
512         struct interval_node *child, *parent;
513         int color;
514         ENTRY;
515
516         if (!node->in_left) {
517                 child = node->in_right;
518         } else if (!node->in_right) {
519                 child = node->in_left;
520         } else { /* Both left and right child are not NULL */
521                 struct interval_node *old = node;
522
523                 node = interval_next(node);
524                 child = node->in_right;
525                 parent = node->in_parent;
526                 color = node->in_color;
527
528                 if (child)
529                         child->in_parent = parent;
530                 if (parent == old) {
531                         parent->in_right = child;
532                         parent = node;
533                 } else {
534                         parent->in_left = child;
535                 }
536
537                 node->in_color = old->in_color;
538                 node->in_right = old->in_right;
539                 node->in_left = old->in_left;
540                 node->in_parent = old->in_parent;
541
542                 if (old->in_parent) {
543                         if (node_is_left_child(old))
544                                 old->in_parent->in_left = node;
545                         else
546                                 old->in_parent->in_right = node;
547                 } else {
548                         *root = node;
549                 }
550
551                 old->in_left->in_parent = node;
552                 if (old->in_right)
553                         old->in_right->in_parent = node;
554                 update_maxhigh(child, node->in_max_high);
555                 update_maxhigh(node, old->in_max_high);
556                 goto color;
557         }
558         parent = node->in_parent;
559         color = node->in_color;
560
561         if (child)
562                 child->in_parent = parent;
563         if (parent) {
564                 if (node_is_left_child(node))
565                         parent->in_left = child;
566                 else
567                         parent->in_right = child;
568         } else {
569                 *root = child;
570         }
571
572         update_maxhigh(child, node->in_max_high);
573
574 color:
575         if (color == INTERVAL_BLACK)
576                 interval_erase_color(child, parent, root);
577         EXIT;
578 }
579 EXPORT_SYMBOL(interval_erase);
580
581 static inline int interval_may_overlap(struct interval_node *node,
582                                           struct interval_node_extent *ext)
583 {
584         return (ext->start <= node->in_max_high &&
585                 ext->end >= interval_low(node));
586 }
587
588 /*
589  * This function finds all intervals that overlap interval ext,
590  * and calls func to handle resulted intervals one by one.
591  * in lustre, this function will find all conflicting locks in
592  * the granted queue and add these locks to the ast work list.
593  *
594  * {
595  *       if (node == NULL)
596  *               return 0;
597  *       if (ext->end < interval_low(node)) {
598  *               interval_search(node->in_left, ext, func, data);
599  *       } else if (interval_may_overlap(node, ext)) {
600  *               if (extent_overlapped(ext, &node->in_extent))
601  *                       func(node, data);
602  *               interval_search(node->in_left, ext, func, data);
603  *               interval_search(node->in_right, ext, func, data);
604  *       }
605  *       return 0;
606  * }
607  *
608  */
609 enum interval_iter interval_search(struct interval_node *node,
610                                    struct interval_node_extent *ext,
611                                    interval_callback_t func,
612                                    void *data)
613 {
614         struct interval_node *parent;
615         enum interval_iter rc = INTERVAL_ITER_CONT;
616
617         LASSERT(ext != NULL);
618         LASSERT(func != NULL);
619
620         while (node) {
621                 if (ext->end < interval_low(node)) {
622                         if (node->in_left) {
623                                 node = node->in_left;
624                                 continue;
625                         }
626                 } else if (interval_may_overlap(node, ext)) {
627                         if (extent_overlapped(ext, &node->in_extent)) {
628                                 rc = func(node, data);
629                                 if (rc == INTERVAL_ITER_STOP)
630                                         break;
631                         }
632
633                         if (node->in_left) {
634                                 node = node->in_left;
635                                 continue;
636                         }
637                         if (node->in_right) {
638                                 node = node->in_right;
639                                 continue;
640                         }
641                 } 
642
643                 parent = node->in_parent;
644                 while (parent) {
645                         if (node_is_left_child(node) &&
646                             parent->in_right) {
647                                 /* If we ever got the left, it means that the 
648                                  * parent met ext->end<interval_low(parent), or
649                                  * may_overlap(parent). If the former is true,
650                                  * we needn't go back. So stop early and check
651                                  * may_overlap(parent) after this loop.  */
652                                 node = parent->in_right;
653                                 break;
654                         }
655                         node = parent;
656                         parent = parent->in_parent;
657                 }
658                 if (parent == NULL || !interval_may_overlap(parent, ext))
659                         break;
660         }
661
662         return rc;
663 }
664 EXPORT_SYMBOL(interval_search);
665
666 static enum interval_iter interval_overlap_cb(struct interval_node *n,
667                                               void *args)
668 {
669         *(int *)args = 1;
670         return INTERVAL_ITER_STOP;
671 }
672
673 int interval_is_overlapped(struct interval_node *root,
674                            struct interval_node_extent *ext)
675 {
676         int has = 0;
677         (void)interval_search(root, ext, interval_overlap_cb, &has);
678         return has;
679 }
680 EXPORT_SYMBOL(interval_is_overlapped);
681
682 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
683  * some extents, because programs seldom do IO backward.
684  *
685  * The recursive algorithm of expanding low:
686  * expand_low {
687  *        struct interval_node *tmp;
688  *        static __u64 res = 0;
689  *
690  *        if (root == NULL)
691  *                return res;
692  *        if (root->in_max_high < low) {
693  *                res = max_u64(root->in_max_high + 1, res);
694  *                return res;
695  *        } else if (low < interval_low(root)) {
696  *                interval_expand_low(root->in_left, low);
697  *                return res;
698  *        }
699  *
700  *        if (interval_high(root) < low)
701  *                res = max_u64(interval_high(root) + 1, res);
702  *        interval_expand_low(root->in_left, low);
703  *        interval_expand_low(root->in_right, low);
704  *
705  *        return res;
706  * }
707  *
708  * It's much easy to eliminate the recursion, see interval_search for 
709  * an example. -jay
710  */
711 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
712 {
713         /* we only concern the empty tree right now. */
714         if (root == NULL)
715                 return 0;
716         return low;
717 }
718
719 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
720 {
721         __u64 result = ~0;
722
723         while (node != NULL) {
724                 if (node->in_max_high < high)
725                         break;
726                         
727                 if (interval_low(node) > high) {
728                         result = interval_low(node) - 1;
729                         node = node->in_left;
730                 } else {
731                         node = node->in_right;
732                 }
733         }
734
735         return result;
736 }
737
738 /* expanding the extent based on @ext. */
739 void interval_expand(struct interval_node *root,
740                      struct interval_node_extent *ext,
741                      struct interval_node_extent *limiter)
742 {
743         /* The assertion of interval_is_overlapped is expensive because we may
744          * travel many nodes to find the overlapped node. */
745         LASSERT(interval_is_overlapped(root, ext) == 0);
746         if (!limiter || limiter->start < ext->start)
747                 ext->start = interval_expand_low(root, ext->start);
748         if (!limiter || limiter->end > ext->end)
749                 ext->end = interval_expand_high(root, ext->end);
750         LASSERT(interval_is_overlapped(root, ext) == 0);
751 }
752 EXPORT_SYMBOL(interval_expand);