Whamcloud - gitweb
branch: HEAD
[fs/lustre-release.git] / lustre / ldlm / interval_tree.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Interval tree library used by ldlm extent lock code
5  *
6  *  Copyright (c) 2007 Cluster File Systems, Inc.
7  *   Author: Huang Wei <huangwei@clusterfs.com>
8  *   Author: Jay Xiong <jinshan.xiong@sun.com>
9  *
10  *   This file is part of the Lustre file system, http://www.lustre.org
11  *   Lustre is a trademark of Cluster File Systems, Inc.
12  *
13  *   You may have signed or agreed to another license before downloading
14  *   this software.  If so, you are bound by the terms and conditions
15  *   of that agreement, and the following does not apply to you.  See the
16  *   LICENSE file included with this distribution for more information.
17  *
18  *   If you did not agree to a different license, then this copy of Lustre
19  *   is open source software; you can redistribute it and/or modify it
20  *   under the terms of version 2 of the GNU General Public License as
21  *   published by the Free Software Foundation.
22  *
23  *   In either case, Lustre is distributed in the hope that it will be
24  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
25  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26  *   license text for more details.
27  */
28 #ifdef __KERNEL__
29 # include <lustre_dlm.h>
30 #else
31 # include <liblustre.h>
32 #endif
33 #include <obd_support.h>
34 #include <interval_tree.h>
35
36 enum {
37         INTERVAL_RED = 0,
38         INTERVAL_BLACK = 1
39 };
40
41 static inline int node_is_left_child(struct interval_node *node)
42 {
43         LASSERT(node->in_parent != NULL);
44         return node == node->in_parent->in_left;
45 }
46
47 static inline int node_is_right_child(struct interval_node *node)
48 {
49         LASSERT(node->in_parent != NULL);
50         return node == node->in_parent->in_right;
51 }
52
53 static inline int node_is_red(struct interval_node *node)
54 {
55         return node->in_color == INTERVAL_RED;
56 }
57
58 static inline int node_is_black(struct interval_node *node)
59 {
60         return node->in_color == INTERVAL_BLACK;
61 }
62
63 static inline int extent_compare(struct interval_node_extent *e1,
64                                  struct interval_node_extent *e2)
65 {
66         int rc;
67         if (e1->start == e2->start) {
68                 if (e1->end < e2->end)
69                         rc = -1;
70                 else if (e1->end > e2->end)
71                         rc = 1;
72                 else
73                         rc = 0;
74         } else {
75                 if (e1->start < e2->start)
76                         rc = -1;
77                 else
78                         rc = 1;
79         }
80         return rc;
81 }
82
83 static inline int extent_equal(struct interval_node_extent *e1,
84                                struct interval_node_extent *e2)
85 {
86         return (e1->start == e2->start) && (e1->end == e2->end);
87 }
88
89 static inline int extent_overlapped(struct interval_node_extent *e1, 
90                                     struct interval_node_extent *e2)
91 {
92         return (e1->start <= e2->end) && (e2->start <= e1->end);
93 }
94
95 static inline int node_compare(struct interval_node *n1,
96                                struct interval_node *n2)
97 {
98         return extent_compare(&n1->in_extent, &n2->in_extent);
99 }
100
101 static inline int node_equal(struct interval_node *n1,
102                              struct interval_node *n2)
103 {
104         return extent_equal(&n1->in_extent, &n2->in_extent);
105 }
106
107 static inline __u64 max_u64(__u64 x, __u64 y)
108 {
109         return x > y ? x : y;
110 }
111
112 static inline __u64 min_u64(__u64 x, __u64 y)
113 {
114         return x < y ? x : y;
115 }
116
117 #define interval_for_each(node, root)                   \
118 for (node = interval_first(root); node != NULL;         \
119      node = interval_next(node))
120
121 #define interval_for_each_reverse(node, root)           \
122 for (node = interval_last(root); node != NULL;          \
123      node = interval_prev(node))
124
125 static struct interval_node *interval_first(struct interval_node *node)
126 {
127         ENTRY;
128
129         if (!node)
130                 RETURN(NULL);
131         while (node->in_left)
132                 node = node->in_left;
133         RETURN(node);
134 }
135
136 static struct interval_node *interval_last(struct interval_node *node)
137 {
138         ENTRY;
139
140         if (!node)
141                 RETURN(NULL);
142         while (node->in_right)
143                 node = node->in_right;
144         RETURN(node);
145 }
146
147 static struct interval_node *interval_next(struct interval_node *node)
148 {
149         ENTRY;
150
151         if (!node)
152                 RETURN(NULL);
153         if (node->in_right)
154                 RETURN(interval_first(node->in_right));
155         while (node->in_parent && node_is_right_child(node))
156                 node = node->in_parent;
157         RETURN(node->in_parent);
158 }
159
160 static struct interval_node *interval_prev(struct interval_node *node)
161 {
162         ENTRY;
163
164         if (!node)
165                 RETURN(NULL);
166
167         if (node->in_left)
168                 RETURN(interval_last(node->in_left));
169
170         while (node->in_parent && node_is_left_child(node))
171                 node = node->in_parent;
172
173         RETURN(node->in_parent);
174 }
175
176 enum interval_iter interval_iterate(struct interval_node *root,
177                                     interval_callback_t func,
178                                     void *data)
179 {
180         struct interval_node *node;
181         enum interval_iter rc = INTERVAL_ITER_CONT;
182         ENTRY;
183         
184         interval_for_each(node, root) {
185                 rc = func(node, data);
186                 if (rc == INTERVAL_ITER_STOP)
187                         break;
188         }
189
190         RETURN(rc);
191 }
192 EXPORT_SYMBOL(interval_iterate);
193
194 enum interval_iter interval_iterate_reverse(struct interval_node *root,
195                                             interval_callback_t func,
196                                             void *data)
197 {
198         struct interval_node *node;
199         enum interval_iter rc = INTERVAL_ITER_CONT;
200         ENTRY;
201         
202         interval_for_each_reverse(node, root) {
203                 rc = func(node, data);
204                 if (rc == INTERVAL_ITER_STOP)
205                         break;
206         }
207
208         RETURN(rc);
209 }
210 EXPORT_SYMBOL(interval_iterate_reverse);
211
212 /* try to find a node with same interval in the tree,
213  * if found, return the pointer to the node, otherwise return NULL*/
214 struct interval_node *interval_find(struct interval_node *root,
215                                     struct interval_node_extent *ex)
216 {
217         struct interval_node *walk = root;
218         int rc;
219         ENTRY;
220
221         while (walk) {
222                 rc = extent_compare(ex, &walk->in_extent);
223                 if (rc == 0)
224                         break;
225                 else if (rc < 0)
226                         walk = walk->in_left;
227                 else
228                         walk = walk->in_right;
229         }
230
231         RETURN(walk);
232 }
233 EXPORT_SYMBOL(interval_find);
234
235 static void __rotate_change_maxhigh(struct interval_node *node,
236                                     struct interval_node *rotate)
237 {
238         __u64 left_max, right_max;
239
240         rotate->in_max_high = node->in_max_high;
241         left_max = node->in_left ? node->in_left->in_max_high : 0;
242         right_max = node->in_right ? node->in_right->in_max_high : 0;
243         node->in_max_high  = max_u64(interval_high(node),
244                                      max_u64(left_max,right_max));
245 }
246
247 /* The left rotation "pivots" around the link from node to node->right, and
248  * - node will be linked to node->right's left child, and
249  * - node->right's left child will be linked to node's right child.  */
250 static void __rotate_left(struct interval_node *node,
251                           struct interval_node **root)
252 {
253         struct interval_node *right = node->in_right;
254         struct interval_node *parent = node->in_parent;
255
256         node->in_right = right->in_left;
257         if (node->in_right)
258                 right->in_left->in_parent = node;
259
260         right->in_left = node;
261         right->in_parent = parent;
262         if (parent) {
263                 if (node_is_left_child(node))
264                         parent->in_left = right;
265                 else
266                         parent->in_right = right;
267         } else {
268                 *root = right;
269         }
270         node->in_parent = right;
271
272         /* update max_high for node and right */
273         __rotate_change_maxhigh(node, right);
274 }
275
276 /* The right rotation "pivots" around the link from node to node->left, and
277  * - node will be linked to node->left's right child, and
278  * - node->left's right child will be linked to node's left child.  */
279 static void __rotate_right(struct interval_node *node,
280                            struct interval_node **root)
281 {
282         struct interval_node *left = node->in_left;
283         struct interval_node *parent = node->in_parent;
284
285         node->in_left = left->in_right;
286         if (node->in_left)
287                 left->in_right->in_parent = node;
288         left->in_right = node;
289
290         left->in_parent = parent;
291         if (parent) {
292                 if (node_is_right_child(node))
293                         parent->in_right = left;
294                 else
295                         parent->in_left = left;
296         } else {
297                 *root = left;
298         }
299         node->in_parent = left;
300
301         /* update max_high for node and left */
302         __rotate_change_maxhigh(node, left);
303 }
304
305 #define interval_swap(a, b) do {                        \
306         struct interval_node *c = a; a = b; b = c;      \
307 } while (0)
308
309 /*
310  * Operations INSERT and DELETE, when run on a tree with n keys, 
311  * take O(logN) time.Because they modify the tree, the result 
312  * may violate the red-black properties.To restore these properties, 
313  * we must change the colors of some of the nodes in the tree 
314  * and also change the pointer structure.
315  */
316 static void interval_insert_color(struct interval_node *node,
317                                   struct interval_node **root)
318 {
319         struct interval_node *parent, *gparent;
320         ENTRY;
321
322         while ((parent = node->in_parent) && node_is_red(parent)) {
323                 gparent = parent->in_parent;
324                 /* Parent is RED, so gparent must not be NULL */
325                 if (node_is_left_child(parent)) {
326                         struct interval_node *uncle;
327                         uncle = gparent->in_right;
328                         if (uncle && node_is_red(uncle)) {
329                                 uncle->in_color = INTERVAL_BLACK;
330                                 parent->in_color = INTERVAL_BLACK;
331                                 gparent->in_color = INTERVAL_RED;
332                                 node = gparent;
333                                 continue;
334                         }
335
336                         if (parent->in_right == node) {
337                                 __rotate_left(parent, root);
338                                 interval_swap(node, parent);
339                         }
340
341                         parent->in_color = INTERVAL_BLACK;
342                         gparent->in_color = INTERVAL_RED;
343                         __rotate_right(gparent, root);
344                 } else {
345                         struct interval_node *uncle;
346                         uncle = gparent->in_left;
347                         if (uncle && node_is_red(uncle)) {
348                                 uncle->in_color = INTERVAL_BLACK;
349                                 parent->in_color = INTERVAL_BLACK;
350                                 gparent->in_color = INTERVAL_RED;
351                                 node = gparent;
352                                 continue;
353                         }
354
355                         if (node_is_left_child(node)) {
356                                 __rotate_right(parent, root);
357                                 interval_swap(node, parent);
358                         }
359
360                         parent->in_color = INTERVAL_BLACK;
361                         gparent->in_color = INTERVAL_RED;
362                         __rotate_left(gparent, root);
363                 }
364         }
365
366         (*root)->in_color = INTERVAL_BLACK;
367         EXIT;
368 }
369
370 struct interval_node *interval_insert(struct interval_node *node,
371                                       struct interval_node **root)
372                      
373 {
374         struct interval_node **p, *parent = NULL;
375         ENTRY;
376
377         p = root;
378         while (*p) {
379                 parent = *p;
380                 if (node_equal(parent, node))
381                         RETURN(parent);
382
383                 /* max_high field must be updated after each iteration */
384                 if (parent->in_max_high < interval_high(node))
385                         parent->in_max_high = interval_high(node);
386
387                 if (node_compare(node, parent) < 0)
388                         p = &parent->in_left;
389                 else 
390                         p = &parent->in_right;
391         }
392
393         /* link node into the tree */
394         node->in_parent = parent;
395         node->in_color = INTERVAL_RED;
396         node->in_left = node->in_right = NULL;
397         *p = node;
398
399         interval_insert_color(node, root);
400
401         RETURN(NULL);
402 }
403 EXPORT_SYMBOL(interval_insert);
404
405 static inline int node_is_black_or_0(struct interval_node *node)
406 {
407         return !node || node_is_black(node);
408 }
409
410 static void interval_erase_color(struct interval_node *node,
411                                  struct interval_node *parent,
412                                  struct interval_node **root)
413 {
414         struct interval_node *tmp;
415         ENTRY;
416
417         while (node_is_black_or_0(node) && node != *root) {
418                 if (parent->in_left == node) {
419                         tmp = parent->in_right;
420                         if (node_is_red(tmp)) {
421                                 tmp->in_color = INTERVAL_BLACK;
422                                 parent->in_color = INTERVAL_RED;
423                                 __rotate_left(parent, root);
424                                 tmp = parent->in_right;
425                         }
426                         if (node_is_black_or_0(tmp->in_left) &&
427                             node_is_black_or_0(tmp->in_right)) {
428                                 tmp->in_color = INTERVAL_RED;
429                                 node = parent;
430                                 parent = node->in_parent;
431                         } else {
432                                 if (node_is_black_or_0(tmp->in_right)) {
433                                         struct interval_node *o_left;
434                                         if ((o_left = tmp->in_left))
435                                              o_left->in_color = INTERVAL_BLACK;
436                                         tmp->in_color = INTERVAL_RED;
437                                         __rotate_right(tmp, root);
438                                         tmp = parent->in_right;
439                                 }
440                                 tmp->in_color = parent->in_color;
441                                 parent->in_color = INTERVAL_BLACK;
442                                 if (tmp->in_right)
443                                     tmp->in_right->in_color = INTERVAL_BLACK;
444                                 __rotate_left(parent, root);
445                                 node = *root;
446                                 break;
447                         }
448                 } else {
449                         tmp = parent->in_left;
450                         if (node_is_red(tmp)) {
451                                 tmp->in_color = INTERVAL_BLACK;
452                                 parent->in_color = INTERVAL_RED;
453                                 __rotate_right(parent, root);
454                                 tmp = parent->in_left;
455                         }
456                         if (node_is_black_or_0(tmp->in_left) &&
457                             node_is_black_or_0(tmp->in_right)) {
458                                 tmp->in_color = INTERVAL_RED;
459                                 node = parent;
460                                 parent = node->in_parent;
461                         } else {
462                                 if (node_is_black_or_0(tmp->in_left)) {
463                                         struct interval_node *o_right;
464                                         if ((o_right = tmp->in_right))
465                                             o_right->in_color = INTERVAL_BLACK;
466                                         tmp->in_color = INTERVAL_RED;
467                                         __rotate_left(tmp, root);
468                                         tmp = parent->in_left;
469                                 }
470                                 tmp->in_color = parent->in_color;
471                                 parent->in_color = INTERVAL_BLACK;
472                                 if (tmp->in_left)
473                                         tmp->in_left->in_color = INTERVAL_BLACK;
474                                 __rotate_right(parent, root);
475                                 node = *root;
476                                 break;
477                         }
478                 }
479         }
480         if (node)
481                 node->in_color = INTERVAL_BLACK;
482         EXIT;
483 }
484
485 /* 
486  * if the @max_high value of @node is changed, this function traverse  a path 
487  * from node  up to the root to update max_high for the whole tree.
488  */
489 static void update_maxhigh(struct interval_node *node,
490                            __u64  old_maxhigh)
491 {
492         __u64 left_max, right_max;
493         ENTRY;
494
495         while (node) {
496                 left_max = node->in_left ? node->in_left->in_max_high : 0;
497                 right_max = node->in_right ? node->in_right->in_max_high : 0;
498                 node->in_max_high = max_u64(interval_high(node),
499                                             max_u64(left_max, right_max));
500
501                 if (node->in_max_high >= old_maxhigh)
502                         break;
503                 node = node->in_parent;
504         }
505         EXIT;
506 }
507
508 void interval_erase(struct interval_node *node,
509                     struct interval_node **root)
510 {
511         struct interval_node *child, *parent;
512         int color;
513         ENTRY;
514
515         if (!node->in_left) {
516                 child = node->in_right;
517         } else if (!node->in_right) {
518                 child = node->in_left;
519         } else { /* Both left and right child are not NULL */
520                 struct interval_node *old = node;
521
522                 node = interval_next(node);
523                 child = node->in_right;
524                 parent = node->in_parent;
525                 color = node->in_color;
526
527                 if (child)
528                         child->in_parent = parent;
529                 if (parent == old) {
530                         parent->in_right = child;
531                         parent = node;
532                 } else {
533                         parent->in_left = child;
534                 }
535
536                 node->in_color = old->in_color;
537                 node->in_right = old->in_right;
538                 node->in_left = old->in_left;
539                 node->in_parent = old->in_parent;
540
541                 if (old->in_parent) {
542                         if (node_is_left_child(old))
543                                 old->in_parent->in_left = node;
544                         else
545                                 old->in_parent->in_right = node;
546                 } else {
547                         *root = node;
548                 }
549
550                 old->in_left->in_parent = node;
551                 if (old->in_right)
552                         old->in_right->in_parent = node;
553                 update_maxhigh(child, node->in_max_high);
554                 update_maxhigh(node, old->in_max_high);
555                 goto color;
556         }
557         parent = node->in_parent;
558         color = node->in_color;
559
560         if (child)
561                 child->in_parent = parent;
562         if (parent) {
563                 if (node_is_left_child(node))
564                         parent->in_left = child;
565                 else
566                         parent->in_right = child;
567         } else {
568                 *root = child;
569         }
570
571         update_maxhigh(child, node->in_max_high);
572
573 color:
574         if (color == INTERVAL_BLACK)
575                 interval_erase_color(child, parent, root);
576         EXIT;
577 }
578 EXPORT_SYMBOL(interval_erase);
579
580 static inline int interval_may_overlap(struct interval_node *node,
581                                           struct interval_node_extent *ext)
582 {
583         return (ext->start <= node->in_max_high &&
584                 ext->end >= interval_low(node));
585 }
586
587 /*
588  * This function finds all intervals that overlap interval ext,
589  * and calls func to handle resulted intervals one by one.
590  * in lustre, this function will find all conflicting locks in
591  * the granted queue and add these locks to the ast work list.
592  *
593  * {
594  *       if (node == NULL)
595  *               return 0;
596  *       if (ext->end < interval_low(node)) {
597  *               interval_search(node->in_left, ext, func, data);
598  *       } else if (interval_may_overlap(node, ext)) {
599  *               if (extent_overlapped(ext, &node->in_extent))
600  *                       func(node, data);
601  *               interval_search(node->in_left, ext, func, data);
602  *               interval_search(node->in_right, ext, func, data);
603  *       }
604  *       return 0;
605  * }
606  *
607  */
608 enum interval_iter interval_search(struct interval_node *node,
609                                    struct interval_node_extent *ext,
610                                    interval_callback_t func,
611                                    void *data)
612 {
613         struct interval_node *parent;
614         enum interval_iter rc = INTERVAL_ITER_CONT;
615
616         LASSERT(ext != NULL);
617         LASSERT(func != NULL);
618
619         while (node) {
620                 if (ext->end < interval_low(node)) {
621                         if (node->in_left) {
622                                 node = node->in_left;
623                                 continue;
624                         }
625                 } else if (interval_may_overlap(node, ext)) {
626                         if (extent_overlapped(ext, &node->in_extent)) {
627                                 rc = func(node, data);
628                                 if (rc == INTERVAL_ITER_STOP)
629                                         break;
630                         }
631
632                         if (node->in_left) {
633                                 node = node->in_left;
634                                 continue;
635                         }
636                         if (node->in_right) {
637                                 node = node->in_right;
638                                 continue;
639                         }
640                 } 
641
642                 parent = node->in_parent;
643                 while (parent) {
644                         if (node_is_left_child(node) &&
645                             parent->in_right) {
646                                 /* If we ever got the left, it means that the 
647                                  * parent met ext->end<interval_low(parent), or
648                                  * may_overlap(parent). If the former is true,
649                                  * we needn't go back. So stop early and check
650                                  * may_overlap(parent) after this loop.  */
651                                 node = parent->in_right;
652                                 break;
653                         }
654                         node = parent;
655                         parent = parent->in_parent;
656                 }
657                 if (parent == NULL || !interval_may_overlap(parent, ext))
658                         break;
659         }
660
661         return rc;
662 }
663 EXPORT_SYMBOL(interval_search);
664
665 static enum interval_iter interval_overlap_cb(struct interval_node *n,
666                                               void *args)
667 {
668         *(int *)args = 1;
669         return INTERVAL_ITER_STOP;
670 }
671
672 int interval_is_overlapped(struct interval_node *root,
673                            struct interval_node_extent *ext)
674 {
675         int has = 0;
676         (void)interval_search(root, ext, interval_overlap_cb, &has);
677         return has;
678 }
679 EXPORT_SYMBOL(interval_is_overlapped);
680
681 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
682  * some extents, because programs seldom do IO backward.
683  *
684  * The recursive algorithm of expanding low:
685  * expand_low {
686  *        struct interval_node *tmp;
687  *        static __u64 res = 0;
688  *
689  *        if (root == NULL)
690  *                return res;
691  *        if (root->in_max_high < low) {
692  *                res = max_u64(root->in_max_high + 1, res);
693  *                return res;
694  *        } else if (low < interval_low(root)) {
695  *                interval_expand_low(root->in_left, low);
696  *                return res;
697  *        }
698  *
699  *        if (interval_high(root) < low)
700  *                res = max_u64(interval_high(root) + 1, res);
701  *        interval_expand_low(root->in_left, low);
702  *        interval_expand_low(root->in_right, low);
703  *
704  *        return res;
705  * }
706  *
707  * It's much easy to eliminate the recursion, see interval_search for 
708  * an example. -jay
709  */
710 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
711 {
712         /* we only concern the empty tree right now. */
713         if (root == NULL)
714                 return 0;
715         return low;
716 }
717
718 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
719 {
720         __u64 result = ~0;
721
722         while (node != NULL) {
723                 if (node->in_max_high < high)
724                         break;
725                         
726                 if (interval_low(node) > high) {
727                         result = interval_low(node) - 1;
728                         node = node->in_left;
729                 } else {
730                         node = node->in_right;
731                 }
732         }
733
734         return result;
735 }
736
737 /* expanding the extent based on @ext. */
738 void interval_expand(struct interval_node *root,
739                      struct interval_node_extent *ext,
740                      struct interval_node_extent *limiter)
741 {
742         /* The assertion of interval_is_overlapped is expensive because we may
743          * travel many nodes to find the overlapped node. */
744         LASSERT(interval_is_overlapped(root, ext) == 0);
745         if (!limiter || limiter->start < ext->start)
746                 ext->start = interval_expand_low(root, ext->start);
747         if (!limiter || limiter->end > ext->end)
748                 ext->end = interval_expand_high(root, ext->end);
749         LASSERT(interval_is_overlapped(root, ext) == 0);
750 }
751 EXPORT_SYMBOL(interval_expand);