Whamcloud - gitweb
LU-1669 llite: Replace write mutex with range lock
[fs/lustre-release.git] / lustre / ldlm / interval_tree.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * lustre/ldlm/interval_tree.c
35  *
36  * Interval tree library used by ldlm extent lock code
37  *
38  * Author: Huang Wei <huangwei@clusterfs.com>
39  * Author: Jay Xiong <jinshan.xiong@sun.com>
40  */
41 #ifdef __KERNEL__
42 # include <lustre_dlm.h>
43 #else
44 # include <libcfs/libcfs.h>
45 #endif
46 #include <obd_support.h>
47 #include <interval_tree.h>
48
49 enum {
50         INTERVAL_RED = 0,
51         INTERVAL_BLACK = 1
52 };
53
54 static inline int node_is_left_child(struct interval_node *node)
55 {
56         LASSERT(node->in_parent != NULL);
57         return node == node->in_parent->in_left;
58 }
59
60 static inline int node_is_right_child(struct interval_node *node)
61 {
62         LASSERT(node->in_parent != NULL);
63         return node == node->in_parent->in_right;
64 }
65
66 static inline int node_is_red(struct interval_node *node)
67 {
68         return node->in_color == INTERVAL_RED;
69 }
70
71 static inline int node_is_black(struct interval_node *node)
72 {
73         return node->in_color == INTERVAL_BLACK;
74 }
75
76 static inline int extent_compare(struct interval_node_extent *e1,
77                                  struct interval_node_extent *e2)
78 {
79         int rc;
80         if (e1->start == e2->start) {
81                 if (e1->end < e2->end)
82                         rc = -1;
83                 else if (e1->end > e2->end)
84                         rc = 1;
85                 else
86                         rc = 0;
87         } else {
88                 if (e1->start < e2->start)
89                         rc = -1;
90                 else
91                         rc = 1;
92         }
93         return rc;
94 }
95
96 static inline int extent_equal(struct interval_node_extent *e1,
97                                struct interval_node_extent *e2)
98 {
99         return (e1->start == e2->start) && (e1->end == e2->end);
100 }
101
102 static inline int extent_overlapped(struct interval_node_extent *e1,
103                                     struct interval_node_extent *e2)
104 {
105         return (e1->start <= e2->end) && (e2->start <= e1->end);
106 }
107
108 static inline int node_compare(struct interval_node *n1,
109                                struct interval_node *n2)
110 {
111         return extent_compare(&n1->in_extent, &n2->in_extent);
112 }
113
114 int node_equal(struct interval_node *n1, struct interval_node *n2)
115 {
116         return extent_equal(&n1->in_extent, &n2->in_extent);
117 }
118 EXPORT_SYMBOL(node_equal);
119
120 static inline __u64 max_u64(__u64 x, __u64 y)
121 {
122         return x > y ? x : y;
123 }
124
125 static inline __u64 min_u64(__u64 x, __u64 y)
126 {
127         return x < y ? x : y;
128 }
129
130 #define interval_for_each(node, root)                   \
131 for (node = interval_first(root); node != NULL;         \
132      node = interval_next(node))
133
134 #define interval_for_each_reverse(node, root)           \
135 for (node = interval_last(root); node != NULL;          \
136      node = interval_prev(node))
137
138 static struct interval_node *interval_first(struct interval_node *node)
139 {
140         ENTRY;
141
142         if (!node)
143                 RETURN(NULL);
144         while (node->in_left)
145                 node = node->in_left;
146         RETURN(node);
147 }
148
149 static struct interval_node *interval_last(struct interval_node *node)
150 {
151         ENTRY;
152
153         if (!node)
154                 RETURN(NULL);
155         while (node->in_right)
156                 node = node->in_right;
157         RETURN(node);
158 }
159
160 static struct interval_node *interval_next(struct interval_node *node)
161 {
162         ENTRY;
163
164         if (!node)
165                 RETURN(NULL);
166         if (node->in_right)
167                 RETURN(interval_first(node->in_right));
168         while (node->in_parent && node_is_right_child(node))
169                 node = node->in_parent;
170         RETURN(node->in_parent);
171 }
172
173 static struct interval_node *interval_prev(struct interval_node *node)
174 {
175         ENTRY;
176
177         if (!node)
178                 RETURN(NULL);
179
180         if (node->in_left)
181                 RETURN(interval_last(node->in_left));
182
183         while (node->in_parent && node_is_left_child(node))
184                 node = node->in_parent;
185
186         RETURN(node->in_parent);
187 }
188
189 enum interval_iter interval_iterate(struct interval_node *root,
190                                     interval_callback_t func,
191                                     void *data)
192 {
193         struct interval_node *node;
194         enum interval_iter rc = INTERVAL_ITER_CONT;
195         ENTRY;
196         
197         interval_for_each(node, root) {
198                 rc = func(node, data);
199                 if (rc == INTERVAL_ITER_STOP)
200                         break;
201         }
202
203         RETURN(rc);
204 }
205 EXPORT_SYMBOL(interval_iterate);
206
207 enum interval_iter interval_iterate_reverse(struct interval_node *root,
208                                             interval_callback_t func,
209                                             void *data)
210 {
211         struct interval_node *node;
212         enum interval_iter rc = INTERVAL_ITER_CONT;
213         ENTRY;
214         
215         interval_for_each_reverse(node, root) {
216                 rc = func(node, data);
217                 if (rc == INTERVAL_ITER_STOP)
218                         break;
219         }
220
221         RETURN(rc);
222 }
223 EXPORT_SYMBOL(interval_iterate_reverse);
224
225 /* try to find a node with same interval in the tree,
226  * if found, return the pointer to the node, otherwise return NULL*/
227 struct interval_node *interval_find(struct interval_node *root,
228                                     struct interval_node_extent *ex)
229 {
230         struct interval_node *walk = root;
231         int rc;
232         ENTRY;
233
234         while (walk) {
235                 rc = extent_compare(ex, &walk->in_extent);
236                 if (rc == 0)
237                         break;
238                 else if (rc < 0)
239                         walk = walk->in_left;
240                 else
241                         walk = walk->in_right;
242         }
243
244         RETURN(walk);
245 }
246 EXPORT_SYMBOL(interval_find);
247
248 static void __rotate_change_maxhigh(struct interval_node *node,
249                                     struct interval_node *rotate)
250 {
251         __u64 left_max, right_max;
252
253         rotate->in_max_high = node->in_max_high;
254         left_max = node->in_left ? node->in_left->in_max_high : 0;
255         right_max = node->in_right ? node->in_right->in_max_high : 0;
256         node->in_max_high  = max_u64(interval_high(node),
257                                      max_u64(left_max,right_max));
258 }
259
260 /* The left rotation "pivots" around the link from node to node->right, and
261  * - node will be linked to node->right's left child, and
262  * - node->right's left child will be linked to node's right child.  */
263 static void __rotate_left(struct interval_node *node,
264                           struct interval_node **root)
265 {
266         struct interval_node *right = node->in_right;
267         struct interval_node *parent = node->in_parent;
268
269         node->in_right = right->in_left;
270         if (node->in_right)
271                 right->in_left->in_parent = node;
272
273         right->in_left = node;
274         right->in_parent = parent;
275         if (parent) {
276                 if (node_is_left_child(node))
277                         parent->in_left = right;
278                 else
279                         parent->in_right = right;
280         } else {
281                 *root = right;
282         }
283         node->in_parent = right;
284
285         /* update max_high for node and right */
286         __rotate_change_maxhigh(node, right);
287 }
288
289 /* The right rotation "pivots" around the link from node to node->left, and
290  * - node will be linked to node->left's right child, and
291  * - node->left's right child will be linked to node's left child.  */
292 static void __rotate_right(struct interval_node *node,
293                            struct interval_node **root)
294 {
295         struct interval_node *left = node->in_left;
296         struct interval_node *parent = node->in_parent;
297
298         node->in_left = left->in_right;
299         if (node->in_left)
300                 left->in_right->in_parent = node;
301         left->in_right = node;
302
303         left->in_parent = parent;
304         if (parent) {
305                 if (node_is_right_child(node))
306                         parent->in_right = left;
307                 else
308                         parent->in_left = left;
309         } else {
310                 *root = left;
311         }
312         node->in_parent = left;
313
314         /* update max_high for node and left */
315         __rotate_change_maxhigh(node, left);
316 }
317
318 #define interval_swap(a, b) do {                        \
319         struct interval_node *c = a; a = b; b = c;      \
320 } while (0)
321
322 /*
323  * Operations INSERT and DELETE, when run on a tree with n keys, 
324  * take O(logN) time.Because they modify the tree, the result 
325  * may violate the red-black properties.To restore these properties, 
326  * we must change the colors of some of the nodes in the tree 
327  * and also change the pointer structure.
328  */
329 static void interval_insert_color(struct interval_node *node,
330                                   struct interval_node **root)
331 {
332         struct interval_node *parent, *gparent;
333         ENTRY;
334
335         while ((parent = node->in_parent) && node_is_red(parent)) {
336                 gparent = parent->in_parent;
337                 /* Parent is RED, so gparent must not be NULL */
338                 if (node_is_left_child(parent)) {
339                         struct interval_node *uncle;
340                         uncle = gparent->in_right;
341                         if (uncle && node_is_red(uncle)) {
342                                 uncle->in_color = INTERVAL_BLACK;
343                                 parent->in_color = INTERVAL_BLACK;
344                                 gparent->in_color = INTERVAL_RED;
345                                 node = gparent;
346                                 continue;
347                         }
348
349                         if (parent->in_right == node) {
350                                 __rotate_left(parent, root);
351                                 interval_swap(node, parent);
352                         }
353
354                         parent->in_color = INTERVAL_BLACK;
355                         gparent->in_color = INTERVAL_RED;
356                         __rotate_right(gparent, root);
357                 } else {
358                         struct interval_node *uncle;
359                         uncle = gparent->in_left;
360                         if (uncle && node_is_red(uncle)) {
361                                 uncle->in_color = INTERVAL_BLACK;
362                                 parent->in_color = INTERVAL_BLACK;
363                                 gparent->in_color = INTERVAL_RED;
364                                 node = gparent;
365                                 continue;
366                         }
367
368                         if (node_is_left_child(node)) {
369                                 __rotate_right(parent, root);
370                                 interval_swap(node, parent);
371                         }
372
373                         parent->in_color = INTERVAL_BLACK;
374                         gparent->in_color = INTERVAL_RED;
375                         __rotate_left(gparent, root);
376                 }
377         }
378
379         (*root)->in_color = INTERVAL_BLACK;
380         EXIT;
381 }
382
383 struct interval_node *interval_insert(struct interval_node *node,
384                                       struct interval_node **root)
385                      
386 {
387         struct interval_node **p, *parent = NULL;
388         ENTRY;
389
390         LASSERT(!interval_is_intree(node));
391         p = root;
392         while (*p) {
393                 parent = *p;
394                 if (node_equal(parent, node))
395                         RETURN(parent);
396
397                 /* max_high field must be updated after each iteration */
398                 if (parent->in_max_high < interval_high(node))
399                         parent->in_max_high = interval_high(node);
400
401                 if (node_compare(node, parent) < 0)
402                         p = &parent->in_left;
403                 else 
404                         p = &parent->in_right;
405         }
406
407         /* link node into the tree */
408         node->in_parent = parent;
409         node->in_color = INTERVAL_RED;
410         node->in_left = node->in_right = NULL;
411         *p = node;
412
413         interval_insert_color(node, root);
414         node->in_intree = 1;
415
416         RETURN(NULL);
417 }
418 EXPORT_SYMBOL(interval_insert);
419
420 static inline int node_is_black_or_0(struct interval_node *node)
421 {
422         return !node || node_is_black(node);
423 }
424
425 static void interval_erase_color(struct interval_node *node,
426                                  struct interval_node *parent,
427                                  struct interval_node **root)
428 {
429         struct interval_node *tmp;
430         ENTRY;
431
432         while (node_is_black_or_0(node) && node != *root) {
433                 if (parent->in_left == node) {
434                         tmp = parent->in_right;
435                         if (node_is_red(tmp)) {
436                                 tmp->in_color = INTERVAL_BLACK;
437                                 parent->in_color = INTERVAL_RED;
438                                 __rotate_left(parent, root);
439                                 tmp = parent->in_right;
440                         }
441                         if (node_is_black_or_0(tmp->in_left) &&
442                             node_is_black_or_0(tmp->in_right)) {
443                                 tmp->in_color = INTERVAL_RED;
444                                 node = parent;
445                                 parent = node->in_parent;
446                         } else {
447                                 if (node_is_black_or_0(tmp->in_right)) {
448                                         struct interval_node *o_left;
449                                         if ((o_left = tmp->in_left))
450                                              o_left->in_color = INTERVAL_BLACK;
451                                         tmp->in_color = INTERVAL_RED;
452                                         __rotate_right(tmp, root);
453                                         tmp = parent->in_right;
454                                 }
455                                 tmp->in_color = parent->in_color;
456                                 parent->in_color = INTERVAL_BLACK;
457                                 if (tmp->in_right)
458                                     tmp->in_right->in_color = INTERVAL_BLACK;
459                                 __rotate_left(parent, root);
460                                 node = *root;
461                                 break;
462                         }
463                 } else {
464                         tmp = parent->in_left;
465                         if (node_is_red(tmp)) {
466                                 tmp->in_color = INTERVAL_BLACK;
467                                 parent->in_color = INTERVAL_RED;
468                                 __rotate_right(parent, root);
469                                 tmp = parent->in_left;
470                         }
471                         if (node_is_black_or_0(tmp->in_left) &&
472                             node_is_black_or_0(tmp->in_right)) {
473                                 tmp->in_color = INTERVAL_RED;
474                                 node = parent;
475                                 parent = node->in_parent;
476                         } else {
477                                 if (node_is_black_or_0(tmp->in_left)) {
478                                         struct interval_node *o_right;
479                                         if ((o_right = tmp->in_right))
480                                             o_right->in_color = INTERVAL_BLACK;
481                                         tmp->in_color = INTERVAL_RED;
482                                         __rotate_left(tmp, root);
483                                         tmp = parent->in_left;
484                                 }
485                                 tmp->in_color = parent->in_color;
486                                 parent->in_color = INTERVAL_BLACK;
487                                 if (tmp->in_left)
488                                         tmp->in_left->in_color = INTERVAL_BLACK;
489                                 __rotate_right(parent, root);
490                                 node = *root;
491                                 break;
492                         }
493                 }
494         }
495         if (node)
496                 node->in_color = INTERVAL_BLACK;
497         EXIT;
498 }
499
500 /* 
501  * if the @max_high value of @node is changed, this function traverse  a path 
502  * from node  up to the root to update max_high for the whole tree.
503  */
504 static void update_maxhigh(struct interval_node *node,
505                            __u64  old_maxhigh)
506 {
507         __u64 left_max, right_max;
508         ENTRY;
509
510         while (node) {
511                 left_max = node->in_left ? node->in_left->in_max_high : 0;
512                 right_max = node->in_right ? node->in_right->in_max_high : 0;
513                 node->in_max_high = max_u64(interval_high(node),
514                                             max_u64(left_max, right_max));
515
516                 if (node->in_max_high >= old_maxhigh)
517                         break;
518                 node = node->in_parent;
519         }
520         EXIT;
521 }
522
523 void interval_erase(struct interval_node *node,
524                     struct interval_node **root)
525 {
526         struct interval_node *child, *parent;
527         int color;
528         ENTRY;
529
530         LASSERT(interval_is_intree(node));
531         node->in_intree = 0;
532         if (!node->in_left) {
533                 child = node->in_right;
534         } else if (!node->in_right) {
535                 child = node->in_left;
536         } else { /* Both left and right child are not NULL */
537                 struct interval_node *old = node;
538
539                 node = interval_next(node);
540                 child = node->in_right;
541                 parent = node->in_parent;
542                 color = node->in_color;
543
544                 if (child)
545                         child->in_parent = parent;
546                 if (parent == old)
547                         parent->in_right = child;
548                 else
549                         parent->in_left = child;
550
551                 node->in_color = old->in_color;
552                 node->in_right = old->in_right;
553                 node->in_left = old->in_left;
554                 node->in_parent = old->in_parent;
555
556                 if (old->in_parent) {
557                         if (node_is_left_child(old))
558                                 old->in_parent->in_left = node;
559                         else
560                                 old->in_parent->in_right = node;
561                 } else {
562                         *root = node;
563                 }
564
565                 old->in_left->in_parent = node;
566                 if (old->in_right)
567                         old->in_right->in_parent = node;
568                 update_maxhigh(child ? : parent, node->in_max_high);
569                 update_maxhigh(node, old->in_max_high);
570                 if (parent == old)
571                          parent = node;
572                 goto color;
573         }
574         parent = node->in_parent;
575         color = node->in_color;
576
577         if (child)
578                 child->in_parent = parent;
579         if (parent) {
580                 if (node_is_left_child(node))
581                         parent->in_left = child;
582                 else
583                         parent->in_right = child;
584         } else {
585                 *root = child;
586         }
587
588         update_maxhigh(child ? : parent, node->in_max_high);
589
590 color:
591         if (color == INTERVAL_BLACK)
592                 interval_erase_color(child, parent, root);
593         EXIT;
594 }
595 EXPORT_SYMBOL(interval_erase);
596
597 static inline int interval_may_overlap(struct interval_node *node,
598                                           struct interval_node_extent *ext)
599 {
600         return (ext->start <= node->in_max_high &&
601                 ext->end >= interval_low(node));
602 }
603
604 /*
605  * This function finds all intervals that overlap interval ext,
606  * and calls func to handle resulted intervals one by one.
607  * in lustre, this function will find all conflicting locks in
608  * the granted queue and add these locks to the ast work list.
609  *
610  * {
611  *       if (node == NULL)
612  *               return 0;
613  *       if (ext->end < interval_low(node)) {
614  *               interval_search(node->in_left, ext, func, data);
615  *       } else if (interval_may_overlap(node, ext)) {
616  *               if (extent_overlapped(ext, &node->in_extent))
617  *                       func(node, data);
618  *               interval_search(node->in_left, ext, func, data);
619  *               interval_search(node->in_right, ext, func, data);
620  *       }
621  *       return 0;
622  * }
623  *
624  */
625 enum interval_iter interval_search(struct interval_node *node,
626                                    struct interval_node_extent *ext,
627                                    interval_callback_t func,
628                                    void *data)
629 {
630         struct interval_node *parent;
631         enum interval_iter rc = INTERVAL_ITER_CONT;
632
633         ENTRY;
634
635         LASSERT(ext != NULL);
636         LASSERT(func != NULL);
637
638         while (node) {
639                 if (ext->end < interval_low(node)) {
640                         if (node->in_left) {
641                                 node = node->in_left;
642                                 continue;
643                         }
644                 } else if (interval_may_overlap(node, ext)) {
645                         if (extent_overlapped(ext, &node->in_extent)) {
646                                 rc = func(node, data);
647                                 if (rc == INTERVAL_ITER_STOP)
648                                         break;
649                         }
650
651                         if (node->in_left) {
652                                 node = node->in_left;
653                                 continue;
654                         }
655                         if (node->in_right) {
656                                 node = node->in_right;
657                                 continue;
658                         }
659                 }
660
661                 parent = node->in_parent;
662                 while (parent) {
663                         if (node_is_left_child(node) &&
664                             parent->in_right) {
665                                 /* If we ever got the left, it means that the
666                                  * parent met ext->end<interval_low(parent), or
667                                  * may_overlap(parent). If the former is true,
668                                  * we needn't go back. So stop early and check
669                                  * may_overlap(parent) after this loop.  */
670                                 node = parent->in_right;
671                                 break;
672                         }
673                         node = parent;
674                         parent = parent->in_parent;
675                 }
676                 if (parent == NULL || !interval_may_overlap(parent, ext))
677                         break;
678         }
679
680         RETURN(rc);
681 }
682 EXPORT_SYMBOL(interval_search);
683
684 static enum interval_iter interval_overlap_cb(struct interval_node *n,
685                                               void *args)
686 {
687         *(int *)args = 1;
688         return INTERVAL_ITER_STOP;
689 }
690
691 int interval_is_overlapped(struct interval_node *root,
692                            struct interval_node_extent *ext)
693 {
694         int has = 0;
695         (void)interval_search(root, ext, interval_overlap_cb, &has);
696         return has;
697 }
698 EXPORT_SYMBOL(interval_is_overlapped);
699
700 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
701  * some extents, because programs seldom do IO backward.
702  *
703  * The recursive algorithm of expanding low:
704  * expand_low {
705  *        struct interval_node *tmp;
706  *        static __u64 res = 0;
707  *
708  *        if (root == NULL)
709  *                return res;
710  *        if (root->in_max_high < low) {
711  *                res = max_u64(root->in_max_high + 1, res);
712  *                return res;
713  *        } else if (low < interval_low(root)) {
714  *                interval_expand_low(root->in_left, low);
715  *                return res;
716  *        }
717  *
718  *        if (interval_high(root) < low)
719  *                res = max_u64(interval_high(root) + 1, res);
720  *        interval_expand_low(root->in_left, low);
721  *        interval_expand_low(root->in_right, low);
722  *
723  *        return res;
724  * }
725  *
726  * It's much easy to eliminate the recursion, see interval_search for 
727  * an example. -jay
728  */
729 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
730 {
731         /* we only concern the empty tree right now. */
732         if (root == NULL)
733                 return 0;
734         return low;
735 }
736
737 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
738 {
739         __u64 result = ~0;
740
741         while (node != NULL) {
742                 if (node->in_max_high < high)
743                         break;
744                         
745                 if (interval_low(node) > high) {
746                         result = interval_low(node) - 1;
747                         node = node->in_left;
748                 } else {
749                         node = node->in_right;
750                 }
751         }
752
753         return result;
754 }
755
756 /* expanding the extent based on @ext. */
757 void interval_expand(struct interval_node *root,
758                      struct interval_node_extent *ext,
759                      struct interval_node_extent *limiter)
760 {
761         /* The assertion of interval_is_overlapped is expensive because we may
762          * travel many nodes to find the overlapped node. */
763         LASSERT(interval_is_overlapped(root, ext) == 0);
764         if (!limiter || limiter->start < ext->start)
765                 ext->start = interval_expand_low(root, ext->start);
766         if (!limiter || limiter->end > ext->end)
767                 ext->end = interval_expand_high(root, ext->end);
768         LASSERT(interval_is_overlapped(root, ext) == 0);
769 }
770 EXPORT_SYMBOL(interval_expand);