Whamcloud - gitweb
LU-12616 obclass: fix MDS start/stop race
[fs/lustre-release.git] / lustre / ldlm / interval_tree.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2014, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * lustre/ldlm/interval_tree.c
33  *
34  * Interval tree library used by ldlm extent lock code
35  *
36  * Author: Huang Wei <huangwei@clusterfs.com>
37  * Author: Jay Xiong <jinshan.xiong@sun.com>
38  */
39
40 #include <lustre_dlm.h>
41 #include <interval_tree.h>
42
43 enum {
44         INTERVAL_RED = 0,
45         INTERVAL_BLACK = 1
46 };
47
48 static inline int node_is_left_child(struct interval_node *node)
49 {
50         LASSERT(node->in_parent != NULL);
51         return node == node->in_parent->in_left;
52 }
53
54 static inline int node_is_right_child(struct interval_node *node)
55 {
56         LASSERT(node->in_parent != NULL);
57         return node == node->in_parent->in_right;
58 }
59
60 static inline int node_is_red(struct interval_node *node)
61 {
62         return node->in_color == INTERVAL_RED;
63 }
64
65 static inline int node_is_black(struct interval_node *node)
66 {
67         return node->in_color == INTERVAL_BLACK;
68 }
69
70 static inline int extent_compare(struct interval_node_extent *e1,
71                                  struct interval_node_extent *e2)
72 {
73         int rc;
74
75         if (e1->start == e2->start) {
76                 if (e1->end < e2->end)
77                         rc = -1;
78                 else if (e1->end > e2->end)
79                         rc = 1;
80                 else
81                         rc = 0;
82         } else {
83                 if (e1->start < e2->start)
84                         rc = -1;
85                 else
86                         rc = 1;
87         }
88         return rc;
89 }
90
91 static inline int extent_equal(struct interval_node_extent *e1,
92                                struct interval_node_extent *e2)
93 {
94         return (e1->start == e2->start) && (e1->end == e2->end);
95 }
96
97 static inline int extent_overlapped(struct interval_node_extent *e1,
98                                     struct interval_node_extent *e2)
99 {
100         return (e1->start <= e2->end) && (e2->start <= e1->end);
101 }
102
103 static inline int node_compare(struct interval_node *n1,
104                                struct interval_node *n2)
105 {
106         return extent_compare(&n1->in_extent, &n2->in_extent);
107 }
108
109 int node_equal(struct interval_node *n1, struct interval_node *n2)
110 {
111         return extent_equal(&n1->in_extent, &n2->in_extent);
112 }
113
114 static inline __u64 max_u64(__u64 x, __u64 y)
115 {
116         return x > y ? x : y;
117 }
118
119 static inline __u64 min_u64(__u64 x, __u64 y)
120 {
121         return x < y ? x : y;
122 }
123
124 #define interval_for_each(node, root)                   \
125 for (node = interval_first(root); node != NULL;         \
126         node = interval_next(node))
127
128 #define interval_for_each_reverse(node, root)           \
129 for (node = interval_last(root); node != NULL;          \
130         node = interval_prev(node))
131
132 static struct interval_node *interval_first(struct interval_node *node)
133 {
134         ENTRY;
135
136         if (!node)
137                 RETURN(NULL);
138         while (node->in_left)
139                 node = node->in_left;
140         RETURN(node);
141 }
142
143 static struct interval_node *interval_last(struct interval_node *node)
144 {
145         ENTRY;
146
147         if (!node)
148                 RETURN(NULL);
149         while (node->in_right)
150                 node = node->in_right;
151         RETURN(node);
152 }
153
154 static struct interval_node *interval_next(struct interval_node *node)
155 {
156         ENTRY;
157
158         if (!node)
159                 RETURN(NULL);
160         if (node->in_right)
161                 RETURN(interval_first(node->in_right));
162         while (node->in_parent && node_is_right_child(node))
163                 node = node->in_parent;
164         RETURN(node->in_parent);
165 }
166
167 static struct interval_node *interval_prev(struct interval_node *node)
168 {
169         ENTRY;
170
171         if (!node)
172                 RETURN(NULL);
173
174         if (node->in_left)
175                 RETURN(interval_last(node->in_left));
176
177         while (node->in_parent && node_is_left_child(node))
178                 node = node->in_parent;
179
180         RETURN(node->in_parent);
181 }
182
183 enum interval_iter interval_iterate(struct interval_node *root,
184                                     interval_callback_t func,
185                                     void *data)
186 {
187         struct interval_node *node;
188         enum interval_iter rc = INTERVAL_ITER_CONT;
189
190         ENTRY;
191
192         interval_for_each(node, root) {
193                 rc = func(node, data);
194                 if (rc == INTERVAL_ITER_STOP)
195                         break;
196         }
197
198         RETURN(rc);
199 }
200 EXPORT_SYMBOL(interval_iterate);
201
202 enum interval_iter interval_iterate_reverse(struct interval_node *root,
203                                             interval_callback_t func,
204                                             void *data)
205 {
206         struct interval_node *node;
207         enum interval_iter rc = INTERVAL_ITER_CONT;
208
209         ENTRY;
210
211         interval_for_each_reverse(node, root) {
212                 rc = func(node, data);
213                 if (rc == INTERVAL_ITER_STOP)
214                         break;
215         }
216
217         RETURN(rc);
218 }
219 EXPORT_SYMBOL(interval_iterate_reverse);
220
221 /* try to find a node with same interval in the tree,
222  * if found, return the pointer to the node, otherwise return NULL
223  */
224 struct interval_node *interval_find(struct interval_node *root,
225                                     struct interval_node_extent *ex)
226 {
227         struct interval_node *walk = root;
228         int rc;
229
230         ENTRY;
231
232         while (walk) {
233                 rc = extent_compare(ex, &walk->in_extent);
234                 if (rc == 0)
235                         break;
236                 else if (rc < 0)
237                         walk = walk->in_left;
238                 else
239                         walk = walk->in_right;
240         }
241
242         RETURN(walk);
243 }
244 EXPORT_SYMBOL(interval_find);
245
246 static void __rotate_change_maxhigh(struct interval_node *node,
247                                     struct interval_node *rotate)
248 {
249         __u64 left_max, right_max;
250
251         rotate->in_max_high = node->in_max_high;
252         left_max = node->in_left ? node->in_left->in_max_high : 0;
253         right_max = node->in_right ? node->in_right->in_max_high : 0;
254         node->in_max_high  = max_u64(interval_high(node),
255                                      max_u64(left_max, right_max));
256 }
257
258 /* The left rotation "pivots" around the link from node to node->right, and
259  * - node will be linked to node->right's left child, and
260  * - node->right's left child will be linked to node's right child.
261  */
262 static void __rotate_left(struct interval_node *node,
263                           struct interval_node **root)
264 {
265         struct interval_node *right = node->in_right;
266         struct interval_node *parent = node->in_parent;
267
268         node->in_right = right->in_left;
269         if (node->in_right)
270                 right->in_left->in_parent = node;
271
272         right->in_left = node;
273         right->in_parent = parent;
274         if (parent) {
275                 if (node_is_left_child(node))
276                         parent->in_left = right;
277                 else
278                         parent->in_right = right;
279         } else {
280                 *root = right;
281         }
282         node->in_parent = right;
283
284         /* update max_high for node and right */
285         __rotate_change_maxhigh(node, right);
286 }
287
288 /* The right rotation "pivots" around the link from node to node->left, and
289  * - node will be linked to node->left's right child, and
290  * - node->left's right child will be linked to node's left child.
291  */
292 static void __rotate_right(struct interval_node *node,
293                            struct interval_node **root)
294 {
295         struct interval_node *left = node->in_left;
296         struct interval_node *parent = node->in_parent;
297
298         node->in_left = left->in_right;
299         if (node->in_left)
300                 left->in_right->in_parent = node;
301         left->in_right = node;
302
303         left->in_parent = parent;
304         if (parent) {
305                 if (node_is_right_child(node))
306                         parent->in_right = left;
307                 else
308                         parent->in_left = left;
309         } else {
310                 *root = left;
311         }
312         node->in_parent = left;
313
314         /* update max_high for node and left */
315         __rotate_change_maxhigh(node, left);
316 }
317
318 #define interval_swap(a, b) do {                        \
319         struct interval_node *c = a; a = b; b = c;      \
320 } while (0)
321
322 /*
323  * Operations INSERT and DELETE, when run on a tree with n keys,
324  * take O(logN) time.Because they modify the tree, the result
325  * may violate the red-black properties.To restore these properties,
326  * we must change the colors of some of the nodes in the tree
327  * and also change the pointer structure.
328  */
329 static void interval_insert_color(struct interval_node *node,
330                                   struct interval_node **root)
331 {
332         struct interval_node *parent, *gparent;
333
334         ENTRY;
335
336         while ((parent = node->in_parent) && node_is_red(parent)) {
337                 gparent = parent->in_parent;
338                 /* Parent is RED, so gparent must not be NULL */
339                 if (node_is_left_child(parent)) {
340                         struct interval_node *uncle;
341
342                         uncle = gparent->in_right;
343                         if (uncle && node_is_red(uncle)) {
344                                 uncle->in_color = INTERVAL_BLACK;
345                                 parent->in_color = INTERVAL_BLACK;
346                                 gparent->in_color = INTERVAL_RED;
347                                 node = gparent;
348                                 continue;
349                         }
350
351                         if (parent->in_right == node) {
352                                 __rotate_left(parent, root);
353                                 interval_swap(node, parent);
354                         }
355
356                         parent->in_color = INTERVAL_BLACK;
357                         gparent->in_color = INTERVAL_RED;
358                         __rotate_right(gparent, root);
359                 } else {
360                         struct interval_node *uncle;
361
362                         uncle = gparent->in_left;
363                         if (uncle && node_is_red(uncle)) {
364                                 uncle->in_color = INTERVAL_BLACK;
365                                 parent->in_color = INTERVAL_BLACK;
366                                 gparent->in_color = INTERVAL_RED;
367                                 node = gparent;
368                                 continue;
369                         }
370
371                         if (node_is_left_child(node)) {
372                                 __rotate_right(parent, root);
373                                 interval_swap(node, parent);
374                         }
375
376                         parent->in_color = INTERVAL_BLACK;
377                         gparent->in_color = INTERVAL_RED;
378                         __rotate_left(gparent, root);
379                 }
380         }
381
382         (*root)->in_color = INTERVAL_BLACK;
383         EXIT;
384 }
385
386 struct interval_node *interval_insert(struct interval_node *node,
387                                       struct interval_node **root)
388 {
389         struct interval_node **p, *parent = NULL;
390
391         ENTRY;
392
393         LASSERT(!interval_is_intree(node));
394         p = root;
395         while (*p) {
396                 parent = *p;
397                 if (node_equal(parent, node))
398                         RETURN(parent);
399
400                 /* max_high field must be updated after each iteration */
401                 if (parent->in_max_high < interval_high(node))
402                         parent->in_max_high = interval_high(node);
403
404                 if (node_compare(node, parent) < 0)
405                         p = &parent->in_left;
406                 else 
407                         p = &parent->in_right;
408         }
409
410         /* link node into the tree */
411         node->in_parent = parent;
412         node->in_color = INTERVAL_RED;
413         node->in_left = node->in_right = NULL;
414         *p = node;
415
416         interval_insert_color(node, root);
417         node->in_intree = 1;
418
419         RETURN(NULL);
420 }
421 EXPORT_SYMBOL(interval_insert);
422
423 static inline int node_is_black_or_0(struct interval_node *node)
424 {
425         return !node || node_is_black(node);
426 }
427
428 static void interval_erase_color(struct interval_node *node,
429                                  struct interval_node *parent,
430                                  struct interval_node **root)
431 {
432         struct interval_node *tmp;
433
434         ENTRY;
435
436         while (node_is_black_or_0(node) && node != *root) {
437                 if (parent->in_left == node) {
438                         tmp = parent->in_right;
439                         if (node_is_red(tmp)) {
440                                 tmp->in_color = INTERVAL_BLACK;
441                                 parent->in_color = INTERVAL_RED;
442                                 __rotate_left(parent, root);
443                                 tmp = parent->in_right;
444                         }
445                         if (node_is_black_or_0(tmp->in_left) &&
446                             node_is_black_or_0(tmp->in_right)) {
447                                 tmp->in_color = INTERVAL_RED;
448                                 node = parent;
449                                 parent = node->in_parent;
450                         } else {
451                                 if (node_is_black_or_0(tmp->in_right)) {
452                                         struct interval_node *o_left;
453
454                                         if ((o_left = tmp->in_left))
455                                                 o_left->in_color =
456                                                         INTERVAL_BLACK;
457                                         tmp->in_color = INTERVAL_RED;
458                                         __rotate_right(tmp, root);
459                                         tmp = parent->in_right;
460                                 }
461                                 tmp->in_color = parent->in_color;
462                                 parent->in_color = INTERVAL_BLACK;
463                                 if (tmp->in_right)
464                                         tmp->in_right->in_color =
465                                                 INTERVAL_BLACK;
466                                 __rotate_left(parent, root);
467                                 node = *root;
468                                 break;
469                         }
470                 } else {
471                         tmp = parent->in_left;
472                         if (node_is_red(tmp)) {
473                                 tmp->in_color = INTERVAL_BLACK;
474                                 parent->in_color = INTERVAL_RED;
475                                 __rotate_right(parent, root);
476                                 tmp = parent->in_left;
477                         }
478                         if (node_is_black_or_0(tmp->in_left) &&
479                             node_is_black_or_0(tmp->in_right)) {
480                                 tmp->in_color = INTERVAL_RED;
481                                 node = parent;
482                                 parent = node->in_parent;
483                         } else {
484                                 if (node_is_black_or_0(tmp->in_left)) {
485                                         struct interval_node *o_right;
486
487                                         if ((o_right = tmp->in_right))
488                                                 o_right->in_color =
489                                                         INTERVAL_BLACK;
490                                         tmp->in_color = INTERVAL_RED;
491                                         __rotate_left(tmp, root);
492                                         tmp = parent->in_left;
493                                 }
494                                 tmp->in_color = parent->in_color;
495                                 parent->in_color = INTERVAL_BLACK;
496                                 if (tmp->in_left)
497                                         tmp->in_left->in_color = INTERVAL_BLACK;
498                                 __rotate_right(parent, root);
499                                 node = *root;
500                                 break;
501                         }
502                 }
503         }
504         if (node)
505                 node->in_color = INTERVAL_BLACK;
506         EXIT;
507 }
508
509 /*
510  * if the @max_high value of @node is changed, this function traverse a path
511  * from node  up to the root to update max_high for the whole tree.
512  */
513 static void update_maxhigh(struct interval_node *node,
514                            __u64  old_maxhigh)
515 {
516         __u64 left_max, right_max;
517
518         ENTRY;
519
520         while (node) {
521                 left_max = node->in_left ? node->in_left->in_max_high : 0;
522                 right_max = node->in_right ? node->in_right->in_max_high : 0;
523                 node->in_max_high = max_u64(interval_high(node),
524                                             max_u64(left_max, right_max));
525
526                 if (node->in_max_high >= old_maxhigh)
527                         break;
528                 node = node->in_parent;
529         }
530         EXIT;
531 }
532
533 void interval_erase(struct interval_node *node,
534                     struct interval_node **root)
535 {
536         struct interval_node *child, *parent;
537         int color;
538
539         ENTRY;
540
541         LASSERT(interval_is_intree(node));
542         node->in_intree = 0;
543         if (!node->in_left) {
544                 child = node->in_right;
545         } else if (!node->in_right) {
546                 child = node->in_left;
547         } else { /* Both left and right child are not NULL */
548                 struct interval_node *old = node;
549
550                 node = interval_next(node);
551                 child = node->in_right;
552                 parent = node->in_parent;
553                 color = node->in_color;
554
555                 if (child)
556                         child->in_parent = parent;
557                 if (parent == old)
558                         parent->in_right = child;
559                 else
560                         parent->in_left = child;
561
562                 node->in_color = old->in_color;
563                 node->in_right = old->in_right;
564                 node->in_left = old->in_left;
565                 node->in_parent = old->in_parent;
566
567                 if (old->in_parent) {
568                         if (node_is_left_child(old))
569                                 old->in_parent->in_left = node;
570                         else
571                                 old->in_parent->in_right = node;
572                 } else {
573                         *root = node;
574                 }
575
576                 old->in_left->in_parent = node;
577                 if (old->in_right)
578                         old->in_right->in_parent = node;
579                 update_maxhigh(child ? : parent, node->in_max_high);
580                 update_maxhigh(node, old->in_max_high);
581                 if (parent == old)
582                         parent = node;
583                 goto color;
584         }
585         parent = node->in_parent;
586         color = node->in_color;
587
588         if (child)
589                 child->in_parent = parent;
590         if (parent) {
591                 if (node_is_left_child(node))
592                         parent->in_left = child;
593                 else
594                         parent->in_right = child;
595         } else {
596                 *root = child;
597         }
598
599         update_maxhigh(child ? : parent, node->in_max_high);
600
601 color:
602         if (color == INTERVAL_BLACK)
603                 interval_erase_color(child, parent, root);
604         EXIT;
605 }
606 EXPORT_SYMBOL(interval_erase);
607
608 static inline int interval_may_overlap(struct interval_node *node,
609                                        struct interval_node_extent *ext)
610 {
611         return (ext->start <= node->in_max_high &&
612                 ext->end >= interval_low(node));
613 }
614
615 /*
616  * This function finds all intervals that overlap interval ext,
617  * and calls func to handle resulted intervals one by one.
618  * in lustre, this function will find all conflicting locks in
619  * the granted queue and add these locks to the ast work list.
620  *
621  * {
622  *       if (node == NULL)
623  *               return 0;
624  *       if (ext->end < interval_low(node)) {
625  *               interval_search(node->in_left, ext, func, data);
626  *       } else if (interval_may_overlap(node, ext)) {
627  *               if (extent_overlapped(ext, &node->in_extent))
628  *                       func(node, data);
629  *               interval_search(node->in_left, ext, func, data);
630  *               interval_search(node->in_right, ext, func, data);
631  *       }
632  *       return 0;
633  * }
634  *
635  */
636 enum interval_iter interval_search(struct interval_node *node,
637                                    struct interval_node_extent *ext,
638                                    interval_callback_t func,
639                                    void *data)
640 {
641         struct interval_node *parent;
642         enum interval_iter rc = INTERVAL_ITER_CONT;
643
644         ENTRY;
645
646         LASSERT(ext != NULL);
647         LASSERT(func != NULL);
648
649         while (node) {
650                 if (ext->end < interval_low(node)) {
651                         if (node->in_left) {
652                                 node = node->in_left;
653                                 continue;
654                         }
655                 } else if (interval_may_overlap(node, ext)) {
656                         if (extent_overlapped(ext, &node->in_extent)) {
657                                 rc = func(node, data);
658                                 if (rc == INTERVAL_ITER_STOP)
659                                         break;
660                         }
661
662                         if (node->in_left) {
663                                 node = node->in_left;
664                                 continue;
665                         }
666                         if (node->in_right) {
667                                 node = node->in_right;
668                                 continue;
669                         }
670                 }
671
672                 parent = node->in_parent;
673                 while (parent) {
674                         if (node_is_left_child(node) &&
675                             parent->in_right) {
676                                 /* If we ever got the left, it means that the
677                                  * parent met ext->end<interval_low(parent), or
678                                  * may_overlap(parent). If the former is true,
679                                  * we needn't go back. So stop early and check
680                                  * may_overlap(parent) after this loop.
681                                  */
682                                 node = parent->in_right;
683                                 break;
684                         }
685                         node = parent;
686                         parent = parent->in_parent;
687                 }
688                 if (parent == NULL || !interval_may_overlap(parent, ext))
689                         break;
690         }
691
692         RETURN(rc);
693 }
694 EXPORT_SYMBOL(interval_search);
695
696 static enum interval_iter interval_overlap_cb(struct interval_node *n,
697                                               void *args)
698 {
699         *(int *)args = 1;
700         return INTERVAL_ITER_STOP;
701 }
702
703 int interval_is_overlapped(struct interval_node *root,
704                            struct interval_node_extent *ext)
705 {
706         int has = 0;
707         (void)interval_search(root, ext, interval_overlap_cb, &has);
708         return has;
709 }
710 EXPORT_SYMBOL(interval_is_overlapped);
711
712 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
713  * some extents, because programs seldom do IO backward.
714  *
715  * The recursive algorithm of expanding low:
716  * expand_low {
717  *        struct interval_node *tmp;
718  *        static __u64 res = 0;
719  *
720  *        if (root == NULL)
721  *                return res;
722  *        if (root->in_max_high < low) {
723  *                res = max_u64(root->in_max_high + 1, res);
724  *                return res;
725  *        } else if (low < interval_low(root)) {
726  *                interval_expand_low(root->in_left, low);
727  *                return res;
728  *        }
729  *
730  *        if (interval_high(root) < low)
731  *                res = max_u64(interval_high(root) + 1, res);
732  *        interval_expand_low(root->in_left, low);
733  *        interval_expand_low(root->in_right, low);
734  *
735  *        return res;
736  * }
737  *
738  * It's much easy to eliminate the recursion, see interval_search for
739  * an example. -jay
740  */
741 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
742 {
743         /* we only concern the empty tree right now. */
744         if (root == NULL)
745                 return 0;
746         return low;
747 }
748
749 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
750 {
751         __u64 result = ~0;
752
753         while (node != NULL) {
754                 if (node->in_max_high < high)
755                         break;
756
757                 if (interval_low(node) > high) {
758                         result = interval_low(node) - 1;
759                         node = node->in_left;
760                 } else {
761                         node = node->in_right;
762                 }
763         }
764
765         return result;
766 }
767
768 /* expanding the extent based on @ext. */
769 void interval_expand(struct interval_node *root,
770                      struct interval_node_extent *ext,
771                      struct interval_node_extent *limiter)
772 {
773         /* The assertion of interval_is_overlapped is expensive because we may
774          * travel many nodes to find the overlapped node.
775          */
776         LASSERT(interval_is_overlapped(root, ext) == 0);
777         if (!limiter || limiter->start < ext->start)
778                 ext->start = interval_expand_low(root, ext->start);
779         if (!limiter || limiter->end > ext->end)
780                 ext->end = interval_expand_high(root, ext->end);
781         LASSERT(interval_is_overlapped(root, ext) == 0);
782 }