Whamcloud - gitweb
LU-6075 osd: race for check/chance od_dirent_journal
[fs/lustre-release.git] / lustre / ldlm / interval_tree.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2014, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ldlm/interval_tree.c
37  *
38  * Interval tree library used by ldlm extent lock code
39  *
40  * Author: Huang Wei <huangwei@clusterfs.com>
41  * Author: Jay Xiong <jinshan.xiong@sun.com>
42  */
43 #ifdef __KERNEL__
44 # include <lustre_dlm.h>
45 #else
46 # include <libcfs/libcfs.h>
47 #endif
48 #include <interval_tree.h>
49
50 enum {
51         INTERVAL_RED = 0,
52         INTERVAL_BLACK = 1
53 };
54
55 static inline int node_is_left_child(struct interval_node *node)
56 {
57         LASSERT(node->in_parent != NULL);
58         return node == node->in_parent->in_left;
59 }
60
61 static inline int node_is_right_child(struct interval_node *node)
62 {
63         LASSERT(node->in_parent != NULL);
64         return node == node->in_parent->in_right;
65 }
66
67 static inline int node_is_red(struct interval_node *node)
68 {
69         return node->in_color == INTERVAL_RED;
70 }
71
72 static inline int node_is_black(struct interval_node *node)
73 {
74         return node->in_color == INTERVAL_BLACK;
75 }
76
77 static inline int extent_compare(struct interval_node_extent *e1,
78                                  struct interval_node_extent *e2)
79 {
80         int rc;
81         if (e1->start == e2->start) {
82                 if (e1->end < e2->end)
83                         rc = -1;
84                 else if (e1->end > e2->end)
85                         rc = 1;
86                 else
87                         rc = 0;
88         } else {
89                 if (e1->start < e2->start)
90                         rc = -1;
91                 else
92                         rc = 1;
93         }
94         return rc;
95 }
96
97 static inline int extent_equal(struct interval_node_extent *e1,
98                                struct interval_node_extent *e2)
99 {
100         return (e1->start == e2->start) && (e1->end == e2->end);
101 }
102
103 static inline int extent_overlapped(struct interval_node_extent *e1,
104                                     struct interval_node_extent *e2)
105 {
106         return (e1->start <= e2->end) && (e2->start <= e1->end);
107 }
108
109 static inline int node_compare(struct interval_node *n1,
110                                struct interval_node *n2)
111 {
112         return extent_compare(&n1->in_extent, &n2->in_extent);
113 }
114
115 int node_equal(struct interval_node *n1, struct interval_node *n2)
116 {
117         return extent_equal(&n1->in_extent, &n2->in_extent);
118 }
119 EXPORT_SYMBOL(node_equal);
120
121 static inline __u64 max_u64(__u64 x, __u64 y)
122 {
123         return x > y ? x : y;
124 }
125
126 static inline __u64 min_u64(__u64 x, __u64 y)
127 {
128         return x < y ? x : y;
129 }
130
131 #define interval_for_each(node, root)                   \
132 for (node = interval_first(root); node != NULL;         \
133      node = interval_next(node))
134
135 #define interval_for_each_reverse(node, root)           \
136 for (node = interval_last(root); node != NULL;          \
137      node = interval_prev(node))
138
139 static struct interval_node *interval_first(struct interval_node *node)
140 {
141         ENTRY;
142
143         if (!node)
144                 RETURN(NULL);
145         while (node->in_left)
146                 node = node->in_left;
147         RETURN(node);
148 }
149
150 static struct interval_node *interval_last(struct interval_node *node)
151 {
152         ENTRY;
153
154         if (!node)
155                 RETURN(NULL);
156         while (node->in_right)
157                 node = node->in_right;
158         RETURN(node);
159 }
160
161 static struct interval_node *interval_next(struct interval_node *node)
162 {
163         ENTRY;
164
165         if (!node)
166                 RETURN(NULL);
167         if (node->in_right)
168                 RETURN(interval_first(node->in_right));
169         while (node->in_parent && node_is_right_child(node))
170                 node = node->in_parent;
171         RETURN(node->in_parent);
172 }
173
174 static struct interval_node *interval_prev(struct interval_node *node)
175 {
176         ENTRY;
177
178         if (!node)
179                 RETURN(NULL);
180
181         if (node->in_left)
182                 RETURN(interval_last(node->in_left));
183
184         while (node->in_parent && node_is_left_child(node))
185                 node = node->in_parent;
186
187         RETURN(node->in_parent);
188 }
189
190 enum interval_iter interval_iterate(struct interval_node *root,
191                                     interval_callback_t func,
192                                     void *data)
193 {
194         struct interval_node *node;
195         enum interval_iter rc = INTERVAL_ITER_CONT;
196         ENTRY;
197         
198         interval_for_each(node, root) {
199                 rc = func(node, data);
200                 if (rc == INTERVAL_ITER_STOP)
201                         break;
202         }
203
204         RETURN(rc);
205 }
206 EXPORT_SYMBOL(interval_iterate);
207
208 enum interval_iter interval_iterate_reverse(struct interval_node *root,
209                                             interval_callback_t func,
210                                             void *data)
211 {
212         struct interval_node *node;
213         enum interval_iter rc = INTERVAL_ITER_CONT;
214         ENTRY;
215         
216         interval_for_each_reverse(node, root) {
217                 rc = func(node, data);
218                 if (rc == INTERVAL_ITER_STOP)
219                         break;
220         }
221
222         RETURN(rc);
223 }
224 EXPORT_SYMBOL(interval_iterate_reverse);
225
226 /* try to find a node with same interval in the tree,
227  * if found, return the pointer to the node, otherwise return NULL*/
228 struct interval_node *interval_find(struct interval_node *root,
229                                     struct interval_node_extent *ex)
230 {
231         struct interval_node *walk = root;
232         int rc;
233         ENTRY;
234
235         while (walk) {
236                 rc = extent_compare(ex, &walk->in_extent);
237                 if (rc == 0)
238                         break;
239                 else if (rc < 0)
240                         walk = walk->in_left;
241                 else
242                         walk = walk->in_right;
243         }
244
245         RETURN(walk);
246 }
247 EXPORT_SYMBOL(interval_find);
248
249 static void __rotate_change_maxhigh(struct interval_node *node,
250                                     struct interval_node *rotate)
251 {
252         __u64 left_max, right_max;
253
254         rotate->in_max_high = node->in_max_high;
255         left_max = node->in_left ? node->in_left->in_max_high : 0;
256         right_max = node->in_right ? node->in_right->in_max_high : 0;
257         node->in_max_high  = max_u64(interval_high(node),
258                                      max_u64(left_max,right_max));
259 }
260
261 /* The left rotation "pivots" around the link from node to node->right, and
262  * - node will be linked to node->right's left child, and
263  * - node->right's left child will be linked to node's right child.  */
264 static void __rotate_left(struct interval_node *node,
265                           struct interval_node **root)
266 {
267         struct interval_node *right = node->in_right;
268         struct interval_node *parent = node->in_parent;
269
270         node->in_right = right->in_left;
271         if (node->in_right)
272                 right->in_left->in_parent = node;
273
274         right->in_left = node;
275         right->in_parent = parent;
276         if (parent) {
277                 if (node_is_left_child(node))
278                         parent->in_left = right;
279                 else
280                         parent->in_right = right;
281         } else {
282                 *root = right;
283         }
284         node->in_parent = right;
285
286         /* update max_high for node and right */
287         __rotate_change_maxhigh(node, right);
288 }
289
290 /* The right rotation "pivots" around the link from node to node->left, and
291  * - node will be linked to node->left's right child, and
292  * - node->left's right child will be linked to node's left child.  */
293 static void __rotate_right(struct interval_node *node,
294                            struct interval_node **root)
295 {
296         struct interval_node *left = node->in_left;
297         struct interval_node *parent = node->in_parent;
298
299         node->in_left = left->in_right;
300         if (node->in_left)
301                 left->in_right->in_parent = node;
302         left->in_right = node;
303
304         left->in_parent = parent;
305         if (parent) {
306                 if (node_is_right_child(node))
307                         parent->in_right = left;
308                 else
309                         parent->in_left = left;
310         } else {
311                 *root = left;
312         }
313         node->in_parent = left;
314
315         /* update max_high for node and left */
316         __rotate_change_maxhigh(node, left);
317 }
318
319 #define interval_swap(a, b) do {                        \
320         struct interval_node *c = a; a = b; b = c;      \
321 } while (0)
322
323 /*
324  * Operations INSERT and DELETE, when run on a tree with n keys, 
325  * take O(logN) time.Because they modify the tree, the result 
326  * may violate the red-black properties.To restore these properties, 
327  * we must change the colors of some of the nodes in the tree 
328  * and also change the pointer structure.
329  */
330 static void interval_insert_color(struct interval_node *node,
331                                   struct interval_node **root)
332 {
333         struct interval_node *parent, *gparent;
334         ENTRY;
335
336         while ((parent = node->in_parent) && node_is_red(parent)) {
337                 gparent = parent->in_parent;
338                 /* Parent is RED, so gparent must not be NULL */
339                 if (node_is_left_child(parent)) {
340                         struct interval_node *uncle;
341                         uncle = gparent->in_right;
342                         if (uncle && node_is_red(uncle)) {
343                                 uncle->in_color = INTERVAL_BLACK;
344                                 parent->in_color = INTERVAL_BLACK;
345                                 gparent->in_color = INTERVAL_RED;
346                                 node = gparent;
347                                 continue;
348                         }
349
350                         if (parent->in_right == node) {
351                                 __rotate_left(parent, root);
352                                 interval_swap(node, parent);
353                         }
354
355                         parent->in_color = INTERVAL_BLACK;
356                         gparent->in_color = INTERVAL_RED;
357                         __rotate_right(gparent, root);
358                 } else {
359                         struct interval_node *uncle;
360                         uncle = gparent->in_left;
361                         if (uncle && node_is_red(uncle)) {
362                                 uncle->in_color = INTERVAL_BLACK;
363                                 parent->in_color = INTERVAL_BLACK;
364                                 gparent->in_color = INTERVAL_RED;
365                                 node = gparent;
366                                 continue;
367                         }
368
369                         if (node_is_left_child(node)) {
370                                 __rotate_right(parent, root);
371                                 interval_swap(node, parent);
372                         }
373
374                         parent->in_color = INTERVAL_BLACK;
375                         gparent->in_color = INTERVAL_RED;
376                         __rotate_left(gparent, root);
377                 }
378         }
379
380         (*root)->in_color = INTERVAL_BLACK;
381         EXIT;
382 }
383
384 struct interval_node *interval_insert(struct interval_node *node,
385                                       struct interval_node **root)
386                      
387 {
388         struct interval_node **p, *parent = NULL;
389         ENTRY;
390
391         LASSERT(!interval_is_intree(node));
392         p = root;
393         while (*p) {
394                 parent = *p;
395                 if (node_equal(parent, node))
396                         RETURN(parent);
397
398                 /* max_high field must be updated after each iteration */
399                 if (parent->in_max_high < interval_high(node))
400                         parent->in_max_high = interval_high(node);
401
402                 if (node_compare(node, parent) < 0)
403                         p = &parent->in_left;
404                 else 
405                         p = &parent->in_right;
406         }
407
408         /* link node into the tree */
409         node->in_parent = parent;
410         node->in_color = INTERVAL_RED;
411         node->in_left = node->in_right = NULL;
412         *p = node;
413
414         interval_insert_color(node, root);
415         node->in_intree = 1;
416
417         RETURN(NULL);
418 }
419 EXPORT_SYMBOL(interval_insert);
420
421 static inline int node_is_black_or_0(struct interval_node *node)
422 {
423         return !node || node_is_black(node);
424 }
425
426 static void interval_erase_color(struct interval_node *node,
427                                  struct interval_node *parent,
428                                  struct interval_node **root)
429 {
430         struct interval_node *tmp;
431         ENTRY;
432
433         while (node_is_black_or_0(node) && node != *root) {
434                 if (parent->in_left == node) {
435                         tmp = parent->in_right;
436                         if (node_is_red(tmp)) {
437                                 tmp->in_color = INTERVAL_BLACK;
438                                 parent->in_color = INTERVAL_RED;
439                                 __rotate_left(parent, root);
440                                 tmp = parent->in_right;
441                         }
442                         if (node_is_black_or_0(tmp->in_left) &&
443                             node_is_black_or_0(tmp->in_right)) {
444                                 tmp->in_color = INTERVAL_RED;
445                                 node = parent;
446                                 parent = node->in_parent;
447                         } else {
448                                 if (node_is_black_or_0(tmp->in_right)) {
449                                         struct interval_node *o_left;
450                                         if ((o_left = tmp->in_left))
451                                              o_left->in_color = INTERVAL_BLACK;
452                                         tmp->in_color = INTERVAL_RED;
453                                         __rotate_right(tmp, root);
454                                         tmp = parent->in_right;
455                                 }
456                                 tmp->in_color = parent->in_color;
457                                 parent->in_color = INTERVAL_BLACK;
458                                 if (tmp->in_right)
459                                     tmp->in_right->in_color = INTERVAL_BLACK;
460                                 __rotate_left(parent, root);
461                                 node = *root;
462                                 break;
463                         }
464                 } else {
465                         tmp = parent->in_left;
466                         if (node_is_red(tmp)) {
467                                 tmp->in_color = INTERVAL_BLACK;
468                                 parent->in_color = INTERVAL_RED;
469                                 __rotate_right(parent, root);
470                                 tmp = parent->in_left;
471                         }
472                         if (node_is_black_or_0(tmp->in_left) &&
473                             node_is_black_or_0(tmp->in_right)) {
474                                 tmp->in_color = INTERVAL_RED;
475                                 node = parent;
476                                 parent = node->in_parent;
477                         } else {
478                                 if (node_is_black_or_0(tmp->in_left)) {
479                                         struct interval_node *o_right;
480                                         if ((o_right = tmp->in_right))
481                                             o_right->in_color = INTERVAL_BLACK;
482                                         tmp->in_color = INTERVAL_RED;
483                                         __rotate_left(tmp, root);
484                                         tmp = parent->in_left;
485                                 }
486                                 tmp->in_color = parent->in_color;
487                                 parent->in_color = INTERVAL_BLACK;
488                                 if (tmp->in_left)
489                                         tmp->in_left->in_color = INTERVAL_BLACK;
490                                 __rotate_right(parent, root);
491                                 node = *root;
492                                 break;
493                         }
494                 }
495         }
496         if (node)
497                 node->in_color = INTERVAL_BLACK;
498         EXIT;
499 }
500
501 /* 
502  * if the @max_high value of @node is changed, this function traverse  a path 
503  * from node  up to the root to update max_high for the whole tree.
504  */
505 static void update_maxhigh(struct interval_node *node,
506                            __u64  old_maxhigh)
507 {
508         __u64 left_max, right_max;
509         ENTRY;
510
511         while (node) {
512                 left_max = node->in_left ? node->in_left->in_max_high : 0;
513                 right_max = node->in_right ? node->in_right->in_max_high : 0;
514                 node->in_max_high = max_u64(interval_high(node),
515                                             max_u64(left_max, right_max));
516
517                 if (node->in_max_high >= old_maxhigh)
518                         break;
519                 node = node->in_parent;
520         }
521         EXIT;
522 }
523
524 void interval_erase(struct interval_node *node,
525                     struct interval_node **root)
526 {
527         struct interval_node *child, *parent;
528         int color;
529         ENTRY;
530
531         LASSERT(interval_is_intree(node));
532         node->in_intree = 0;
533         if (!node->in_left) {
534                 child = node->in_right;
535         } else if (!node->in_right) {
536                 child = node->in_left;
537         } else { /* Both left and right child are not NULL */
538                 struct interval_node *old = node;
539
540                 node = interval_next(node);
541                 child = node->in_right;
542                 parent = node->in_parent;
543                 color = node->in_color;
544
545                 if (child)
546                         child->in_parent = parent;
547                 if (parent == old)
548                         parent->in_right = child;
549                 else
550                         parent->in_left = child;
551
552                 node->in_color = old->in_color;
553                 node->in_right = old->in_right;
554                 node->in_left = old->in_left;
555                 node->in_parent = old->in_parent;
556
557                 if (old->in_parent) {
558                         if (node_is_left_child(old))
559                                 old->in_parent->in_left = node;
560                         else
561                                 old->in_parent->in_right = node;
562                 } else {
563                         *root = node;
564                 }
565
566                 old->in_left->in_parent = node;
567                 if (old->in_right)
568                         old->in_right->in_parent = node;
569                 update_maxhigh(child ? : parent, node->in_max_high);
570                 update_maxhigh(node, old->in_max_high);
571                 if (parent == old)
572                          parent = node;
573                 goto color;
574         }
575         parent = node->in_parent;
576         color = node->in_color;
577
578         if (child)
579                 child->in_parent = parent;
580         if (parent) {
581                 if (node_is_left_child(node))
582                         parent->in_left = child;
583                 else
584                         parent->in_right = child;
585         } else {
586                 *root = child;
587         }
588
589         update_maxhigh(child ? : parent, node->in_max_high);
590
591 color:
592         if (color == INTERVAL_BLACK)
593                 interval_erase_color(child, parent, root);
594         EXIT;
595 }
596 EXPORT_SYMBOL(interval_erase);
597
598 static inline int interval_may_overlap(struct interval_node *node,
599                                           struct interval_node_extent *ext)
600 {
601         return (ext->start <= node->in_max_high &&
602                 ext->end >= interval_low(node));
603 }
604
605 /*
606  * This function finds all intervals that overlap interval ext,
607  * and calls func to handle resulted intervals one by one.
608  * in lustre, this function will find all conflicting locks in
609  * the granted queue and add these locks to the ast work list.
610  *
611  * {
612  *       if (node == NULL)
613  *               return 0;
614  *       if (ext->end < interval_low(node)) {
615  *               interval_search(node->in_left, ext, func, data);
616  *       } else if (interval_may_overlap(node, ext)) {
617  *               if (extent_overlapped(ext, &node->in_extent))
618  *                       func(node, data);
619  *               interval_search(node->in_left, ext, func, data);
620  *               interval_search(node->in_right, ext, func, data);
621  *       }
622  *       return 0;
623  * }
624  *
625  */
626 enum interval_iter interval_search(struct interval_node *node,
627                                    struct interval_node_extent *ext,
628                                    interval_callback_t func,
629                                    void *data)
630 {
631         struct interval_node *parent;
632         enum interval_iter rc = INTERVAL_ITER_CONT;
633
634         ENTRY;
635
636         LASSERT(ext != NULL);
637         LASSERT(func != NULL);
638
639         while (node) {
640                 if (ext->end < interval_low(node)) {
641                         if (node->in_left) {
642                                 node = node->in_left;
643                                 continue;
644                         }
645                 } else if (interval_may_overlap(node, ext)) {
646                         if (extent_overlapped(ext, &node->in_extent)) {
647                                 rc = func(node, data);
648                                 if (rc == INTERVAL_ITER_STOP)
649                                         break;
650                         }
651
652                         if (node->in_left) {
653                                 node = node->in_left;
654                                 continue;
655                         }
656                         if (node->in_right) {
657                                 node = node->in_right;
658                                 continue;
659                         }
660                 }
661
662                 parent = node->in_parent;
663                 while (parent) {
664                         if (node_is_left_child(node) &&
665                             parent->in_right) {
666                                 /* If we ever got the left, it means that the
667                                  * parent met ext->end<interval_low(parent), or
668                                  * may_overlap(parent). If the former is true,
669                                  * we needn't go back. So stop early and check
670                                  * may_overlap(parent) after this loop.  */
671                                 node = parent->in_right;
672                                 break;
673                         }
674                         node = parent;
675                         parent = parent->in_parent;
676                 }
677                 if (parent == NULL || !interval_may_overlap(parent, ext))
678                         break;
679         }
680
681         RETURN(rc);
682 }
683 EXPORT_SYMBOL(interval_search);
684
685 static enum interval_iter interval_overlap_cb(struct interval_node *n,
686                                               void *args)
687 {
688         *(int *)args = 1;
689         return INTERVAL_ITER_STOP;
690 }
691
692 int interval_is_overlapped(struct interval_node *root,
693                            struct interval_node_extent *ext)
694 {
695         int has = 0;
696         (void)interval_search(root, ext, interval_overlap_cb, &has);
697         return has;
698 }
699 EXPORT_SYMBOL(interval_is_overlapped);
700
701 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
702  * some extents, because programs seldom do IO backward.
703  *
704  * The recursive algorithm of expanding low:
705  * expand_low {
706  *        struct interval_node *tmp;
707  *        static __u64 res = 0;
708  *
709  *        if (root == NULL)
710  *                return res;
711  *        if (root->in_max_high < low) {
712  *                res = max_u64(root->in_max_high + 1, res);
713  *                return res;
714  *        } else if (low < interval_low(root)) {
715  *                interval_expand_low(root->in_left, low);
716  *                return res;
717  *        }
718  *
719  *        if (interval_high(root) < low)
720  *                res = max_u64(interval_high(root) + 1, res);
721  *        interval_expand_low(root->in_left, low);
722  *        interval_expand_low(root->in_right, low);
723  *
724  *        return res;
725  * }
726  *
727  * It's much easy to eliminate the recursion, see interval_search for 
728  * an example. -jay
729  */
730 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
731 {
732         /* we only concern the empty tree right now. */
733         if (root == NULL)
734                 return 0;
735         return low;
736 }
737
738 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
739 {
740         __u64 result = ~0;
741
742         while (node != NULL) {
743                 if (node->in_max_high < high)
744                         break;
745                         
746                 if (interval_low(node) > high) {
747                         result = interval_low(node) - 1;
748                         node = node->in_left;
749                 } else {
750                         node = node->in_right;
751                 }
752         }
753
754         return result;
755 }
756
757 /* expanding the extent based on @ext. */
758 void interval_expand(struct interval_node *root,
759                      struct interval_node_extent *ext,
760                      struct interval_node_extent *limiter)
761 {
762         /* The assertion of interval_is_overlapped is expensive because we may
763          * travel many nodes to find the overlapped node. */
764         LASSERT(interval_is_overlapped(root, ext) == 0);
765         if (!limiter || limiter->start < ext->start)
766                 ext->start = interval_expand_low(root, ext->start);
767         if (!limiter || limiter->end > ext->end)
768                 ext->end = interval_expand_high(root, ext->end);
769         LASSERT(interval_is_overlapped(root, ext) == 0);
770 }
771 EXPORT_SYMBOL(interval_expand);