Whamcloud - gitweb
LU-1520 ldlm: improve ldlm_pools_shrink algorithm
[fs/lustre-release.git] / lustre / ldlm / interval_tree.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/ldlm/interval_tree.c
37  *
38  * Interval tree library used by ldlm extent lock code
39  *
40  * Author: Huang Wei <huangwei@clusterfs.com>
41  * Author: Jay Xiong <jinshan.xiong@sun.com>
42  */
43 #ifdef __KERNEL__
44 # include <lustre_dlm.h>
45 #else
46 # include <liblustre.h>
47 # include <libcfs/kp30.h>
48 #endif
49 #include <obd_support.h>
50 #include <interval_tree.h>
51
52 enum {
53         INTERVAL_RED = 0,
54         INTERVAL_BLACK = 1
55 };
56
57 static inline int node_is_left_child(struct interval_node *node)
58 {
59         LASSERT(node->in_parent != NULL);
60         return node == node->in_parent->in_left;
61 }
62
63 static inline int node_is_right_child(struct interval_node *node)
64 {
65         LASSERT(node->in_parent != NULL);
66         return node == node->in_parent->in_right;
67 }
68
69 static inline int node_is_red(struct interval_node *node)
70 {
71         return node->in_color == INTERVAL_RED;
72 }
73
74 static inline int node_is_black(struct interval_node *node)
75 {
76         return node->in_color == INTERVAL_BLACK;
77 }
78
79 static inline int extent_compare(struct interval_node_extent *e1,
80                                  struct interval_node_extent *e2)
81 {
82         int rc;
83         if (e1->start == e2->start) {
84                 if (e1->end < e2->end)
85                         rc = -1;
86                 else if (e1->end > e2->end)
87                         rc = 1;
88                 else
89                         rc = 0;
90         } else {
91                 if (e1->start < e2->start)
92                         rc = -1;
93                 else
94                         rc = 1;
95         }
96         return rc;
97 }
98
99 static inline int extent_equal(struct interval_node_extent *e1,
100                                struct interval_node_extent *e2)
101 {
102         return (e1->start == e2->start) && (e1->end == e2->end);
103 }
104
105 static inline int extent_overlapped(struct interval_node_extent *e1,
106                                     struct interval_node_extent *e2)
107 {
108         return (e1->start <= e2->end) && (e2->start <= e1->end);
109 }
110
111 static inline int node_compare(struct interval_node *n1,
112                                struct interval_node *n2)
113 {
114         return extent_compare(&n1->in_extent, &n2->in_extent);
115 }
116
117 static inline int node_equal(struct interval_node *n1,
118                              struct interval_node *n2)
119 {
120         return extent_equal(&n1->in_extent, &n2->in_extent);
121 }
122
123 static inline __u64 max_u64(__u64 x, __u64 y)
124 {
125         return x > y ? x : y;
126 }
127
128 static inline __u64 min_u64(__u64 x, __u64 y)
129 {
130         return x < y ? x : y;
131 }
132
133 #define interval_for_each(node, root)                   \
134 for (node = interval_first(root); node != NULL;         \
135      node = interval_next(node))
136
137 #define interval_for_each_reverse(node, root)           \
138 for (node = interval_last(root); node != NULL;          \
139      node = interval_prev(node))
140
141 static struct interval_node *interval_first(struct interval_node *node)
142 {
143         ENTRY;
144
145         if (!node)
146                 RETURN(NULL);
147         while (node->in_left)
148                 node = node->in_left;
149         RETURN(node);
150 }
151
152 static struct interval_node *interval_last(struct interval_node *node)
153 {
154         ENTRY;
155
156         if (!node)
157                 RETURN(NULL);
158         while (node->in_right)
159                 node = node->in_right;
160         RETURN(node);
161 }
162
163 static struct interval_node *interval_next(struct interval_node *node)
164 {
165         ENTRY;
166
167         if (!node)
168                 RETURN(NULL);
169         if (node->in_right)
170                 RETURN(interval_first(node->in_right));
171         while (node->in_parent && node_is_right_child(node))
172                 node = node->in_parent;
173         RETURN(node->in_parent);
174 }
175
176 static struct interval_node *interval_prev(struct interval_node *node)
177 {
178         ENTRY;
179
180         if (!node)
181                 RETURN(NULL);
182
183         if (node->in_left)
184                 RETURN(interval_last(node->in_left));
185
186         while (node->in_parent && node_is_left_child(node))
187                 node = node->in_parent;
188
189         RETURN(node->in_parent);
190 }
191
192 enum interval_iter interval_iterate(struct interval_node *root,
193                                     interval_callback_t func,
194                                     void *data)
195 {
196         struct interval_node *node;
197         enum interval_iter rc = INTERVAL_ITER_CONT;
198         ENTRY;
199
200         interval_for_each(node, root) {
201                 rc = func(node, data);
202                 if (rc == INTERVAL_ITER_STOP)
203                         break;
204         }
205
206         RETURN(rc);
207 }
208 EXPORT_SYMBOL(interval_iterate);
209
210 enum interval_iter interval_iterate_reverse(struct interval_node *root,
211                                             interval_callback_t func,
212                                             void *data)
213 {
214         struct interval_node *node;
215         enum interval_iter rc = INTERVAL_ITER_CONT;
216         ENTRY;
217
218         interval_for_each_reverse(node, root) {
219                 rc = func(node, data);
220                 if (rc == INTERVAL_ITER_STOP)
221                         break;
222         }
223
224         RETURN(rc);
225 }
226 EXPORT_SYMBOL(interval_iterate_reverse);
227
228 /* try to find a node with same interval in the tree,
229  * if found, return the pointer to the node, otherwise return NULL*/
230 struct interval_node *interval_find(struct interval_node *root,
231                                     struct interval_node_extent *ex)
232 {
233         struct interval_node *walk = root;
234         int rc;
235         ENTRY;
236
237         while (walk) {
238                 rc = extent_compare(ex, &walk->in_extent);
239                 if (rc == 0)
240                         break;
241                 else if (rc < 0)
242                         walk = walk->in_left;
243                 else
244                         walk = walk->in_right;
245         }
246
247         RETURN(walk);
248 }
249 EXPORT_SYMBOL(interval_find);
250
251 static void __rotate_change_maxhigh(struct interval_node *node,
252                                     struct interval_node *rotate)
253 {
254         __u64 left_max, right_max;
255
256         rotate->in_max_high = node->in_max_high;
257         left_max = node->in_left ? node->in_left->in_max_high : 0;
258         right_max = node->in_right ? node->in_right->in_max_high : 0;
259         node->in_max_high  = max_u64(interval_high(node),
260                                      max_u64(left_max,right_max));
261 }
262
263 /* The left rotation "pivots" around the link from node to node->right, and
264  * - node will be linked to node->right's left child, and
265  * - node->right's left child will be linked to node's right child.  */
266 static void __rotate_left(struct interval_node *node,
267                           struct interval_node **root)
268 {
269         struct interval_node *right = node->in_right;
270         struct interval_node *parent = node->in_parent;
271
272         node->in_right = right->in_left;
273         if (node->in_right)
274                 right->in_left->in_parent = node;
275
276         right->in_left = node;
277         right->in_parent = parent;
278         if (parent) {
279                 if (node_is_left_child(node))
280                         parent->in_left = right;
281                 else
282                         parent->in_right = right;
283         } else {
284                 *root = right;
285         }
286         node->in_parent = right;
287
288         /* update max_high for node and right */
289         __rotate_change_maxhigh(node, right);
290 }
291
292 /* The right rotation "pivots" around the link from node to node->left, and
293  * - node will be linked to node->left's right child, and
294  * - node->left's right child will be linked to node's left child.  */
295 static void __rotate_right(struct interval_node *node,
296                            struct interval_node **root)
297 {
298         struct interval_node *left = node->in_left;
299         struct interval_node *parent = node->in_parent;
300
301         node->in_left = left->in_right;
302         if (node->in_left)
303                 left->in_right->in_parent = node;
304         left->in_right = node;
305
306         left->in_parent = parent;
307         if (parent) {
308                 if (node_is_right_child(node))
309                         parent->in_right = left;
310                 else
311                         parent->in_left = left;
312         } else {
313                 *root = left;
314         }
315         node->in_parent = left;
316
317         /* update max_high for node and left */
318         __rotate_change_maxhigh(node, left);
319 }
320
321 #define interval_swap(a, b) do {                        \
322         struct interval_node *c = a; a = b; b = c;      \
323 } while (0)
324
325 /*
326  * Operations INSERT and DELETE, when run on a tree with n keys,
327  * take O(logN) time.Because they modify the tree, the result
328  * may violate the red-black properties.To restore these properties,
329  * we must change the colors of some of the nodes in the tree
330  * and also change the pointer structure.
331  */
332 static void interval_insert_color(struct interval_node *node,
333                                   struct interval_node **root)
334 {
335         struct interval_node *parent, *gparent;
336         ENTRY;
337
338         while ((parent = node->in_parent) && node_is_red(parent)) {
339                 gparent = parent->in_parent;
340                 /* Parent is RED, so gparent must not be NULL */
341                 if (node_is_left_child(parent)) {
342                         struct interval_node *uncle;
343                         uncle = gparent->in_right;
344                         if (uncle && node_is_red(uncle)) {
345                                 uncle->in_color = INTERVAL_BLACK;
346                                 parent->in_color = INTERVAL_BLACK;
347                                 gparent->in_color = INTERVAL_RED;
348                                 node = gparent;
349                                 continue;
350                         }
351
352                         if (parent->in_right == node) {
353                                 __rotate_left(parent, root);
354                                 interval_swap(node, parent);
355                         }
356
357                         parent->in_color = INTERVAL_BLACK;
358                         gparent->in_color = INTERVAL_RED;
359                         __rotate_right(gparent, root);
360                 } else {
361                         struct interval_node *uncle;
362                         uncle = gparent->in_left;
363                         if (uncle && node_is_red(uncle)) {
364                                 uncle->in_color = INTERVAL_BLACK;
365                                 parent->in_color = INTERVAL_BLACK;
366                                 gparent->in_color = INTERVAL_RED;
367                                 node = gparent;
368                                 continue;
369                         }
370
371                         if (node_is_left_child(node)) {
372                                 __rotate_right(parent, root);
373                                 interval_swap(node, parent);
374                         }
375
376                         parent->in_color = INTERVAL_BLACK;
377                         gparent->in_color = INTERVAL_RED;
378                         __rotate_left(gparent, root);
379                 }
380         }
381
382         (*root)->in_color = INTERVAL_BLACK;
383         EXIT;
384 }
385
386 struct interval_node *interval_insert(struct interval_node *node,
387                                       struct interval_node **root)
388 {
389         struct interval_node **p, *parent = NULL;
390         ENTRY;
391
392         LASSERT(!interval_is_intree(node));
393         p = root;
394         while (*p) {
395                 parent = *p;
396                 if (node_equal(parent, node))
397                         RETURN(parent);
398
399                 /* max_high field must be updated after each iteration */
400                 if (parent->in_max_high < interval_high(node))
401                         parent->in_max_high = interval_high(node);
402
403                 if (node_compare(node, parent) < 0)
404                         p = &parent->in_left;
405                 else
406                         p = &parent->in_right;
407         }
408
409         /* link node into the tree */
410         node->in_parent = parent;
411         node->in_color = INTERVAL_RED;
412         node->in_left = node->in_right = NULL;
413         *p = node;
414
415         interval_insert_color(node, root);
416         node->in_intree = 1;
417
418         RETURN(NULL);
419 }
420 EXPORT_SYMBOL(interval_insert);
421
422 static inline int node_is_black_or_0(struct interval_node *node)
423 {
424         return !node || node_is_black(node);
425 }
426
427 static void interval_erase_color(struct interval_node *node,
428                                  struct interval_node *parent,
429                                  struct interval_node **root)
430 {
431         struct interval_node *tmp;
432         ENTRY;
433
434         while (node_is_black_or_0(node) && node != *root) {
435                 if (parent->in_left == node) {
436                         tmp = parent->in_right;
437                         if (node_is_red(tmp)) {
438                                 tmp->in_color = INTERVAL_BLACK;
439                                 parent->in_color = INTERVAL_RED;
440                                 __rotate_left(parent, root);
441                                 tmp = parent->in_right;
442                         }
443                         if (node_is_black_or_0(tmp->in_left) &&
444                             node_is_black_or_0(tmp->in_right)) {
445                                 tmp->in_color = INTERVAL_RED;
446                                 node = parent;
447                                 parent = node->in_parent;
448                         } else {
449                                 if (node_is_black_or_0(tmp->in_right)) {
450                                         struct interval_node *o_left;
451                                         if ((o_left = tmp->in_left))
452                                              o_left->in_color = INTERVAL_BLACK;
453                                         tmp->in_color = INTERVAL_RED;
454                                         __rotate_right(tmp, root);
455                                         tmp = parent->in_right;
456                                 }
457                                 tmp->in_color = parent->in_color;
458                                 parent->in_color = INTERVAL_BLACK;
459                                 if (tmp->in_right)
460                                     tmp->in_right->in_color = INTERVAL_BLACK;
461                                 __rotate_left(parent, root);
462                                 node = *root;
463                                 break;
464                         }
465                 } else {
466                         tmp = parent->in_left;
467                         if (node_is_red(tmp)) {
468                                 tmp->in_color = INTERVAL_BLACK;
469                                 parent->in_color = INTERVAL_RED;
470                                 __rotate_right(parent, root);
471                                 tmp = parent->in_left;
472                         }
473                         if (node_is_black_or_0(tmp->in_left) &&
474                             node_is_black_or_0(tmp->in_right)) {
475                                 tmp->in_color = INTERVAL_RED;
476                                 node = parent;
477                                 parent = node->in_parent;
478                         } else {
479                                 if (node_is_black_or_0(tmp->in_left)) {
480                                         struct interval_node *o_right;
481                                         if ((o_right = tmp->in_right))
482                                             o_right->in_color = INTERVAL_BLACK;
483                                         tmp->in_color = INTERVAL_RED;
484                                         __rotate_left(tmp, root);
485                                         tmp = parent->in_left;
486                                 }
487                                 tmp->in_color = parent->in_color;
488                                 parent->in_color = INTERVAL_BLACK;
489                                 if (tmp->in_left)
490                                         tmp->in_left->in_color = INTERVAL_BLACK;
491                                 __rotate_right(parent, root);
492                                 node = *root;
493                                 break;
494                         }
495                 }
496         }
497         if (node)
498                 node->in_color = INTERVAL_BLACK;
499         EXIT;
500 }
501
502 /*
503  * if the @max_high value of @node is changed, this function traverse  a path
504  * from node  up to the root to update max_high for the whole tree.
505  */
506 static void update_maxhigh(struct interval_node *node,
507                            __u64  old_maxhigh)
508 {
509         __u64 left_max, right_max;
510         ENTRY;
511
512         while (node) {
513                 left_max = node->in_left ? node->in_left->in_max_high : 0;
514                 right_max = node->in_right ? node->in_right->in_max_high : 0;
515                 node->in_max_high = max_u64(interval_high(node),
516                                             max_u64(left_max, right_max));
517
518                 if (node->in_max_high >= old_maxhigh)
519                         break;
520                 node = node->in_parent;
521         }
522         EXIT;
523 }
524
525 void interval_erase(struct interval_node *node,
526                     struct interval_node **root)
527 {
528         struct interval_node *child, *parent;
529         int color;
530         ENTRY;
531
532         LASSERT(interval_is_intree(node));
533         node->in_intree = 0;
534         if (!node->in_left) {
535                 child = node->in_right;
536         } else if (!node->in_right) {
537                 child = node->in_left;
538         } else { /* Both left and right child are not NULL */
539                 struct interval_node *old = node;
540
541                 node = interval_next(node);
542                 child = node->in_right;
543                 parent = node->in_parent;
544                 color = node->in_color;
545
546                 if (child)
547                         child->in_parent = parent;
548                 if (parent == old)
549                         parent->in_right = child;
550                 else
551                         parent->in_left = child;
552
553                 node->in_color = old->in_color;
554                 node->in_right = old->in_right;
555                 node->in_left = old->in_left;
556                 node->in_parent = old->in_parent;
557
558                 if (old->in_parent) {
559                         if (node_is_left_child(old))
560                                 old->in_parent->in_left = node;
561                         else
562                                 old->in_parent->in_right = node;
563                 } else {
564                         *root = node;
565                 }
566
567                 old->in_left->in_parent = node;
568                 if (old->in_right)
569                         old->in_right->in_parent = node;
570                 update_maxhigh(child ? : parent, node->in_max_high);
571                 update_maxhigh(node, old->in_max_high);
572                 if (parent == old)
573                          parent = node;
574                 goto color;
575         }
576         parent = node->in_parent;
577         color = node->in_color;
578
579         if (child)
580                 child->in_parent = parent;
581         if (parent) {
582                 if (node_is_left_child(node))
583                         parent->in_left = child;
584                 else
585                         parent->in_right = child;
586         } else {
587                 *root = child;
588         }
589
590         update_maxhigh(child ? : parent, node->in_max_high);
591
592 color:
593         if (color == INTERVAL_BLACK)
594                 interval_erase_color(child, parent, root);
595         EXIT;
596 }
597 EXPORT_SYMBOL(interval_erase);
598
599 static inline int interval_may_overlap(struct interval_node *node,
600                                           struct interval_node_extent *ext)
601 {
602         return (ext->start <= node->in_max_high &&
603                 ext->end >= interval_low(node));
604 }
605
606 /*
607  * This function finds all intervals that overlap interval ext,
608  * and calls func to handle resulted intervals one by one.
609  * in lustre, this function will find all conflicting locks in
610  * the granted queue and add these locks to the ast work list.
611  *
612  * {
613  *       if (node == NULL)
614  *               return 0;
615  *       if (ext->end < interval_low(node)) {
616  *               interval_search(node->in_left, ext, func, data);
617  *       } else if (interval_may_overlap(node, ext)) {
618  *               if (extent_overlapped(ext, &node->in_extent))
619  *                       func(node, data);
620  *               interval_search(node->in_left, ext, func, data);
621  *               interval_search(node->in_right, ext, func, data);
622  *       }
623  *       return 0;
624  * }
625  *
626  */
627 enum interval_iter interval_search(struct interval_node *node,
628                                    struct interval_node_extent *ext,
629                                    interval_callback_t func,
630                                    void *data)
631 {
632         struct interval_node *parent;
633         enum interval_iter rc = INTERVAL_ITER_CONT;
634
635         LASSERT(ext != NULL);
636         LASSERT(func != NULL);
637
638         while (node) {
639                 if (ext->end < interval_low(node)) {
640                         if (node->in_left) {
641                                 node = node->in_left;
642                                 continue;
643                         }
644                 } else if (interval_may_overlap(node, ext)) {
645                         if (extent_overlapped(ext, &node->in_extent)) {
646                                 rc = func(node, data);
647                                 if (rc == INTERVAL_ITER_STOP)
648                                         break;
649                         }
650
651                         if (node->in_left) {
652                                 node = node->in_left;
653                                 continue;
654                         }
655                         if (node->in_right) {
656                                 node = node->in_right;
657                                 continue;
658                         }
659                 }
660
661                 parent = node->in_parent;
662                 while (parent) {
663                         if (node_is_left_child(node) &&
664                             parent->in_right) {
665                                 /* If we ever got the left, it means that the
666                                  * parent met ext->end<interval_low(parent), or
667                                  * may_overlap(parent). If the former is true,
668                                  * we needn't go back. So stop early and check
669                                  * may_overlap(parent) after this loop.  */
670                                 node = parent->in_right;
671                                 break;
672                         }
673                         node = parent;
674                         parent = parent->in_parent;
675                 }
676                 if (parent == NULL || !interval_may_overlap(parent, ext))
677                         break;
678         }
679
680         return rc;
681 }
682 EXPORT_SYMBOL(interval_search);
683
684 static enum interval_iter interval_overlap_cb(struct interval_node *n,
685                                               void *args)
686 {
687         *(int *)args = 1;
688         return INTERVAL_ITER_STOP;
689 }
690
691 int interval_is_overlapped(struct interval_node *root,
692                            struct interval_node_extent *ext)
693 {
694         int has = 0;
695         (void)interval_search(root, ext, interval_overlap_cb, &has);
696         return has;
697 }
698 EXPORT_SYMBOL(interval_is_overlapped);
699
700 /* Don't expand to low. Expanding downwards is expensive, and meaningless to
701  * some extents, because programs seldom do IO backward.
702  *
703  * The recursive algorithm of expanding low:
704  * interval_expand_low {
705  *        struct interval_node *tmp;
706  *        static __u64 res = 0;
707  *
708  *        if (root == NULL)
709  *                return res;
710  *        if (root->in_max_high < low) {
711  *                res = max_u64(root->in_max_high + 1, res);
712  *                return res;
713  *        } else if (low < interval_low(root)) {
714  *                interval_expand_low(root->in_left, low);
715  *                return res;
716  *        }
717  *
718  *        if (interval_high(root) < low)
719  *                res = max_u64(interval_high(root) + 1, res);
720  *        interval_expand_low(root->in_left, low);
721  *        interval_expand_low(root->in_right, low);
722  *
723  *        return res;
724  * }
725  *
726  * It's much easy to eliminate the recursion, see interval_search for
727  * an example. -jay
728  */
729 static inline __u64 interval_expand_low(struct interval_node *root, __u64 low)
730 {
731         /* we only concern the empty tree right now. */
732         if (root == NULL)
733                 return 0;
734         return low;
735 }
736
737 static inline __u64 interval_expand_high(struct interval_node *node, __u64 high)
738 {
739         __u64 result = ~0;
740
741         while (node != NULL) {
742                 if (node->in_max_high < high)
743                         break;
744
745                 if (interval_low(node) > high) {
746                         result = interval_low(node) - 1;
747                         node = node->in_left;
748                 } else {
749                         node = node->in_right;
750                 }
751         }
752
753         return result;
754 }
755
756 /* expanding the extent based on @ext. */
757 void interval_expand(struct interval_node *root,
758                      struct interval_node_extent *ext,
759                      struct interval_node_extent *limiter)
760 {
761         /* The assertion of interval_is_overlapped is expensive because we may
762          * travel many nodes to find the overlapped node. */
763         LASSERT(interval_is_overlapped(root, ext) == 0);
764         if (!limiter || limiter->start < ext->start)
765                 ext->start = interval_expand_low(root, ext->start);
766         if (!limiter || limiter->end > ext->end)
767                 ext->end = interval_expand_high(root, ext->end);
768         LASSERT(interval_is_overlapped(root, ext) == 0);
769 }
770 EXPORT_SYMBOL(interval_expand);