1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 #include "qemu/osdep.h"
4 #include "qemu/interval-tree.h"
5 #include "qemu/atomic.h"
10 * For now, don't expose Linux Red-Black Trees separately, but retain the
11 * separate type definitions to keep the implementation sane, and allow
12 * the possibility of separating them later.
14 * Derived from include/linux/rbtree_augmented.h and its dependencies.
18 * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
20 * 1) A node is either red or black
21 * 2) The root is black
22 * 3) All leaves (NULL) are black
23 * 4) Both children of every red node are black
24 * 5) Every simple path from root to leaves contains the same number
27 * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
28 * consecutive red nodes in a path and every red node is therefore followed by
29 * a black. So if B is the number of black nodes on every simple path (as per
30 * 5), then the longest possible path due to 4 is 2B.
32 * We shall indicate color with case, where black nodes are uppercase and red
33 * nodes will be lowercase. Unknown color nodes shall be drawn as red within
34 * parentheses and have some accompanying text comment.
36 * Notes on lockless lookups:
38 * All stores to the tree structure (rb_left and rb_right) must be done using
39 * WRITE_ONCE [qatomic_set for QEMU]. And we must not inadvertently cause
40 * (temporary) loops in the tree structure as seen in program order.
42 * These two requirements will allow lockless iteration of the tree -- not
43 * correct iteration mind you, tree rotations are not atomic so a lookup might
44 * miss entire subtrees.
46 * But they do guarantee that any such traversal will only see valid elements
47 * and that it will indeed complete -- does not get stuck in a loop.
49 * It also guarantees that if the lookup returns an element it is the 'correct'
50 * one. But not returning an element does _NOT_ mean it's not present.
54 * Stores to __rb_parent_color are not important for simple lookups so those
55 * are left undone as of now. Nor did I check for loops involving parent
65 typedef struct RBAugmentCallbacks {
66 void (*propagate)(RBNode *node, RBNode *stop);
67 void (*copy)(RBNode *old, RBNode *new);
68 void (*rotate)(RBNode *old, RBNode *new);
71 static inline RBNode *rb_parent(const RBNode *n)
73 return (RBNode *)(n->rb_parent_color & ~1);
76 static inline RBNode *rb_red_parent(const RBNode *n)
78 return (RBNode *)n->rb_parent_color;
81 static inline RBColor pc_color(uintptr_t pc)
83 return (RBColor)(pc & 1);
86 static inline bool pc_is_red(uintptr_t pc)
88 return pc_color(pc) == RB_RED;
91 static inline bool pc_is_black(uintptr_t pc)
93 return !pc_is_red(pc);
96 static inline RBColor rb_color(const RBNode *n)
98 return pc_color(n->rb_parent_color);
101 static inline bool rb_is_red(const RBNode *n)
103 return pc_is_red(n->rb_parent_color);
106 static inline bool rb_is_black(const RBNode *n)
108 return pc_is_black(n->rb_parent_color);
111 static inline void rb_set_black(RBNode *n)
113 n->rb_parent_color |= RB_BLACK;
116 static inline void rb_set_parent_color(RBNode *n, RBNode *p, RBColor color)
118 n->rb_parent_color = (uintptr_t)p | color;
121 static inline void rb_set_parent(RBNode *n, RBNode *p)
123 rb_set_parent_color(n, p, rb_color(n));
126 static inline void rb_link_node(RBNode *node, RBNode *parent, RBNode **rb_link)
128 node->rb_parent_color = (uintptr_t)parent;
129 node->rb_left = node->rb_right = NULL;
131 qatomic_set(rb_link, node);
134 static RBNode *rb_next(RBNode *node)
138 /* OMIT: if empty node, return null. */
141 * If we have a right-hand child, go down and then left as far as we can.
143 if (node->rb_right) {
144 node = node->rb_right;
145 while (node->rb_left) {
146 node = node->rb_left;
152 * No right-hand children. Everything down and left is smaller than us,
153 * so any 'next' node must be in the general direction of our parent.
154 * Go up the tree; any time the ancestor is a right-hand child of its
155 * parent, keep going up. First time it's a left-hand child of its
156 * parent, said parent is our 'next' node.
158 while ((parent = rb_parent(node)) && node == parent->rb_right) {
165 static inline void rb_change_child(RBNode *old, RBNode *new,
166 RBNode *parent, RBRoot *root)
169 qatomic_set(&root->rb_node, new);
170 } else if (parent->rb_left == old) {
171 qatomic_set(&parent->rb_left, new);
173 qatomic_set(&parent->rb_right, new);
177 static inline void rb_rotate_set_parents(RBNode *old, RBNode *new,
178 RBRoot *root, RBColor color)
180 RBNode *parent = rb_parent(old);
182 new->rb_parent_color = old->rb_parent_color;
183 rb_set_parent_color(old, new, color);
184 rb_change_child(old, new, parent, root);
187 static void rb_insert_augmented(RBNode *node, RBRoot *root,
188 const RBAugmentCallbacks *augment)
190 RBNode *parent = rb_red_parent(node), *gparent, *tmp;
194 * Loop invariant: node is red.
196 if (unlikely(!parent)) {
198 * The inserted node is root. Either this is the first node, or
199 * we recursed at Case 1 below and are no longer violating 4).
201 rb_set_parent_color(node, NULL, RB_BLACK);
206 * If there is a black parent, we are done. Otherwise, take some
207 * corrective action as, per 4), we don't want a red root or two
208 * consecutive red nodes.
210 if (rb_is_black(parent)) {
214 gparent = rb_red_parent(parent);
216 tmp = gparent->rb_right;
217 if (parent != tmp) { /* parent == gparent->rb_left */
218 if (tmp && rb_is_red(tmp)) {
220 * Case 1 - node's uncle is red (color flips).
228 * However, since g's parent might be red, and 4) does not
229 * allow this, we need to recurse at g.
231 rb_set_parent_color(tmp, gparent, RB_BLACK);
232 rb_set_parent_color(parent, gparent, RB_BLACK);
234 parent = rb_parent(node);
235 rb_set_parent_color(node, parent, RB_RED);
239 tmp = parent->rb_right;
242 * Case 2 - node's uncle is black and node is
243 * the parent's right child (left rotate at parent).
251 * This still leaves us in violation of 4), the
252 * continuation into Case 3 will fix that.
255 qatomic_set(&parent->rb_right, tmp);
256 qatomic_set(&node->rb_left, parent);
258 rb_set_parent_color(tmp, parent, RB_BLACK);
260 rb_set_parent_color(parent, node, RB_RED);
261 augment->rotate(parent, node);
263 tmp = node->rb_right;
267 * Case 3 - node's uncle is black and node is
268 * the parent's left child (right rotate at gparent).
276 qatomic_set(&gparent->rb_left, tmp); /* == parent->rb_right */
277 qatomic_set(&parent->rb_right, gparent);
279 rb_set_parent_color(tmp, gparent, RB_BLACK);
281 rb_rotate_set_parents(gparent, parent, root, RB_RED);
282 augment->rotate(gparent, parent);
285 tmp = gparent->rb_left;
286 if (tmp && rb_is_red(tmp)) {
287 /* Case 1 - color flips */
288 rb_set_parent_color(tmp, gparent, RB_BLACK);
289 rb_set_parent_color(parent, gparent, RB_BLACK);
291 parent = rb_parent(node);
292 rb_set_parent_color(node, parent, RB_RED);
296 tmp = parent->rb_left;
298 /* Case 2 - right rotate at parent */
299 tmp = node->rb_right;
300 qatomic_set(&parent->rb_left, tmp);
301 qatomic_set(&node->rb_right, parent);
303 rb_set_parent_color(tmp, parent, RB_BLACK);
305 rb_set_parent_color(parent, node, RB_RED);
306 augment->rotate(parent, node);
311 /* Case 3 - left rotate at gparent */
312 qatomic_set(&gparent->rb_right, tmp); /* == parent->rb_left */
313 qatomic_set(&parent->rb_left, gparent);
315 rb_set_parent_color(tmp, gparent, RB_BLACK);
317 rb_rotate_set_parents(gparent, parent, root, RB_RED);
318 augment->rotate(gparent, parent);
324 static void rb_insert_augmented_cached(RBNode *node,
325 RBRootLeftCached *root, bool newleft,
326 const RBAugmentCallbacks *augment)
329 root->rb_leftmost = node;
331 rb_insert_augmented(node, &root->rb_root, augment);
334 static void rb_erase_color(RBNode *parent, RBRoot *root,
335 const RBAugmentCallbacks *augment)
337 RBNode *node = NULL, *sibling, *tmp1, *tmp2;
342 * - node is black (or NULL on first iteration)
343 * - node is not the root (parent is not NULL)
344 * - All leaf paths going through parent and node have a
345 * black node count that is 1 lower than other leaf paths.
347 sibling = parent->rb_right;
348 if (node != sibling) { /* node == parent->rb_left */
349 if (rb_is_red(sibling)) {
351 * Case 1 - left rotate at parent
359 tmp1 = sibling->rb_left;
360 qatomic_set(&parent->rb_right, tmp1);
361 qatomic_set(&sibling->rb_left, parent);
362 rb_set_parent_color(tmp1, parent, RB_BLACK);
363 rb_rotate_set_parents(parent, sibling, root, RB_RED);
364 augment->rotate(parent, sibling);
367 tmp1 = sibling->rb_right;
368 if (!tmp1 || rb_is_black(tmp1)) {
369 tmp2 = sibling->rb_left;
370 if (!tmp2 || rb_is_black(tmp2)) {
372 * Case 2 - sibling color flip
373 * (p could be either color here)
381 * This leaves us violating 5) which
382 * can be fixed by flipping p to black
383 * if it was red, or by recursing at p.
384 * p is red when coming from Case 1.
386 rb_set_parent_color(sibling, parent, RB_RED);
387 if (rb_is_red(parent)) {
388 rb_set_black(parent);
391 parent = rb_parent(node);
399 * Case 3 - right rotate at sibling
400 * (p could be either color here)
410 * Note: p might be red, and then bot
411 * p and sl are red after rotation (which
412 * breaks property 4). This is fixed in
413 * Case 4 (in rb_rotate_set_parents()
414 * which set sl the color of p
415 * and set p RB_BLACK)
425 tmp1 = tmp2->rb_right;
426 qatomic_set(&sibling->rb_left, tmp1);
427 qatomic_set(&tmp2->rb_right, sibling);
428 qatomic_set(&parent->rb_right, tmp2);
430 rb_set_parent_color(tmp1, sibling, RB_BLACK);
432 augment->rotate(sibling, tmp2);
437 * Case 4 - left rotate at parent + color flips
438 * (p and sl could be either color here.
439 * After rotation, p becomes black, s acquires
440 * p's color, and sl keeps its color)
448 tmp2 = sibling->rb_left;
449 qatomic_set(&parent->rb_right, tmp2);
450 qatomic_set(&sibling->rb_left, parent);
451 rb_set_parent_color(tmp1, sibling, RB_BLACK);
453 rb_set_parent(tmp2, parent);
455 rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
456 augment->rotate(parent, sibling);
459 sibling = parent->rb_left;
460 if (rb_is_red(sibling)) {
461 /* Case 1 - right rotate at parent */
462 tmp1 = sibling->rb_right;
463 qatomic_set(&parent->rb_left, tmp1);
464 qatomic_set(&sibling->rb_right, parent);
465 rb_set_parent_color(tmp1, parent, RB_BLACK);
466 rb_rotate_set_parents(parent, sibling, root, RB_RED);
467 augment->rotate(parent, sibling);
470 tmp1 = sibling->rb_left;
471 if (!tmp1 || rb_is_black(tmp1)) {
472 tmp2 = sibling->rb_right;
473 if (!tmp2 || rb_is_black(tmp2)) {
474 /* Case 2 - sibling color flip */
475 rb_set_parent_color(sibling, parent, RB_RED);
476 if (rb_is_red(parent)) {
477 rb_set_black(parent);
480 parent = rb_parent(node);
487 /* Case 3 - left rotate at sibling */
488 tmp1 = tmp2->rb_left;
489 qatomic_set(&sibling->rb_right, tmp1);
490 qatomic_set(&tmp2->rb_left, sibling);
491 qatomic_set(&parent->rb_left, tmp2);
493 rb_set_parent_color(tmp1, sibling, RB_BLACK);
495 augment->rotate(sibling, tmp2);
499 /* Case 4 - right rotate at parent + color flips */
500 tmp2 = sibling->rb_right;
501 qatomic_set(&parent->rb_left, tmp2);
502 qatomic_set(&sibling->rb_right, parent);
503 rb_set_parent_color(tmp1, sibling, RB_BLACK);
505 rb_set_parent(tmp2, parent);
507 rb_rotate_set_parents(parent, sibling, root, RB_BLACK);
508 augment->rotate(parent, sibling);
514 static void rb_erase_augmented(RBNode *node, RBRoot *root,
515 const RBAugmentCallbacks *augment)
517 RBNode *child = node->rb_right;
518 RBNode *tmp = node->rb_left;
519 RBNode *parent, *rebalance;
524 * Case 1: node to erase has no more than 1 child (easy!)
526 * Note that if there is one child it must be red due to 5)
527 * and node must be black due to 4). We adjust colors locally
528 * so as to bypass rb_erase_color() later on.
530 pc = node->rb_parent_color;
531 parent = rb_parent(node);
532 rb_change_child(node, child, parent, root);
534 child->rb_parent_color = pc;
537 rebalance = pc_is_black(pc) ? parent : NULL;
541 /* Still case 1, but this time the child is node->rb_left */
542 pc = node->rb_parent_color;
543 parent = rb_parent(node);
544 tmp->rb_parent_color = pc;
545 rb_change_child(node, tmp, parent, root);
549 RBNode *successor = child, *child2;
550 tmp = child->rb_left;
553 * Case 2: node's successor is its right child
562 child2 = successor->rb_right;
564 augment->copy(node, successor);
567 * Case 3: node's successor is leftmost under
568 * node's right child subtree
585 child2 = successor->rb_right;
586 qatomic_set(&parent->rb_left, child2);
587 qatomic_set(&successor->rb_right, child);
588 rb_set_parent(child, successor);
590 augment->copy(node, successor);
591 augment->propagate(parent, successor);
595 qatomic_set(&successor->rb_left, tmp);
596 rb_set_parent(tmp, successor);
598 pc = node->rb_parent_color;
599 tmp = rb_parent(node);
600 rb_change_child(node, successor, tmp, root);
603 rb_set_parent_color(child2, parent, RB_BLACK);
606 rebalance = rb_is_black(successor) ? parent : NULL;
608 successor->rb_parent_color = pc;
612 augment->propagate(tmp, NULL);
615 rb_erase_color(rebalance, root, augment);
619 static void rb_erase_augmented_cached(RBNode *node, RBRootLeftCached *root,
620 const RBAugmentCallbacks *augment)
622 if (root->rb_leftmost == node) {
623 root->rb_leftmost = rb_next(node);
625 rb_erase_augmented(node, &root->rb_root, augment);
632 * Derived from lib/interval_tree.c and its dependencies,
633 * especially include/linux/interval_tree_generic.h.
636 #define rb_to_itree(N) container_of(N, IntervalTreeNode, rb)
638 static bool interval_tree_compute_max(IntervalTreeNode *node, bool exit)
640 IntervalTreeNode *child;
641 uint64_t max = node->last;
643 if (node->rb.rb_left) {
644 child = rb_to_itree(node->rb.rb_left);
645 if (child->subtree_last > max) {
646 max = child->subtree_last;
649 if (node->rb.rb_right) {
650 child = rb_to_itree(node->rb.rb_right);
651 if (child->subtree_last > max) {
652 max = child->subtree_last;
655 if (exit && node->subtree_last == max) {
658 node->subtree_last = max;
662 static void interval_tree_propagate(RBNode *rb, RBNode *stop)
665 IntervalTreeNode *node = rb_to_itree(rb);
666 if (interval_tree_compute_max(node, true)) {
669 rb = rb_parent(&node->rb);
673 static void interval_tree_copy(RBNode *rb_old, RBNode *rb_new)
675 IntervalTreeNode *old = rb_to_itree(rb_old);
676 IntervalTreeNode *new = rb_to_itree(rb_new);
678 new->subtree_last = old->subtree_last;
681 static void interval_tree_rotate(RBNode *rb_old, RBNode *rb_new)
683 IntervalTreeNode *old = rb_to_itree(rb_old);
684 IntervalTreeNode *new = rb_to_itree(rb_new);
686 new->subtree_last = old->subtree_last;
687 interval_tree_compute_max(old, false);
690 static const RBAugmentCallbacks interval_tree_augment = {
691 .propagate = interval_tree_propagate,
692 .copy = interval_tree_copy,
693 .rotate = interval_tree_rotate,
696 /* Insert / remove interval nodes from the tree */
697 void interval_tree_insert(IntervalTreeNode *node, IntervalTreeRoot *root)
699 RBNode **link = &root->rb_root.rb_node, *rb_parent = NULL;
700 uint64_t start = node->start, last = node->last;
701 IntervalTreeNode *parent;
702 bool leftmost = true;
706 parent = rb_to_itree(rb_parent);
708 if (parent->subtree_last < last) {
709 parent->subtree_last = last;
711 if (start < parent->start) {
712 link = &parent->rb.rb_left;
714 link = &parent->rb.rb_right;
719 node->subtree_last = last;
720 rb_link_node(&node->rb, rb_parent, link);
721 rb_insert_augmented_cached(&node->rb, root, leftmost,
722 &interval_tree_augment);
725 void interval_tree_remove(IntervalTreeNode *node, IntervalTreeRoot *root)
727 rb_erase_augmented_cached(&node->rb, root, &interval_tree_augment);
731 * Iterate over intervals intersecting [start;last]
733 * Note that a node's interval intersects [start;last] iff:
734 * Cond1: node->start <= last
736 * Cond2: start <= node->last
739 static IntervalTreeNode *interval_tree_subtree_search(IntervalTreeNode *node,
745 * Loop invariant: start <= node->subtree_last
746 * (Cond2 is satisfied by one of the subtree nodes)
748 if (node->rb.rb_left) {
749 IntervalTreeNode *left = rb_to_itree(node->rb.rb_left);
751 if (start <= left->subtree_last) {
753 * Some nodes in left subtree satisfy Cond2.
754 * Iterate to find the leftmost such node N.
755 * If it also satisfies Cond1, that's the
756 * match we are looking for. Otherwise, there
757 * is no matching interval as nodes to the
758 * right of N can't satisfy Cond1 either.
764 if (node->start <= last) { /* Cond1 */
765 if (start <= node->last) { /* Cond2 */
766 return node; /* node is leftmost match */
768 if (node->rb.rb_right) {
769 node = rb_to_itree(node->rb.rb_right);
770 if (start <= node->subtree_last) {
775 return NULL; /* no match */
779 IntervalTreeNode *interval_tree_iter_first(IntervalTreeRoot *root,
780 uint64_t start, uint64_t last)
782 IntervalTreeNode *node, *leftmost;
784 if (!root->rb_root.rb_node) {
789 * Fastpath range intersection/overlap between A: [a0, a1] and
790 * B: [b0, b1] is given by:
792 * a0 <= b1 && b0 <= a1
794 * ... where A holds the lock range and B holds the smallest
795 * 'start' and largest 'last' in the tree. For the later, we
796 * rely on the root node, which by augmented interval tree
797 * property, holds the largest value in its last-in-subtree.
798 * This allows mitigating some of the tree walk overhead for
799 * for non-intersecting ranges, maintained and consulted in O(1).
801 node = rb_to_itree(root->rb_root.rb_node);
802 if (node->subtree_last < start) {
806 leftmost = rb_to_itree(root->rb_leftmost);
807 if (leftmost->start > last) {
811 return interval_tree_subtree_search(node, start, last);
814 IntervalTreeNode *interval_tree_iter_next(IntervalTreeNode *node,
815 uint64_t start, uint64_t last)
817 RBNode *rb = node->rb.rb_right, *prev;
822 * Cond1: node->start <= last
823 * rb == node->rb.rb_right
825 * First, search right subtree if suitable
828 IntervalTreeNode *right = rb_to_itree(rb);
830 if (start <= right->subtree_last) {
831 return interval_tree_subtree_search(right, start, last);
835 /* Move up the tree until we come from a node's left child */
837 rb = rb_parent(&node->rb);
842 node = rb_to_itree(rb);
843 rb = node->rb.rb_right;
844 } while (prev == rb);
846 /* Check if the node intersects [start;last] */
847 if (last < node->start) { /* !Cond1 */
850 if (start <= node->last) { /* Cond2 */
856 /* Occasionally useful for calling from within the debugger. */
858 static void debug_interval_tree_int(IntervalTreeNode *node,
859 const char *dir, int level)
861 printf("%4d %*s %s [%" PRIu64 ",%" PRIu64 "] subtree_last:%" PRIu64 "\n",
862 level, level + 1, dir, rb_is_red(&node->rb) ? "r" : "b",
863 node->start, node->last, node->subtree_last);
865 if (node->rb.rb_left) {
866 debug_interval_tree_int(rb_to_itree(node->rb.rb_left), "<", level + 1);
868 if (node->rb.rb_right) {
869 debug_interval_tree_int(rb_to_itree(node->rb.rb_right), ">", level + 1);
873 void debug_interval_tree(IntervalTreeNode *node);
874 void debug_interval_tree(IntervalTreeNode *node)
877 debug_interval_tree_int(node, "*", 0);