Loading...
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Sparse bit array
4 *
5 * Copyright (C) 2018, Google LLC.
6 * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
7 *
8 * This library provides functions to support a memory efficient bit array,
9 * with an index size of 2^64. A sparsebit array is allocated through
10 * the use sparsebit_alloc() and free'd via sparsebit_free(),
11 * such as in the following:
12 *
13 * struct sparsebit *s;
14 * s = sparsebit_alloc();
15 * sparsebit_free(&s);
16 *
17 * The struct sparsebit type resolves down to a struct sparsebit.
18 * Note that, sparsebit_free() takes a pointer to the sparsebit
19 * structure. This is so that sparsebit_free() is able to poison
20 * the pointer (e.g. set it to NULL) to the struct sparsebit before
21 * returning to the caller.
22 *
23 * Between the return of sparsebit_alloc() and the call of
24 * sparsebit_free(), there are multiple query and modifying operations
25 * that can be performed on the allocated sparsebit array. All of
26 * these operations take as a parameter the value returned from
27 * sparsebit_alloc() and most also take a bit index. Frequently
28 * used routines include:
29 *
30 * ---- Query Operations
31 * sparsebit_is_set(s, idx)
32 * sparsebit_is_clear(s, idx)
33 * sparsebit_any_set(s)
34 * sparsebit_first_set(s)
35 * sparsebit_next_set(s, prev_idx)
36 *
37 * ---- Modifying Operations
38 * sparsebit_set(s, idx)
39 * sparsebit_clear(s, idx)
40 * sparsebit_set_num(s, idx, num);
41 * sparsebit_clear_num(s, idx, num);
42 *
43 * A common operation, is to itterate over all the bits set in a test
44 * sparsebit array. This can be done via code with the following structure:
45 *
46 * sparsebit_idx_t idx;
47 * if (sparsebit_any_set(s)) {
48 * idx = sparsebit_first_set(s);
49 * do {
50 * ...
51 * idx = sparsebit_next_set(s, idx);
52 * } while (idx != 0);
53 * }
54 *
55 * The index of the first bit set needs to be obtained via
56 * sparsebit_first_set(), because sparsebit_next_set(), needs
57 * the index of the previously set. The sparsebit_idx_t type is
58 * unsigned, so there is no previous index before 0 that is available.
59 * Also, the call to sparsebit_first_set() is not made unless there
60 * is at least 1 bit in the array set. This is because sparsebit_first_set()
61 * aborts if sparsebit_first_set() is called with no bits set.
62 * It is the callers responsibility to assure that the
63 * sparsebit array has at least a single bit set before calling
64 * sparsebit_first_set().
65 *
66 * ==== Implementation Overview ====
67 * For the most part the internal implementation of sparsebit is
68 * opaque to the caller. One important implementation detail that the
69 * caller may need to be aware of is the spatial complexity of the
70 * implementation. This implementation of a sparsebit array is not
71 * only sparse, in that it uses memory proportional to the number of bits
72 * set. It is also efficient in memory usage when most of the bits are
73 * set.
74 *
75 * At a high-level the state of the bit settings are maintained through
76 * the use of a binary-search tree, where each node contains at least
77 * the following members:
78 *
79 * typedef uint64_t sparsebit_idx_t;
80 * typedef uint64_t sparsebit_num_t;
81 *
82 * sparsebit_idx_t idx;
83 * uint32_t mask;
84 * sparsebit_num_t num_after;
85 *
86 * The idx member contains the bit index of the first bit described by this
87 * node, while the mask member stores the setting of the first 32-bits.
88 * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
89 * mask member at 1 << n.
90 *
91 * Nodes are sorted by idx and the bits described by two nodes will never
92 * overlap. The idx member is always aligned to the mask size, i.e. a
93 * multiple of 32.
94 *
95 * Beyond a typical implementation, the nodes in this implementation also
96 * contains a member named num_after. The num_after member holds the
97 * number of bits immediately after the mask bits that are contiguously set.
98 * The use of the num_after member allows this implementation to efficiently
99 * represent cases where most bits are set. For example, the case of all
100 * but the last two bits set, is represented by the following two nodes:
101 *
102 * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
103 * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
104 *
105 * ==== Invariants ====
106 * This implementation usses the following invariants:
107 *
108 * + Node are only used to represent bits that are set.
109 * Nodes with a mask of 0 and num_after of 0 are not allowed.
110 *
111 * + Sum of bits set in all the nodes is equal to the value of
112 * the struct sparsebit_pvt num_set member.
113 *
114 * + The setting of at least one bit is always described in a nodes
115 * mask (mask >= 1).
116 *
117 * + A node with all mask bits set only occurs when the last bit
118 * described by the previous node is not equal to this nodes
119 * starting index - 1. All such occurences of this condition are
120 * avoided by moving the setting of the nodes mask bits into
121 * the previous nodes num_after setting.
122 *
123 * + Node starting index is evenly divisible by the number of bits
124 * within a nodes mask member.
125 *
126 * + Nodes never represent a range of bits that wrap around the
127 * highest supported index.
128 *
129 * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
130 *
131 * As a consequence of the above, the num_after member of a node
132 * will always be <=:
133 *
134 * maximum_index - nodes_starting_index - number_of_mask_bits
135 *
136 * + Nodes within the binary search tree are sorted based on each
137 * nodes starting index.
138 *
139 * + The range of bits described by any two nodes do not overlap. The
140 * range of bits described by a single node is:
141 *
142 * start: node->idx
143 * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
144 *
145 * Note, at times these invariants are temporarily violated for a
146 * specific portion of the code. For example, when setting a mask
147 * bit, there is a small delay between when the mask bit is set and the
148 * value in the struct sparsebit_pvt num_set member is updated. Other
149 * temporary violations occur when node_split() is called with a specified
150 * index and assures that a node where its mask represents the bit
151 * at the specified index exists. At times to do this node_split()
152 * must split an existing node into two nodes or create a node that
153 * has no bits set. Such temporary violations must be corrected before
154 * returning to the caller. These corrections are typically performed
155 * by the local function node_reduce().
156 */
157
158#include "test_util.h"
159#include "sparsebit.h"
160#include <limits.h>
161#include <assert.h>
162
163#define DUMP_LINE_MAX 100 /* Does not include indent amount */
164
165typedef uint32_t mask_t;
166#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
167
168struct node {
169 struct node *parent;
170 struct node *left;
171 struct node *right;
172 sparsebit_idx_t idx; /* index of least-significant bit in mask */
173 sparsebit_num_t num_after; /* num contiguously set after mask */
174 mask_t mask;
175};
176
177struct sparsebit {
178 /*
179 * Points to root node of the binary search
180 * tree. Equal to NULL when no bits are set in
181 * the entire sparsebit array.
182 */
183 struct node *root;
184
185 /*
186 * A redundant count of the total number of bits set. Used for
187 * diagnostic purposes and to change the time complexity of
188 * sparsebit_num_set() from O(n) to O(1).
189 * Note: Due to overflow, a value of 0 means none or all set.
190 */
191 sparsebit_num_t num_set;
192};
193
194/* Returns the number of set bits described by the settings
195 * of the node pointed to by nodep.
196 */
197static sparsebit_num_t node_num_set(struct node *nodep)
198{
199 return nodep->num_after + __builtin_popcount(nodep->mask);
200}
201
202/* Returns a pointer to the node that describes the
203 * lowest bit index.
204 */
205static struct node *node_first(struct sparsebit *s)
206{
207 struct node *nodep;
208
209 for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
210 ;
211
212 return nodep;
213}
214
215/* Returns a pointer to the node that describes the
216 * lowest bit index > the index of the node pointed to by np.
217 * Returns NULL if no node with a higher index exists.
218 */
219static struct node *node_next(struct sparsebit *s, struct node *np)
220{
221 struct node *nodep = np;
222
223 /*
224 * If current node has a right child, next node is the left-most
225 * of the right child.
226 */
227 if (nodep->right) {
228 for (nodep = nodep->right; nodep->left; nodep = nodep->left)
229 ;
230 return nodep;
231 }
232
233 /*
234 * No right child. Go up until node is left child of a parent.
235 * That parent is then the next node.
236 */
237 while (nodep->parent && nodep == nodep->parent->right)
238 nodep = nodep->parent;
239
240 return nodep->parent;
241}
242
243/* Searches for and returns a pointer to the node that describes the
244 * highest index < the index of the node pointed to by np.
245 * Returns NULL if no node with a lower index exists.
246 */
247static struct node *node_prev(struct sparsebit *s, struct node *np)
248{
249 struct node *nodep = np;
250
251 /*
252 * If current node has a left child, next node is the right-most
253 * of the left child.
254 */
255 if (nodep->left) {
256 for (nodep = nodep->left; nodep->right; nodep = nodep->right)
257 ;
258 return (struct node *) nodep;
259 }
260
261 /*
262 * No left child. Go up until node is right child of a parent.
263 * That parent is then the next node.
264 */
265 while (nodep->parent && nodep == nodep->parent->left)
266 nodep = nodep->parent;
267
268 return (struct node *) nodep->parent;
269}
270
271
272/* Allocates space to hold a copy of the node sub-tree pointed to by
273 * subtree and duplicates the bit settings to the newly allocated nodes.
274 * Returns the newly allocated copy of subtree.
275 */
276static struct node *node_copy_subtree(struct node *subtree)
277{
278 struct node *root;
279
280 /* Duplicate the node at the root of the subtree */
281 root = calloc(1, sizeof(*root));
282 if (!root) {
283 perror("calloc");
284 abort();
285 }
286
287 root->idx = subtree->idx;
288 root->mask = subtree->mask;
289 root->num_after = subtree->num_after;
290
291 /* As needed, recursively duplicate the left and right subtrees */
292 if (subtree->left) {
293 root->left = node_copy_subtree(subtree->left);
294 root->left->parent = root;
295 }
296
297 if (subtree->right) {
298 root->right = node_copy_subtree(subtree->right);
299 root->right->parent = root;
300 }
301
302 return root;
303}
304
305/* Searches for and returns a pointer to the node that describes the setting
306 * of the bit given by idx. A node describes the setting of a bit if its
307 * index is within the bits described by the mask bits or the number of
308 * contiguous bits set after the mask. Returns NULL if there is no such node.
309 */
310static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
311{
312 struct node *nodep;
313
314 /* Find the node that describes the setting of the bit at idx */
315 for (nodep = s->root; nodep;
316 nodep = nodep->idx > idx ? nodep->left : nodep->right) {
317 if (idx >= nodep->idx &&
318 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
319 break;
320 }
321
322 return nodep;
323}
324
325/* Entry Requirements:
326 * + A node that describes the setting of idx is not already present.
327 *
328 * Adds a new node to describe the setting of the bit at the index given
329 * by idx. Returns a pointer to the newly added node.
330 *
331 * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
332 */
333static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
334{
335 struct node *nodep, *parentp, *prev;
336
337 /* Allocate and initialize the new node. */
338 nodep = calloc(1, sizeof(*nodep));
339 if (!nodep) {
340 perror("calloc");
341 abort();
342 }
343
344 nodep->idx = idx & -MASK_BITS;
345
346 /* If no nodes, set it up as the root node. */
347 if (!s->root) {
348 s->root = nodep;
349 return nodep;
350 }
351
352 /*
353 * Find the parent where the new node should be attached
354 * and add the node there.
355 */
356 parentp = s->root;
357 while (true) {
358 if (idx < parentp->idx) {
359 if (!parentp->left) {
360 parentp->left = nodep;
361 nodep->parent = parentp;
362 break;
363 }
364 parentp = parentp->left;
365 } else {
366 assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
367 if (!parentp->right) {
368 parentp->right = nodep;
369 nodep->parent = parentp;
370 break;
371 }
372 parentp = parentp->right;
373 }
374 }
375
376 /*
377 * Does num_after bits of previous node overlap with the mask
378 * of the new node? If so set the bits in the new nodes mask
379 * and reduce the previous nodes num_after.
380 */
381 prev = node_prev(s, nodep);
382 while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
383 unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
384 - nodep->idx;
385 assert(prev->num_after > 0);
386 assert(n1 < MASK_BITS);
387 assert(!(nodep->mask & (1 << n1)));
388 nodep->mask |= (1 << n1);
389 prev->num_after--;
390 }
391
392 return nodep;
393}
394
395/* Returns whether all the bits in the sparsebit array are set. */
396bool sparsebit_all_set(struct sparsebit *s)
397{
398 /*
399 * If any nodes there must be at least one bit set. Only case
400 * where a bit is set and total num set is 0, is when all bits
401 * are set.
402 */
403 return s->root && s->num_set == 0;
404}
405
406/* Clears all bits described by the node pointed to by nodep, then
407 * removes the node.
408 */
409static void node_rm(struct sparsebit *s, struct node *nodep)
410{
411 struct node *tmp;
412 sparsebit_num_t num_set;
413
414 num_set = node_num_set(nodep);
415 assert(s->num_set >= num_set || sparsebit_all_set(s));
416 s->num_set -= node_num_set(nodep);
417
418 /* Have both left and right child */
419 if (nodep->left && nodep->right) {
420 /*
421 * Move left children to the leftmost leaf node
422 * of the right child.
423 */
424 for (tmp = nodep->right; tmp->left; tmp = tmp->left)
425 ;
426 tmp->left = nodep->left;
427 nodep->left = NULL;
428 tmp->left->parent = tmp;
429 }
430
431 /* Left only child */
432 if (nodep->left) {
433 if (!nodep->parent) {
434 s->root = nodep->left;
435 nodep->left->parent = NULL;
436 } else {
437 nodep->left->parent = nodep->parent;
438 if (nodep == nodep->parent->left)
439 nodep->parent->left = nodep->left;
440 else {
441 assert(nodep == nodep->parent->right);
442 nodep->parent->right = nodep->left;
443 }
444 }
445
446 nodep->parent = nodep->left = nodep->right = NULL;
447 free(nodep);
448
449 return;
450 }
451
452
453 /* Right only child */
454 if (nodep->right) {
455 if (!nodep->parent) {
456 s->root = nodep->right;
457 nodep->right->parent = NULL;
458 } else {
459 nodep->right->parent = nodep->parent;
460 if (nodep == nodep->parent->left)
461 nodep->parent->left = nodep->right;
462 else {
463 assert(nodep == nodep->parent->right);
464 nodep->parent->right = nodep->right;
465 }
466 }
467
468 nodep->parent = nodep->left = nodep->right = NULL;
469 free(nodep);
470
471 return;
472 }
473
474 /* Leaf Node */
475 if (!nodep->parent) {
476 s->root = NULL;
477 } else {
478 if (nodep->parent->left == nodep)
479 nodep->parent->left = NULL;
480 else {
481 assert(nodep == nodep->parent->right);
482 nodep->parent->right = NULL;
483 }
484 }
485
486 nodep->parent = nodep->left = nodep->right = NULL;
487 free(nodep);
488
489 return;
490}
491
492/* Splits the node containing the bit at idx so that there is a node
493 * that starts at the specified index. If no such node exists, a new
494 * node at the specified index is created. Returns the new node.
495 *
496 * idx must start of a mask boundary.
497 */
498static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
499{
500 struct node *nodep1, *nodep2;
501 sparsebit_idx_t offset;
502 sparsebit_num_t orig_num_after;
503
504 assert(!(idx % MASK_BITS));
505
506 /*
507 * Is there a node that describes the setting of idx?
508 * If not, add it.
509 */
510 nodep1 = node_find(s, idx);
511 if (!nodep1)
512 return node_add(s, idx);
513
514 /*
515 * All done if the starting index of the node is where the
516 * split should occur.
517 */
518 if (nodep1->idx == idx)
519 return nodep1;
520
521 /*
522 * Split point not at start of mask, so it must be part of
523 * bits described by num_after.
524 */
525
526 /*
527 * Calculate offset within num_after for where the split is
528 * to occur.
529 */
530 offset = idx - (nodep1->idx + MASK_BITS);
531 orig_num_after = nodep1->num_after;
532
533 /*
534 * Add a new node to describe the bits starting at
535 * the split point.
536 */
537 nodep1->num_after = offset;
538 nodep2 = node_add(s, idx);
539
540 /* Move bits after the split point into the new node */
541 nodep2->num_after = orig_num_after - offset;
542 if (nodep2->num_after >= MASK_BITS) {
543 nodep2->mask = ~(mask_t) 0;
544 nodep2->num_after -= MASK_BITS;
545 } else {
546 nodep2->mask = (1 << nodep2->num_after) - 1;
547 nodep2->num_after = 0;
548 }
549
550 return nodep2;
551}
552
553/* Iteratively reduces the node pointed to by nodep and its adjacent
554 * nodes into a more compact form. For example, a node with a mask with
555 * all bits set adjacent to a previous node, will get combined into a
556 * single node with an increased num_after setting.
557 *
558 * After each reduction, a further check is made to see if additional
559 * reductions are possible with the new previous and next nodes. Note,
560 * a search for a reduction is only done across the nodes nearest nodep
561 * and those that became part of a reduction. Reductions beyond nodep
562 * and the adjacent nodes that are reduced are not discovered. It is the
563 * responsibility of the caller to pass a nodep that is within one node
564 * of each possible reduction.
565 *
566 * This function does not fix the temporary violation of all invariants.
567 * For example it does not fix the case where the bit settings described
568 * by two or more nodes overlap. Such a violation introduces the potential
569 * complication of a bit setting for a specific index having different settings
570 * in different nodes. This would then introduce the further complication
571 * of which node has the correct setting of the bit and thus such conditions
572 * are not allowed.
573 *
574 * This function is designed to fix invariant violations that are introduced
575 * by node_split() and by changes to the nodes mask or num_after members.
576 * For example, when setting a bit within a nodes mask, the function that
577 * sets the bit doesn't have to worry about whether the setting of that
578 * bit caused the mask to have leading only or trailing only bits set.
579 * Instead, the function can call node_reduce(), with nodep equal to the
580 * node address that it set a mask bit in, and node_reduce() will notice
581 * the cases of leading or trailing only bits and that there is an
582 * adjacent node that the bit settings could be merged into.
583 *
584 * This implementation specifically detects and corrects violation of the
585 * following invariants:
586 *
587 * + Node are only used to represent bits that are set.
588 * Nodes with a mask of 0 and num_after of 0 are not allowed.
589 *
590 * + The setting of at least one bit is always described in a nodes
591 * mask (mask >= 1).
592 *
593 * + A node with all mask bits set only occurs when the last bit
594 * described by the previous node is not equal to this nodes
595 * starting index - 1. All such occurences of this condition are
596 * avoided by moving the setting of the nodes mask bits into
597 * the previous nodes num_after setting.
598 */
599static void node_reduce(struct sparsebit *s, struct node *nodep)
600{
601 bool reduction_performed;
602
603 do {
604 reduction_performed = false;
605 struct node *prev, *next, *tmp;
606
607 /* 1) Potential reductions within the current node. */
608
609 /* Nodes with all bits cleared may be removed. */
610 if (nodep->mask == 0 && nodep->num_after == 0) {
611 /*
612 * About to remove the node pointed to by
613 * nodep, which normally would cause a problem
614 * for the next pass through the reduction loop,
615 * because the node at the starting point no longer
616 * exists. This potential problem is handled
617 * by first remembering the location of the next
618 * or previous nodes. Doesn't matter which, because
619 * once the node at nodep is removed, there will be
620 * no other nodes between prev and next.
621 *
622 * Note, the checks performed on nodep against both
623 * both prev and next both check for an adjacent
624 * node that can be reduced into a single node. As
625 * such, after removing the node at nodep, doesn't
626 * matter whether the nodep for the next pass
627 * through the loop is equal to the previous pass
628 * prev or next node. Either way, on the next pass
629 * the one not selected will become either the
630 * prev or next node.
631 */
632 tmp = node_next(s, nodep);
633 if (!tmp)
634 tmp = node_prev(s, nodep);
635
636 node_rm(s, nodep);
637 nodep = NULL;
638
639 nodep = tmp;
640 reduction_performed = true;
641 continue;
642 }
643
644 /*
645 * When the mask is 0, can reduce the amount of num_after
646 * bits by moving the initial num_after bits into the mask.
647 */
648 if (nodep->mask == 0) {
649 assert(nodep->num_after != 0);
650 assert(nodep->idx + MASK_BITS > nodep->idx);
651
652 nodep->idx += MASK_BITS;
653
654 if (nodep->num_after >= MASK_BITS) {
655 nodep->mask = ~0;
656 nodep->num_after -= MASK_BITS;
657 } else {
658 nodep->mask = (1u << nodep->num_after) - 1;
659 nodep->num_after = 0;
660 }
661
662 reduction_performed = true;
663 continue;
664 }
665
666 /*
667 * 2) Potential reductions between the current and
668 * previous nodes.
669 */
670 prev = node_prev(s, nodep);
671 if (prev) {
672 sparsebit_idx_t prev_highest_bit;
673
674 /* Nodes with no bits set can be removed. */
675 if (prev->mask == 0 && prev->num_after == 0) {
676 node_rm(s, prev);
677
678 reduction_performed = true;
679 continue;
680 }
681
682 /*
683 * All mask bits set and previous node has
684 * adjacent index.
685 */
686 if (nodep->mask + 1 == 0 &&
687 prev->idx + MASK_BITS == nodep->idx) {
688 prev->num_after += MASK_BITS + nodep->num_after;
689 nodep->mask = 0;
690 nodep->num_after = 0;
691
692 reduction_performed = true;
693 continue;
694 }
695
696 /*
697 * Is node adjacent to previous node and the node
698 * contains a single contiguous range of bits
699 * starting from the beginning of the mask?
700 */
701 prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
702 if (prev_highest_bit + 1 == nodep->idx &&
703 (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
704 /*
705 * How many contiguous bits are there?
706 * Is equal to the total number of set
707 * bits, due to an earlier check that
708 * there is a single contiguous range of
709 * set bits.
710 */
711 unsigned int num_contiguous
712 = __builtin_popcount(nodep->mask);
713 assert((num_contiguous > 0) &&
714 ((1ULL << num_contiguous) - 1) == nodep->mask);
715
716 prev->num_after += num_contiguous;
717 nodep->mask = 0;
718
719 /*
720 * For predictable performance, handle special
721 * case where all mask bits are set and there
722 * is a non-zero num_after setting. This code
723 * is functionally correct without the following
724 * conditionalized statements, but without them
725 * the value of num_after is only reduced by
726 * the number of mask bits per pass. There are
727 * cases where num_after can be close to 2^64.
728 * Without this code it could take nearly
729 * (2^64) / 32 passes to perform the full
730 * reduction.
731 */
732 if (num_contiguous == MASK_BITS) {
733 prev->num_after += nodep->num_after;
734 nodep->num_after = 0;
735 }
736
737 reduction_performed = true;
738 continue;
739 }
740 }
741
742 /*
743 * 3) Potential reductions between the current and
744 * next nodes.
745 */
746 next = node_next(s, nodep);
747 if (next) {
748 /* Nodes with no bits set can be removed. */
749 if (next->mask == 0 && next->num_after == 0) {
750 node_rm(s, next);
751 reduction_performed = true;
752 continue;
753 }
754
755 /*
756 * Is next node index adjacent to current node
757 * and has a mask with all bits set?
758 */
759 if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
760 next->mask == ~(mask_t) 0) {
761 nodep->num_after += MASK_BITS;
762 next->mask = 0;
763 nodep->num_after += next->num_after;
764 next->num_after = 0;
765
766 node_rm(s, next);
767 next = NULL;
768
769 reduction_performed = true;
770 continue;
771 }
772 }
773 } while (nodep && reduction_performed);
774}
775
776/* Returns whether the bit at the index given by idx, within the
777 * sparsebit array is set or not.
778 */
779bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
780{
781 struct node *nodep;
782
783 /* Find the node that describes the setting of the bit at idx */
784 for (nodep = s->root; nodep;
785 nodep = nodep->idx > idx ? nodep->left : nodep->right)
786 if (idx >= nodep->idx &&
787 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
788 goto have_node;
789
790 return false;
791
792have_node:
793 /* Bit is set if it is any of the bits described by num_after */
794 if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
795 return true;
796
797 /* Is the corresponding mask bit set */
798 assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
799 return !!(nodep->mask & (1 << (idx - nodep->idx)));
800}
801
802/* Within the sparsebit array pointed to by s, sets the bit
803 * at the index given by idx.
804 */
805static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
806{
807 struct node *nodep;
808
809 /* Skip bits that are already set */
810 if (sparsebit_is_set(s, idx))
811 return;
812
813 /*
814 * Get a node where the bit at idx is described by the mask.
815 * The node_split will also create a node, if there isn't
816 * already a node that describes the setting of bit.
817 */
818 nodep = node_split(s, idx & -MASK_BITS);
819
820 /* Set the bit within the nodes mask */
821 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
822 assert(!(nodep->mask & (1 << (idx - nodep->idx))));
823 nodep->mask |= 1 << (idx - nodep->idx);
824 s->num_set++;
825
826 node_reduce(s, nodep);
827}
828
829/* Within the sparsebit array pointed to by s, clears the bit
830 * at the index given by idx.
831 */
832static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
833{
834 struct node *nodep;
835
836 /* Skip bits that are already cleared */
837 if (!sparsebit_is_set(s, idx))
838 return;
839
840 /* Is there a node that describes the setting of this bit? */
841 nodep = node_find(s, idx);
842 if (!nodep)
843 return;
844
845 /*
846 * If a num_after bit, split the node, so that the bit is
847 * part of a node mask.
848 */
849 if (idx >= nodep->idx + MASK_BITS)
850 nodep = node_split(s, idx & -MASK_BITS);
851
852 /*
853 * After node_split above, bit at idx should be within the mask.
854 * Clear that bit.
855 */
856 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
857 assert(nodep->mask & (1 << (idx - nodep->idx)));
858 nodep->mask &= ~(1 << (idx - nodep->idx));
859 assert(s->num_set > 0 || sparsebit_all_set(s));
860 s->num_set--;
861
862 node_reduce(s, nodep);
863}
864
865/* Recursively dumps to the FILE stream given by stream the contents
866 * of the sub-tree of nodes pointed to by nodep. Each line of output
867 * is prefixed by the number of spaces given by indent. On each
868 * recursion, the indent amount is increased by 2. This causes nodes
869 * at each level deeper into the binary search tree to be displayed
870 * with a greater indent.
871 */
872static void dump_nodes(FILE *stream, struct node *nodep,
873 unsigned int indent)
874{
875 char *node_type;
876
877 /* Dump contents of node */
878 if (!nodep->parent)
879 node_type = "root";
880 else if (nodep == nodep->parent->left)
881 node_type = "left";
882 else {
883 assert(nodep == nodep->parent->right);
884 node_type = "right";
885 }
886 fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
887 fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
888 nodep->parent, nodep->left, nodep->right);
889 fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
890 indent, "", nodep->idx, nodep->mask, nodep->num_after);
891
892 /* If present, dump contents of left child nodes */
893 if (nodep->left)
894 dump_nodes(stream, nodep->left, indent + 2);
895
896 /* If present, dump contents of right child nodes */
897 if (nodep->right)
898 dump_nodes(stream, nodep->right, indent + 2);
899}
900
901static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
902{
903 mask_t leading = (mask_t)1 << start;
904 int n1 = __builtin_ctz(nodep->mask & -leading);
905
906 return nodep->idx + n1;
907}
908
909static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
910{
911 mask_t leading = (mask_t)1 << start;
912 int n1 = __builtin_ctz(~nodep->mask & -leading);
913
914 return nodep->idx + n1;
915}
916
917/* Dumps to the FILE stream specified by stream, the implementation dependent
918 * internal state of s. Each line of output is prefixed with the number
919 * of spaces given by indent. The output is completely implementation
920 * dependent and subject to change. Output from this function should only
921 * be used for diagnostic purposes. For example, this function can be
922 * used by test cases after they detect an unexpected condition, as a means
923 * to capture diagnostic information.
924 */
925static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
926 unsigned int indent)
927{
928 /* Dump the contents of s */
929 fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
930 fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
931
932 if (s->root)
933 dump_nodes(stream, s->root, indent);
934}
935
936/* Allocates and returns a new sparsebit array. The initial state
937 * of the newly allocated sparsebit array has all bits cleared.
938 */
939struct sparsebit *sparsebit_alloc(void)
940{
941 struct sparsebit *s;
942
943 /* Allocate top level structure. */
944 s = calloc(1, sizeof(*s));
945 if (!s) {
946 perror("calloc");
947 abort();
948 }
949
950 return s;
951}
952
953/* Frees the implementation dependent data for the sparsebit array
954 * pointed to by s and poisons the pointer to that data.
955 */
956void sparsebit_free(struct sparsebit **sbitp)
957{
958 struct sparsebit *s = *sbitp;
959
960 if (!s)
961 return;
962
963 sparsebit_clear_all(s);
964 free(s);
965 *sbitp = NULL;
966}
967
968/* Makes a copy of the sparsebit array given by s, to the sparsebit
969 * array given by d. Note, d must have already been allocated via
970 * sparsebit_alloc(). It can though already have bits set, which
971 * if different from src will be cleared.
972 */
973void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
974{
975 /* First clear any bits already set in the destination */
976 sparsebit_clear_all(d);
977
978 if (s->root) {
979 d->root = node_copy_subtree(s->root);
980 d->num_set = s->num_set;
981 }
982}
983
984/* Returns whether num consecutive bits starting at idx are all set. */
985bool sparsebit_is_set_num(struct sparsebit *s,
986 sparsebit_idx_t idx, sparsebit_num_t num)
987{
988 sparsebit_idx_t next_cleared;
989
990 assert(num > 0);
991 assert(idx + num - 1 >= idx);
992
993 /* With num > 0, the first bit must be set. */
994 if (!sparsebit_is_set(s, idx))
995 return false;
996
997 /* Find the next cleared bit */
998 next_cleared = sparsebit_next_clear(s, idx);
999
1000 /*
1001 * If no cleared bits beyond idx, then there are at least num
1002 * set bits. idx + num doesn't wrap. Otherwise check if
1003 * there are enough set bits between idx and the next cleared bit.
1004 */
1005 return next_cleared == 0 || next_cleared - idx >= num;
1006}
1007
1008/* Returns whether the bit at the index given by idx. */
1009bool sparsebit_is_clear(struct sparsebit *s,
1010 sparsebit_idx_t idx)
1011{
1012 return !sparsebit_is_set(s, idx);
1013}
1014
1015/* Returns whether num consecutive bits starting at idx are all cleared. */
1016bool sparsebit_is_clear_num(struct sparsebit *s,
1017 sparsebit_idx_t idx, sparsebit_num_t num)
1018{
1019 sparsebit_idx_t next_set;
1020
1021 assert(num > 0);
1022 assert(idx + num - 1 >= idx);
1023
1024 /* With num > 0, the first bit must be cleared. */
1025 if (!sparsebit_is_clear(s, idx))
1026 return false;
1027
1028 /* Find the next set bit */
1029 next_set = sparsebit_next_set(s, idx);
1030
1031 /*
1032 * If no set bits beyond idx, then there are at least num
1033 * cleared bits. idx + num doesn't wrap. Otherwise check if
1034 * there are enough cleared bits between idx and the next set bit.
1035 */
1036 return next_set == 0 || next_set - idx >= num;
1037}
1038
1039/* Returns the total number of bits set. Note: 0 is also returned for
1040 * the case of all bits set. This is because with all bits set, there
1041 * is 1 additional bit set beyond what can be represented in the return
1042 * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
1043 * to determine if the sparsebit array has any bits set.
1044 */
1045sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
1046{
1047 return s->num_set;
1048}
1049
1050/* Returns whether any bit is set in the sparsebit array. */
1051bool sparsebit_any_set(struct sparsebit *s)
1052{
1053 /*
1054 * Nodes only describe set bits. If any nodes then there
1055 * is at least 1 bit set.
1056 */
1057 if (!s->root)
1058 return false;
1059
1060 /*
1061 * Every node should have a non-zero mask. For now will
1062 * just assure that the root node has a non-zero mask,
1063 * which is a quick check that at least 1 bit is set.
1064 */
1065 assert(s->root->mask != 0);
1066 assert(s->num_set > 0 ||
1067 (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
1068 s->root->mask == ~(mask_t) 0));
1069
1070 return true;
1071}
1072
1073/* Returns whether all the bits in the sparsebit array are cleared. */
1074bool sparsebit_all_clear(struct sparsebit *s)
1075{
1076 return !sparsebit_any_set(s);
1077}
1078
1079/* Returns whether all the bits in the sparsebit array are set. */
1080bool sparsebit_any_clear(struct sparsebit *s)
1081{
1082 return !sparsebit_all_set(s);
1083}
1084
1085/* Returns the index of the first set bit. Abort if no bits are set.
1086 */
1087sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
1088{
1089 struct node *nodep;
1090
1091 /* Validate at least 1 bit is set */
1092 assert(sparsebit_any_set(s));
1093
1094 nodep = node_first(s);
1095 return node_first_set(nodep, 0);
1096}
1097
1098/* Returns the index of the first cleared bit. Abort if
1099 * no bits are cleared.
1100 */
1101sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
1102{
1103 struct node *nodep1, *nodep2;
1104
1105 /* Validate at least 1 bit is cleared. */
1106 assert(sparsebit_any_clear(s));
1107
1108 /* If no nodes or first node index > 0 then lowest cleared is 0 */
1109 nodep1 = node_first(s);
1110 if (!nodep1 || nodep1->idx > 0)
1111 return 0;
1112
1113 /* Does the mask in the first node contain any cleared bits. */
1114 if (nodep1->mask != ~(mask_t) 0)
1115 return node_first_clear(nodep1, 0);
1116
1117 /*
1118 * All mask bits set in first node. If there isn't a second node
1119 * then the first cleared bit is the first bit after the bits
1120 * described by the first node.
1121 */
1122 nodep2 = node_next(s, nodep1);
1123 if (!nodep2) {
1124 /*
1125 * No second node. First cleared bit is first bit beyond
1126 * bits described by first node.
1127 */
1128 assert(nodep1->mask == ~(mask_t) 0);
1129 assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
1130 return nodep1->idx + MASK_BITS + nodep1->num_after;
1131 }
1132
1133 /*
1134 * There is a second node.
1135 * If it is not adjacent to the first node, then there is a gap
1136 * of cleared bits between the nodes, and the first cleared bit
1137 * is the first bit within the gap.
1138 */
1139 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
1140 return nodep1->idx + MASK_BITS + nodep1->num_after;
1141
1142 /*
1143 * Second node is adjacent to the first node.
1144 * Because it is adjacent, its mask should be non-zero. If all
1145 * its mask bits are set, then with it being adjacent, it should
1146 * have had the mask bits moved into the num_after setting of the
1147 * previous node.
1148 */
1149 return node_first_clear(nodep2, 0);
1150}
1151
1152/* Returns index of next bit set within s after the index given by prev.
1153 * Returns 0 if there are no bits after prev that are set.
1154 */
1155sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
1156 sparsebit_idx_t prev)
1157{
1158 sparsebit_idx_t lowest_possible = prev + 1;
1159 sparsebit_idx_t start;
1160 struct node *nodep;
1161
1162 /* A bit after the highest index can't be set. */
1163 if (lowest_possible == 0)
1164 return 0;
1165
1166 /*
1167 * Find the leftmost 'candidate' overlapping or to the right
1168 * of lowest_possible.
1169 */
1170 struct node *candidate = NULL;
1171
1172 /* True iff lowest_possible is within candidate */
1173 bool contains = false;
1174
1175 /*
1176 * Find node that describes setting of bit at lowest_possible.
1177 * If such a node doesn't exist, find the node with the lowest
1178 * starting index that is > lowest_possible.
1179 */
1180 for (nodep = s->root; nodep;) {
1181 if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
1182 >= lowest_possible) {
1183 candidate = nodep;
1184 if (candidate->idx <= lowest_possible) {
1185 contains = true;
1186 break;
1187 }
1188 nodep = nodep->left;
1189 } else {
1190 nodep = nodep->right;
1191 }
1192 }
1193 if (!candidate)
1194 return 0;
1195
1196 assert(candidate->mask != 0);
1197
1198 /* Does the candidate node describe the setting of lowest_possible? */
1199 if (!contains) {
1200 /*
1201 * Candidate doesn't describe setting of bit at lowest_possible.
1202 * Candidate points to the first node with a starting index
1203 * > lowest_possible.
1204 */
1205 assert(candidate->idx > lowest_possible);
1206
1207 return node_first_set(candidate, 0);
1208 }
1209
1210 /*
1211 * Candidate describes setting of bit at lowest_possible.
1212 * Note: although the node describes the setting of the bit
1213 * at lowest_possible, its possible that its setting and the
1214 * setting of all latter bits described by this node are 0.
1215 * For now, just handle the cases where this node describes
1216 * a bit at or after an index of lowest_possible that is set.
1217 */
1218 start = lowest_possible - candidate->idx;
1219
1220 if (start < MASK_BITS && candidate->mask >= (1 << start))
1221 return node_first_set(candidate, start);
1222
1223 if (candidate->num_after) {
1224 sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
1225
1226 return lowest_possible < first_num_after_idx
1227 ? first_num_after_idx : lowest_possible;
1228 }
1229
1230 /*
1231 * Although candidate node describes setting of bit at
1232 * the index of lowest_possible, all bits at that index and
1233 * latter that are described by candidate are cleared. With
1234 * this, the next bit is the first bit in the next node, if
1235 * such a node exists. If a next node doesn't exist, then
1236 * there is no next set bit.
1237 */
1238 candidate = node_next(s, candidate);
1239 if (!candidate)
1240 return 0;
1241
1242 return node_first_set(candidate, 0);
1243}
1244
1245/* Returns index of next bit cleared within s after the index given by prev.
1246 * Returns 0 if there are no bits after prev that are cleared.
1247 */
1248sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
1249 sparsebit_idx_t prev)
1250{
1251 sparsebit_idx_t lowest_possible = prev + 1;
1252 sparsebit_idx_t idx;
1253 struct node *nodep1, *nodep2;
1254
1255 /* A bit after the highest index can't be set. */
1256 if (lowest_possible == 0)
1257 return 0;
1258
1259 /*
1260 * Does a node describing the setting of lowest_possible exist?
1261 * If not, the bit at lowest_possible is cleared.
1262 */
1263 nodep1 = node_find(s, lowest_possible);
1264 if (!nodep1)
1265 return lowest_possible;
1266
1267 /* Does a mask bit in node 1 describe the next cleared bit. */
1268 for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
1269 if (!(nodep1->mask & (1 << idx)))
1270 return nodep1->idx + idx;
1271
1272 /*
1273 * Next cleared bit is not described by node 1. If there
1274 * isn't a next node, then next cleared bit is described
1275 * by bit after the bits described by the first node.
1276 */
1277 nodep2 = node_next(s, nodep1);
1278 if (!nodep2)
1279 return nodep1->idx + MASK_BITS + nodep1->num_after;
1280
1281 /*
1282 * There is a second node.
1283 * If it is not adjacent to the first node, then there is a gap
1284 * of cleared bits between the nodes, and the next cleared bit
1285 * is the first bit within the gap.
1286 */
1287 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
1288 return nodep1->idx + MASK_BITS + nodep1->num_after;
1289
1290 /*
1291 * Second node is adjacent to the first node.
1292 * Because it is adjacent, its mask should be non-zero. If all
1293 * its mask bits are set, then with it being adjacent, it should
1294 * have had the mask bits moved into the num_after setting of the
1295 * previous node.
1296 */
1297 return node_first_clear(nodep2, 0);
1298}
1299
1300/* Starting with the index 1 greater than the index given by start, finds
1301 * and returns the index of the first sequence of num consecutively set
1302 * bits. Returns a value of 0 of no such sequence exists.
1303 */
1304sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
1305 sparsebit_idx_t start, sparsebit_num_t num)
1306{
1307 sparsebit_idx_t idx;
1308
1309 assert(num >= 1);
1310
1311 for (idx = sparsebit_next_set(s, start);
1312 idx != 0 && idx + num - 1 >= idx;
1313 idx = sparsebit_next_set(s, idx)) {
1314 assert(sparsebit_is_set(s, idx));
1315
1316 /*
1317 * Does the sequence of bits starting at idx consist of
1318 * num set bits?
1319 */
1320 if (sparsebit_is_set_num(s, idx, num))
1321 return idx;
1322
1323 /*
1324 * Sequence of set bits at idx isn't large enough.
1325 * Skip this entire sequence of set bits.
1326 */
1327 idx = sparsebit_next_clear(s, idx);
1328 if (idx == 0)
1329 return 0;
1330 }
1331
1332 return 0;
1333}
1334
1335/* Starting with the index 1 greater than the index given by start, finds
1336 * and returns the index of the first sequence of num consecutively cleared
1337 * bits. Returns a value of 0 of no such sequence exists.
1338 */
1339sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
1340 sparsebit_idx_t start, sparsebit_num_t num)
1341{
1342 sparsebit_idx_t idx;
1343
1344 assert(num >= 1);
1345
1346 for (idx = sparsebit_next_clear(s, start);
1347 idx != 0 && idx + num - 1 >= idx;
1348 idx = sparsebit_next_clear(s, idx)) {
1349 assert(sparsebit_is_clear(s, idx));
1350
1351 /*
1352 * Does the sequence of bits starting at idx consist of
1353 * num cleared bits?
1354 */
1355 if (sparsebit_is_clear_num(s, idx, num))
1356 return idx;
1357
1358 /*
1359 * Sequence of cleared bits at idx isn't large enough.
1360 * Skip this entire sequence of cleared bits.
1361 */
1362 idx = sparsebit_next_set(s, idx);
1363 if (idx == 0)
1364 return 0;
1365 }
1366
1367 return 0;
1368}
1369
1370/* Sets the bits * in the inclusive range idx through idx + num - 1. */
1371void sparsebit_set_num(struct sparsebit *s,
1372 sparsebit_idx_t start, sparsebit_num_t num)
1373{
1374 struct node *nodep, *next;
1375 unsigned int n1;
1376 sparsebit_idx_t idx;
1377 sparsebit_num_t n;
1378 sparsebit_idx_t middle_start, middle_end;
1379
1380 assert(num > 0);
1381 assert(start + num - 1 >= start);
1382
1383 /*
1384 * Leading - bits before first mask boundary.
1385 *
1386 * TODO(lhuemill): With some effort it may be possible to
1387 * replace the following loop with a sequential sequence
1388 * of statements. High level sequence would be:
1389 *
1390 * 1. Use node_split() to force node that describes setting
1391 * of idx to be within the mask portion of a node.
1392 * 2. Form mask of bits to be set.
1393 * 3. Determine number of mask bits already set in the node
1394 * and store in a local variable named num_already_set.
1395 * 4. Set the appropriate mask bits within the node.
1396 * 5. Increment struct sparsebit_pvt num_set member
1397 * by the number of bits that were actually set.
1398 * Exclude from the counts bits that were already set.
1399 * 6. Before returning to the caller, use node_reduce() to
1400 * handle the multiple corner cases that this method
1401 * introduces.
1402 */
1403 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
1404 bit_set(s, idx);
1405
1406 /* Middle - bits spanning one or more entire mask */
1407 middle_start = idx;
1408 middle_end = middle_start + (n & -MASK_BITS) - 1;
1409 if (n >= MASK_BITS) {
1410 nodep = node_split(s, middle_start);
1411
1412 /*
1413 * As needed, split just after end of middle bits.
1414 * No split needed if end of middle bits is at highest
1415 * supported bit index.
1416 */
1417 if (middle_end + 1 > middle_end)
1418 (void) node_split(s, middle_end + 1);
1419
1420 /* Delete nodes that only describe bits within the middle. */
1421 for (next = node_next(s, nodep);
1422 next && (next->idx < middle_end);
1423 next = node_next(s, nodep)) {
1424 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
1425 node_rm(s, next);
1426 next = NULL;
1427 }
1428
1429 /* As needed set each of the mask bits */
1430 for (n1 = 0; n1 < MASK_BITS; n1++) {
1431 if (!(nodep->mask & (1 << n1))) {
1432 nodep->mask |= 1 << n1;
1433 s->num_set++;
1434 }
1435 }
1436
1437 s->num_set -= nodep->num_after;
1438 nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
1439 s->num_set += nodep->num_after;
1440
1441 node_reduce(s, nodep);
1442 }
1443 idx = middle_end + 1;
1444 n -= middle_end - middle_start + 1;
1445
1446 /* Trailing - bits at and beyond last mask boundary */
1447 assert(n < MASK_BITS);
1448 for (; n > 0; idx++, n--)
1449 bit_set(s, idx);
1450}
1451
1452/* Clears the bits * in the inclusive range idx through idx + num - 1. */
1453void sparsebit_clear_num(struct sparsebit *s,
1454 sparsebit_idx_t start, sparsebit_num_t num)
1455{
1456 struct node *nodep, *next;
1457 unsigned int n1;
1458 sparsebit_idx_t idx;
1459 sparsebit_num_t n;
1460 sparsebit_idx_t middle_start, middle_end;
1461
1462 assert(num > 0);
1463 assert(start + num - 1 >= start);
1464
1465 /* Leading - bits before first mask boundary */
1466 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
1467 bit_clear(s, idx);
1468
1469 /* Middle - bits spanning one or more entire mask */
1470 middle_start = idx;
1471 middle_end = middle_start + (n & -MASK_BITS) - 1;
1472 if (n >= MASK_BITS) {
1473 nodep = node_split(s, middle_start);
1474
1475 /*
1476 * As needed, split just after end of middle bits.
1477 * No split needed if end of middle bits is at highest
1478 * supported bit index.
1479 */
1480 if (middle_end + 1 > middle_end)
1481 (void) node_split(s, middle_end + 1);
1482
1483 /* Delete nodes that only describe bits within the middle. */
1484 for (next = node_next(s, nodep);
1485 next && (next->idx < middle_end);
1486 next = node_next(s, nodep)) {
1487 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
1488 node_rm(s, next);
1489 next = NULL;
1490 }
1491
1492 /* As needed clear each of the mask bits */
1493 for (n1 = 0; n1 < MASK_BITS; n1++) {
1494 if (nodep->mask & (1 << n1)) {
1495 nodep->mask &= ~(1 << n1);
1496 s->num_set--;
1497 }
1498 }
1499
1500 /* Clear any bits described by num_after */
1501 s->num_set -= nodep->num_after;
1502 nodep->num_after = 0;
1503
1504 /*
1505 * Delete the node that describes the beginning of
1506 * the middle bits and perform any allowed reductions
1507 * with the nodes prev or next of nodep.
1508 */
1509 node_reduce(s, nodep);
1510 nodep = NULL;
1511 }
1512 idx = middle_end + 1;
1513 n -= middle_end - middle_start + 1;
1514
1515 /* Trailing - bits at and beyond last mask boundary */
1516 assert(n < MASK_BITS);
1517 for (; n > 0; idx++, n--)
1518 bit_clear(s, idx);
1519}
1520
1521/* Sets the bit at the index given by idx. */
1522void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
1523{
1524 sparsebit_set_num(s, idx, 1);
1525}
1526
1527/* Clears the bit at the index given by idx. */
1528void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
1529{
1530 sparsebit_clear_num(s, idx, 1);
1531}
1532
1533/* Sets the bits in the entire addressable range of the sparsebit array. */
1534void sparsebit_set_all(struct sparsebit *s)
1535{
1536 sparsebit_set(s, 0);
1537 sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
1538 assert(sparsebit_all_set(s));
1539}
1540
1541/* Clears the bits in the entire addressable range of the sparsebit array. */
1542void sparsebit_clear_all(struct sparsebit *s)
1543{
1544 sparsebit_clear(s, 0);
1545 sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
1546 assert(!sparsebit_any_set(s));
1547}
1548
1549static size_t display_range(FILE *stream, sparsebit_idx_t low,
1550 sparsebit_idx_t high, bool prepend_comma_space)
1551{
1552 char *fmt_str;
1553 size_t sz;
1554
1555 /* Determine the printf format string */
1556 if (low == high)
1557 fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
1558 else
1559 fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
1560
1561 /*
1562 * When stream is NULL, just determine the size of what would
1563 * have been printed, else print the range.
1564 */
1565 if (!stream)
1566 sz = snprintf(NULL, 0, fmt_str, low, high);
1567 else
1568 sz = fprintf(stream, fmt_str, low, high);
1569
1570 return sz;
1571}
1572
1573
1574/* Dumps to the FILE stream given by stream, the bit settings
1575 * of s. Each line of output is prefixed with the number of
1576 * spaces given by indent. The length of each line is implementation
1577 * dependent and does not depend on the indent amount. The following
1578 * is an example output of a sparsebit array that has bits:
1579 *
1580 * 0x5, 0x8, 0xa:0xe, 0x12
1581 *
1582 * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
1583 * are set. Note that a ':', instead of a '-' is used to specify a range of
1584 * contiguous bits. This is done because '-' is used to specify command-line
1585 * options, and sometimes ranges are specified as command-line arguments.
1586 */
1587void sparsebit_dump(FILE *stream, struct sparsebit *s,
1588 unsigned int indent)
1589{
1590 size_t current_line_len = 0;
1591 size_t sz;
1592 struct node *nodep;
1593
1594 if (!sparsebit_any_set(s))
1595 return;
1596
1597 /* Display initial indent */
1598 fprintf(stream, "%*s", indent, "");
1599
1600 /* For each node */
1601 for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
1602 unsigned int n1;
1603 sparsebit_idx_t low, high;
1604
1605 /* For each group of bits in the mask */
1606 for (n1 = 0; n1 < MASK_BITS; n1++) {
1607 if (nodep->mask & (1 << n1)) {
1608 low = high = nodep->idx + n1;
1609
1610 for (; n1 < MASK_BITS; n1++) {
1611 if (nodep->mask & (1 << n1))
1612 high = nodep->idx + n1;
1613 else
1614 break;
1615 }
1616
1617 if ((n1 == MASK_BITS) && nodep->num_after)
1618 high += nodep->num_after;
1619
1620 /*
1621 * How much room will it take to display
1622 * this range.
1623 */
1624 sz = display_range(NULL, low, high,
1625 current_line_len != 0);
1626
1627 /*
1628 * If there is not enough room, display
1629 * a newline plus the indent of the next
1630 * line.
1631 */
1632 if (current_line_len + sz > DUMP_LINE_MAX) {
1633 fputs("\n", stream);
1634 fprintf(stream, "%*s", indent, "");
1635 current_line_len = 0;
1636 }
1637
1638 /* Display the range */
1639 sz = display_range(stream, low, high,
1640 current_line_len != 0);
1641 current_line_len += sz;
1642 }
1643 }
1644
1645 /*
1646 * If num_after and most significant-bit of mask is not
1647 * set, then still need to display a range for the bits
1648 * described by num_after.
1649 */
1650 if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
1651 low = nodep->idx + MASK_BITS;
1652 high = nodep->idx + MASK_BITS + nodep->num_after - 1;
1653
1654 /*
1655 * How much room will it take to display
1656 * this range.
1657 */
1658 sz = display_range(NULL, low, high,
1659 current_line_len != 0);
1660
1661 /*
1662 * If there is not enough room, display
1663 * a newline plus the indent of the next
1664 * line.
1665 */
1666 if (current_line_len + sz > DUMP_LINE_MAX) {
1667 fputs("\n", stream);
1668 fprintf(stream, "%*s", indent, "");
1669 current_line_len = 0;
1670 }
1671
1672 /* Display the range */
1673 sz = display_range(stream, low, high,
1674 current_line_len != 0);
1675 current_line_len += sz;
1676 }
1677 }
1678 fputs("\n", stream);
1679}
1680
1681/* Validates the internal state of the sparsebit array given by
1682 * s. On error, diagnostic information is printed to stderr and
1683 * abort is called.
1684 */
1685void sparsebit_validate_internal(struct sparsebit *s)
1686{
1687 bool error_detected = false;
1688 struct node *nodep, *prev = NULL;
1689 sparsebit_num_t total_bits_set = 0;
1690 unsigned int n1;
1691
1692 /* For each node */
1693 for (nodep = node_first(s); nodep;
1694 prev = nodep, nodep = node_next(s, nodep)) {
1695
1696 /*
1697 * Increase total bits set by the number of bits set
1698 * in this node.
1699 */
1700 for (n1 = 0; n1 < MASK_BITS; n1++)
1701 if (nodep->mask & (1 << n1))
1702 total_bits_set++;
1703
1704 total_bits_set += nodep->num_after;
1705
1706 /*
1707 * Arbitrary choice as to whether a mask of 0 is allowed
1708 * or not. For diagnostic purposes it is beneficial to
1709 * have only one valid means to represent a set of bits.
1710 * To support this an arbitrary choice has been made
1711 * to not allow a mask of zero.
1712 */
1713 if (nodep->mask == 0) {
1714 fprintf(stderr, "Node mask of zero, "
1715 "nodep: %p nodep->mask: 0x%x",
1716 nodep, nodep->mask);
1717 error_detected = true;
1718 break;
1719 }
1720
1721 /*
1722 * Validate num_after is not greater than the max index
1723 * - the number of mask bits. The num_after member
1724 * uses 0-based indexing and thus has no value that
1725 * represents all bits set. This limitation is handled
1726 * by requiring a non-zero mask. With a non-zero mask,
1727 * MASK_BITS worth of bits are described by the mask,
1728 * which makes the largest needed num_after equal to:
1729 *
1730 * (~(sparsebit_num_t) 0) - MASK_BITS + 1
1731 */
1732 if (nodep->num_after
1733 > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
1734 fprintf(stderr, "num_after too large, "
1735 "nodep: %p nodep->num_after: 0x%lx",
1736 nodep, nodep->num_after);
1737 error_detected = true;
1738 break;
1739 }
1740
1741 /* Validate node index is divisible by the mask size */
1742 if (nodep->idx % MASK_BITS) {
1743 fprintf(stderr, "Node index not divisible by "
1744 "mask size,\n"
1745 " nodep: %p nodep->idx: 0x%lx "
1746 "MASK_BITS: %lu\n",
1747 nodep, nodep->idx, MASK_BITS);
1748 error_detected = true;
1749 break;
1750 }
1751
1752 /*
1753 * Validate bits described by node don't wrap beyond the
1754 * highest supported index.
1755 */
1756 if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
1757 fprintf(stderr, "Bits described by node wrap "
1758 "beyond highest supported index,\n"
1759 " nodep: %p nodep->idx: 0x%lx\n"
1760 " MASK_BITS: %lu nodep->num_after: 0x%lx",
1761 nodep, nodep->idx, MASK_BITS, nodep->num_after);
1762 error_detected = true;
1763 break;
1764 }
1765
1766 /* Check parent pointers. */
1767 if (nodep->left) {
1768 if (nodep->left->parent != nodep) {
1769 fprintf(stderr, "Left child parent pointer "
1770 "doesn't point to this node,\n"
1771 " nodep: %p nodep->left: %p "
1772 "nodep->left->parent: %p",
1773 nodep, nodep->left,
1774 nodep->left->parent);
1775 error_detected = true;
1776 break;
1777 }
1778 }
1779
1780 if (nodep->right) {
1781 if (nodep->right->parent != nodep) {
1782 fprintf(stderr, "Right child parent pointer "
1783 "doesn't point to this node,\n"
1784 " nodep: %p nodep->right: %p "
1785 "nodep->right->parent: %p",
1786 nodep, nodep->right,
1787 nodep->right->parent);
1788 error_detected = true;
1789 break;
1790 }
1791 }
1792
1793 if (!nodep->parent) {
1794 if (s->root != nodep) {
1795 fprintf(stderr, "Unexpected root node, "
1796 "s->root: %p nodep: %p",
1797 s->root, nodep);
1798 error_detected = true;
1799 break;
1800 }
1801 }
1802
1803 if (prev) {
1804 /*
1805 * Is index of previous node before index of
1806 * current node?
1807 */
1808 if (prev->idx >= nodep->idx) {
1809 fprintf(stderr, "Previous node index "
1810 ">= current node index,\n"
1811 " prev: %p prev->idx: 0x%lx\n"
1812 " nodep: %p nodep->idx: 0x%lx",
1813 prev, prev->idx, nodep, nodep->idx);
1814 error_detected = true;
1815 break;
1816 }
1817
1818 /*
1819 * Nodes occur in asscending order, based on each
1820 * nodes starting index.
1821 */
1822 if ((prev->idx + MASK_BITS + prev->num_after - 1)
1823 >= nodep->idx) {
1824 fprintf(stderr, "Previous node bit range "
1825 "overlap with current node bit range,\n"
1826 " prev: %p prev->idx: 0x%lx "
1827 "prev->num_after: 0x%lx\n"
1828 " nodep: %p nodep->idx: 0x%lx "
1829 "nodep->num_after: 0x%lx\n"
1830 " MASK_BITS: %lu",
1831 prev, prev->idx, prev->num_after,
1832 nodep, nodep->idx, nodep->num_after,
1833 MASK_BITS);
1834 error_detected = true;
1835 break;
1836 }
1837
1838 /*
1839 * When the node has all mask bits set, it shouldn't
1840 * be adjacent to the last bit described by the
1841 * previous node.
1842 */
1843 if (nodep->mask == ~(mask_t) 0 &&
1844 prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
1845 fprintf(stderr, "Current node has mask with "
1846 "all bits set and is adjacent to the "
1847 "previous node,\n"
1848 " prev: %p prev->idx: 0x%lx "
1849 "prev->num_after: 0x%lx\n"
1850 " nodep: %p nodep->idx: 0x%lx "
1851 "nodep->num_after: 0x%lx\n"
1852 " MASK_BITS: %lu",
1853 prev, prev->idx, prev->num_after,
1854 nodep, nodep->idx, nodep->num_after,
1855 MASK_BITS);
1856
1857 error_detected = true;
1858 break;
1859 }
1860 }
1861 }
1862
1863 if (!error_detected) {
1864 /*
1865 * Is sum of bits set in each node equal to the count
1866 * of total bits set.
1867 */
1868 if (s->num_set != total_bits_set) {
1869 fprintf(stderr, "Number of bits set mismatch,\n"
1870 " s->num_set: 0x%lx total_bits_set: 0x%lx",
1871 s->num_set, total_bits_set);
1872
1873 error_detected = true;
1874 }
1875 }
1876
1877 if (error_detected) {
1878 fputs(" dump_internal:\n", stderr);
1879 sparsebit_dump_internal(stderr, s, 4);
1880 abort();
1881 }
1882}
1883
1884
1885#ifdef FUZZ
1886/* A simple but effective fuzzing driver. Look for bugs with the help
1887 * of some invariants and of a trivial representation of sparsebit.
1888 * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
1889 * afl-fuzz do the magic. :)
1890 */
1891
1892#include <stdlib.h>
1893
1894struct range {
1895 sparsebit_idx_t first, last;
1896 bool set;
1897};
1898
1899struct sparsebit *s;
1900struct range ranges[1000];
1901int num_ranges;
1902
1903static bool get_value(sparsebit_idx_t idx)
1904{
1905 int i;
1906
1907 for (i = num_ranges; --i >= 0; )
1908 if (ranges[i].first <= idx && idx <= ranges[i].last)
1909 return ranges[i].set;
1910
1911 return false;
1912}
1913
1914static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
1915{
1916 sparsebit_num_t num;
1917 sparsebit_idx_t next;
1918
1919 if (first < last) {
1920 num = last - first + 1;
1921 } else {
1922 num = first - last + 1;
1923 first = last;
1924 last = first + num - 1;
1925 }
1926
1927 switch (code) {
1928 case 0:
1929 sparsebit_set(s, first);
1930 assert(sparsebit_is_set(s, first));
1931 assert(!sparsebit_is_clear(s, first));
1932 assert(sparsebit_any_set(s));
1933 assert(!sparsebit_all_clear(s));
1934 if (get_value(first))
1935 return;
1936 if (num_ranges == 1000)
1937 exit(0);
1938 ranges[num_ranges++] = (struct range)
1939 { .first = first, .last = first, .set = true };
1940 break;
1941 case 1:
1942 sparsebit_clear(s, first);
1943 assert(!sparsebit_is_set(s, first));
1944 assert(sparsebit_is_clear(s, first));
1945 assert(sparsebit_any_clear(s));
1946 assert(!sparsebit_all_set(s));
1947 if (!get_value(first))
1948 return;
1949 if (num_ranges == 1000)
1950 exit(0);
1951 ranges[num_ranges++] = (struct range)
1952 { .first = first, .last = first, .set = false };
1953 break;
1954 case 2:
1955 assert(sparsebit_is_set(s, first) == get_value(first));
1956 assert(sparsebit_is_clear(s, first) == !get_value(first));
1957 break;
1958 case 3:
1959 if (sparsebit_any_set(s))
1960 assert(get_value(sparsebit_first_set(s)));
1961 if (sparsebit_any_clear(s))
1962 assert(!get_value(sparsebit_first_clear(s)));
1963 sparsebit_set_all(s);
1964 assert(!sparsebit_any_clear(s));
1965 assert(sparsebit_all_set(s));
1966 num_ranges = 0;
1967 ranges[num_ranges++] = (struct range)
1968 { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
1969 break;
1970 case 4:
1971 if (sparsebit_any_set(s))
1972 assert(get_value(sparsebit_first_set(s)));
1973 if (sparsebit_any_clear(s))
1974 assert(!get_value(sparsebit_first_clear(s)));
1975 sparsebit_clear_all(s);
1976 assert(!sparsebit_any_set(s));
1977 assert(sparsebit_all_clear(s));
1978 num_ranges = 0;
1979 break;
1980 case 5:
1981 next = sparsebit_next_set(s, first);
1982 assert(next == 0 || next > first);
1983 assert(next == 0 || get_value(next));
1984 break;
1985 case 6:
1986 next = sparsebit_next_clear(s, first);
1987 assert(next == 0 || next > first);
1988 assert(next == 0 || !get_value(next));
1989 break;
1990 case 7:
1991 next = sparsebit_next_clear(s, first);
1992 if (sparsebit_is_set_num(s, first, num)) {
1993 assert(next == 0 || next > last);
1994 if (first)
1995 next = sparsebit_next_set(s, first - 1);
1996 else if (sparsebit_any_set(s))
1997 next = sparsebit_first_set(s);
1998 else
1999 return;
2000 assert(next == first);
2001 } else {
2002 assert(sparsebit_is_clear(s, first) || next <= last);
2003 }
2004 break;
2005 case 8:
2006 next = sparsebit_next_set(s, first);
2007 if (sparsebit_is_clear_num(s, first, num)) {
2008 assert(next == 0 || next > last);
2009 if (first)
2010 next = sparsebit_next_clear(s, first - 1);
2011 else if (sparsebit_any_clear(s))
2012 next = sparsebit_first_clear(s);
2013 else
2014 return;
2015 assert(next == first);
2016 } else {
2017 assert(sparsebit_is_set(s, first) || next <= last);
2018 }
2019 break;
2020 case 9:
2021 sparsebit_set_num(s, first, num);
2022 assert(sparsebit_is_set_num(s, first, num));
2023 assert(!sparsebit_is_clear_num(s, first, num));
2024 assert(sparsebit_any_set(s));
2025 assert(!sparsebit_all_clear(s));
2026 if (num_ranges == 1000)
2027 exit(0);
2028 ranges[num_ranges++] = (struct range)
2029 { .first = first, .last = last, .set = true };
2030 break;
2031 case 10:
2032 sparsebit_clear_num(s, first, num);
2033 assert(!sparsebit_is_set_num(s, first, num));
2034 assert(sparsebit_is_clear_num(s, first, num));
2035 assert(sparsebit_any_clear(s));
2036 assert(!sparsebit_all_set(s));
2037 if (num_ranges == 1000)
2038 exit(0);
2039 ranges[num_ranges++] = (struct range)
2040 { .first = first, .last = last, .set = false };
2041 break;
2042 case 11:
2043 sparsebit_validate_internal(s);
2044 break;
2045 default:
2046 break;
2047 }
2048}
2049
2050unsigned char get8(void)
2051{
2052 int ch;
2053
2054 ch = getchar();
2055 if (ch == EOF)
2056 exit(0);
2057 return ch;
2058}
2059
2060uint64_t get64(void)
2061{
2062 uint64_t x;
2063
2064 x = get8();
2065 x = (x << 8) | get8();
2066 x = (x << 8) | get8();
2067 x = (x << 8) | get8();
2068 x = (x << 8) | get8();
2069 x = (x << 8) | get8();
2070 x = (x << 8) | get8();
2071 return (x << 8) | get8();
2072}
2073
2074int main(void)
2075{
2076 s = sparsebit_alloc();
2077 for (;;) {
2078 uint8_t op = get8() & 0xf;
2079 uint64_t first = get64();
2080 uint64_t last = get64();
2081
2082 operate(op, first, last);
2083 }
2084}
2085#endif
1/*
2 * Sparse bit array
3 *
4 * Copyright (C) 2018, Google LLC.
5 * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2.
8 *
9 * This library provides functions to support a memory efficient bit array,
10 * with an index size of 2^64. A sparsebit array is allocated through
11 * the use sparsebit_alloc() and free'd via sparsebit_free(),
12 * such as in the following:
13 *
14 * struct sparsebit *s;
15 * s = sparsebit_alloc();
16 * sparsebit_free(&s);
17 *
18 * The struct sparsebit type resolves down to a struct sparsebit.
19 * Note that, sparsebit_free() takes a pointer to the sparsebit
20 * structure. This is so that sparsebit_free() is able to poison
21 * the pointer (e.g. set it to NULL) to the struct sparsebit before
22 * returning to the caller.
23 *
24 * Between the return of sparsebit_alloc() and the call of
25 * sparsebit_free(), there are multiple query and modifying operations
26 * that can be performed on the allocated sparsebit array. All of
27 * these operations take as a parameter the value returned from
28 * sparsebit_alloc() and most also take a bit index. Frequently
29 * used routines include:
30 *
31 * ---- Query Operations
32 * sparsebit_is_set(s, idx)
33 * sparsebit_is_clear(s, idx)
34 * sparsebit_any_set(s)
35 * sparsebit_first_set(s)
36 * sparsebit_next_set(s, prev_idx)
37 *
38 * ---- Modifying Operations
39 * sparsebit_set(s, idx)
40 * sparsebit_clear(s, idx)
41 * sparsebit_set_num(s, idx, num);
42 * sparsebit_clear_num(s, idx, num);
43 *
44 * A common operation, is to itterate over all the bits set in a test
45 * sparsebit array. This can be done via code with the following structure:
46 *
47 * sparsebit_idx_t idx;
48 * if (sparsebit_any_set(s)) {
49 * idx = sparsebit_first_set(s);
50 * do {
51 * ...
52 * idx = sparsebit_next_set(s, idx);
53 * } while (idx != 0);
54 * }
55 *
56 * The index of the first bit set needs to be obtained via
57 * sparsebit_first_set(), because sparsebit_next_set(), needs
58 * the index of the previously set. The sparsebit_idx_t type is
59 * unsigned, so there is no previous index before 0 that is available.
60 * Also, the call to sparsebit_first_set() is not made unless there
61 * is at least 1 bit in the array set. This is because sparsebit_first_set()
62 * aborts if sparsebit_first_set() is called with no bits set.
63 * It is the callers responsibility to assure that the
64 * sparsebit array has at least a single bit set before calling
65 * sparsebit_first_set().
66 *
67 * ==== Implementation Overview ====
68 * For the most part the internal implementation of sparsebit is
69 * opaque to the caller. One important implementation detail that the
70 * caller may need to be aware of is the spatial complexity of the
71 * implementation. This implementation of a sparsebit array is not
72 * only sparse, in that it uses memory proportional to the number of bits
73 * set. It is also efficient in memory usage when most of the bits are
74 * set.
75 *
76 * At a high-level the state of the bit settings are maintained through
77 * the use of a binary-search tree, where each node contains at least
78 * the following members:
79 *
80 * typedef uint64_t sparsebit_idx_t;
81 * typedef uint64_t sparsebit_num_t;
82 *
83 * sparsebit_idx_t idx;
84 * uint32_t mask;
85 * sparsebit_num_t num_after;
86 *
87 * The idx member contains the bit index of the first bit described by this
88 * node, while the mask member stores the setting of the first 32-bits.
89 * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
90 * mask member at 1 << n.
91 *
92 * Nodes are sorted by idx and the bits described by two nodes will never
93 * overlap. The idx member is always aligned to the mask size, i.e. a
94 * multiple of 32.
95 *
96 * Beyond a typical implementation, the nodes in this implementation also
97 * contains a member named num_after. The num_after member holds the
98 * number of bits immediately after the mask bits that are contiguously set.
99 * The use of the num_after member allows this implementation to efficiently
100 * represent cases where most bits are set. For example, the case of all
101 * but the last two bits set, is represented by the following two nodes:
102 *
103 * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
104 * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
105 *
106 * ==== Invariants ====
107 * This implementation usses the following invariants:
108 *
109 * + Node are only used to represent bits that are set.
110 * Nodes with a mask of 0 and num_after of 0 are not allowed.
111 *
112 * + Sum of bits set in all the nodes is equal to the value of
113 * the struct sparsebit_pvt num_set member.
114 *
115 * + The setting of at least one bit is always described in a nodes
116 * mask (mask >= 1).
117 *
118 * + A node with all mask bits set only occurs when the last bit
119 * described by the previous node is not equal to this nodes
120 * starting index - 1. All such occurences of this condition are
121 * avoided by moving the setting of the nodes mask bits into
122 * the previous nodes num_after setting.
123 *
124 * + Node starting index is evenly divisible by the number of bits
125 * within a nodes mask member.
126 *
127 * + Nodes never represent a range of bits that wrap around the
128 * highest supported index.
129 *
130 * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
131 *
132 * As a consequence of the above, the num_after member of a node
133 * will always be <=:
134 *
135 * maximum_index - nodes_starting_index - number_of_mask_bits
136 *
137 * + Nodes within the binary search tree are sorted based on each
138 * nodes starting index.
139 *
140 * + The range of bits described by any two nodes do not overlap. The
141 * range of bits described by a single node is:
142 *
143 * start: node->idx
144 * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
145 *
146 * Note, at times these invariants are temporarily violated for a
147 * specific portion of the code. For example, when setting a mask
148 * bit, there is a small delay between when the mask bit is set and the
149 * value in the struct sparsebit_pvt num_set member is updated. Other
150 * temporary violations occur when node_split() is called with a specified
151 * index and assures that a node where its mask represents the bit
152 * at the specified index exists. At times to do this node_split()
153 * must split an existing node into two nodes or create a node that
154 * has no bits set. Such temporary violations must be corrected before
155 * returning to the caller. These corrections are typically performed
156 * by the local function node_reduce().
157 */
158
159#include "test_util.h"
160#include "sparsebit.h"
161#include <limits.h>
162#include <assert.h>
163
164#define DUMP_LINE_MAX 100 /* Does not include indent amount */
165
166typedef uint32_t mask_t;
167#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
168
169struct node {
170 struct node *parent;
171 struct node *left;
172 struct node *right;
173 sparsebit_idx_t idx; /* index of least-significant bit in mask */
174 sparsebit_num_t num_after; /* num contiguously set after mask */
175 mask_t mask;
176};
177
178struct sparsebit {
179 /*
180 * Points to root node of the binary search
181 * tree. Equal to NULL when no bits are set in
182 * the entire sparsebit array.
183 */
184 struct node *root;
185
186 /*
187 * A redundant count of the total number of bits set. Used for
188 * diagnostic purposes and to change the time complexity of
189 * sparsebit_num_set() from O(n) to O(1).
190 * Note: Due to overflow, a value of 0 means none or all set.
191 */
192 sparsebit_num_t num_set;
193};
194
195/* Returns the number of set bits described by the settings
196 * of the node pointed to by nodep.
197 */
198static sparsebit_num_t node_num_set(struct node *nodep)
199{
200 return nodep->num_after + __builtin_popcount(nodep->mask);
201}
202
203/* Returns a pointer to the node that describes the
204 * lowest bit index.
205 */
206static struct node *node_first(struct sparsebit *s)
207{
208 struct node *nodep;
209
210 for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
211 ;
212
213 return nodep;
214}
215
216/* Returns a pointer to the node that describes the
217 * lowest bit index > the index of the node pointed to by np.
218 * Returns NULL if no node with a higher index exists.
219 */
220static struct node *node_next(struct sparsebit *s, struct node *np)
221{
222 struct node *nodep = np;
223
224 /*
225 * If current node has a right child, next node is the left-most
226 * of the right child.
227 */
228 if (nodep->right) {
229 for (nodep = nodep->right; nodep->left; nodep = nodep->left)
230 ;
231 return nodep;
232 }
233
234 /*
235 * No right child. Go up until node is left child of a parent.
236 * That parent is then the next node.
237 */
238 while (nodep->parent && nodep == nodep->parent->right)
239 nodep = nodep->parent;
240
241 return nodep->parent;
242}
243
244/* Searches for and returns a pointer to the node that describes the
245 * highest index < the index of the node pointed to by np.
246 * Returns NULL if no node with a lower index exists.
247 */
248static struct node *node_prev(struct sparsebit *s, struct node *np)
249{
250 struct node *nodep = np;
251
252 /*
253 * If current node has a left child, next node is the right-most
254 * of the left child.
255 */
256 if (nodep->left) {
257 for (nodep = nodep->left; nodep->right; nodep = nodep->right)
258 ;
259 return (struct node *) nodep;
260 }
261
262 /*
263 * No left child. Go up until node is right child of a parent.
264 * That parent is then the next node.
265 */
266 while (nodep->parent && nodep == nodep->parent->left)
267 nodep = nodep->parent;
268
269 return (struct node *) nodep->parent;
270}
271
272
273/* Allocates space to hold a copy of the node sub-tree pointed to by
274 * subtree and duplicates the bit settings to the newly allocated nodes.
275 * Returns the newly allocated copy of subtree.
276 */
277static struct node *node_copy_subtree(struct node *subtree)
278{
279 struct node *root;
280
281 /* Duplicate the node at the root of the subtree */
282 root = calloc(1, sizeof(*root));
283 if (!root) {
284 perror("calloc");
285 abort();
286 }
287
288 root->idx = subtree->idx;
289 root->mask = subtree->mask;
290 root->num_after = subtree->num_after;
291
292 /* As needed, recursively duplicate the left and right subtrees */
293 if (subtree->left) {
294 root->left = node_copy_subtree(subtree->left);
295 root->left->parent = root;
296 }
297
298 if (subtree->right) {
299 root->right = node_copy_subtree(subtree->right);
300 root->right->parent = root;
301 }
302
303 return root;
304}
305
306/* Searches for and returns a pointer to the node that describes the setting
307 * of the bit given by idx. A node describes the setting of a bit if its
308 * index is within the bits described by the mask bits or the number of
309 * contiguous bits set after the mask. Returns NULL if there is no such node.
310 */
311static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
312{
313 struct node *nodep;
314
315 /* Find the node that describes the setting of the bit at idx */
316 for (nodep = s->root; nodep;
317 nodep = nodep->idx > idx ? nodep->left : nodep->right) {
318 if (idx >= nodep->idx &&
319 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
320 break;
321 }
322
323 return nodep;
324}
325
326/* Entry Requirements:
327 * + A node that describes the setting of idx is not already present.
328 *
329 * Adds a new node to describe the setting of the bit at the index given
330 * by idx. Returns a pointer to the newly added node.
331 *
332 * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
333 */
334static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
335{
336 struct node *nodep, *parentp, *prev;
337
338 /* Allocate and initialize the new node. */
339 nodep = calloc(1, sizeof(*nodep));
340 if (!nodep) {
341 perror("calloc");
342 abort();
343 }
344
345 nodep->idx = idx & -MASK_BITS;
346
347 /* If no nodes, set it up as the root node. */
348 if (!s->root) {
349 s->root = nodep;
350 return nodep;
351 }
352
353 /*
354 * Find the parent where the new node should be attached
355 * and add the node there.
356 */
357 parentp = s->root;
358 while (true) {
359 if (idx < parentp->idx) {
360 if (!parentp->left) {
361 parentp->left = nodep;
362 nodep->parent = parentp;
363 break;
364 }
365 parentp = parentp->left;
366 } else {
367 assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
368 if (!parentp->right) {
369 parentp->right = nodep;
370 nodep->parent = parentp;
371 break;
372 }
373 parentp = parentp->right;
374 }
375 }
376
377 /*
378 * Does num_after bits of previous node overlap with the mask
379 * of the new node? If so set the bits in the new nodes mask
380 * and reduce the previous nodes num_after.
381 */
382 prev = node_prev(s, nodep);
383 while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
384 unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
385 - nodep->idx;
386 assert(prev->num_after > 0);
387 assert(n1 < MASK_BITS);
388 assert(!(nodep->mask & (1 << n1)));
389 nodep->mask |= (1 << n1);
390 prev->num_after--;
391 }
392
393 return nodep;
394}
395
396/* Returns whether all the bits in the sparsebit array are set. */
397bool sparsebit_all_set(struct sparsebit *s)
398{
399 /*
400 * If any nodes there must be at least one bit set. Only case
401 * where a bit is set and total num set is 0, is when all bits
402 * are set.
403 */
404 return s->root && s->num_set == 0;
405}
406
407/* Clears all bits described by the node pointed to by nodep, then
408 * removes the node.
409 */
410static void node_rm(struct sparsebit *s, struct node *nodep)
411{
412 struct node *tmp;
413 sparsebit_num_t num_set;
414
415 num_set = node_num_set(nodep);
416 assert(s->num_set >= num_set || sparsebit_all_set(s));
417 s->num_set -= node_num_set(nodep);
418
419 /* Have both left and right child */
420 if (nodep->left && nodep->right) {
421 /*
422 * Move left children to the leftmost leaf node
423 * of the right child.
424 */
425 for (tmp = nodep->right; tmp->left; tmp = tmp->left)
426 ;
427 tmp->left = nodep->left;
428 nodep->left = NULL;
429 tmp->left->parent = tmp;
430 }
431
432 /* Left only child */
433 if (nodep->left) {
434 if (!nodep->parent) {
435 s->root = nodep->left;
436 nodep->left->parent = NULL;
437 } else {
438 nodep->left->parent = nodep->parent;
439 if (nodep == nodep->parent->left)
440 nodep->parent->left = nodep->left;
441 else {
442 assert(nodep == nodep->parent->right);
443 nodep->parent->right = nodep->left;
444 }
445 }
446
447 nodep->parent = nodep->left = nodep->right = NULL;
448 free(nodep);
449
450 return;
451 }
452
453
454 /* Right only child */
455 if (nodep->right) {
456 if (!nodep->parent) {
457 s->root = nodep->right;
458 nodep->right->parent = NULL;
459 } else {
460 nodep->right->parent = nodep->parent;
461 if (nodep == nodep->parent->left)
462 nodep->parent->left = nodep->right;
463 else {
464 assert(nodep == nodep->parent->right);
465 nodep->parent->right = nodep->right;
466 }
467 }
468
469 nodep->parent = nodep->left = nodep->right = NULL;
470 free(nodep);
471
472 return;
473 }
474
475 /* Leaf Node */
476 if (!nodep->parent) {
477 s->root = NULL;
478 } else {
479 if (nodep->parent->left == nodep)
480 nodep->parent->left = NULL;
481 else {
482 assert(nodep == nodep->parent->right);
483 nodep->parent->right = NULL;
484 }
485 }
486
487 nodep->parent = nodep->left = nodep->right = NULL;
488 free(nodep);
489
490 return;
491}
492
493/* Splits the node containing the bit at idx so that there is a node
494 * that starts at the specified index. If no such node exists, a new
495 * node at the specified index is created. Returns the new node.
496 *
497 * idx must start of a mask boundary.
498 */
499static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
500{
501 struct node *nodep1, *nodep2;
502 sparsebit_idx_t offset;
503 sparsebit_num_t orig_num_after;
504
505 assert(!(idx % MASK_BITS));
506
507 /*
508 * Is there a node that describes the setting of idx?
509 * If not, add it.
510 */
511 nodep1 = node_find(s, idx);
512 if (!nodep1)
513 return node_add(s, idx);
514
515 /*
516 * All done if the starting index of the node is where the
517 * split should occur.
518 */
519 if (nodep1->idx == idx)
520 return nodep1;
521
522 /*
523 * Split point not at start of mask, so it must be part of
524 * bits described by num_after.
525 */
526
527 /*
528 * Calculate offset within num_after for where the split is
529 * to occur.
530 */
531 offset = idx - (nodep1->idx + MASK_BITS);
532 orig_num_after = nodep1->num_after;
533
534 /*
535 * Add a new node to describe the bits starting at
536 * the split point.
537 */
538 nodep1->num_after = offset;
539 nodep2 = node_add(s, idx);
540
541 /* Move bits after the split point into the new node */
542 nodep2->num_after = orig_num_after - offset;
543 if (nodep2->num_after >= MASK_BITS) {
544 nodep2->mask = ~(mask_t) 0;
545 nodep2->num_after -= MASK_BITS;
546 } else {
547 nodep2->mask = (1 << nodep2->num_after) - 1;
548 nodep2->num_after = 0;
549 }
550
551 return nodep2;
552}
553
554/* Iteratively reduces the node pointed to by nodep and its adjacent
555 * nodes into a more compact form. For example, a node with a mask with
556 * all bits set adjacent to a previous node, will get combined into a
557 * single node with an increased num_after setting.
558 *
559 * After each reduction, a further check is made to see if additional
560 * reductions are possible with the new previous and next nodes. Note,
561 * a search for a reduction is only done across the nodes nearest nodep
562 * and those that became part of a reduction. Reductions beyond nodep
563 * and the adjacent nodes that are reduced are not discovered. It is the
564 * responsibility of the caller to pass a nodep that is within one node
565 * of each possible reduction.
566 *
567 * This function does not fix the temporary violation of all invariants.
568 * For example it does not fix the case where the bit settings described
569 * by two or more nodes overlap. Such a violation introduces the potential
570 * complication of a bit setting for a specific index having different settings
571 * in different nodes. This would then introduce the further complication
572 * of which node has the correct setting of the bit and thus such conditions
573 * are not allowed.
574 *
575 * This function is designed to fix invariant violations that are introduced
576 * by node_split() and by changes to the nodes mask or num_after members.
577 * For example, when setting a bit within a nodes mask, the function that
578 * sets the bit doesn't have to worry about whether the setting of that
579 * bit caused the mask to have leading only or trailing only bits set.
580 * Instead, the function can call node_reduce(), with nodep equal to the
581 * node address that it set a mask bit in, and node_reduce() will notice
582 * the cases of leading or trailing only bits and that there is an
583 * adjacent node that the bit settings could be merged into.
584 *
585 * This implementation specifically detects and corrects violation of the
586 * following invariants:
587 *
588 * + Node are only used to represent bits that are set.
589 * Nodes with a mask of 0 and num_after of 0 are not allowed.
590 *
591 * + The setting of at least one bit is always described in a nodes
592 * mask (mask >= 1).
593 *
594 * + A node with all mask bits set only occurs when the last bit
595 * described by the previous node is not equal to this nodes
596 * starting index - 1. All such occurences of this condition are
597 * avoided by moving the setting of the nodes mask bits into
598 * the previous nodes num_after setting.
599 */
600static void node_reduce(struct sparsebit *s, struct node *nodep)
601{
602 bool reduction_performed;
603
604 do {
605 reduction_performed = false;
606 struct node *prev, *next, *tmp;
607
608 /* 1) Potential reductions within the current node. */
609
610 /* Nodes with all bits cleared may be removed. */
611 if (nodep->mask == 0 && nodep->num_after == 0) {
612 /*
613 * About to remove the node pointed to by
614 * nodep, which normally would cause a problem
615 * for the next pass through the reduction loop,
616 * because the node at the starting point no longer
617 * exists. This potential problem is handled
618 * by first remembering the location of the next
619 * or previous nodes. Doesn't matter which, because
620 * once the node at nodep is removed, there will be
621 * no other nodes between prev and next.
622 *
623 * Note, the checks performed on nodep against both
624 * both prev and next both check for an adjacent
625 * node that can be reduced into a single node. As
626 * such, after removing the node at nodep, doesn't
627 * matter whether the nodep for the next pass
628 * through the loop is equal to the previous pass
629 * prev or next node. Either way, on the next pass
630 * the one not selected will become either the
631 * prev or next node.
632 */
633 tmp = node_next(s, nodep);
634 if (!tmp)
635 tmp = node_prev(s, nodep);
636
637 node_rm(s, nodep);
638 nodep = NULL;
639
640 nodep = tmp;
641 reduction_performed = true;
642 continue;
643 }
644
645 /*
646 * When the mask is 0, can reduce the amount of num_after
647 * bits by moving the initial num_after bits into the mask.
648 */
649 if (nodep->mask == 0) {
650 assert(nodep->num_after != 0);
651 assert(nodep->idx + MASK_BITS > nodep->idx);
652
653 nodep->idx += MASK_BITS;
654
655 if (nodep->num_after >= MASK_BITS) {
656 nodep->mask = ~0;
657 nodep->num_after -= MASK_BITS;
658 } else {
659 nodep->mask = (1u << nodep->num_after) - 1;
660 nodep->num_after = 0;
661 }
662
663 reduction_performed = true;
664 continue;
665 }
666
667 /*
668 * 2) Potential reductions between the current and
669 * previous nodes.
670 */
671 prev = node_prev(s, nodep);
672 if (prev) {
673 sparsebit_idx_t prev_highest_bit;
674
675 /* Nodes with no bits set can be removed. */
676 if (prev->mask == 0 && prev->num_after == 0) {
677 node_rm(s, prev);
678
679 reduction_performed = true;
680 continue;
681 }
682
683 /*
684 * All mask bits set and previous node has
685 * adjacent index.
686 */
687 if (nodep->mask + 1 == 0 &&
688 prev->idx + MASK_BITS == nodep->idx) {
689 prev->num_after += MASK_BITS + nodep->num_after;
690 nodep->mask = 0;
691 nodep->num_after = 0;
692
693 reduction_performed = true;
694 continue;
695 }
696
697 /*
698 * Is node adjacent to previous node and the node
699 * contains a single contiguous range of bits
700 * starting from the beginning of the mask?
701 */
702 prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
703 if (prev_highest_bit + 1 == nodep->idx &&
704 (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
705 /*
706 * How many contiguous bits are there?
707 * Is equal to the total number of set
708 * bits, due to an earlier check that
709 * there is a single contiguous range of
710 * set bits.
711 */
712 unsigned int num_contiguous
713 = __builtin_popcount(nodep->mask);
714 assert((num_contiguous > 0) &&
715 ((1ULL << num_contiguous) - 1) == nodep->mask);
716
717 prev->num_after += num_contiguous;
718 nodep->mask = 0;
719
720 /*
721 * For predictable performance, handle special
722 * case where all mask bits are set and there
723 * is a non-zero num_after setting. This code
724 * is functionally correct without the following
725 * conditionalized statements, but without them
726 * the value of num_after is only reduced by
727 * the number of mask bits per pass. There are
728 * cases where num_after can be close to 2^64.
729 * Without this code it could take nearly
730 * (2^64) / 32 passes to perform the full
731 * reduction.
732 */
733 if (num_contiguous == MASK_BITS) {
734 prev->num_after += nodep->num_after;
735 nodep->num_after = 0;
736 }
737
738 reduction_performed = true;
739 continue;
740 }
741 }
742
743 /*
744 * 3) Potential reductions between the current and
745 * next nodes.
746 */
747 next = node_next(s, nodep);
748 if (next) {
749 /* Nodes with no bits set can be removed. */
750 if (next->mask == 0 && next->num_after == 0) {
751 node_rm(s, next);
752 reduction_performed = true;
753 continue;
754 }
755
756 /*
757 * Is next node index adjacent to current node
758 * and has a mask with all bits set?
759 */
760 if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
761 next->mask == ~(mask_t) 0) {
762 nodep->num_after += MASK_BITS;
763 next->mask = 0;
764 nodep->num_after += next->num_after;
765 next->num_after = 0;
766
767 node_rm(s, next);
768 next = NULL;
769
770 reduction_performed = true;
771 continue;
772 }
773 }
774 } while (nodep && reduction_performed);
775}
776
777/* Returns whether the bit at the index given by idx, within the
778 * sparsebit array is set or not.
779 */
780bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
781{
782 struct node *nodep;
783
784 /* Find the node that describes the setting of the bit at idx */
785 for (nodep = s->root; nodep;
786 nodep = nodep->idx > idx ? nodep->left : nodep->right)
787 if (idx >= nodep->idx &&
788 idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
789 goto have_node;
790
791 return false;
792
793have_node:
794 /* Bit is set if it is any of the bits described by num_after */
795 if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
796 return true;
797
798 /* Is the corresponding mask bit set */
799 assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
800 return !!(nodep->mask & (1 << (idx - nodep->idx)));
801}
802
803/* Within the sparsebit array pointed to by s, sets the bit
804 * at the index given by idx.
805 */
806static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
807{
808 struct node *nodep;
809
810 /* Skip bits that are already set */
811 if (sparsebit_is_set(s, idx))
812 return;
813
814 /*
815 * Get a node where the bit at idx is described by the mask.
816 * The node_split will also create a node, if there isn't
817 * already a node that describes the setting of bit.
818 */
819 nodep = node_split(s, idx & -MASK_BITS);
820
821 /* Set the bit within the nodes mask */
822 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
823 assert(!(nodep->mask & (1 << (idx - nodep->idx))));
824 nodep->mask |= 1 << (idx - nodep->idx);
825 s->num_set++;
826
827 node_reduce(s, nodep);
828}
829
830/* Within the sparsebit array pointed to by s, clears the bit
831 * at the index given by idx.
832 */
833static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
834{
835 struct node *nodep;
836
837 /* Skip bits that are already cleared */
838 if (!sparsebit_is_set(s, idx))
839 return;
840
841 /* Is there a node that describes the setting of this bit? */
842 nodep = node_find(s, idx);
843 if (!nodep)
844 return;
845
846 /*
847 * If a num_after bit, split the node, so that the bit is
848 * part of a node mask.
849 */
850 if (idx >= nodep->idx + MASK_BITS)
851 nodep = node_split(s, idx & -MASK_BITS);
852
853 /*
854 * After node_split above, bit at idx should be within the mask.
855 * Clear that bit.
856 */
857 assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
858 assert(nodep->mask & (1 << (idx - nodep->idx)));
859 nodep->mask &= ~(1 << (idx - nodep->idx));
860 assert(s->num_set > 0 || sparsebit_all_set(s));
861 s->num_set--;
862
863 node_reduce(s, nodep);
864}
865
866/* Recursively dumps to the FILE stream given by stream the contents
867 * of the sub-tree of nodes pointed to by nodep. Each line of output
868 * is prefixed by the number of spaces given by indent. On each
869 * recursion, the indent amount is increased by 2. This causes nodes
870 * at each level deeper into the binary search tree to be displayed
871 * with a greater indent.
872 */
873static void dump_nodes(FILE *stream, struct node *nodep,
874 unsigned int indent)
875{
876 char *node_type;
877
878 /* Dump contents of node */
879 if (!nodep->parent)
880 node_type = "root";
881 else if (nodep == nodep->parent->left)
882 node_type = "left";
883 else {
884 assert(nodep == nodep->parent->right);
885 node_type = "right";
886 }
887 fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
888 fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
889 nodep->parent, nodep->left, nodep->right);
890 fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
891 indent, "", nodep->idx, nodep->mask, nodep->num_after);
892
893 /* If present, dump contents of left child nodes */
894 if (nodep->left)
895 dump_nodes(stream, nodep->left, indent + 2);
896
897 /* If present, dump contents of right child nodes */
898 if (nodep->right)
899 dump_nodes(stream, nodep->right, indent + 2);
900}
901
902static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
903{
904 mask_t leading = (mask_t)1 << start;
905 int n1 = __builtin_ctz(nodep->mask & -leading);
906
907 return nodep->idx + n1;
908}
909
910static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
911{
912 mask_t leading = (mask_t)1 << start;
913 int n1 = __builtin_ctz(~nodep->mask & -leading);
914
915 return nodep->idx + n1;
916}
917
918/* Dumps to the FILE stream specified by stream, the implementation dependent
919 * internal state of s. Each line of output is prefixed with the number
920 * of spaces given by indent. The output is completely implementation
921 * dependent and subject to change. Output from this function should only
922 * be used for diagnostic purposes. For example, this function can be
923 * used by test cases after they detect an unexpected condition, as a means
924 * to capture diagnostic information.
925 */
926static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
927 unsigned int indent)
928{
929 /* Dump the contents of s */
930 fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
931 fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
932
933 if (s->root)
934 dump_nodes(stream, s->root, indent);
935}
936
937/* Allocates and returns a new sparsebit array. The initial state
938 * of the newly allocated sparsebit array has all bits cleared.
939 */
940struct sparsebit *sparsebit_alloc(void)
941{
942 struct sparsebit *s;
943
944 /* Allocate top level structure. */
945 s = calloc(1, sizeof(*s));
946 if (!s) {
947 perror("calloc");
948 abort();
949 }
950
951 return s;
952}
953
954/* Frees the implementation dependent data for the sparsebit array
955 * pointed to by s and poisons the pointer to that data.
956 */
957void sparsebit_free(struct sparsebit **sbitp)
958{
959 struct sparsebit *s = *sbitp;
960
961 if (!s)
962 return;
963
964 sparsebit_clear_all(s);
965 free(s);
966 *sbitp = NULL;
967}
968
969/* Makes a copy of the sparsebit array given by s, to the sparsebit
970 * array given by d. Note, d must have already been allocated via
971 * sparsebit_alloc(). It can though already have bits set, which
972 * if different from src will be cleared.
973 */
974void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
975{
976 /* First clear any bits already set in the destination */
977 sparsebit_clear_all(d);
978
979 if (s->root) {
980 d->root = node_copy_subtree(s->root);
981 d->num_set = s->num_set;
982 }
983}
984
985/* Returns whether num consecutive bits starting at idx are all set. */
986bool sparsebit_is_set_num(struct sparsebit *s,
987 sparsebit_idx_t idx, sparsebit_num_t num)
988{
989 sparsebit_idx_t next_cleared;
990
991 assert(num > 0);
992 assert(idx + num - 1 >= idx);
993
994 /* With num > 0, the first bit must be set. */
995 if (!sparsebit_is_set(s, idx))
996 return false;
997
998 /* Find the next cleared bit */
999 next_cleared = sparsebit_next_clear(s, idx);
1000
1001 /*
1002 * If no cleared bits beyond idx, then there are at least num
1003 * set bits. idx + num doesn't wrap. Otherwise check if
1004 * there are enough set bits between idx and the next cleared bit.
1005 */
1006 return next_cleared == 0 || next_cleared - idx >= num;
1007}
1008
1009/* Returns whether the bit at the index given by idx. */
1010bool sparsebit_is_clear(struct sparsebit *s,
1011 sparsebit_idx_t idx)
1012{
1013 return !sparsebit_is_set(s, idx);
1014}
1015
1016/* Returns whether num consecutive bits starting at idx are all cleared. */
1017bool sparsebit_is_clear_num(struct sparsebit *s,
1018 sparsebit_idx_t idx, sparsebit_num_t num)
1019{
1020 sparsebit_idx_t next_set;
1021
1022 assert(num > 0);
1023 assert(idx + num - 1 >= idx);
1024
1025 /* With num > 0, the first bit must be cleared. */
1026 if (!sparsebit_is_clear(s, idx))
1027 return false;
1028
1029 /* Find the next set bit */
1030 next_set = sparsebit_next_set(s, idx);
1031
1032 /*
1033 * If no set bits beyond idx, then there are at least num
1034 * cleared bits. idx + num doesn't wrap. Otherwise check if
1035 * there are enough cleared bits between idx and the next set bit.
1036 */
1037 return next_set == 0 || next_set - idx >= num;
1038}
1039
1040/* Returns the total number of bits set. Note: 0 is also returned for
1041 * the case of all bits set. This is because with all bits set, there
1042 * is 1 additional bit set beyond what can be represented in the return
1043 * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
1044 * to determine if the sparsebit array has any bits set.
1045 */
1046sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
1047{
1048 return s->num_set;
1049}
1050
1051/* Returns whether any bit is set in the sparsebit array. */
1052bool sparsebit_any_set(struct sparsebit *s)
1053{
1054 /*
1055 * Nodes only describe set bits. If any nodes then there
1056 * is at least 1 bit set.
1057 */
1058 if (!s->root)
1059 return false;
1060
1061 /*
1062 * Every node should have a non-zero mask. For now will
1063 * just assure that the root node has a non-zero mask,
1064 * which is a quick check that at least 1 bit is set.
1065 */
1066 assert(s->root->mask != 0);
1067 assert(s->num_set > 0 ||
1068 (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
1069 s->root->mask == ~(mask_t) 0));
1070
1071 return true;
1072}
1073
1074/* Returns whether all the bits in the sparsebit array are cleared. */
1075bool sparsebit_all_clear(struct sparsebit *s)
1076{
1077 return !sparsebit_any_set(s);
1078}
1079
1080/* Returns whether all the bits in the sparsebit array are set. */
1081bool sparsebit_any_clear(struct sparsebit *s)
1082{
1083 return !sparsebit_all_set(s);
1084}
1085
1086/* Returns the index of the first set bit. Abort if no bits are set.
1087 */
1088sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
1089{
1090 struct node *nodep;
1091
1092 /* Validate at least 1 bit is set */
1093 assert(sparsebit_any_set(s));
1094
1095 nodep = node_first(s);
1096 return node_first_set(nodep, 0);
1097}
1098
1099/* Returns the index of the first cleared bit. Abort if
1100 * no bits are cleared.
1101 */
1102sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
1103{
1104 struct node *nodep1, *nodep2;
1105
1106 /* Validate at least 1 bit is cleared. */
1107 assert(sparsebit_any_clear(s));
1108
1109 /* If no nodes or first node index > 0 then lowest cleared is 0 */
1110 nodep1 = node_first(s);
1111 if (!nodep1 || nodep1->idx > 0)
1112 return 0;
1113
1114 /* Does the mask in the first node contain any cleared bits. */
1115 if (nodep1->mask != ~(mask_t) 0)
1116 return node_first_clear(nodep1, 0);
1117
1118 /*
1119 * All mask bits set in first node. If there isn't a second node
1120 * then the first cleared bit is the first bit after the bits
1121 * described by the first node.
1122 */
1123 nodep2 = node_next(s, nodep1);
1124 if (!nodep2) {
1125 /*
1126 * No second node. First cleared bit is first bit beyond
1127 * bits described by first node.
1128 */
1129 assert(nodep1->mask == ~(mask_t) 0);
1130 assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
1131 return nodep1->idx + MASK_BITS + nodep1->num_after;
1132 }
1133
1134 /*
1135 * There is a second node.
1136 * If it is not adjacent to the first node, then there is a gap
1137 * of cleared bits between the nodes, and the first cleared bit
1138 * is the first bit within the gap.
1139 */
1140 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
1141 return nodep1->idx + MASK_BITS + nodep1->num_after;
1142
1143 /*
1144 * Second node is adjacent to the first node.
1145 * Because it is adjacent, its mask should be non-zero. If all
1146 * its mask bits are set, then with it being adjacent, it should
1147 * have had the mask bits moved into the num_after setting of the
1148 * previous node.
1149 */
1150 return node_first_clear(nodep2, 0);
1151}
1152
1153/* Returns index of next bit set within s after the index given by prev.
1154 * Returns 0 if there are no bits after prev that are set.
1155 */
1156sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
1157 sparsebit_idx_t prev)
1158{
1159 sparsebit_idx_t lowest_possible = prev + 1;
1160 sparsebit_idx_t start;
1161 struct node *nodep;
1162
1163 /* A bit after the highest index can't be set. */
1164 if (lowest_possible == 0)
1165 return 0;
1166
1167 /*
1168 * Find the leftmost 'candidate' overlapping or to the right
1169 * of lowest_possible.
1170 */
1171 struct node *candidate = NULL;
1172
1173 /* True iff lowest_possible is within candidate */
1174 bool contains = false;
1175
1176 /*
1177 * Find node that describes setting of bit at lowest_possible.
1178 * If such a node doesn't exist, find the node with the lowest
1179 * starting index that is > lowest_possible.
1180 */
1181 for (nodep = s->root; nodep;) {
1182 if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
1183 >= lowest_possible) {
1184 candidate = nodep;
1185 if (candidate->idx <= lowest_possible) {
1186 contains = true;
1187 break;
1188 }
1189 nodep = nodep->left;
1190 } else {
1191 nodep = nodep->right;
1192 }
1193 }
1194 if (!candidate)
1195 return 0;
1196
1197 assert(candidate->mask != 0);
1198
1199 /* Does the candidate node describe the setting of lowest_possible? */
1200 if (!contains) {
1201 /*
1202 * Candidate doesn't describe setting of bit at lowest_possible.
1203 * Candidate points to the first node with a starting index
1204 * > lowest_possible.
1205 */
1206 assert(candidate->idx > lowest_possible);
1207
1208 return node_first_set(candidate, 0);
1209 }
1210
1211 /*
1212 * Candidate describes setting of bit at lowest_possible.
1213 * Note: although the node describes the setting of the bit
1214 * at lowest_possible, its possible that its setting and the
1215 * setting of all latter bits described by this node are 0.
1216 * For now, just handle the cases where this node describes
1217 * a bit at or after an index of lowest_possible that is set.
1218 */
1219 start = lowest_possible - candidate->idx;
1220
1221 if (start < MASK_BITS && candidate->mask >= (1 << start))
1222 return node_first_set(candidate, start);
1223
1224 if (candidate->num_after) {
1225 sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
1226
1227 return lowest_possible < first_num_after_idx
1228 ? first_num_after_idx : lowest_possible;
1229 }
1230
1231 /*
1232 * Although candidate node describes setting of bit at
1233 * the index of lowest_possible, all bits at that index and
1234 * latter that are described by candidate are cleared. With
1235 * this, the next bit is the first bit in the next node, if
1236 * such a node exists. If a next node doesn't exist, then
1237 * there is no next set bit.
1238 */
1239 candidate = node_next(s, candidate);
1240 if (!candidate)
1241 return 0;
1242
1243 return node_first_set(candidate, 0);
1244}
1245
1246/* Returns index of next bit cleared within s after the index given by prev.
1247 * Returns 0 if there are no bits after prev that are cleared.
1248 */
1249sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
1250 sparsebit_idx_t prev)
1251{
1252 sparsebit_idx_t lowest_possible = prev + 1;
1253 sparsebit_idx_t idx;
1254 struct node *nodep1, *nodep2;
1255
1256 /* A bit after the highest index can't be set. */
1257 if (lowest_possible == 0)
1258 return 0;
1259
1260 /*
1261 * Does a node describing the setting of lowest_possible exist?
1262 * If not, the bit at lowest_possible is cleared.
1263 */
1264 nodep1 = node_find(s, lowest_possible);
1265 if (!nodep1)
1266 return lowest_possible;
1267
1268 /* Does a mask bit in node 1 describe the next cleared bit. */
1269 for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
1270 if (!(nodep1->mask & (1 << idx)))
1271 return nodep1->idx + idx;
1272
1273 /*
1274 * Next cleared bit is not described by node 1. If there
1275 * isn't a next node, then next cleared bit is described
1276 * by bit after the bits described by the first node.
1277 */
1278 nodep2 = node_next(s, nodep1);
1279 if (!nodep2)
1280 return nodep1->idx + MASK_BITS + nodep1->num_after;
1281
1282 /*
1283 * There is a second node.
1284 * If it is not adjacent to the first node, then there is a gap
1285 * of cleared bits between the nodes, and the next cleared bit
1286 * is the first bit within the gap.
1287 */
1288 if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
1289 return nodep1->idx + MASK_BITS + nodep1->num_after;
1290
1291 /*
1292 * Second node is adjacent to the first node.
1293 * Because it is adjacent, its mask should be non-zero. If all
1294 * its mask bits are set, then with it being adjacent, it should
1295 * have had the mask bits moved into the num_after setting of the
1296 * previous node.
1297 */
1298 return node_first_clear(nodep2, 0);
1299}
1300
1301/* Starting with the index 1 greater than the index given by start, finds
1302 * and returns the index of the first sequence of num consecutively set
1303 * bits. Returns a value of 0 of no such sequence exists.
1304 */
1305sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
1306 sparsebit_idx_t start, sparsebit_num_t num)
1307{
1308 sparsebit_idx_t idx;
1309
1310 assert(num >= 1);
1311
1312 for (idx = sparsebit_next_set(s, start);
1313 idx != 0 && idx + num - 1 >= idx;
1314 idx = sparsebit_next_set(s, idx)) {
1315 assert(sparsebit_is_set(s, idx));
1316
1317 /*
1318 * Does the sequence of bits starting at idx consist of
1319 * num set bits?
1320 */
1321 if (sparsebit_is_set_num(s, idx, num))
1322 return idx;
1323
1324 /*
1325 * Sequence of set bits at idx isn't large enough.
1326 * Skip this entire sequence of set bits.
1327 */
1328 idx = sparsebit_next_clear(s, idx);
1329 if (idx == 0)
1330 return 0;
1331 }
1332
1333 return 0;
1334}
1335
1336/* Starting with the index 1 greater than the index given by start, finds
1337 * and returns the index of the first sequence of num consecutively cleared
1338 * bits. Returns a value of 0 of no such sequence exists.
1339 */
1340sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
1341 sparsebit_idx_t start, sparsebit_num_t num)
1342{
1343 sparsebit_idx_t idx;
1344
1345 assert(num >= 1);
1346
1347 for (idx = sparsebit_next_clear(s, start);
1348 idx != 0 && idx + num - 1 >= idx;
1349 idx = sparsebit_next_clear(s, idx)) {
1350 assert(sparsebit_is_clear(s, idx));
1351
1352 /*
1353 * Does the sequence of bits starting at idx consist of
1354 * num cleared bits?
1355 */
1356 if (sparsebit_is_clear_num(s, idx, num))
1357 return idx;
1358
1359 /*
1360 * Sequence of cleared bits at idx isn't large enough.
1361 * Skip this entire sequence of cleared bits.
1362 */
1363 idx = sparsebit_next_set(s, idx);
1364 if (idx == 0)
1365 return 0;
1366 }
1367
1368 return 0;
1369}
1370
1371/* Sets the bits * in the inclusive range idx through idx + num - 1. */
1372void sparsebit_set_num(struct sparsebit *s,
1373 sparsebit_idx_t start, sparsebit_num_t num)
1374{
1375 struct node *nodep, *next;
1376 unsigned int n1;
1377 sparsebit_idx_t idx;
1378 sparsebit_num_t n;
1379 sparsebit_idx_t middle_start, middle_end;
1380
1381 assert(num > 0);
1382 assert(start + num - 1 >= start);
1383
1384 /*
1385 * Leading - bits before first mask boundary.
1386 *
1387 * TODO(lhuemill): With some effort it may be possible to
1388 * replace the following loop with a sequential sequence
1389 * of statements. High level sequence would be:
1390 *
1391 * 1. Use node_split() to force node that describes setting
1392 * of idx to be within the mask portion of a node.
1393 * 2. Form mask of bits to be set.
1394 * 3. Determine number of mask bits already set in the node
1395 * and store in a local variable named num_already_set.
1396 * 4. Set the appropriate mask bits within the node.
1397 * 5. Increment struct sparsebit_pvt num_set member
1398 * by the number of bits that were actually set.
1399 * Exclude from the counts bits that were already set.
1400 * 6. Before returning to the caller, use node_reduce() to
1401 * handle the multiple corner cases that this method
1402 * introduces.
1403 */
1404 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
1405 bit_set(s, idx);
1406
1407 /* Middle - bits spanning one or more entire mask */
1408 middle_start = idx;
1409 middle_end = middle_start + (n & -MASK_BITS) - 1;
1410 if (n >= MASK_BITS) {
1411 nodep = node_split(s, middle_start);
1412
1413 /*
1414 * As needed, split just after end of middle bits.
1415 * No split needed if end of middle bits is at highest
1416 * supported bit index.
1417 */
1418 if (middle_end + 1 > middle_end)
1419 (void) node_split(s, middle_end + 1);
1420
1421 /* Delete nodes that only describe bits within the middle. */
1422 for (next = node_next(s, nodep);
1423 next && (next->idx < middle_end);
1424 next = node_next(s, nodep)) {
1425 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
1426 node_rm(s, next);
1427 next = NULL;
1428 }
1429
1430 /* As needed set each of the mask bits */
1431 for (n1 = 0; n1 < MASK_BITS; n1++) {
1432 if (!(nodep->mask & (1 << n1))) {
1433 nodep->mask |= 1 << n1;
1434 s->num_set++;
1435 }
1436 }
1437
1438 s->num_set -= nodep->num_after;
1439 nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
1440 s->num_set += nodep->num_after;
1441
1442 node_reduce(s, nodep);
1443 }
1444 idx = middle_end + 1;
1445 n -= middle_end - middle_start + 1;
1446
1447 /* Trailing - bits at and beyond last mask boundary */
1448 assert(n < MASK_BITS);
1449 for (; n > 0; idx++, n--)
1450 bit_set(s, idx);
1451}
1452
1453/* Clears the bits * in the inclusive range idx through idx + num - 1. */
1454void sparsebit_clear_num(struct sparsebit *s,
1455 sparsebit_idx_t start, sparsebit_num_t num)
1456{
1457 struct node *nodep, *next;
1458 unsigned int n1;
1459 sparsebit_idx_t idx;
1460 sparsebit_num_t n;
1461 sparsebit_idx_t middle_start, middle_end;
1462
1463 assert(num > 0);
1464 assert(start + num - 1 >= start);
1465
1466 /* Leading - bits before first mask boundary */
1467 for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
1468 bit_clear(s, idx);
1469
1470 /* Middle - bits spanning one or more entire mask */
1471 middle_start = idx;
1472 middle_end = middle_start + (n & -MASK_BITS) - 1;
1473 if (n >= MASK_BITS) {
1474 nodep = node_split(s, middle_start);
1475
1476 /*
1477 * As needed, split just after end of middle bits.
1478 * No split needed if end of middle bits is at highest
1479 * supported bit index.
1480 */
1481 if (middle_end + 1 > middle_end)
1482 (void) node_split(s, middle_end + 1);
1483
1484 /* Delete nodes that only describe bits within the middle. */
1485 for (next = node_next(s, nodep);
1486 next && (next->idx < middle_end);
1487 next = node_next(s, nodep)) {
1488 assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
1489 node_rm(s, next);
1490 next = NULL;
1491 }
1492
1493 /* As needed clear each of the mask bits */
1494 for (n1 = 0; n1 < MASK_BITS; n1++) {
1495 if (nodep->mask & (1 << n1)) {
1496 nodep->mask &= ~(1 << n1);
1497 s->num_set--;
1498 }
1499 }
1500
1501 /* Clear any bits described by num_after */
1502 s->num_set -= nodep->num_after;
1503 nodep->num_after = 0;
1504
1505 /*
1506 * Delete the node that describes the beginning of
1507 * the middle bits and perform any allowed reductions
1508 * with the nodes prev or next of nodep.
1509 */
1510 node_reduce(s, nodep);
1511 nodep = NULL;
1512 }
1513 idx = middle_end + 1;
1514 n -= middle_end - middle_start + 1;
1515
1516 /* Trailing - bits at and beyond last mask boundary */
1517 assert(n < MASK_BITS);
1518 for (; n > 0; idx++, n--)
1519 bit_clear(s, idx);
1520}
1521
1522/* Sets the bit at the index given by idx. */
1523void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
1524{
1525 sparsebit_set_num(s, idx, 1);
1526}
1527
1528/* Clears the bit at the index given by idx. */
1529void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
1530{
1531 sparsebit_clear_num(s, idx, 1);
1532}
1533
1534/* Sets the bits in the entire addressable range of the sparsebit array. */
1535void sparsebit_set_all(struct sparsebit *s)
1536{
1537 sparsebit_set(s, 0);
1538 sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
1539 assert(sparsebit_all_set(s));
1540}
1541
1542/* Clears the bits in the entire addressable range of the sparsebit array. */
1543void sparsebit_clear_all(struct sparsebit *s)
1544{
1545 sparsebit_clear(s, 0);
1546 sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
1547 assert(!sparsebit_any_set(s));
1548}
1549
1550static size_t display_range(FILE *stream, sparsebit_idx_t low,
1551 sparsebit_idx_t high, bool prepend_comma_space)
1552{
1553 char *fmt_str;
1554 size_t sz;
1555
1556 /* Determine the printf format string */
1557 if (low == high)
1558 fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
1559 else
1560 fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
1561
1562 /*
1563 * When stream is NULL, just determine the size of what would
1564 * have been printed, else print the range.
1565 */
1566 if (!stream)
1567 sz = snprintf(NULL, 0, fmt_str, low, high);
1568 else
1569 sz = fprintf(stream, fmt_str, low, high);
1570
1571 return sz;
1572}
1573
1574
1575/* Dumps to the FILE stream given by stream, the bit settings
1576 * of s. Each line of output is prefixed with the number of
1577 * spaces given by indent. The length of each line is implementation
1578 * dependent and does not depend on the indent amount. The following
1579 * is an example output of a sparsebit array that has bits:
1580 *
1581 * 0x5, 0x8, 0xa:0xe, 0x12
1582 *
1583 * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
1584 * are set. Note that a ':', instead of a '-' is used to specify a range of
1585 * contiguous bits. This is done because '-' is used to specify command-line
1586 * options, and sometimes ranges are specified as command-line arguments.
1587 */
1588void sparsebit_dump(FILE *stream, struct sparsebit *s,
1589 unsigned int indent)
1590{
1591 size_t current_line_len = 0;
1592 size_t sz;
1593 struct node *nodep;
1594
1595 if (!sparsebit_any_set(s))
1596 return;
1597
1598 /* Display initial indent */
1599 fprintf(stream, "%*s", indent, "");
1600
1601 /* For each node */
1602 for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
1603 unsigned int n1;
1604 sparsebit_idx_t low, high;
1605
1606 /* For each group of bits in the mask */
1607 for (n1 = 0; n1 < MASK_BITS; n1++) {
1608 if (nodep->mask & (1 << n1)) {
1609 low = high = nodep->idx + n1;
1610
1611 for (; n1 < MASK_BITS; n1++) {
1612 if (nodep->mask & (1 << n1))
1613 high = nodep->idx + n1;
1614 else
1615 break;
1616 }
1617
1618 if ((n1 == MASK_BITS) && nodep->num_after)
1619 high += nodep->num_after;
1620
1621 /*
1622 * How much room will it take to display
1623 * this range.
1624 */
1625 sz = display_range(NULL, low, high,
1626 current_line_len != 0);
1627
1628 /*
1629 * If there is not enough room, display
1630 * a newline plus the indent of the next
1631 * line.
1632 */
1633 if (current_line_len + sz > DUMP_LINE_MAX) {
1634 fputs("\n", stream);
1635 fprintf(stream, "%*s", indent, "");
1636 current_line_len = 0;
1637 }
1638
1639 /* Display the range */
1640 sz = display_range(stream, low, high,
1641 current_line_len != 0);
1642 current_line_len += sz;
1643 }
1644 }
1645
1646 /*
1647 * If num_after and most significant-bit of mask is not
1648 * set, then still need to display a range for the bits
1649 * described by num_after.
1650 */
1651 if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
1652 low = nodep->idx + MASK_BITS;
1653 high = nodep->idx + MASK_BITS + nodep->num_after - 1;
1654
1655 /*
1656 * How much room will it take to display
1657 * this range.
1658 */
1659 sz = display_range(NULL, low, high,
1660 current_line_len != 0);
1661
1662 /*
1663 * If there is not enough room, display
1664 * a newline plus the indent of the next
1665 * line.
1666 */
1667 if (current_line_len + sz > DUMP_LINE_MAX) {
1668 fputs("\n", stream);
1669 fprintf(stream, "%*s", indent, "");
1670 current_line_len = 0;
1671 }
1672
1673 /* Display the range */
1674 sz = display_range(stream, low, high,
1675 current_line_len != 0);
1676 current_line_len += sz;
1677 }
1678 }
1679 fputs("\n", stream);
1680}
1681
1682/* Validates the internal state of the sparsebit array given by
1683 * s. On error, diagnostic information is printed to stderr and
1684 * abort is called.
1685 */
1686void sparsebit_validate_internal(struct sparsebit *s)
1687{
1688 bool error_detected = false;
1689 struct node *nodep, *prev = NULL;
1690 sparsebit_num_t total_bits_set = 0;
1691 unsigned int n1;
1692
1693 /* For each node */
1694 for (nodep = node_first(s); nodep;
1695 prev = nodep, nodep = node_next(s, nodep)) {
1696
1697 /*
1698 * Increase total bits set by the number of bits set
1699 * in this node.
1700 */
1701 for (n1 = 0; n1 < MASK_BITS; n1++)
1702 if (nodep->mask & (1 << n1))
1703 total_bits_set++;
1704
1705 total_bits_set += nodep->num_after;
1706
1707 /*
1708 * Arbitrary choice as to whether a mask of 0 is allowed
1709 * or not. For diagnostic purposes it is beneficial to
1710 * have only one valid means to represent a set of bits.
1711 * To support this an arbitrary choice has been made
1712 * to not allow a mask of zero.
1713 */
1714 if (nodep->mask == 0) {
1715 fprintf(stderr, "Node mask of zero, "
1716 "nodep: %p nodep->mask: 0x%x",
1717 nodep, nodep->mask);
1718 error_detected = true;
1719 break;
1720 }
1721
1722 /*
1723 * Validate num_after is not greater than the max index
1724 * - the number of mask bits. The num_after member
1725 * uses 0-based indexing and thus has no value that
1726 * represents all bits set. This limitation is handled
1727 * by requiring a non-zero mask. With a non-zero mask,
1728 * MASK_BITS worth of bits are described by the mask,
1729 * which makes the largest needed num_after equal to:
1730 *
1731 * (~(sparsebit_num_t) 0) - MASK_BITS + 1
1732 */
1733 if (nodep->num_after
1734 > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
1735 fprintf(stderr, "num_after too large, "
1736 "nodep: %p nodep->num_after: 0x%lx",
1737 nodep, nodep->num_after);
1738 error_detected = true;
1739 break;
1740 }
1741
1742 /* Validate node index is divisible by the mask size */
1743 if (nodep->idx % MASK_BITS) {
1744 fprintf(stderr, "Node index not divisible by "
1745 "mask size,\n"
1746 " nodep: %p nodep->idx: 0x%lx "
1747 "MASK_BITS: %lu\n",
1748 nodep, nodep->idx, MASK_BITS);
1749 error_detected = true;
1750 break;
1751 }
1752
1753 /*
1754 * Validate bits described by node don't wrap beyond the
1755 * highest supported index.
1756 */
1757 if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
1758 fprintf(stderr, "Bits described by node wrap "
1759 "beyond highest supported index,\n"
1760 " nodep: %p nodep->idx: 0x%lx\n"
1761 " MASK_BITS: %lu nodep->num_after: 0x%lx",
1762 nodep, nodep->idx, MASK_BITS, nodep->num_after);
1763 error_detected = true;
1764 break;
1765 }
1766
1767 /* Check parent pointers. */
1768 if (nodep->left) {
1769 if (nodep->left->parent != nodep) {
1770 fprintf(stderr, "Left child parent pointer "
1771 "doesn't point to this node,\n"
1772 " nodep: %p nodep->left: %p "
1773 "nodep->left->parent: %p",
1774 nodep, nodep->left,
1775 nodep->left->parent);
1776 error_detected = true;
1777 break;
1778 }
1779 }
1780
1781 if (nodep->right) {
1782 if (nodep->right->parent != nodep) {
1783 fprintf(stderr, "Right child parent pointer "
1784 "doesn't point to this node,\n"
1785 " nodep: %p nodep->right: %p "
1786 "nodep->right->parent: %p",
1787 nodep, nodep->right,
1788 nodep->right->parent);
1789 error_detected = true;
1790 break;
1791 }
1792 }
1793
1794 if (!nodep->parent) {
1795 if (s->root != nodep) {
1796 fprintf(stderr, "Unexpected root node, "
1797 "s->root: %p nodep: %p",
1798 s->root, nodep);
1799 error_detected = true;
1800 break;
1801 }
1802 }
1803
1804 if (prev) {
1805 /*
1806 * Is index of previous node before index of
1807 * current node?
1808 */
1809 if (prev->idx >= nodep->idx) {
1810 fprintf(stderr, "Previous node index "
1811 ">= current node index,\n"
1812 " prev: %p prev->idx: 0x%lx\n"
1813 " nodep: %p nodep->idx: 0x%lx",
1814 prev, prev->idx, nodep, nodep->idx);
1815 error_detected = true;
1816 break;
1817 }
1818
1819 /*
1820 * Nodes occur in asscending order, based on each
1821 * nodes starting index.
1822 */
1823 if ((prev->idx + MASK_BITS + prev->num_after - 1)
1824 >= nodep->idx) {
1825 fprintf(stderr, "Previous node bit range "
1826 "overlap with current node bit range,\n"
1827 " prev: %p prev->idx: 0x%lx "
1828 "prev->num_after: 0x%lx\n"
1829 " nodep: %p nodep->idx: 0x%lx "
1830 "nodep->num_after: 0x%lx\n"
1831 " MASK_BITS: %lu",
1832 prev, prev->idx, prev->num_after,
1833 nodep, nodep->idx, nodep->num_after,
1834 MASK_BITS);
1835 error_detected = true;
1836 break;
1837 }
1838
1839 /*
1840 * When the node has all mask bits set, it shouldn't
1841 * be adjacent to the last bit described by the
1842 * previous node.
1843 */
1844 if (nodep->mask == ~(mask_t) 0 &&
1845 prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
1846 fprintf(stderr, "Current node has mask with "
1847 "all bits set and is adjacent to the "
1848 "previous node,\n"
1849 " prev: %p prev->idx: 0x%lx "
1850 "prev->num_after: 0x%lx\n"
1851 " nodep: %p nodep->idx: 0x%lx "
1852 "nodep->num_after: 0x%lx\n"
1853 " MASK_BITS: %lu",
1854 prev, prev->idx, prev->num_after,
1855 nodep, nodep->idx, nodep->num_after,
1856 MASK_BITS);
1857
1858 error_detected = true;
1859 break;
1860 }
1861 }
1862 }
1863
1864 if (!error_detected) {
1865 /*
1866 * Is sum of bits set in each node equal to the count
1867 * of total bits set.
1868 */
1869 if (s->num_set != total_bits_set) {
1870 fprintf(stderr, "Number of bits set missmatch,\n"
1871 " s->num_set: 0x%lx total_bits_set: 0x%lx",
1872 s->num_set, total_bits_set);
1873
1874 error_detected = true;
1875 }
1876 }
1877
1878 if (error_detected) {
1879 fputs(" dump_internal:\n", stderr);
1880 sparsebit_dump_internal(stderr, s, 4);
1881 abort();
1882 }
1883}
1884
1885
1886#ifdef FUZZ
1887/* A simple but effective fuzzing driver. Look for bugs with the help
1888 * of some invariants and of a trivial representation of sparsebit.
1889 * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
1890 * afl-fuzz do the magic. :)
1891 */
1892
1893#include <stdlib.h>
1894#include <assert.h>
1895
1896struct range {
1897 sparsebit_idx_t first, last;
1898 bool set;
1899};
1900
1901struct sparsebit *s;
1902struct range ranges[1000];
1903int num_ranges;
1904
1905static bool get_value(sparsebit_idx_t idx)
1906{
1907 int i;
1908
1909 for (i = num_ranges; --i >= 0; )
1910 if (ranges[i].first <= idx && idx <= ranges[i].last)
1911 return ranges[i].set;
1912
1913 return false;
1914}
1915
1916static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
1917{
1918 sparsebit_num_t num;
1919 sparsebit_idx_t next;
1920
1921 if (first < last) {
1922 num = last - first + 1;
1923 } else {
1924 num = first - last + 1;
1925 first = last;
1926 last = first + num - 1;
1927 }
1928
1929 switch (code) {
1930 case 0:
1931 sparsebit_set(s, first);
1932 assert(sparsebit_is_set(s, first));
1933 assert(!sparsebit_is_clear(s, first));
1934 assert(sparsebit_any_set(s));
1935 assert(!sparsebit_all_clear(s));
1936 if (get_value(first))
1937 return;
1938 if (num_ranges == 1000)
1939 exit(0);
1940 ranges[num_ranges++] = (struct range)
1941 { .first = first, .last = first, .set = true };
1942 break;
1943 case 1:
1944 sparsebit_clear(s, first);
1945 assert(!sparsebit_is_set(s, first));
1946 assert(sparsebit_is_clear(s, first));
1947 assert(sparsebit_any_clear(s));
1948 assert(!sparsebit_all_set(s));
1949 if (!get_value(first))
1950 return;
1951 if (num_ranges == 1000)
1952 exit(0);
1953 ranges[num_ranges++] = (struct range)
1954 { .first = first, .last = first, .set = false };
1955 break;
1956 case 2:
1957 assert(sparsebit_is_set(s, first) == get_value(first));
1958 assert(sparsebit_is_clear(s, first) == !get_value(first));
1959 break;
1960 case 3:
1961 if (sparsebit_any_set(s))
1962 assert(get_value(sparsebit_first_set(s)));
1963 if (sparsebit_any_clear(s))
1964 assert(!get_value(sparsebit_first_clear(s)));
1965 sparsebit_set_all(s);
1966 assert(!sparsebit_any_clear(s));
1967 assert(sparsebit_all_set(s));
1968 num_ranges = 0;
1969 ranges[num_ranges++] = (struct range)
1970 { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
1971 break;
1972 case 4:
1973 if (sparsebit_any_set(s))
1974 assert(get_value(sparsebit_first_set(s)));
1975 if (sparsebit_any_clear(s))
1976 assert(!get_value(sparsebit_first_clear(s)));
1977 sparsebit_clear_all(s);
1978 assert(!sparsebit_any_set(s));
1979 assert(sparsebit_all_clear(s));
1980 num_ranges = 0;
1981 break;
1982 case 5:
1983 next = sparsebit_next_set(s, first);
1984 assert(next == 0 || next > first);
1985 assert(next == 0 || get_value(next));
1986 break;
1987 case 6:
1988 next = sparsebit_next_clear(s, first);
1989 assert(next == 0 || next > first);
1990 assert(next == 0 || !get_value(next));
1991 break;
1992 case 7:
1993 next = sparsebit_next_clear(s, first);
1994 if (sparsebit_is_set_num(s, first, num)) {
1995 assert(next == 0 || next > last);
1996 if (first)
1997 next = sparsebit_next_set(s, first - 1);
1998 else if (sparsebit_any_set(s))
1999 next = sparsebit_first_set(s);
2000 else
2001 return;
2002 assert(next == first);
2003 } else {
2004 assert(sparsebit_is_clear(s, first) || next <= last);
2005 }
2006 break;
2007 case 8:
2008 next = sparsebit_next_set(s, first);
2009 if (sparsebit_is_clear_num(s, first, num)) {
2010 assert(next == 0 || next > last);
2011 if (first)
2012 next = sparsebit_next_clear(s, first - 1);
2013 else if (sparsebit_any_clear(s))
2014 next = sparsebit_first_clear(s);
2015 else
2016 return;
2017 assert(next == first);
2018 } else {
2019 assert(sparsebit_is_set(s, first) || next <= last);
2020 }
2021 break;
2022 case 9:
2023 sparsebit_set_num(s, first, num);
2024 assert(sparsebit_is_set_num(s, first, num));
2025 assert(!sparsebit_is_clear_num(s, first, num));
2026 assert(sparsebit_any_set(s));
2027 assert(!sparsebit_all_clear(s));
2028 if (num_ranges == 1000)
2029 exit(0);
2030 ranges[num_ranges++] = (struct range)
2031 { .first = first, .last = last, .set = true };
2032 break;
2033 case 10:
2034 sparsebit_clear_num(s, first, num);
2035 assert(!sparsebit_is_set_num(s, first, num));
2036 assert(sparsebit_is_clear_num(s, first, num));
2037 assert(sparsebit_any_clear(s));
2038 assert(!sparsebit_all_set(s));
2039 if (num_ranges == 1000)
2040 exit(0);
2041 ranges[num_ranges++] = (struct range)
2042 { .first = first, .last = last, .set = false };
2043 break;
2044 case 11:
2045 sparsebit_validate_internal(s);
2046 break;
2047 default:
2048 break;
2049 }
2050}
2051
2052unsigned char get8(void)
2053{
2054 int ch;
2055
2056 ch = getchar();
2057 if (ch == EOF)
2058 exit(0);
2059 return ch;
2060}
2061
2062uint64_t get64(void)
2063{
2064 uint64_t x;
2065
2066 x = get8();
2067 x = (x << 8) | get8();
2068 x = (x << 8) | get8();
2069 x = (x << 8) | get8();
2070 x = (x << 8) | get8();
2071 x = (x << 8) | get8();
2072 x = (x << 8) | get8();
2073 return (x << 8) | get8();
2074}
2075
2076int main(void)
2077{
2078 s = sparsebit_alloc();
2079 for (;;) {
2080 uint8_t op = get8() & 0xf;
2081 uint64_t first = get64();
2082 uint64_t last = get64();
2083
2084 operate(op, first, last);
2085 }
2086}
2087#endif