Search in sources :

Example 1 with IntList

use of io.druid.extendedset.utilities.IntList in project druid by druid-io.

the class ImmutableConciseSet method complement.

public static ImmutableConciseSet complement(ImmutableConciseSet set, int length) {
    if (length <= 0) {
        return new ImmutableConciseSet();
    }
    // special case when the set is empty and we need a concise set of ones
    if (set == null || set.isEmpty()) {
        final int leftoverBits = length % 31;
        final int onesBlocks = length / 31;
        final int[] words;
        if (onesBlocks > 0) {
            if (leftoverBits > 0) {
                words = new int[] { ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1), ConciseSetUtils.onesUntil(leftoverBits) };
            } else {
                words = new int[] { ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1) };
            }
        } else {
            if (leftoverBits > 0) {
                words = new int[] { ConciseSetUtils.onesUntil(leftoverBits) };
            } else {
                words = new int[] {};
            }
        }
        ConciseSet newSet = new ConciseSet(words, false);
        return ImmutableConciseSet.newImmutableFromMutable(newSet);
    }
    IntList retVal = new IntList();
    int endIndex = length - 1;
    int wordsWalked = 0;
    int last = 0;
    WordIterator iter = set.newWordIterator();
    while (iter.hasNext()) {
        int word = iter.next();
        wordsWalked = iter.wordsWalked;
        if (ConciseSetUtils.isLiteral(word)) {
            retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word);
        } else {
            retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word);
        }
    }
    last = set.getLast();
    int distFromLastWordBoundary = ConciseSetUtils.maxLiteralLengthModulus(last);
    int distToNextWordBoundary = ConciseSetUtils.MAX_LITERAL_LENGTH - distFromLastWordBoundary - 1;
    last = (last < 0) ? 0 : last + distToNextWordBoundary;
    int diff = endIndex - last;
    // only append a new literal when the end index is beyond the current word
    if (diff > 0) {
        // first check if the difference can be represented in 31 bits
        if (diff <= ConciseSetUtils.MAX_LITERAL_LENGTH) {
            retVal.add(ConciseSetUtils.ALL_ONES_LITERAL);
        } else {
            // create a fill from last set bit to endIndex for number of 31 bit blocks minus one
            int endIndexWordCount = ConciseSetUtils.maxLiteralLengthDivision(endIndex);
            retVal.add(ConciseSetUtils.SEQUENCE_BIT | (endIndexWordCount - wordsWalked - 1));
            retVal.add(ConciseSetUtils.ALL_ONES_LITERAL);
        }
    }
    // clear bits after last set value
    int lastWord = retVal.get(retVal.length() - 1);
    if (ConciseSetUtils.isLiteral(lastWord)) {
        lastWord = ConciseSetUtils.clearBitsAfterInLastWord(lastWord, ConciseSetUtils.maxLiteralLengthModulus(endIndex));
    }
    retVal.set(retVal.length() - 1, lastWord);
    trimZeros(retVal);
    if (retVal.isEmpty()) {
        return new ImmutableConciseSet();
    }
    return compact(new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())));
}
Also used : IntList(io.druid.extendedset.utilities.IntList)

Example 2 with IntList

use of io.druid.extendedset.utilities.IntList in project druid by druid-io.

the class ImmutableConciseSet method doComplement.

public static ImmutableConciseSet doComplement(ImmutableConciseSet set) {
    if (set == null || set.isEmpty()) {
        return new ImmutableConciseSet();
    }
    IntList retVal = new IntList();
    WordIterator iter = set.newWordIterator();
    while (iter.hasNext()) {
        int word = iter.next();
        if (ConciseSetUtils.isLiteral(word)) {
            retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word);
        } else {
            retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word);
        }
    }
    // do not complement after the last element
    int lastWord = retVal.get(retVal.length() - 1);
    if (ConciseSetUtils.isLiteral(lastWord)) {
        lastWord = ConciseSetUtils.clearBitsAfterInLastWord(lastWord, ConciseSetUtils.maxLiteralLengthModulus(set.getLast()));
    }
    retVal.set(retVal.length() - 1, lastWord);
    trimZeros(retVal);
    if (retVal.isEmpty()) {
        return new ImmutableConciseSet();
    }
    return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}
Also used : IntList(io.druid.extendedset.utilities.IntList)

Example 3 with IntList

use of io.druid.extendedset.utilities.IntList in project druid by druid-io.

the class ImmutableConciseSet method compact.

public static ImmutableConciseSet compact(ImmutableConciseSet set) {
    IntList retVal = new IntList();
    WordIterator itr = set.newWordIterator();
    while (itr.hasNext()) {
        addAndCompact(retVal, itr.next());
    }
    return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}
Also used : IntList(io.druid.extendedset.utilities.IntList)

Example 4 with IntList

use of io.druid.extendedset.utilities.IntList in project druid by druid-io.

the class ImmutableConciseSet method doIntersection.

public static ImmutableConciseSet doIntersection(Iterator<ImmutableConciseSet> sets) {
    IntList retVal = new IntList();
    ArrayList<WordIterator> iterators = new ArrayList<>();
    // populate priority queue
    while (sets.hasNext()) {
        ImmutableConciseSet set = sets.next();
        if (set == null || set.isEmpty()) {
            return new ImmutableConciseSet();
        }
        WordIterator itr = set.newWordIterator();
        itr.word = itr.next();
        iterators.add(itr);
    }
    // Keep iterators in a sorted array, because usually only a few bitsets are intersected, very rarely - a few dozens.
    // Sorted array approach was benchmarked and proven to be faster than PriorityQueue (as in doUnion()) up to 100
    // bitsets.
    WordIterator[] theQ = iterators.toArray(new WordIterator[0]);
    int qSize = theQ.length;
    partialSort(theQ, qSize - 1, qSize, INTERSECTION_COMPARATOR);
    int currIndex = 0;
    int wordsWalkedAtSequenceEnd = Integer.MAX_VALUE;
    while (qSize > 0) {
        int maxChangedIndex = -1;
        // grab the top element from the priority queue
        WordIterator itr = theQ[0];
        int word = itr.getWord();
        // if a sequence has ended, we can break out because of Boolean logic
        if (itr.startIndex >= wordsWalkedAtSequenceEnd) {
            break;
        }
        // to fill the space
        if (currIndex < itr.startIndex) {
            // number of 31 bit blocks that compromise the fill minus one
            addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (itr.startIndex - currIndex - 1)));
            currIndex = itr.startIndex;
        }
        if (ConciseSetUtils.isLiteral(word)) {
            // advance all other literals
            int qIndex = 1;
            while (qIndex < qSize && theQ[qIndex].startIndex == itr.startIndex) {
                WordIterator i = theQ[qIndex];
                int w = i.getWord();
                // if we still have one fills with flipped bits, AND them here
                if (ConciseSetUtils.isLiteral(w)) {
                    word &= w;
                } else {
                    int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                    if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                        word &= flipBitLiteral;
                        i.advanceTo(itr.wordsWalked);
                    }
                }
                if (i.hasNext()) {
                    i.word = i.next();
                    maxChangedIndex = qIndex;
                    qIndex++;
                } else {
                    removeElement(theQ, qIndex, qSize);
                    qSize--;
                    wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
                }
            }
            // advance the set with the current literal forward and push result back to priority queue
            addAndCompact(retVal, word);
            currIndex++;
            if (itr.hasNext()) {
                itr.word = itr.next();
                maxChangedIndex = Math.max(maxChangedIndex, 0);
            } else {
                removeElement(theQ, 0, qSize);
                qSize--;
                wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
            }
        } else if (ConciseSetUtils.isZeroSequence(word)) {
            // extract a literal from the flip bits of the zero sequence
            int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word);
            // advance everything past the longest zero sequence
            int qIndex = 1;
            while (qIndex < qSize && theQ[qIndex].startIndex < itr.wordsWalked) {
                WordIterator i = theQ[qIndex];
                int w = i.getWord();
                if (i.startIndex == itr.startIndex) {
                    // same position
                    if (ConciseSetUtils.isLiteral(w)) {
                        flipBitLiteral &= w;
                    } else if (ConciseSetUtils.isZeroSequence(w)) {
                        flipBitLiteral &= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                    } else {
                        assert ConciseSetUtils.isOneSequence(w);
                        flipBitLiteral &= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                    }
                }
                i.advanceTo(itr.wordsWalked);
                if (i.hasNext()) {
                    i.word = i.next();
                    maxChangedIndex = qIndex;
                    qIndex++;
                } else {
                    removeElement(theQ, qIndex, qSize);
                    qSize--;
                    wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
                }
            }
            // advance longest zero literal forward and push result back to priority queue
            // if a flip bit is still needed, put it in the correct position
            int newWord = word & 0xC1FFFFFF;
            if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1;
                newWord = (word & 0xC1FFFFFF) | (position << 25);
            }
            addAndCompact(retVal, newWord);
            currIndex = itr.wordsWalked;
            if (itr.hasNext()) {
                itr.word = itr.next();
                maxChangedIndex = Math.max(maxChangedIndex, 0);
            } else {
                removeElement(theQ, 0, qSize);
                qSize--;
                wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
            }
        } else {
            assert ConciseSetUtils.isOneSequence(word);
            int flipBitLiteral;
            int qIndex = 1;
            while (qIndex < qSize && theQ[qIndex].startIndex == itr.startIndex) {
                // check if literal can be created flip bits of other one sequences
                WordIterator i = theQ[qIndex];
                int w = i.getWord();
                flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                    i.word = flipBitLiteral;
                    maxChangedIndex = qIndex;
                    qIndex++;
                } else if (i.hasNext()) {
                    i.word = i.next();
                    maxChangedIndex = qIndex;
                    qIndex++;
                } else {
                    removeElement(theQ, qIndex, qSize);
                    qSize--;
                    wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
                }
            }
            // check if a literal needs to be created from the flipped bits of this sequence
            flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word);
            if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                itr.word = flipBitLiteral;
                maxChangedIndex = Math.max(maxChangedIndex, 0);
            } else if (itr.hasNext()) {
                itr.word = itr.next();
                maxChangedIndex = Math.max(maxChangedIndex, 0);
            } else {
                removeElement(theQ, 0, qSize);
                qSize--;
                wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
            }
        }
        if (maxChangedIndex >= 0) {
            partialSort(theQ, maxChangedIndex, qSize, INTERSECTION_COMPARATOR);
        }
    }
    // fill in any missing one sequences
    if (currIndex < wordsWalkedAtSequenceEnd) {
        addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (wordsWalkedAtSequenceEnd - currIndex - 1)));
    }
    if (retVal.isEmpty()) {
        return new ImmutableConciseSet();
    }
    return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}
Also used : ArrayList(java.util.ArrayList) IntList(io.druid.extendedset.utilities.IntList)

Example 5 with IntList

use of io.druid.extendedset.utilities.IntList in project druid by druid-io.

the class ImmutableConciseSet method doUnion.

private static ImmutableConciseSet doUnion(Iterator<ImmutableConciseSet> sets) {
    IntList retVal = new IntList();
    // Use PriorityQueue, because sometimes as much as 20k of bitsets are unified, and the asymptotic complexity of
    // keeping bitsets in a sorted array (n^2), as in doIntersection(), becomes more important factor than PriorityQueue
    // inefficiency.
    // Need to specify initial capacity because JDK 7 doesn't have Comparator-only constructor of PriorityQueue
    PriorityQueue<WordIterator> theQ = new PriorityQueue<>(11, UNION_COMPARATOR);
    // populate priority queue
    while (sets.hasNext()) {
        ImmutableConciseSet set = sets.next();
        if (set != null && !set.isEmpty()) {
            WordIterator itr = set.newWordIterator();
            itr.word = itr.next();
            theQ.add(itr);
        }
    }
    int currIndex = 0;
    List<WordIterator> changedIterators = new ArrayList<>();
    while (!theQ.isEmpty()) {
        // grab the top element from the priority queue
        WordIterator itr = theQ.poll();
        int word = itr.getWord();
        // to fill the space
        if (currIndex < itr.startIndex) {
            addAndCompact(retVal, itr.startIndex - currIndex - 1);
            currIndex = itr.startIndex;
        }
        if (ConciseSetUtils.isLiteral(word)) {
            // advance all other literals
            while (!theQ.isEmpty() && theQ.peek().startIndex == itr.startIndex) {
                WordIterator i = theQ.poll();
                int w = i.getWord();
                // if we still have zero fills with flipped bits, OR them here
                if (ConciseSetUtils.isLiteral(w)) {
                    word |= w;
                } else {
                    int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                    if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                        word |= flipBitLiteral;
                        i.advanceTo(itr.wordsWalked);
                    }
                }
                if (i.hasNext()) {
                    i.word = i.next();
                    changedIterators.add(i);
                }
            }
            // advance the set with the current literal forward and push result back to priority queue
            addAndCompact(retVal, word);
            currIndex++;
            if (itr.hasNext()) {
                itr.word = itr.next();
                changedIterators.add(itr);
            }
        } else if (ConciseSetUtils.isZeroSequence(word)) {
            int flipBitLiteral;
            while (!theQ.isEmpty() && theQ.peek().startIndex == itr.startIndex) {
                WordIterator i = theQ.poll();
                int w = i.getWord();
                flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                    i.word = flipBitLiteral;
                    changedIterators.add(i);
                } else if (i.hasNext()) {
                    i.word = i.next();
                    changedIterators.add(i);
                }
            }
            // check if a literal needs to be created from the flipped bits of this sequence
            flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word);
            if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
                itr.word = flipBitLiteral;
                changedIterators.add(itr);
            } else if (itr.hasNext()) {
                itr.word = itr.next();
                changedIterators.add(itr);
            }
        } else {
            assert ConciseSetUtils.isOneSequence(word);
            // extract a literal from the flip bits of the one sequence
            int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word);
            // advance everything past the longest ones sequence
            while (!theQ.isEmpty() && theQ.peek().startIndex < itr.wordsWalked) {
                WordIterator i = theQ.poll();
                int w = i.getWord();
                if (i.startIndex == itr.startIndex) {
                    // position
                    if (ConciseSetUtils.isLiteral(w)) {
                        flipBitLiteral |= w;
                    } else if (ConciseSetUtils.isZeroSequence(w)) {
                        flipBitLiteral |= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
                    } else {
                        assert ConciseSetUtils.isOneSequence(w);
                        flipBitLiteral |= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
                    }
                }
                i.advanceTo(itr.wordsWalked);
                if (i.hasNext()) {
                    i.word = i.next();
                    changedIterators.add(i);
                }
            }
            // advance longest one literal forward and push result back to priority queue
            // if a flip bit is still needed, put it in the correct position
            int newWord = word & 0xC1FFFFFF;
            if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
                flipBitLiteral ^= ConciseSetUtils.ALL_ONES_LITERAL;
                int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1;
                newWord |= (position << 25);
            }
            addAndCompact(retVal, newWord);
            currIndex = itr.wordsWalked;
            if (itr.hasNext()) {
                itr.word = itr.next();
                changedIterators.add(itr);
            }
        }
        theQ.addAll(changedIterators);
        changedIterators.clear();
    }
    if (retVal.isEmpty()) {
        return new ImmutableConciseSet();
    }
    return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}
Also used : ArrayList(java.util.ArrayList) PriorityQueue(java.util.PriorityQueue) IntList(io.druid.extendedset.utilities.IntList)

Aggregations

IntList (io.druid.extendedset.utilities.IntList)5 ArrayList (java.util.ArrayList)2 PriorityQueue (java.util.PriorityQueue)1