use of io.druid.extendedset.utilities.IntList in project druid by druid-io.
the class ImmutableConciseSet method complement.
public static ImmutableConciseSet complement(ImmutableConciseSet set, int length) {
if (length <= 0) {
return new ImmutableConciseSet();
}
// special case when the set is empty and we need a concise set of ones
if (set == null || set.isEmpty()) {
final int leftoverBits = length % 31;
final int onesBlocks = length / 31;
final int[] words;
if (onesBlocks > 0) {
if (leftoverBits > 0) {
words = new int[] { ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1), ConciseSetUtils.onesUntil(leftoverBits) };
} else {
words = new int[] { ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1) };
}
} else {
if (leftoverBits > 0) {
words = new int[] { ConciseSetUtils.onesUntil(leftoverBits) };
} else {
words = new int[] {};
}
}
ConciseSet newSet = new ConciseSet(words, false);
return ImmutableConciseSet.newImmutableFromMutable(newSet);
}
IntList retVal = new IntList();
int endIndex = length - 1;
int wordsWalked = 0;
int last = 0;
WordIterator iter = set.newWordIterator();
while (iter.hasNext()) {
int word = iter.next();
wordsWalked = iter.wordsWalked;
if (ConciseSetUtils.isLiteral(word)) {
retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word);
} else {
retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word);
}
}
last = set.getLast();
int distFromLastWordBoundary = ConciseSetUtils.maxLiteralLengthModulus(last);
int distToNextWordBoundary = ConciseSetUtils.MAX_LITERAL_LENGTH - distFromLastWordBoundary - 1;
last = (last < 0) ? 0 : last + distToNextWordBoundary;
int diff = endIndex - last;
// only append a new literal when the end index is beyond the current word
if (diff > 0) {
// first check if the difference can be represented in 31 bits
if (diff <= ConciseSetUtils.MAX_LITERAL_LENGTH) {
retVal.add(ConciseSetUtils.ALL_ONES_LITERAL);
} else {
// create a fill from last set bit to endIndex for number of 31 bit blocks minus one
int endIndexWordCount = ConciseSetUtils.maxLiteralLengthDivision(endIndex);
retVal.add(ConciseSetUtils.SEQUENCE_BIT | (endIndexWordCount - wordsWalked - 1));
retVal.add(ConciseSetUtils.ALL_ONES_LITERAL);
}
}
// clear bits after last set value
int lastWord = retVal.get(retVal.length() - 1);
if (ConciseSetUtils.isLiteral(lastWord)) {
lastWord = ConciseSetUtils.clearBitsAfterInLastWord(lastWord, ConciseSetUtils.maxLiteralLengthModulus(endIndex));
}
retVal.set(retVal.length() - 1, lastWord);
trimZeros(retVal);
if (retVal.isEmpty()) {
return new ImmutableConciseSet();
}
return compact(new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())));
}
use of io.druid.extendedset.utilities.IntList in project druid by druid-io.
the class ImmutableConciseSet method doComplement.
public static ImmutableConciseSet doComplement(ImmutableConciseSet set) {
if (set == null || set.isEmpty()) {
return new ImmutableConciseSet();
}
IntList retVal = new IntList();
WordIterator iter = set.newWordIterator();
while (iter.hasNext()) {
int word = iter.next();
if (ConciseSetUtils.isLiteral(word)) {
retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word);
} else {
retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word);
}
}
// do not complement after the last element
int lastWord = retVal.get(retVal.length() - 1);
if (ConciseSetUtils.isLiteral(lastWord)) {
lastWord = ConciseSetUtils.clearBitsAfterInLastWord(lastWord, ConciseSetUtils.maxLiteralLengthModulus(set.getLast()));
}
retVal.set(retVal.length() - 1, lastWord);
trimZeros(retVal);
if (retVal.isEmpty()) {
return new ImmutableConciseSet();
}
return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}
use of io.druid.extendedset.utilities.IntList in project druid by druid-io.
the class ImmutableConciseSet method compact.
public static ImmutableConciseSet compact(ImmutableConciseSet set) {
IntList retVal = new IntList();
WordIterator itr = set.newWordIterator();
while (itr.hasNext()) {
addAndCompact(retVal, itr.next());
}
return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}
use of io.druid.extendedset.utilities.IntList in project druid by druid-io.
the class ImmutableConciseSet method doIntersection.
public static ImmutableConciseSet doIntersection(Iterator<ImmutableConciseSet> sets) {
IntList retVal = new IntList();
ArrayList<WordIterator> iterators = new ArrayList<>();
// populate priority queue
while (sets.hasNext()) {
ImmutableConciseSet set = sets.next();
if (set == null || set.isEmpty()) {
return new ImmutableConciseSet();
}
WordIterator itr = set.newWordIterator();
itr.word = itr.next();
iterators.add(itr);
}
// Keep iterators in a sorted array, because usually only a few bitsets are intersected, very rarely - a few dozens.
// Sorted array approach was benchmarked and proven to be faster than PriorityQueue (as in doUnion()) up to 100
// bitsets.
WordIterator[] theQ = iterators.toArray(new WordIterator[0]);
int qSize = theQ.length;
partialSort(theQ, qSize - 1, qSize, INTERSECTION_COMPARATOR);
int currIndex = 0;
int wordsWalkedAtSequenceEnd = Integer.MAX_VALUE;
while (qSize > 0) {
int maxChangedIndex = -1;
// grab the top element from the priority queue
WordIterator itr = theQ[0];
int word = itr.getWord();
// if a sequence has ended, we can break out because of Boolean logic
if (itr.startIndex >= wordsWalkedAtSequenceEnd) {
break;
}
// to fill the space
if (currIndex < itr.startIndex) {
// number of 31 bit blocks that compromise the fill minus one
addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (itr.startIndex - currIndex - 1)));
currIndex = itr.startIndex;
}
if (ConciseSetUtils.isLiteral(word)) {
// advance all other literals
int qIndex = 1;
while (qIndex < qSize && theQ[qIndex].startIndex == itr.startIndex) {
WordIterator i = theQ[qIndex];
int w = i.getWord();
// if we still have one fills with flipped bits, AND them here
if (ConciseSetUtils.isLiteral(w)) {
word &= w;
} else {
int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
word &= flipBitLiteral;
i.advanceTo(itr.wordsWalked);
}
}
if (i.hasNext()) {
i.word = i.next();
maxChangedIndex = qIndex;
qIndex++;
} else {
removeElement(theQ, qIndex, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
}
}
// advance the set with the current literal forward and push result back to priority queue
addAndCompact(retVal, word);
currIndex++;
if (itr.hasNext()) {
itr.word = itr.next();
maxChangedIndex = Math.max(maxChangedIndex, 0);
} else {
removeElement(theQ, 0, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
}
} else if (ConciseSetUtils.isZeroSequence(word)) {
// extract a literal from the flip bits of the zero sequence
int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word);
// advance everything past the longest zero sequence
int qIndex = 1;
while (qIndex < qSize && theQ[qIndex].startIndex < itr.wordsWalked) {
WordIterator i = theQ[qIndex];
int w = i.getWord();
if (i.startIndex == itr.startIndex) {
// same position
if (ConciseSetUtils.isLiteral(w)) {
flipBitLiteral &= w;
} else if (ConciseSetUtils.isZeroSequence(w)) {
flipBitLiteral &= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
} else {
assert ConciseSetUtils.isOneSequence(w);
flipBitLiteral &= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
}
}
i.advanceTo(itr.wordsWalked);
if (i.hasNext()) {
i.word = i.next();
maxChangedIndex = qIndex;
qIndex++;
} else {
removeElement(theQ, qIndex, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
}
}
// advance longest zero literal forward and push result back to priority queue
// if a flip bit is still needed, put it in the correct position
int newWord = word & 0xC1FFFFFF;
if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1;
newWord = (word & 0xC1FFFFFF) | (position << 25);
}
addAndCompact(retVal, newWord);
currIndex = itr.wordsWalked;
if (itr.hasNext()) {
itr.word = itr.next();
maxChangedIndex = Math.max(maxChangedIndex, 0);
} else {
removeElement(theQ, 0, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
}
} else {
assert ConciseSetUtils.isOneSequence(word);
int flipBitLiteral;
int qIndex = 1;
while (qIndex < qSize && theQ[qIndex].startIndex == itr.startIndex) {
// check if literal can be created flip bits of other one sequences
WordIterator i = theQ[qIndex];
int w = i.getWord();
flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
i.word = flipBitLiteral;
maxChangedIndex = qIndex;
qIndex++;
} else if (i.hasNext()) {
i.word = i.next();
maxChangedIndex = qIndex;
qIndex++;
} else {
removeElement(theQ, qIndex, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd);
}
}
// check if a literal needs to be created from the flipped bits of this sequence
flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word);
if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
itr.word = flipBitLiteral;
maxChangedIndex = Math.max(maxChangedIndex, 0);
} else if (itr.hasNext()) {
itr.word = itr.next();
maxChangedIndex = Math.max(maxChangedIndex, 0);
} else {
removeElement(theQ, 0, qSize);
qSize--;
wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd);
}
}
if (maxChangedIndex >= 0) {
partialSort(theQ, maxChangedIndex, qSize, INTERSECTION_COMPARATOR);
}
}
// fill in any missing one sequences
if (currIndex < wordsWalkedAtSequenceEnd) {
addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (wordsWalkedAtSequenceEnd - currIndex - 1)));
}
if (retVal.isEmpty()) {
return new ImmutableConciseSet();
}
return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}
use of io.druid.extendedset.utilities.IntList in project druid by druid-io.
the class ImmutableConciseSet method doUnion.
private static ImmutableConciseSet doUnion(Iterator<ImmutableConciseSet> sets) {
IntList retVal = new IntList();
// Use PriorityQueue, because sometimes as much as 20k of bitsets are unified, and the asymptotic complexity of
// keeping bitsets in a sorted array (n^2), as in doIntersection(), becomes more important factor than PriorityQueue
// inefficiency.
// Need to specify initial capacity because JDK 7 doesn't have Comparator-only constructor of PriorityQueue
PriorityQueue<WordIterator> theQ = new PriorityQueue<>(11, UNION_COMPARATOR);
// populate priority queue
while (sets.hasNext()) {
ImmutableConciseSet set = sets.next();
if (set != null && !set.isEmpty()) {
WordIterator itr = set.newWordIterator();
itr.word = itr.next();
theQ.add(itr);
}
}
int currIndex = 0;
List<WordIterator> changedIterators = new ArrayList<>();
while (!theQ.isEmpty()) {
// grab the top element from the priority queue
WordIterator itr = theQ.poll();
int word = itr.getWord();
// to fill the space
if (currIndex < itr.startIndex) {
addAndCompact(retVal, itr.startIndex - currIndex - 1);
currIndex = itr.startIndex;
}
if (ConciseSetUtils.isLiteral(word)) {
// advance all other literals
while (!theQ.isEmpty() && theQ.peek().startIndex == itr.startIndex) {
WordIterator i = theQ.poll();
int w = i.getWord();
// if we still have zero fills with flipped bits, OR them here
if (ConciseSetUtils.isLiteral(w)) {
word |= w;
} else {
int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
word |= flipBitLiteral;
i.advanceTo(itr.wordsWalked);
}
}
if (i.hasNext()) {
i.word = i.next();
changedIterators.add(i);
}
}
// advance the set with the current literal forward and push result back to priority queue
addAndCompact(retVal, word);
currIndex++;
if (itr.hasNext()) {
itr.word = itr.next();
changedIterators.add(itr);
}
} else if (ConciseSetUtils.isZeroSequence(word)) {
int flipBitLiteral;
while (!theQ.isEmpty() && theQ.peek().startIndex == itr.startIndex) {
WordIterator i = theQ.poll();
int w = i.getWord();
flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
i.word = flipBitLiteral;
changedIterators.add(i);
} else if (i.hasNext()) {
i.word = i.next();
changedIterators.add(i);
}
}
// check if a literal needs to be created from the flipped bits of this sequence
flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word);
if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) {
itr.word = flipBitLiteral;
changedIterators.add(itr);
} else if (itr.hasNext()) {
itr.word = itr.next();
changedIterators.add(itr);
}
} else {
assert ConciseSetUtils.isOneSequence(word);
// extract a literal from the flip bits of the one sequence
int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word);
// advance everything past the longest ones sequence
while (!theQ.isEmpty() && theQ.peek().startIndex < itr.wordsWalked) {
WordIterator i = theQ.poll();
int w = i.getWord();
if (i.startIndex == itr.startIndex) {
// position
if (ConciseSetUtils.isLiteral(w)) {
flipBitLiteral |= w;
} else if (ConciseSetUtils.isZeroSequence(w)) {
flipBitLiteral |= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w);
} else {
assert ConciseSetUtils.isOneSequence(w);
flipBitLiteral |= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w);
}
}
i.advanceTo(itr.wordsWalked);
if (i.hasNext()) {
i.word = i.next();
changedIterators.add(i);
}
}
// advance longest one literal forward and push result back to priority queue
// if a flip bit is still needed, put it in the correct position
int newWord = word & 0xC1FFFFFF;
if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) {
flipBitLiteral ^= ConciseSetUtils.ALL_ONES_LITERAL;
int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1;
newWord |= (position << 25);
}
addAndCompact(retVal, newWord);
currIndex = itr.wordsWalked;
if (itr.hasNext()) {
itr.word = itr.next();
changedIterators.add(itr);
}
}
theQ.addAll(changedIterators);
changedIterators.clear();
}
if (retVal.isEmpty()) {
return new ImmutableConciseSet();
}
return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()));
}
Aggregations