use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class LogicalPart method getPages.
/**
* Applies the provided indexes. This method assumes that the indexes exist for the provided mappings,
* i.e. map.contains(channelNum) should've been done earlier or an NPE may occur.
* An additional check is not done to reduce unnecessary lookups.
* @param minmaxChannelsToRangesMap
* @param bloomChannelsToRangesMap
* @param sparseChannelsToRangesMap
* @return
*/
List<Page> getPages(Map<Integer, List<Range>> minmaxChannelsToRangesMap, Map<Integer, List<Range>> bloomChannelsToRangesMap, Map<Integer, List<Range>> sparseChannelsToRangesMap) {
// predicates are AND'd together
for (Map.Entry<Integer, List<Range>> e : minmaxChannelsToRangesMap.entrySet()) {
int expressionColumnIndex = e.getKey();
List<Range> ranges = e.getValue();
// only filter using minmax if all ranges do not match
int noMatches = 0;
for (Range range : ranges) {
if (range.isSingleValue()) {
Object lookupValue = getNativeValue(range.getSingleValue());
if (lookupValue instanceof Comparable) {
Comparable comparableLookupValue = (Comparable) lookupValue;
// assumes minMaxIdx map will contain the entry since the check should've been done earlier
Map.Entry<Comparable, Comparable> columnMinMaxEntry = minMaxIdx.get(e.getKey());
Comparable min = columnMinMaxEntry.getKey();
Comparable max = columnMinMaxEntry.getValue();
if (comparableLookupValue.compareTo(min) < 0 || comparableLookupValue.compareTo(max) > 0) {
// lookup value is outside minmax range, skip logicalpart
noMatches++;
}
} else {
// the lookup value isn't comparable, we can't do filtering, e.g. if it's null
LOG.warn("Lookup value is not Comparable. MinMax index could not be used.");
return getPages();
}
} else {
// <, <=, >=, >, BETWEEN
boolean highBoundless = range.getHigh().isUpperUnbounded();
boolean lowBoundless = range.getLow().isLowerUnbounded();
Map.Entry<Comparable, Comparable> columnMinMaxEntry = minMaxIdx.get(e.getKey());
Comparable min = columnMinMaxEntry.getKey();
Comparable max = columnMinMaxEntry.getValue();
if (highBoundless && !lowBoundless) {
// >= or >
Object lowLookupValue = getNativeValue(range.getLow().getValue());
if (lowLookupValue instanceof Comparable) {
Comparable lowComparableLookupValue = (Comparable) lowLookupValue;
boolean inclusive = range.getLow().getBound().equals(Marker.Bound.EXACTLY);
if (inclusive) {
if (lowComparableLookupValue.compareTo(max) > 0) {
// lookup value is outside minmax range, skip logicalpart
noMatches++;
}
} else {
if (lowComparableLookupValue.compareTo(max) >= 0) {
// lookup value is outside minmax range, skip logicalpart
noMatches++;
}
}
} else {
// the lookup value isn't comparable, we can't do filtering, e.g. if it's null
LOG.warn("Lookup value is not Comparable. MinMax index could not be used.");
return getPages();
}
} else if (!highBoundless && lowBoundless) {
// <= or <
Object highLookupValue = getNativeValue(range.getHigh().getValue());
if (highLookupValue instanceof Comparable) {
Comparable highComparableLookupValue = (Comparable) highLookupValue;
boolean inclusive = range.getHigh().getBound().equals(Marker.Bound.EXACTLY);
if (inclusive) {
if (highComparableLookupValue.compareTo(min) < 0) {
// lookup value is outside minmax range, skip logicalpart
noMatches++;
}
} else {
if (highComparableLookupValue.compareTo(min) <= 0) {
// lookup value is outside minmax range, skip logicalpart
noMatches++;
}
}
} else {
// the lookup value isn't comparable, we can't do filtering, e.g. if it's null
LOG.warn("Lookup value is not Comparable. MinMax index could not be used.");
return getPages();
}
} else if (!highBoundless && !lowBoundless) {
// BETWEEN
Object lowLookupValue = getNativeValue(range.getLow().getValue());
Object highLookupValue = getNativeValue(range.getHigh().getValue());
if (lowLookupValue instanceof Comparable && highLookupValue instanceof Comparable) {
Comparable lowComparableLookupValue = (Comparable) lowLookupValue;
Comparable highComparableLookupValue = (Comparable) highLookupValue;
if (lowComparableLookupValue.compareTo(max) > 0 || highComparableLookupValue.compareTo(min) < 0) {
// lookup value is outside minmax range, skip logicalpart
noMatches++;
}
} else {
// the lookup value isn't comparable, we can't do filtering, e.g. if it's null
LOG.warn("Lookup value is not Comparable. MinMax index could not be used.");
return getPages();
}
}
}
}
// if all ranges for this column had no match, filter this logipart
if (noMatches == ranges.size()) {
return Collections.emptyList();
}
}
// bloom filter index
// if any column has no range match, the whole logipart can be filtered since it is assumed all column
// predicates are AND'd together
boolean match = true;
for (Map.Entry<Integer, List<Range>> e : bloomChannelsToRangesMap.entrySet()) {
int expressionColumnIndex = e.getKey();
List<Range> ranges = e.getValue();
// only filter using bloom if all values in range do not match
int falseCount = 0;
for (Range range : ranges) {
if (range.isSingleValue()) {
Object lookupValue = getNativeValue(range.getSingleValue());
// assumes bloomIdx map will contain the entry since the check should've been done earlier
BloomFilter filter = bloomIdx.get(expressionColumnIndex);
if (!testFilter(filter, lookupValue)) {
falseCount++;
}
}
}
// if all ranges for this column had no match, filter this logipart
if (falseCount == ranges.size()) {
match = false;
break;
}
}
// no match with bloom indexed columns
if (!match) {
return Collections.emptyList();
}
// TODO: currently only one sort column is supported so if there's an matching sparse index it will automatically be on that column
for (Map.Entry<Integer, List<Range>> e : sparseChannelsToRangesMap.entrySet()) {
List<Range> ranges = e.getValue();
Set<Integer> result = new HashSet<>();
for (Range range : ranges) {
if (range.isSingleValue()) {
// unique value(for example: id=1, id in (1) (IN operator has multiple singleValue ranges), bound: EXACTLY
Object lookupValue = getNativeValue(range.getSingleValue());
if (!(lookupValue instanceof Comparable)) {
LOG.warn("Lookup value is not Comparable. Sparse index could not be queried.");
return getPages();
}
if (sparseIdx.containsKey(lookupValue)) {
result.addAll(sparseIdx.get(lookupValue).getPageIndices());
}
Integer additionalPageIdx = getLowerPageIndex((Comparable) lookupValue, (Comparable) lookupValue, true, null, false);
if (additionalPageIdx != null) {
result.add(additionalPageIdx);
}
continue;
} else {
// <, <=, >=, >, BETWEEN
boolean highBoundless = range.getHigh().isUpperUnbounded();
boolean lowBoundless = range.getLow().isLowerUnbounded();
boolean fromInclusive = range.getLow().getBound().equals(Marker.Bound.EXACTLY);
boolean toInclusive = range.getHigh().getBound().equals(Marker.Bound.EXACTLY);
NavigableMap<Comparable, SparseValue> navigableMap = null;
Comparable low = null;
Comparable high = null;
if (highBoundless && !lowBoundless) {
// >= or >
if (!(range.getLow().getValue() instanceof Comparable)) {
LOG.warn("Lookup value is not Comparable. Sparse index could not be queried.");
return getPages();
}
low = (Comparable) getNativeValue(range.getLow().getValue());
high = sparseIdx.lastKey();
if (low.compareTo(high) > 0) {
navigableMap = Collections.emptyNavigableMap();
} else {
navigableMap = sparseIdx.subMap(low, fromInclusive, high, true);
}
} else if (!highBoundless && lowBoundless) {
// <= or <
if (!(range.getHigh().getValue() instanceof Comparable)) {
LOG.warn("Lookup value is not Comparable. Sparse index could not be queried.");
return getPages();
}
low = sparseIdx.firstKey();
high = (Comparable) getNativeValue(range.getHigh().getValue());
toInclusive = range.getHigh().getBound().equals(Marker.Bound.EXACTLY);
if (low.compareTo(high) > 0) {
navigableMap = Collections.emptyNavigableMap();
} else {
navigableMap = sparseIdx.subMap(low, true, high, toInclusive);
}
} else if (!highBoundless && !lowBoundless) {
// BETWEEN, non-inclusive range < && >
if (!(range.getLow().getValue() instanceof Comparable || range.getHigh().getValue() instanceof Comparable)) {
LOG.warn("Lookup value is not Comparable. Sparse index could not be queried.");
return getPages();
}
low = min((Comparable) getNativeValue(range.getHigh().getValue()), (Comparable) getNativeValue(range.getLow().getValue()));
high = max((Comparable) getNativeValue(range.getHigh().getValue()), (Comparable) getNativeValue(range.getLow().getValue()));
navigableMap = sparseIdx.subMap(low, fromInclusive, high, toInclusive);
} else {
return getPages();
}
for (Map.Entry<Comparable, SparseValue> entry : navigableMap.entrySet()) {
result.addAll(entry.getValue().getPageIndices());
}
if (!lowBoundless) {
Comparable lowestSparseIdxInDom = null;
if (!navigableMap.isEmpty()) {
lowestSparseIdxInDom = navigableMap.firstKey();
}
Integer additionalResultIdx = getLowerPageIndex(lowestSparseIdxInDom, low, fromInclusive, high, toInclusive);
if (additionalResultIdx != null) {
result.add(additionalResultIdx);
}
}
}
}
List<Page> resultPageList = new ArrayList<>();
for (Integer idx : result) {
resultPageList.add(getPages().get(idx));
}
return resultPageList;
}
return getPages();
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class ByteArrayBlockTest method testFilter.
public void testFilter(int size) {
int count = 1024;
ByteArrayBlock block1 = new ByteArrayBlock(count, Optional.empty(), getValues(count));
ByteArrayBlock block2 = new ByteArrayBlock(count, Optional.empty(), getValues(count));
BloomFilter bf1 = getBf(size);
BloomFilter bf2 = getBf(size);
long total1 = 0;
long total2 = 0;
byte value;
for (int j = 0; j < 100; j++) {
boolean[] result1 = new boolean[count];
boolean[] result2 = new boolean[count];
Arrays.fill(result1, Boolean.TRUE);
Arrays.fill(result2, Boolean.TRUE);
long start = System.nanoTime();
for (int i = 0; i < count; i++) {
value = block1.getByte(i, 0);
result1[i] = bf1.test(value);
}
total1 += System.nanoTime() - start;
start = System.nanoTime();
block2.filter(bf2, result2);
total2 += System.nanoTime() - start;
for (int i = 0; i < count; i++) {
if (result1[i] != result2[i]) {
throw new RuntimeException("error" + i);
}
}
}
System.out.println("bfsize: " + size + " origi: " + total1);
System.out.println("bfsize: " + size + " block: " + total2);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class Int128ArrayBlockTest method testFilter.
public void testFilter(int size) {
int count = 1024;
Int128ArrayBlock block1 = new Int128ArrayBlock(count, Optional.empty(), getValues(count * 2));
Int128ArrayBlock block2 = new Int128ArrayBlock(count, Optional.empty(), getValues(count * 2));
BloomFilter bf1 = getBf(size);
BloomFilter bf2 = getBf(size);
long total1 = 0;
long total2 = 0;
Slice value;
for (int j = 0; j < 100; j++) {
boolean[] result1 = new boolean[count];
boolean[] result2 = new boolean[count];
Arrays.fill(result1, Boolean.TRUE);
Arrays.fill(result2, Boolean.TRUE);
long start = System.nanoTime();
for (int i = 0; i < count; i++) {
value = Slices.wrappedLongArray(block1.getLong(i, 0), block1.getLong(i, SIZE_OF_LONG));
result1[i] = bf1.test(value);
}
total1 += System.nanoTime() - start;
start = System.nanoTime();
block2.filter(bf2, result2);
total2 += System.nanoTime() - start;
for (int i = 0; i < count; i++) {
if (result1[i] != result2[i]) {
throw new RuntimeException("error" + i);
}
}
}
System.out.println("bfsize: " + size + " origi: " + total1);
System.out.println("bfsize: " + size + " block: " + total2);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class LongArrayBlockTest method testFilter.
public void testFilter(int size) {
int count = 1024;
LongArrayBlock block1 = new LongArrayBlock(count, Optional.empty(), getValues(count));
LongArrayBlock block2 = new LongArrayBlock(count, Optional.empty(), getValues(count));
BloomFilter bf1 = getBf(size);
BloomFilter bf2 = getBf(size);
long total1 = 0;
long total2 = 0;
long gtotal = 0;
long value = 0;
for (int j = 0; j < 100; j++) {
boolean[] result1 = new boolean[count];
boolean[] result2 = new boolean[count];
Arrays.fill(result1, Boolean.TRUE);
Arrays.fill(result2, Boolean.TRUE);
long start = System.nanoTime();
for (int i = 0; i < count; i++) {
long s = System.nanoTime();
value = block1.get(i);
gtotal += (System.nanoTime() - s);
result1[i] = bf1.test(value);
}
total1 += System.nanoTime() - start;
start = System.nanoTime();
block2.filter(bf2, result2);
total2 += System.nanoTime() - start;
for (int i = 0; i < count; i++) {
if (result1[i] != result2[i]) {
throw new RuntimeException("error" + i);
}
}
}
System.out.println("bfsize: " + size + " origi: " + total1);
System.out.println("bfsize: " + size + " block: " + total2);
}
use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.
the class ShortArrayBlockTest method testFilter.
public void testFilter(int size) {
int count = 1024;
ShortArrayBlock block1 = new ShortArrayBlock(count, Optional.empty(), getValues(count));
ShortArrayBlock block2 = new ShortArrayBlock(count, Optional.empty(), getValues(count));
BloomFilter bf1 = getBf(size);
BloomFilter bf2 = getBf(size);
long total1 = 0;
long total2 = 0;
short value = 0;
for (int j = 0; j < 100; j++) {
boolean[] result1 = new boolean[count];
boolean[] result2 = new boolean[count];
Arrays.fill(result1, Boolean.TRUE);
Arrays.fill(result2, Boolean.TRUE);
long start = System.nanoTime();
for (int i = 0; i < count; i++) {
value = block1.getShort(i, 0);
result1[i] = bf1.test(value);
}
total1 += System.nanoTime() - start;
start = System.nanoTime();
block2.filter(bf2, result2);
total2 += System.nanoTime() - start;
for (int i = 0; i < count; i++) {
if (result1[i] != result2[i]) {
throw new RuntimeException("error" + i);
}
}
}
System.out.println("bfsize: " + size + " origi: " + total1);
System.out.println("bfsize: " + size + " block: " + total2);
}
Aggregations