Search in sources :

Example 1 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class LogicalPart method getPages.

/**
 * Applies the provided indexes. This method assumes that the indexes exist for the provided mappings,
 * i.e. map.contains(channelNum) should've been done earlier or an NPE may occur.
 * An additional check is not done to reduce unnecessary lookups.
 * @param minmaxChannelsToRangesMap
 * @param bloomChannelsToRangesMap
 * @param sparseChannelsToRangesMap
 * @return
 */
List<Page> getPages(Map<Integer, List<Range>> minmaxChannelsToRangesMap, Map<Integer, List<Range>> bloomChannelsToRangesMap, Map<Integer, List<Range>> sparseChannelsToRangesMap) {
    // predicates are AND'd together
    for (Map.Entry<Integer, List<Range>> e : minmaxChannelsToRangesMap.entrySet()) {
        int expressionColumnIndex = e.getKey();
        List<Range> ranges = e.getValue();
        // only filter using minmax if all ranges do not match
        int noMatches = 0;
        for (Range range : ranges) {
            if (range.isSingleValue()) {
                Object lookupValue = getNativeValue(range.getSingleValue());
                if (lookupValue instanceof Comparable) {
                    Comparable comparableLookupValue = (Comparable) lookupValue;
                    // assumes minMaxIdx map will contain the entry since the check should've been done earlier
                    Map.Entry<Comparable, Comparable> columnMinMaxEntry = minMaxIdx.get(e.getKey());
                    Comparable min = columnMinMaxEntry.getKey();
                    Comparable max = columnMinMaxEntry.getValue();
                    if (comparableLookupValue.compareTo(min) < 0 || comparableLookupValue.compareTo(max) > 0) {
                        // lookup value is outside minmax range, skip logicalpart
                        noMatches++;
                    }
                } else {
                    // the lookup value isn't comparable, we can't do filtering, e.g. if it's null
                    LOG.warn("Lookup value is not Comparable. MinMax index could not be used.");
                    return getPages();
                }
            } else {
                // <, <=, >=, >, BETWEEN
                boolean highBoundless = range.getHigh().isUpperUnbounded();
                boolean lowBoundless = range.getLow().isLowerUnbounded();
                Map.Entry<Comparable, Comparable> columnMinMaxEntry = minMaxIdx.get(e.getKey());
                Comparable min = columnMinMaxEntry.getKey();
                Comparable max = columnMinMaxEntry.getValue();
                if (highBoundless && !lowBoundless) {
                    // >= or >
                    Object lowLookupValue = getNativeValue(range.getLow().getValue());
                    if (lowLookupValue instanceof Comparable) {
                        Comparable lowComparableLookupValue = (Comparable) lowLookupValue;
                        boolean inclusive = range.getLow().getBound().equals(Marker.Bound.EXACTLY);
                        if (inclusive) {
                            if (lowComparableLookupValue.compareTo(max) > 0) {
                                // lookup value is outside minmax range, skip logicalpart
                                noMatches++;
                            }
                        } else {
                            if (lowComparableLookupValue.compareTo(max) >= 0) {
                                // lookup value is outside minmax range, skip logicalpart
                                noMatches++;
                            }
                        }
                    } else {
                        // the lookup value isn't comparable, we can't do filtering, e.g. if it's null
                        LOG.warn("Lookup value is not Comparable. MinMax index could not be used.");
                        return getPages();
                    }
                } else if (!highBoundless && lowBoundless) {
                    // <= or <
                    Object highLookupValue = getNativeValue(range.getHigh().getValue());
                    if (highLookupValue instanceof Comparable) {
                        Comparable highComparableLookupValue = (Comparable) highLookupValue;
                        boolean inclusive = range.getHigh().getBound().equals(Marker.Bound.EXACTLY);
                        if (inclusive) {
                            if (highComparableLookupValue.compareTo(min) < 0) {
                                // lookup value is outside minmax range, skip logicalpart
                                noMatches++;
                            }
                        } else {
                            if (highComparableLookupValue.compareTo(min) <= 0) {
                                // lookup value is outside minmax range, skip logicalpart
                                noMatches++;
                            }
                        }
                    } else {
                        // the lookup value isn't comparable, we can't do filtering, e.g. if it's null
                        LOG.warn("Lookup value is not Comparable. MinMax index could not be used.");
                        return getPages();
                    }
                } else if (!highBoundless && !lowBoundless) {
                    // BETWEEN
                    Object lowLookupValue = getNativeValue(range.getLow().getValue());
                    Object highLookupValue = getNativeValue(range.getHigh().getValue());
                    if (lowLookupValue instanceof Comparable && highLookupValue instanceof Comparable) {
                        Comparable lowComparableLookupValue = (Comparable) lowLookupValue;
                        Comparable highComparableLookupValue = (Comparable) highLookupValue;
                        if (lowComparableLookupValue.compareTo(max) > 0 || highComparableLookupValue.compareTo(min) < 0) {
                            // lookup value is outside minmax range, skip logicalpart
                            noMatches++;
                        }
                    } else {
                        // the lookup value isn't comparable, we can't do filtering, e.g. if it's null
                        LOG.warn("Lookup value is not Comparable. MinMax index could not be used.");
                        return getPages();
                    }
                }
            }
        }
        // if all ranges for this column had no match, filter this logipart
        if (noMatches == ranges.size()) {
            return Collections.emptyList();
        }
    }
    // bloom filter index
    // if any column has no range match, the whole logipart can be filtered since it is assumed all column
    // predicates are AND'd together
    boolean match = true;
    for (Map.Entry<Integer, List<Range>> e : bloomChannelsToRangesMap.entrySet()) {
        int expressionColumnIndex = e.getKey();
        List<Range> ranges = e.getValue();
        // only filter using bloom if all values in range do not match
        int falseCount = 0;
        for (Range range : ranges) {
            if (range.isSingleValue()) {
                Object lookupValue = getNativeValue(range.getSingleValue());
                // assumes bloomIdx map will contain the entry since the check should've been done earlier
                BloomFilter filter = bloomIdx.get(expressionColumnIndex);
                if (!testFilter(filter, lookupValue)) {
                    falseCount++;
                }
            }
        }
        // if all ranges for this column had no match, filter this logipart
        if (falseCount == ranges.size()) {
            match = false;
            break;
        }
    }
    // no match with bloom indexed columns
    if (!match) {
        return Collections.emptyList();
    }
    // TODO: currently only one sort column is supported so if there's an matching sparse index it will automatically be on that column
    for (Map.Entry<Integer, List<Range>> e : sparseChannelsToRangesMap.entrySet()) {
        List<Range> ranges = e.getValue();
        Set<Integer> result = new HashSet<>();
        for (Range range : ranges) {
            if (range.isSingleValue()) {
                // unique value(for example: id=1, id in (1) (IN operator has multiple singleValue ranges), bound: EXACTLY
                Object lookupValue = getNativeValue(range.getSingleValue());
                if (!(lookupValue instanceof Comparable)) {
                    LOG.warn("Lookup value is not Comparable. Sparse index could not be queried.");
                    return getPages();
                }
                if (sparseIdx.containsKey(lookupValue)) {
                    result.addAll(sparseIdx.get(lookupValue).getPageIndices());
                }
                Integer additionalPageIdx = getLowerPageIndex((Comparable) lookupValue, (Comparable) lookupValue, true, null, false);
                if (additionalPageIdx != null) {
                    result.add(additionalPageIdx);
                }
                continue;
            } else {
                // <, <=, >=, >, BETWEEN
                boolean highBoundless = range.getHigh().isUpperUnbounded();
                boolean lowBoundless = range.getLow().isLowerUnbounded();
                boolean fromInclusive = range.getLow().getBound().equals(Marker.Bound.EXACTLY);
                boolean toInclusive = range.getHigh().getBound().equals(Marker.Bound.EXACTLY);
                NavigableMap<Comparable, SparseValue> navigableMap = null;
                Comparable low = null;
                Comparable high = null;
                if (highBoundless && !lowBoundless) {
                    // >= or >
                    if (!(range.getLow().getValue() instanceof Comparable)) {
                        LOG.warn("Lookup value is not Comparable. Sparse index could not be queried.");
                        return getPages();
                    }
                    low = (Comparable) getNativeValue(range.getLow().getValue());
                    high = sparseIdx.lastKey();
                    if (low.compareTo(high) > 0) {
                        navigableMap = Collections.emptyNavigableMap();
                    } else {
                        navigableMap = sparseIdx.subMap(low, fromInclusive, high, true);
                    }
                } else if (!highBoundless && lowBoundless) {
                    // <= or <
                    if (!(range.getHigh().getValue() instanceof Comparable)) {
                        LOG.warn("Lookup value is not Comparable. Sparse index could not be queried.");
                        return getPages();
                    }
                    low = sparseIdx.firstKey();
                    high = (Comparable) getNativeValue(range.getHigh().getValue());
                    toInclusive = range.getHigh().getBound().equals(Marker.Bound.EXACTLY);
                    if (low.compareTo(high) > 0) {
                        navigableMap = Collections.emptyNavigableMap();
                    } else {
                        navigableMap = sparseIdx.subMap(low, true, high, toInclusive);
                    }
                } else if (!highBoundless && !lowBoundless) {
                    // BETWEEN, non-inclusive range < && >
                    if (!(range.getLow().getValue() instanceof Comparable || range.getHigh().getValue() instanceof Comparable)) {
                        LOG.warn("Lookup value is not Comparable. Sparse index could not be queried.");
                        return getPages();
                    }
                    low = min((Comparable) getNativeValue(range.getHigh().getValue()), (Comparable) getNativeValue(range.getLow().getValue()));
                    high = max((Comparable) getNativeValue(range.getHigh().getValue()), (Comparable) getNativeValue(range.getLow().getValue()));
                    navigableMap = sparseIdx.subMap(low, fromInclusive, high, toInclusive);
                } else {
                    return getPages();
                }
                for (Map.Entry<Comparable, SparseValue> entry : navigableMap.entrySet()) {
                    result.addAll(entry.getValue().getPageIndices());
                }
                if (!lowBoundless) {
                    Comparable lowestSparseIdxInDom = null;
                    if (!navigableMap.isEmpty()) {
                        lowestSparseIdxInDom = navigableMap.firstKey();
                    }
                    Integer additionalResultIdx = getLowerPageIndex(lowestSparseIdxInDom, low, fromInclusive, high, toInclusive);
                    if (additionalResultIdx != null) {
                        result.add(additionalResultIdx);
                    }
                }
            }
        }
        List<Page> resultPageList = new ArrayList<>();
        for (Integer idx : result) {
            resultPageList.add(getPages().get(idx));
        }
        return resultPageList;
    }
    return getPages();
}
Also used : ArrayList(java.util.ArrayList) Page(io.prestosql.spi.Page) Range(io.prestosql.spi.predicate.Range) BloomFilter(io.prestosql.spi.util.BloomFilter) List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap) AbstractMap(java.util.AbstractMap) TreeMap(java.util.TreeMap) HashSet(java.util.HashSet)

Example 2 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class ByteArrayBlockTest method testFilter.

public void testFilter(int size) {
    int count = 1024;
    ByteArrayBlock block1 = new ByteArrayBlock(count, Optional.empty(), getValues(count));
    ByteArrayBlock block2 = new ByteArrayBlock(count, Optional.empty(), getValues(count));
    BloomFilter bf1 = getBf(size);
    BloomFilter bf2 = getBf(size);
    long total1 = 0;
    long total2 = 0;
    byte value;
    for (int j = 0; j < 100; j++) {
        boolean[] result1 = new boolean[count];
        boolean[] result2 = new boolean[count];
        Arrays.fill(result1, Boolean.TRUE);
        Arrays.fill(result2, Boolean.TRUE);
        long start = System.nanoTime();
        for (int i = 0; i < count; i++) {
            value = block1.getByte(i, 0);
            result1[i] = bf1.test(value);
        }
        total1 += System.nanoTime() - start;
        start = System.nanoTime();
        block2.filter(bf2, result2);
        total2 += System.nanoTime() - start;
        for (int i = 0; i < count; i++) {
            if (result1[i] != result2[i]) {
                throw new RuntimeException("error" + i);
            }
        }
    }
    System.out.println("bfsize: " + size + "  origi: " + total1);
    System.out.println("bfsize: " + size + "  block: " + total2);
}
Also used : BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 3 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class Int128ArrayBlockTest method testFilter.

public void testFilter(int size) {
    int count = 1024;
    Int128ArrayBlock block1 = new Int128ArrayBlock(count, Optional.empty(), getValues(count * 2));
    Int128ArrayBlock block2 = new Int128ArrayBlock(count, Optional.empty(), getValues(count * 2));
    BloomFilter bf1 = getBf(size);
    BloomFilter bf2 = getBf(size);
    long total1 = 0;
    long total2 = 0;
    Slice value;
    for (int j = 0; j < 100; j++) {
        boolean[] result1 = new boolean[count];
        boolean[] result2 = new boolean[count];
        Arrays.fill(result1, Boolean.TRUE);
        Arrays.fill(result2, Boolean.TRUE);
        long start = System.nanoTime();
        for (int i = 0; i < count; i++) {
            value = Slices.wrappedLongArray(block1.getLong(i, 0), block1.getLong(i, SIZE_OF_LONG));
            result1[i] = bf1.test(value);
        }
        total1 += System.nanoTime() - start;
        start = System.nanoTime();
        block2.filter(bf2, result2);
        total2 += System.nanoTime() - start;
        for (int i = 0; i < count; i++) {
            if (result1[i] != result2[i]) {
                throw new RuntimeException("error" + i);
            }
        }
    }
    System.out.println("bfsize: " + size + "  origi: " + total1);
    System.out.println("bfsize: " + size + "  block: " + total2);
}
Also used : Slice(io.airlift.slice.Slice) BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 4 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class LongArrayBlockTest method testFilter.

public void testFilter(int size) {
    int count = 1024;
    LongArrayBlock block1 = new LongArrayBlock(count, Optional.empty(), getValues(count));
    LongArrayBlock block2 = new LongArrayBlock(count, Optional.empty(), getValues(count));
    BloomFilter bf1 = getBf(size);
    BloomFilter bf2 = getBf(size);
    long total1 = 0;
    long total2 = 0;
    long gtotal = 0;
    long value = 0;
    for (int j = 0; j < 100; j++) {
        boolean[] result1 = new boolean[count];
        boolean[] result2 = new boolean[count];
        Arrays.fill(result1, Boolean.TRUE);
        Arrays.fill(result2, Boolean.TRUE);
        long start = System.nanoTime();
        for (int i = 0; i < count; i++) {
            long s = System.nanoTime();
            value = block1.get(i);
            gtotal += (System.nanoTime() - s);
            result1[i] = bf1.test(value);
        }
        total1 += System.nanoTime() - start;
        start = System.nanoTime();
        block2.filter(bf2, result2);
        total2 += System.nanoTime() - start;
        for (int i = 0; i < count; i++) {
            if (result1[i] != result2[i]) {
                throw new RuntimeException("error" + i);
            }
        }
    }
    System.out.println("bfsize: " + size + "  origi: " + total1);
    System.out.println("bfsize: " + size + "  block: " + total2);
}
Also used : BloomFilter(io.prestosql.spi.util.BloomFilter)

Example 5 with BloomFilter

use of io.prestosql.spi.util.BloomFilter in project hetu-core by openlookeng.

the class ShortArrayBlockTest method testFilter.

public void testFilter(int size) {
    int count = 1024;
    ShortArrayBlock block1 = new ShortArrayBlock(count, Optional.empty(), getValues(count));
    ShortArrayBlock block2 = new ShortArrayBlock(count, Optional.empty(), getValues(count));
    BloomFilter bf1 = getBf(size);
    BloomFilter bf2 = getBf(size);
    long total1 = 0;
    long total2 = 0;
    short value = 0;
    for (int j = 0; j < 100; j++) {
        boolean[] result1 = new boolean[count];
        boolean[] result2 = new boolean[count];
        Arrays.fill(result1, Boolean.TRUE);
        Arrays.fill(result2, Boolean.TRUE);
        long start = System.nanoTime();
        for (int i = 0; i < count; i++) {
            value = block1.getShort(i, 0);
            result1[i] = bf1.test(value);
        }
        total1 += System.nanoTime() - start;
        start = System.nanoTime();
        block2.filter(bf2, result2);
        total2 += System.nanoTime() - start;
        for (int i = 0; i < count; i++) {
            if (result1[i] != result2[i]) {
                throw new RuntimeException("error" + i);
            }
        }
    }
    System.out.println("bfsize: " + size + "  origi: " + total1);
    System.out.println("bfsize: " + size + "  block: " + total2);
}
Also used : BloomFilter(io.prestosql.spi.util.BloomFilter)

Aggregations

BloomFilter (io.prestosql.spi.util.BloomFilter)23 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)6 DynamicFilter (io.prestosql.spi.dynamicfilter.DynamicFilter)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 HashSet (java.util.HashSet)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 Map (java.util.Map)5 Test (org.testng.annotations.Test)5 Page (io.prestosql.spi.Page)4 BloomFilterDynamicFilter (io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter)4 IOException (java.io.IOException)4 Symbol (io.prestosql.spi.plan.Symbol)3 StateSet (io.prestosql.spi.statestore.StateSet)3 List (java.util.List)3 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Block (io.prestosql.spi.block.Block)2 RowExpression (io.prestosql.spi.relation.RowExpression)2 StateMap (io.prestosql.spi.statestore.StateMap)2