Search in sources :

Example 1 with IndexLookUpException

use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.

the class TestBTreeIndex method testLessThanEqualTo.

@Test
public void testLessThanEqualTo() throws IOException, IndexLookUpException {
    BTreeIndex index = new BTreeIndex();
    for (int i = 0; i < 100; i++) {
        List<Pair> pairs = new ArrayList<>();
        Long key = Long.valueOf(100 + i);
        String value = "value" + i;
        pairs.add(new Pair(key, value));
        Pair pair = new Pair("dummyCol", pairs);
        index.addKeyValues(Collections.singletonList(pair));
    }
    File file = getFile();
    index.serialize(new FileOutputStream(file));
    BTreeIndex readIndex = new BTreeIndex();
    readIndex.deserialize(new FileInputStream(file));
    RowExpression comparisonExpression = simplePredicate(OperatorType.LESS_THAN_OR_EQUAL, "dummyCol", BIGINT, 120L);
    Iterator<String> result = readIndex.lookUp(comparisonExpression);
    assertNotNull(result, "Result shouldn't be null");
    assertTrue(result.hasNext());
    Object[] arr = IntStream.iterate(0, n -> n + 1).limit(21).mapToObj(i -> "value" + i).toArray();
    Arrays.sort(arr);
    for (int i = 0; i <= 20; i++) {
        assertEquals(arr[i], result.next());
    }
    assertFalse(result.hasNext());
    index.close();
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) ConstantExpression(io.prestosql.spi.relation.ConstantExpression) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) ArrayList(java.util.ArrayList) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) OperatorType(io.prestosql.spi.function.OperatorType) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) SpecialForm(io.prestosql.spi.relation.SpecialForm) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Assert.assertFalse(org.testng.Assert.assertFalse) Iterator(java.util.Iterator) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) Pair(io.prestosql.spi.heuristicindex.Pair) File(java.io.File) List(java.util.List) HeuristicIndexTestUtils.simplePredicate(io.hetu.core.HeuristicIndexTestUtils.simplePredicate) RowExpression(io.prestosql.spi.relation.RowExpression) Assert.assertTrue(org.testng.Assert.assertTrue) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 2 with IndexLookUpException

use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.

the class SplitFiltering method filterUsingInvertedIndex.

private static List<Split> filterUsingInvertedIndex(RowExpression expression, List<Split> inputSplits, String fullQualifiedTableName, Set<String> referencedColumns, Map<String, IndexRecord> indexRecordKeyToRecordMap, HeuristicIndexerManager indexerManager) {
    try {
        Map<String, Long> inputMaxLastUpdated = new HashMap<>();
        Map<String, Long> indexMaxLastUpdated = new HashMap<>();
        Map<String, List<Split>> partitionSplitMap = new HashMap<>();
        for (Split split : inputSplits) {
            String filePathStr = split.getConnectorSplit().getFilePath();
            String indexKey = getPartitionKeyOrElse(filePathStr, TABLE_LEVEL_KEY);
            long lastUpdated = split.getConnectorSplit().getLastModifiedTime();
            if (!inputMaxLastUpdated.containsKey(indexKey) || lastUpdated > inputMaxLastUpdated.get(indexKey)) {
                inputMaxLastUpdated.put(indexKey, lastUpdated);
            }
            if (!partitionSplitMap.containsKey(indexKey)) {
                partitionSplitMap.put(indexKey, new ArrayList<>());
            }
            partitionSplitMap.get(indexKey).add(split);
        }
        // Split is not compliant to table structure. Return all the splits
        if (partitionSplitMap.isEmpty()) {
            return inputSplits;
        }
        // col -> list of all indices on this column (all partitions)
        Map<String, List<IndexMetadata>> allIndices = new HashMap<>();
        // index loading and verification
        for (String column : referencedColumns) {
            List<IndexMetadata> indexMetadataList = new ArrayList<>();
            for (String indexType : INVERTED_INDEX) {
                indexMetadataList.addAll(getCache(indexerManager.getIndexClient()).getIndices(fullQualifiedTableName, column, indexType, partitionSplitMap.keySet(), Collections.max(inputMaxLastUpdated.values()), indexRecordKeyToRecordMap));
            }
            // If any of the split contains data which is modified after the index was created, return without filtering
            for (IndexMetadata index : indexMetadataList) {
                String partitionKey = getPartitionKeyOrElse(index.getUri(), TABLE_LEVEL_KEY);
                long lastModifiedTime = Long.parseLong(index.getIndex().getProperties().getProperty(MAX_MODIFIED_TIME));
                indexMaxLastUpdated.put(partitionKey, lastModifiedTime);
            }
            allIndices.put(column, indexMetadataList);
        }
        // lookup index
        IndexFilter filter = indexerManager.getIndexFilter(allIndices);
        Iterator<String> iterator = filter.lookUp(expression);
        if (iterator == null) {
            throw new IndexLookUpException();
        }
        // all positioned looked up from index, organized by file path
        Map<String, List<Pair<Long, Long>>> lookUpResults = new HashMap<>();
        while (iterator.hasNext()) {
            SerializationUtils.LookUpResult parsedLookUpResult = deserializeStripeSymbol(iterator.next());
            if (!lookUpResults.containsKey(parsedLookUpResult.filepath)) {
                lookUpResults.put(parsedLookUpResult.filepath, new ArrayList<>());
            }
            lookUpResults.get(parsedLookUpResult.filepath).add(parsedLookUpResult.stripe);
        }
        // filtering
        List<Split> filteredSplits = new ArrayList<>();
        for (Map.Entry<String, List<Split>> entry : partitionSplitMap.entrySet()) {
            String partitionKey = entry.getKey();
            // the partition is indexed by its own partition's index
            boolean partitionHasOwnIndex = indexMaxLastUpdated.containsKey(partitionKey);
            // the partition is covered by a table-level index
            boolean partitionHasTableLevelIndex = indexMaxLastUpdated.size() == 1 && indexMaxLastUpdated.containsKey(TABLE_LEVEL_KEY);
            if (!partitionHasOwnIndex && !partitionHasTableLevelIndex) {
                filteredSplits.addAll(entry.getValue());
            } else {
                long indexLastModifiedTimeOfThisPartition;
                if (partitionHasOwnIndex) {
                    indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(partitionKey);
                } else {
                    indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(TABLE_LEVEL_KEY);
                }
                for (Split split : entry.getValue()) {
                    String filePathStr = new URI(split.getConnectorSplit().getFilePath()).getPath();
                    if (split.getConnectorSplit().getLastModifiedTime() > indexLastModifiedTimeOfThisPartition) {
                        filteredSplits.add(split);
                    } else if (lookUpResults.containsKey(filePathStr)) {
                        Pair<Long, Long> targetRange = new Pair<>(split.getConnectorSplit().getStartIndex(), split.getConnectorSplit().getEndIndex());
                        // do stripe matching: check if [targetStart, targetEnd] has any overlapping with the matching stripes
                        // first sort matching stripes, e.g. (5,10), (18,25), (30,35), (35, 40)
                        // then do binary search for both start and end of the target
                        List<Pair<Long, Long>> stripes = lookUpResults.get(filePathStr);
                        stripes.sort(Comparator.comparingLong(Pair::getFirst));
                        if (rangeSearch(stripes, targetRange)) {
                            filteredSplits.add(split);
                        }
                    }
                }
            }
        }
        return filteredSplits;
    } catch (Throwable e) {
        LOG.debug("Exception occurred while filtering. Returning original splits", e);
        return inputSplits;
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SerializationUtils(io.prestosql.spi.heuristicindex.SerializationUtils) URI(java.net.URI) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) Pair(io.prestosql.spi.heuristicindex.Pair) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) AtomicLong(java.util.concurrent.atomic.AtomicLong) IndexFilter(io.prestosql.spi.heuristicindex.IndexFilter) Split(io.prestosql.metadata.Split) Map(java.util.Map) HashMap(java.util.HashMap)

Example 3 with IndexLookUpException

use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method filterStripeUsingIndex.

private boolean filterStripeUsingIndex(StripeInformation stripe, Map<Long, List<IndexMetadata>> stripeOffsetToIndex, Map<String, Domain> and, Map<String, List<Domain>> or) {
    if (stripeOffsetToIndex.isEmpty()) {
        return false;
    }
    List<IndexMetadata> stripeIndex = stripeOffsetToIndex.get(Long.valueOf(stripe.getOffset()));
    Map<Index, Domain> andDomainMap = new HashMap<>();
    Map<Index, Domain> orDomainMap = new HashMap<>();
    for (Map.Entry<String, Domain> domainEntry : and.entrySet()) {
        String columnName = domainEntry.getKey();
        Domain columnDomain = domainEntry.getValue();
        // if the index exists, there should only be one index for this column within this stripe
        List<IndexMetadata> indexMetadata = stripeIndex.stream().filter(p -> p.getColumns()[0].equalsIgnoreCase(columnName)).collect(Collectors.toList());
        if (indexMetadata.isEmpty() || indexMetadata.size() > 1) {
            continue;
        }
        Index index = indexMetadata.get(0).getIndex();
        andDomainMap.put(index, columnDomain);
    }
    for (Map.Entry<String, List<Domain>> domainEntry : or.entrySet()) {
        String columnName = domainEntry.getKey();
        List<Domain> columnDomain = domainEntry.getValue();
        // if the index exists, there should only be one index for this column within this stripe
        List<IndexMetadata> indexMetadata = stripeIndex.stream().filter(p -> p.getColumns()[0].equalsIgnoreCase(columnName)).collect(Collectors.toList());
        if (indexMetadata.isEmpty() || indexMetadata.size() > 1) {
            continue;
        }
        Index index = indexMetadata.get(0).getIndex();
        orDomainMap.put(index, columnDomain.get(0));
    }
    if (!andDomainMap.isEmpty()) {
        List<Iterator<Integer>> matchings = new ArrayList<>(andDomainMap.size());
        for (Map.Entry<Index, Domain> e : andDomainMap.entrySet()) {
            try {
                Iterator<Integer> lookUpRes = e.getKey().lookUp(e.getValue());
                if (lookUpRes != null) {
                    matchings.add(lookUpRes);
                } else if (!e.getKey().matches(e.getValue())) {
                    return true;
                }
            } catch (UnsupportedOperationException | IndexLookUpException uoe2) {
                return false;
            }
        }
        if (!matchings.isEmpty()) {
            Iterator<Integer> thisStripeMatchingRows = SequenceUtils.intersect(matchings);
            PeekingIterator<Integer> peekingIterator = Iterators.peekingIterator(thisStripeMatchingRows);
            this.stripeMatchingRows.put(stripe, peekingIterator);
        }
        return false;
    }
    if (!orDomainMap.isEmpty()) {
        for (Map.Entry<Index, Domain> e : orDomainMap.entrySet()) {
            try {
                Iterator<Integer> thisStripeMatchingRows = e.getKey().lookUp(e.getValue());
                if (thisStripeMatchingRows != null) {
                    if (thisStripeMatchingRows.hasNext()) {
                        /* any one matched; then include the stripe */
                        return false;
                    }
                } else if (e.getKey().matches(e.getValue())) {
                    return false;
                }
            } catch (UnsupportedOperationException | IndexLookUpException uoe2) {
                return false;
            }
        }
        return true;
    }
    return false;
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) PeekingIterator(com.google.common.collect.PeekingIterator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Slices(io.airlift.slice.Slices) FixedWidthType(io.prestosql.spi.type.FixedWidthType) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) OrcDataSourceUtils.mergeAdjacentDiskRanges(io.prestosql.orc.OrcDataSourceUtils.mergeAdjacentDiskRanges) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Math.min(java.lang.Math.min) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) ResultCachingSelectiveColumnReader(io.prestosql.orc.reader.ResultCachingSelectiveColumnReader) DataSize(io.airlift.units.DataSize) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Comparator.comparingLong(java.util.Comparator.comparingLong) Domain(io.prestosql.spi.predicate.Domain) Optional(java.util.Optional) Math.max(java.lang.Math.max) WriteChecksumBuilder.createWriteChecksumBuilder(io.prestosql.orc.OrcWriteValidation.WriteChecksumBuilder.createWriteChecksumBuilder) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) HashMap(java.util.HashMap) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) MAX_BATCH_SIZE(io.prestosql.orc.OrcReader.MAX_BATCH_SIZE) ImmutableList(com.google.common.collect.ImmutableList) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) Closer(com.google.common.io.Closer) SequenceUtils(io.hetu.core.common.algorithm.SequenceUtils) Objects.requireNonNull(java.util.Objects.requireNonNull) Math.toIntExact(java.lang.Math.toIntExact) LinkedList(java.util.LinkedList) BATCH_SIZE_GROWTH_FACTOR(io.prestosql.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) Block(io.prestosql.spi.block.Block) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader) Iterator(java.util.Iterator) ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) OrcType(io.prestosql.orc.metadata.OrcType) DataCachingSelectiveColumnReader(io.prestosql.orc.reader.DataCachingSelectiveColumnReader) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Closeable(java.io.Closeable) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) Index(io.prestosql.spi.heuristicindex.Index) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Index(io.prestosql.spi.heuristicindex.Index) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) PeekingIterator(com.google.common.collect.PeekingIterator) Iterator(java.util.Iterator) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) Domain(io.prestosql.spi.predicate.Domain) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 4 with IndexLookUpException

use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.

the class HeuristicIndexFilter method lookUp.

@Override
public <I extends Comparable<I>> Iterator<I> lookUp(Object expression) throws IndexLookUpException {
    if (expression instanceof CallExpression) {
        return lookUpAll((RowExpression) expression);
    }
    if (expression instanceof SpecialForm) {
        SpecialForm specialForm = (SpecialForm) expression;
        switch(specialForm.getForm()) {
            case IN:
            case BETWEEN:
                return lookUpAll((RowExpression) expression);
            case AND:
                Iterator<I> iteratorAnd1 = lookUp(specialForm.getArguments().get(0));
                Iterator<I> iteratorAnd2 = lookUp(specialForm.getArguments().get(1));
                if (iteratorAnd1 == null && iteratorAnd2 == null) {
                    return null;
                } else if (iteratorAnd1 == null) {
                    return iteratorAnd2;
                } else if (iteratorAnd2 == null) {
                    return iteratorAnd1;
                } else {
                    return SequenceUtils.intersect(iteratorAnd1, iteratorAnd2);
                }
            case OR:
                Iterator<I> iteratorOr1 = lookUp(specialForm.getArguments().get(0));
                Iterator<I> iteratorOr2 = lookUp(specialForm.getArguments().get(1));
                if (iteratorOr1 == null || iteratorOr2 == null) {
                    throw new IndexLookUpException();
                }
                return SequenceUtils.union(iteratorOr1, iteratorOr2);
        }
    }
    throw new IndexLookUpException();
}
Also used : CallExpression(io.prestosql.spi.relation.CallExpression) SpecialForm(io.prestosql.spi.relation.SpecialForm) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException)

Example 5 with IndexLookUpException

use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.

the class HeuristicIndexFilter method lookUpAll.

/**
 * Lookup all index available according to the expression and union the result.
 * <p>
 * It returns {@code null} as the special value for "universe" result U such that,
 * for any other results A: U \and A == A, U \or A == U.
 * <p>
 * If any of the index throws {@code IndexLookUpException} during lookup, it immediately break and return null.
 */
private <T extends Comparable<T>> Iterator<T> lookUpAll(RowExpression expression) {
    RowExpression varRef = null;
    if (expression instanceof CallExpression) {
        varRef = ((CallExpression) expression).getArguments().get(0);
    }
    if (expression instanceof SpecialForm && (((SpecialForm) expression).getForm() == SpecialForm.Form.BETWEEN || ((SpecialForm) expression).getForm() == SpecialForm.Form.IN)) {
        varRef = ((SpecialForm) expression).getArguments().get(0);
    }
    if (!(varRef instanceof VariableReferenceExpression)) {
        return null;
    }
    List<IndexMetadata> selectedIndex = HeuristicIndexSelector.select(expression, indices.get(((VariableReferenceExpression) varRef).getName()));
    if (selectedIndex.isEmpty()) {
        return null;
    }
    try {
        List<Iterator<T>> iterators = selectedIndex.parallelStream().map(indexMetadata -> {
            try {
                return (Iterator<T>) indexMetadata.getIndex().lookUp(expression);
            } catch (IndexLookUpException e) {
                throw new RuntimeException(e);
            }
        }).collect(Collectors.toList());
        return SequenceUtils.union(iterators);
    } catch (RuntimeException re) {
        return null;
    }
}
Also used : Iterator(java.util.Iterator) IndexFilter(io.prestosql.spi.heuristicindex.IndexFilter) BuiltInFunctionHandle(io.prestosql.spi.function.BuiltInFunctionHandle) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) Collectors(java.util.stream.Collectors) CallExpression(io.prestosql.spi.relation.CallExpression) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SequenceUtils(io.hetu.core.common.algorithm.SequenceUtils) OperatorType(io.prestosql.spi.function.OperatorType) Map(java.util.Map) RowExpression(io.prestosql.spi.relation.RowExpression) Optional(java.util.Optional) Signature(io.prestosql.spi.function.Signature) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SpecialForm(io.prestosql.spi.relation.SpecialForm) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) Iterator(java.util.Iterator) RowExpression(io.prestosql.spi.relation.RowExpression) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) CallExpression(io.prestosql.spi.relation.CallExpression) SpecialForm(io.prestosql.spi.relation.SpecialForm) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException)

Aggregations

IndexLookUpException (io.prestosql.spi.heuristicindex.IndexLookUpException)6 List (java.util.List)5 SpecialForm (io.prestosql.spi.relation.SpecialForm)4 ArrayList (java.util.ArrayList)4 Iterator (java.util.Iterator)4 ImmutableList (com.google.common.collect.ImmutableList)3 OperatorType (io.prestosql.spi.function.OperatorType)3 Index (io.prestosql.spi.heuristicindex.Index)3 IndexMetadata (io.prestosql.spi.heuristicindex.IndexMetadata)3 RowExpression (io.prestosql.spi.relation.RowExpression)3 VariableReferenceExpression (io.prestosql.spi.relation.VariableReferenceExpression)3 IOException (java.io.IOException)3 Map (java.util.Map)3 HeuristicIndexTestUtils.simplePredicate (io.hetu.core.HeuristicIndexTestUtils.simplePredicate)2 SequenceUtils (io.hetu.core.common.algorithm.SequenceUtils)2 Pair (io.prestosql.spi.heuristicindex.Pair)2 CallExpression (io.prestosql.spi.relation.CallExpression)2 Optional (java.util.Optional)2 Collectors (java.util.stream.Collectors)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1