Search in sources :

Example 11 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class TestIndexCacheRemoval method testExpiredCacheIndices.

@Test
public void testExpiredCacheIndices() throws Exception {
    synchronized (this) {
        HiveSplit testHiveSplit;
        testHiveSplit = mock(HiveSplit.class);
        when(testHiveSplit.getPath()).thenReturn(testPath);
        when(testHiveSplit.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        IndexMetadata indexMetadata = mock(IndexMetadata.class);
        when(indexMetadata.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        Index index = mock(Index.class);
        when(indexMetadata.getIndex()).then(new Returns(index));
        when(index.getMemoryUsage()).thenReturn(new DataSize(1, KILOBYTE).toBytes());
        List<IndexMetadata> expectedIndices = new LinkedList<>();
        expectedIndices.add(indexMetadata);
        IndexCacheLoader indexCacheLoader = mock(IndexCacheLoader.class);
        when(indexCacheLoader.load(any())).then(new Returns(expectedIndices));
        IndexCache indexCache = new IndexCache(indexCacheLoader, loadDelay, new NoOpIndexClient());
        List<IndexMetadata> actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), 0);
        Thread.sleep(loadDelay + 2000);
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
        // now the index is in the cache, but changing the lastmodified date of the split should invalidate it
        when(testHiveSplit.getLastModifiedTime()).thenReturn(testLastModifiedTime + 1);
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), 0);
    }
}
Also used : Returns(org.mockito.internal.stubbing.answers.Returns) HiveSplit(io.prestosql.plugin.hive.HiveSplit) DataSize(io.airlift.units.DataSize) Index(io.prestosql.spi.heuristicindex.Index) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) NoOpIndexClient(io.prestosql.testing.NoOpIndexClient) LinkedList(java.util.LinkedList) Test(org.testng.annotations.Test)

Example 12 with Index

use of io.prestosql.spi.heuristicindex.Index in project boostkit-bigdata by kunpengcompute.

the class TestIndexCachePartition method testIndexCacheWithPartitions.

@Test
public void testIndexCacheWithPartitions() throws Exception {
    synchronized (this) {
        HiveSplit testHiveSplit;
        testHiveSplit = mock(HiveSplit.class);
        when(testHiveSplit.getPath()).thenReturn(testPath);
        when(testHiveSplit.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        HiveColumnHandle partitionColumnHandle;
        TupleDomain<HiveColumnHandle> effectivePredicateForPartition;
        partitionColumnHandle = mock(HiveColumnHandle.class);
        // partition column should be filtered out this should never get called
        when(partitionColumnHandle.getName()).thenThrow(Exception.class);
        effectivePredicateForPartition = TupleDomain.withColumnDomains(ImmutableMap.of(testColumnHandle, domain, partitionColumnHandle, domain));
        List<HiveColumnHandle> partitionColumns = ImmutableList.of(partitionColumnHandle);
        IndexMetadata indexMetadata = mock(IndexMetadata.class);
        when(indexMetadata.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        Index index = mock(Index.class);
        when(indexMetadata.getIndex()).then(new Returns(index));
        when(index.getMemoryUsage()).thenReturn(new DataSize(1, KILOBYTE).toBytes());
        List<IndexMetadata> expectedIndices = new LinkedList<>();
        expectedIndices.add(indexMetadata);
        IndexCacheLoader indexCacheLoader = mock(IndexCacheLoader.class);
        when(indexCacheLoader.load(any())).then(new Returns(expectedIndices));
        IndexCache indexCache = new IndexCache(indexCacheLoader, loadDelay, new NoOpIndexClient());
        List<IndexMetadata> actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicateForPartition, partitionColumns);
        assertEquals(actualSplitIndex.size(), 0);
        Thread.sleep(loadDelay + 2000);
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicateForPartition, partitionColumns);
        assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
    }
}
Also used : Index(io.prestosql.spi.heuristicindex.Index) LinkedList(java.util.LinkedList) Returns(org.mockito.internal.stubbing.answers.Returns) HiveSplit(io.prestosql.plugin.hive.HiveSplit) DataSize(io.airlift.units.DataSize) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) NoOpIndexClient(io.prestosql.testing.NoOpIndexClient) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) Test(org.testng.annotations.Test)

Example 13 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method filterStripeUsingIndex.

private boolean filterStripeUsingIndex(StripeInformation stripe, Map<Long, List<IndexMetadata>> stripeOffsetToIndex, Map<String, Domain> and, Map<String, List<Domain>> or) {
    if (stripeOffsetToIndex.isEmpty()) {
        return false;
    }
    List<IndexMetadata> stripeIndex = stripeOffsetToIndex.get(Long.valueOf(stripe.getOffset()));
    Map<Index, Domain> andDomainMap = new HashMap<>();
    Map<Index, Domain> orDomainMap = new HashMap<>();
    for (Map.Entry<String, Domain> domainEntry : and.entrySet()) {
        String columnName = domainEntry.getKey();
        Domain columnDomain = domainEntry.getValue();
        // if the index exists, there should only be one index for this column within this stripe
        List<IndexMetadata> indexMetadata = stripeIndex.stream().filter(p -> p.getColumns()[0].equalsIgnoreCase(columnName)).collect(Collectors.toList());
        if (indexMetadata.isEmpty() || indexMetadata.size() > 1) {
            continue;
        }
        Index index = indexMetadata.get(0).getIndex();
        andDomainMap.put(index, columnDomain);
    }
    for (Map.Entry<String, List<Domain>> domainEntry : or.entrySet()) {
        String columnName = domainEntry.getKey();
        List<Domain> columnDomain = domainEntry.getValue();
        // if the index exists, there should only be one index for this column within this stripe
        List<IndexMetadata> indexMetadata = stripeIndex.stream().filter(p -> p.getColumns()[0].equalsIgnoreCase(columnName)).collect(Collectors.toList());
        if (indexMetadata.isEmpty() || indexMetadata.size() > 1) {
            continue;
        }
        Index index = indexMetadata.get(0).getIndex();
        orDomainMap.put(index, columnDomain.get(0));
    }
    if (!andDomainMap.isEmpty()) {
        List<Iterator<Integer>> matchings = new ArrayList<>(andDomainMap.size());
        for (Map.Entry<Index, Domain> e : andDomainMap.entrySet()) {
            try {
                Iterator<Integer> lookUpRes = e.getKey().lookUp(e.getValue());
                if (lookUpRes != null) {
                    matchings.add(lookUpRes);
                } else if (!e.getKey().matches(e.getValue())) {
                    return true;
                }
            } catch (UnsupportedOperationException | IndexLookUpException uoe2) {
                return false;
            }
        }
        if (!matchings.isEmpty()) {
            Iterator<Integer> thisStripeMatchingRows = SequenceUtils.intersect(matchings);
            PeekingIterator<Integer> peekingIterator = Iterators.peekingIterator(thisStripeMatchingRows);
            this.stripeMatchingRows.put(stripe, peekingIterator);
        }
        return false;
    }
    if (!orDomainMap.isEmpty()) {
        for (Map.Entry<Index, Domain> e : orDomainMap.entrySet()) {
            try {
                Iterator<Integer> thisStripeMatchingRows = e.getKey().lookUp(e.getValue());
                if (thisStripeMatchingRows != null) {
                    if (thisStripeMatchingRows.hasNext()) {
                        /* any one matched; then include the stripe */
                        return false;
                    }
                } else if (e.getKey().matches(e.getValue())) {
                    return false;
                }
            } catch (UnsupportedOperationException | IndexLookUpException uoe2) {
                return false;
            }
        }
        return true;
    }
    return false;
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) PeekingIterator(com.google.common.collect.PeekingIterator) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Slices(io.airlift.slice.Slices) FixedWidthType(io.prestosql.spi.type.FixedWidthType) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) OrcDataSourceUtils.mergeAdjacentDiskRanges(io.prestosql.orc.OrcDataSourceUtils.mergeAdjacentDiskRanges) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Math.min(java.lang.Math.min) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) ResultCachingSelectiveColumnReader(io.prestosql.orc.reader.ResultCachingSelectiveColumnReader) DataSize(io.airlift.units.DataSize) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Comparator.comparingLong(java.util.Comparator.comparingLong) Domain(io.prestosql.spi.predicate.Domain) Optional(java.util.Optional) Math.max(java.lang.Math.max) WriteChecksumBuilder.createWriteChecksumBuilder(io.prestosql.orc.OrcWriteValidation.WriteChecksumBuilder.createWriteChecksumBuilder) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) HashMap(java.util.HashMap) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) MAX_BATCH_SIZE(io.prestosql.orc.OrcReader.MAX_BATCH_SIZE) ImmutableList(com.google.common.collect.ImmutableList) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) Closer(com.google.common.io.Closer) SequenceUtils(io.hetu.core.common.algorithm.SequenceUtils) Objects.requireNonNull(java.util.Objects.requireNonNull) Math.toIntExact(java.lang.Math.toIntExact) LinkedList(java.util.LinkedList) BATCH_SIZE_GROWTH_FACTOR(io.prestosql.orc.OrcReader.BATCH_SIZE_GROWTH_FACTOR) Block(io.prestosql.spi.block.Block) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader) Iterator(java.util.Iterator) ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) OrcType(io.prestosql.orc.metadata.OrcType) DataCachingSelectiveColumnReader(io.prestosql.orc.reader.DataCachingSelectiveColumnReader) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) Closeable(java.io.Closeable) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) Index(io.prestosql.spi.heuristicindex.Index) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Index(io.prestosql.spi.heuristicindex.Index) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) PeekingIterator(com.google.common.collect.PeekingIterator) Iterator(java.util.Iterator) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) Domain(io.prestosql.spi.predicate.Domain) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 14 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class TestIndexCacheFetch method testIndexCacheGetIndices.

@Test
public void testIndexCacheGetIndices() throws Exception {
    synchronized (this) {
        HiveSplit testHiveSplit;
        testHiveSplit = mock(HiveSplit.class);
        when(testHiveSplit.getPath()).thenReturn(testPath);
        when(testHiveSplit.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        IndexMetadata indexMetadata = mock(IndexMetadata.class);
        when(indexMetadata.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        Index index = mock(Index.class);
        when(indexMetadata.getIndex()).then(new Returns(index));
        when(index.getMemoryUsage()).thenReturn(new DataSize(1, KILOBYTE).toBytes());
        List<IndexMetadata> expectedIndices = new LinkedList<>();
        expectedIndices.add(indexMetadata);
        IndexCacheLoader indexCacheLoader = mock(IndexCacheLoader.class);
        when(indexCacheLoader.load(any())).then(new Returns(expectedIndices));
        IndexCache indexCache = new IndexCache(indexCacheLoader, new NoOpIndexClient());
        List<IndexMetadata> actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), 0);
        Thread.sleep(loadDelay + 2000);
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
        assertEquals(actualSplitIndex.get(0), expectedIndices.get(0));
    }
}
Also used : Returns(org.mockito.internal.stubbing.answers.Returns) HiveSplit(io.prestosql.plugin.hive.HiveSplit) DataSize(io.airlift.units.DataSize) Index(io.prestosql.spi.heuristicindex.Index) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) NoOpIndexClient(io.prestosql.testing.NoOpIndexClient) LinkedList(java.util.LinkedList) Test(org.testng.annotations.Test)

Example 15 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class TestIndexCachePartition method testIndexCacheWithPartitions.

@Test
public void testIndexCacheWithPartitions() throws Exception {
    synchronized (this) {
        HiveSplit testHiveSplit;
        testHiveSplit = mock(HiveSplit.class);
        when(testHiveSplit.getPath()).thenReturn(testPath);
        when(testHiveSplit.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        HiveColumnHandle partitionColumnHandle;
        TupleDomain<HiveColumnHandle> effectivePredicateForPartition;
        partitionColumnHandle = mock(HiveColumnHandle.class);
        // partition column should be filtered out this should never get called
        when(partitionColumnHandle.getName()).thenThrow(Exception.class);
        effectivePredicateForPartition = TupleDomain.withColumnDomains(ImmutableMap.of(testColumnHandle, domain, partitionColumnHandle, domain));
        List<HiveColumnHandle> partitionColumns = ImmutableList.of(partitionColumnHandle);
        IndexMetadata indexMetadata = mock(IndexMetadata.class);
        when(indexMetadata.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        Index index = mock(Index.class);
        when(indexMetadata.getIndex()).then(new Returns(index));
        when(index.getMemoryUsage()).thenReturn(new DataSize(1, KILOBYTE).toBytes());
        List<IndexMetadata> expectedIndices = new LinkedList<>();
        expectedIndices.add(indexMetadata);
        IndexCacheLoader indexCacheLoader = mock(IndexCacheLoader.class);
        when(indexCacheLoader.load(any())).then(new Returns(expectedIndices));
        IndexCache indexCache = new IndexCache(indexCacheLoader, loadDelay, new NoOpIndexClient());
        List<IndexMetadata> actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicateForPartition, partitionColumns);
        assertEquals(actualSplitIndex.size(), 0);
        Thread.sleep(loadDelay + 2000);
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicateForPartition, partitionColumns);
        assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
    }
}
Also used : Index(io.prestosql.spi.heuristicindex.Index) LinkedList(java.util.LinkedList) Returns(org.mockito.internal.stubbing.answers.Returns) HiveSplit(io.prestosql.plugin.hive.HiveSplit) DataSize(io.airlift.units.DataSize) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) NoOpIndexClient(io.prestosql.testing.NoOpIndexClient) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) Test(org.testng.annotations.Test)

Aggregations

Index (io.prestosql.spi.heuristicindex.Index)24 LinkedList (java.util.LinkedList)17 IndexMetadata (io.prestosql.spi.heuristicindex.IndexMetadata)15 Test (org.testng.annotations.Test)14 DataSize (io.airlift.units.DataSize)12 NoOpIndexClient (io.prestosql.testing.NoOpIndexClient)11 Returns (org.mockito.internal.stubbing.answers.Returns)11 ArrayList (java.util.ArrayList)10 IOException (java.io.IOException)9 List (java.util.List)9 HiveSplit (io.prestosql.plugin.hive.HiveSplit)8 Pair (io.prestosql.spi.heuristicindex.Pair)8 Collections (java.util.Collections)7 Map (java.util.Map)7 CreateIndexMetadata (io.prestosql.spi.connector.CreateIndexMetadata)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 Logger (io.airlift.log.Logger)5 File (java.io.File)5 FileInputStream (java.io.FileInputStream)5 FileOutputStream (java.io.FileOutputStream)5