Search in sources :

Example 11 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class OrcSelectiveRecordReader method createColumnReaders.

public SelectiveColumnReader[] createColumnReaders(List<OrcColumn> fileColumns, AggregatedMemoryContext systemMemoryContext, OrcBlockFactory blockFactory, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, OrcPredicate predicate, Map<Integer, TupleDomainFilter> filters, DateTimeZone hiveStorageTimeZone, List<Integer> outputColumns, Map<Integer, Type> includedColumns, ColumnMetadata<OrcType> orcTypes, boolean useDataCache) throws OrcCorruptionException {
    int fieldCount = orcTypes.get(OrcColumnId.ROOT_COLUMN).getFieldCount();
    SelectiveColumnReader[] columnReaders = new SelectiveColumnReader[fieldCount];
    colReaderWithFilter = new IntArraySet();
    colReaderWithORFilter = new IntArraySet();
    colReaderWithoutFilter = new IntArraySet();
    IntArraySet remainingColumns = new IntArraySet();
    remainingColumns.addAll(includedColumns.keySet());
    for (int i = 0; i < fieldCount; i++) {
        // create column reader only for columns which are part of projection and filter.
        if (includedColumns.containsKey(i)) {
            int columnIndex = i;
            OrcColumn column = fileColumns.get(columnIndex);
            boolean outputRequired = outputColumns.contains(i);
            SelectiveColumnReader columnReader = null;
            if (useDataCache && orcCacheProperties.isRowDataCacheEnabled()) {
                ColumnReader cr = ColumnReaders.createColumnReader(includedColumns.get(i), column, systemMemoryContext, blockFactory.createNestedBlockFactory(block -> blockLoaded(columnIndex, block)));
                columnReader = SelectiveColumnReaders.wrapWithDataCachingStreamReader(cr, column, orcCacheStore.getRowDataCache());
            } else {
                columnReader = createColumnReader(orcTypes.get(column.getColumnId()), column, Optional.ofNullable(filters.get(i)), outputRequired ? Optional.of(includedColumns.get(i)) : Optional.empty(), hiveStorageTimeZone, systemMemoryContext);
                if (orcCacheProperties.isRowDataCacheEnabled()) {
                    columnReader = SelectiveColumnReaders.wrapWithResultCachingStreamReader(columnReader, column, predicate, orcCacheStore.getRowDataCache());
                }
            }
            columnReaders[columnIndex] = columnReader;
            if (filters.get(i) != null) {
                colReaderWithFilter.add(columnIndex);
            } else if (disjuctFilters.get(i) != null && disjuctFilters.get(i).size() > 0) {
                colReaderWithORFilter.add(columnIndex);
            } else {
                colReaderWithoutFilter.add(columnIndex);
            }
            remainingColumns.remove(columnIndex);
        }
    }
    /* if any still remaining colIdx < 0 */
    remainingColumns.removeAll(missingColumns);
    for (Integer col : remainingColumns) {
        if (col < 0) {
            /* should be always true! */
            if (filters.get(col) != null) {
                colReaderWithFilter.add(col);
            } else if (disjuctFilters.get(col) != null && disjuctFilters.get(col).size() > 0) {
                colReaderWithORFilter.add(col);
            }
        }
    }
    // specially for alter add column case:
    for (int missingColumn : missingColumns) {
        if (filters.get(missingColumn) != null) {
            colReaderWithFilter.add(missingColumn);
        } else if (disjuctFilters.get(missingColumn) != null && disjuctFilters.get(missingColumn).size() > 0) {
            colReaderWithORFilter.add(missingColumn);
        }
    }
    return columnReaders;
}
Also used : IntStream(java.util.stream.IntStream) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) TypeNotFoundException(io.prestosql.spi.type.TypeNotFoundException) PeekingIterator(com.google.common.collect.PeekingIterator) Function(java.util.function.Function) PostScript(io.prestosql.orc.metadata.PostScript) ArrayList(java.util.ArrayList) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) Math.toIntExact(java.lang.Math.toIntExact) SelectiveColumnReaders(io.prestosql.orc.reader.SelectiveColumnReaders) Block(io.prestosql.spi.block.Block) ColumnReaders(io.prestosql.orc.reader.ColumnReaders) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) OrcType(io.prestosql.orc.metadata.OrcType) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet) Set(java.util.Set) Page(io.prestosql.spi.Page) IOException(java.io.IOException) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) ColumnReader(io.prestosql.orc.reader.ColumnReader) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) DataSize(io.airlift.units.DataSize) List(java.util.List) SelectiveColumnReaders.createColumnReader(io.prestosql.orc.reader.SelectiveColumnReaders.createColumnReader) Domain(io.prestosql.spi.predicate.Domain) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) Optional(java.util.Optional) BitSet(java.util.BitSet) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) IntArraySet(it.unimi.dsi.fastutil.ints.IntArraySet) SelectiveColumnReader(io.prestosql.orc.reader.SelectiveColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) SelectiveColumnReaders.createColumnReader(io.prestosql.orc.reader.SelectiveColumnReaders.createColumnReader)

Example 12 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class TestAbstractNumbericColumnReader method testTypeCoercionInteger.

@Test
public void testTypeCoercionInteger() throws OrcCorruptionException {
    OrcColumn column = new OrcColumn("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0", new OrcColumnId(3), "cs_order_number", OrcType.OrcTypeKind.INT, new OrcDataSourceId("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0"), ImmutableList.of());
    ColumnReader actualIntegerColumnReader = ColumnReaders.createColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext(), null);
    IntegerColumnReader expectedIntegerColumnReader = new IntegerColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext().newLocalMemoryContext(ColumnReaders.class.getSimpleName()));
    assertEquals(actualIntegerColumnReader.toString(), expectedIntegerColumnReader.toString());
}
Also used : OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) IntegerColumnReader(io.prestosql.orc.reader.IntegerColumnReader) ShortColumnReader(io.prestosql.orc.reader.ShortColumnReader) DateColumnReader(io.prestosql.orc.reader.DateColumnReader) LongColumnReader(io.prestosql.orc.reader.LongColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) IntegerColumnReader(io.prestosql.orc.reader.IntegerColumnReader) Test(org.testng.annotations.Test)

Example 13 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class TestAbstractNumbericColumnReader method testTypeCoercionShort.

@Test
public void testTypeCoercionShort() throws OrcCorruptionException {
    OrcColumn column = new OrcColumn("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0", new OrcColumnId(3), "cs_order_number", OrcType.OrcTypeKind.SHORT, new OrcDataSourceId("hdfs://hacluster/user/hive/warehouse/tpcds_orc_hive_1000.db/catalog_sales/cs_sold_date_sk=2452268/000896_0"), ImmutableList.of());
    ColumnReader actualShortColumnReader = ColumnReaders.createColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext(), null);
    ShortColumnReader expectedShortColumnReader = new ShortColumnReader(type, column, AggregatedMemoryContext.newSimpleAggregatedMemoryContext().newLocalMemoryContext(ColumnReaders.class.getSimpleName()));
    assertEquals(actualShortColumnReader.toString(), expectedShortColumnReader.toString());
}
Also used : OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) ShortColumnReader(io.prestosql.orc.reader.ShortColumnReader) ShortColumnReader(io.prestosql.orc.reader.ShortColumnReader) DateColumnReader(io.prestosql.orc.reader.DateColumnReader) LongColumnReader(io.prestosql.orc.reader.LongColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) IntegerColumnReader(io.prestosql.orc.reader.IntegerColumnReader) Test(org.testng.annotations.Test)

Example 14 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class TestCachingColumnReader method testBlockCachedOnStartStripe.

@Test
public void testBlockCachedOnStartStripe() throws IOException {
    ColumnReader streamReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(streamReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    Stripe stripe = mock(Stripe.class);
    ZoneId fileTimeZone = stripe.getFileTimeZone();
    ColumnMetadata<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
    cachingColumnReader.startStripe(fileTimeZone, inputStreamSources, columnEncodings);
    verify(streamReader, atLeastOnce()).startStripe(eq(fileTimeZone), eq(inputStreamSources), eq(columnEncodings));
}
Also used : ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) ZoneId(java.time.ZoneId) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Test(org.testng.annotations.Test)

Example 15 with ColumnReader

use of io.prestosql.orc.reader.ColumnReader in project hetu-core by openlookeng.

the class TestCachingColumnReader method testCacheLoaderThrowsInterruptedException.

@Test(expectedExceptions = PrestoException.class, expectedExceptionsMessageRegExp = ".*Read interrupted.*")
public void testCacheLoaderThrowsInterruptedException() throws IOException {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    when(columnReader.readBlock()).then((Answer<Block>) invocationOnMock -> {
        Thread.currentThread().interrupt();
        throw new PrestoException(StandardErrorCode.GENERIC_INTERNAL_ERROR, "Read interrupted");
    });
    try {
        cachingColumnReader.startRowGroup(inputStreamSources);
    } catch (IOException ioEx) {
        verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
        verify(columnReader, times(1)).readBlock();
        assertEquals(cache.size(), 0);
        throw ioEx;
    } finally {
        // clear interrupted flag status
        Thread.interrupted();
    }
}
Also used : Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) Callable(java.util.concurrent.Callable) OrcColumnId(io.prestosql.orc.metadata.OrcColumnId) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Mockito.spy(org.mockito.Mockito.spy) Answer(org.mockito.stubbing.Answer) ImmutableList(com.google.common.collect.ImmutableList) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) Block(io.prestosql.spi.block.Block) PrestoException(io.prestosql.spi.PrestoException) InOrder(org.mockito.InOrder) ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) OrcType(io.prestosql.orc.metadata.OrcType) Mockito.atLeastOnce(org.mockito.Mockito.atLeastOnce) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) ZoneId(java.time.ZoneId) ColumnReader(io.prestosql.orc.reader.ColumnReader) Mockito.verify(org.mockito.Mockito.verify) Matchers.any(org.mockito.Matchers.any) Mockito.inOrder(org.mockito.Mockito.inOrder) CacheBuilder(com.google.common.cache.CacheBuilder) Cache(com.google.common.cache.Cache) StandardErrorCode(io.prestosql.spi.StandardErrorCode) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Block(io.prestosql.spi.block.Block) PrestoException(io.prestosql.spi.PrestoException) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) IOException(java.io.IOException) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Test(org.testng.annotations.Test)

Aggregations

ColumnReader (io.prestosql.orc.reader.ColumnReader)15 Block (io.prestosql.spi.block.Block)11 Test (org.testng.annotations.Test)11 CachingColumnReader (io.prestosql.orc.reader.CachingColumnReader)7 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)6 InputStreamSources (io.prestosql.orc.stream.InputStreamSources)5 IOException (java.io.IOException)5 ColumnMetadata (io.prestosql.orc.metadata.ColumnMetadata)4 OrcType (io.prestosql.orc.metadata.OrcType)4 DateColumnReader (io.prestosql.orc.reader.DateColumnReader)4 IntegerColumnReader (io.prestosql.orc.reader.IntegerColumnReader)4 LongColumnReader (io.prestosql.orc.reader.LongColumnReader)4 ShortColumnReader (io.prestosql.orc.reader.ShortColumnReader)4 StreamSourceMeta (io.prestosql.orc.stream.StreamSourceMeta)4 Page (io.prestosql.spi.Page)4 PeekingIterator (com.google.common.collect.PeekingIterator)3 Logger (io.airlift.log.Logger)3 Slice (io.airlift.slice.Slice)3 DataSize (io.airlift.units.DataSize)3 AggregatedMemoryContext (io.prestosql.memory.context.AggregatedMemoryContext)3