Search in sources :

Example 1 with CachingColumnReader

use of io.prestosql.orc.reader.CachingColumnReader in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method advanceToNextRowGroup.

private boolean advanceToNextRowGroup() throws IOException {
    nextRowInGroup = 0;
    if (currentRowGroup >= 0) {
        if (rowGroupStatisticsValidation.isPresent()) {
            OrcWriteValidation.StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build().get());
            statisticsValidation.reset();
        }
    }
    while (!rowGroups.hasNext() && currentStripe < stripes.size()) {
        advanceToNextStripe();
        currentRowGroup = -1;
    }
    if (!rowGroups.hasNext()) {
        currentGroupRowCount = 0;
        return false;
    }
    currentRowGroup++;
    RowGroup localCurrentRowGroup = rowGroups.next();
    currentGroupRowCount = localCurrentRowGroup.getRowCount();
    if (localCurrentRowGroup.getMinAverageRowBytes() > 0) {
        maxBatchSize = toIntExact(min(maxBatchSize, max(1, maxBlockBytes / localCurrentRowGroup.getMinAverageRowBytes())));
    }
    currentPosition = currentStripePosition + localCurrentRowGroup.getRowOffset();
    filePosition = stripeFilePositions.get(currentStripe) + localCurrentRowGroup.getRowOffset();
    // give reader data streams from row group
    InputStreamSources rowGroupStreamSources = localCurrentRowGroup.getStreamSources();
    for (AbstractColumnReader columnReader : columnReaders) {
        if (columnReader != null) {
            if (columnReader instanceof CachingColumnReader || columnReader instanceof ResultCachingSelectiveColumnReader || columnReader instanceof DataCachingSelectiveColumnReader) {
                StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
                streamSourceMeta.setDataSourceId(orcDataSource.getId());
                streamSourceMeta.setLastModifiedTime(orcDataSource.getLastModifiedTime());
                streamSourceMeta.setStripeOffset(stripes.get(currentStripe).getOffset());
                streamSourceMeta.setRowGroupOffset(localCurrentRowGroup.getRowOffset());
                streamSourceMeta.setRowCount(localCurrentRowGroup.getRowCount());
                rowGroupStreamSources.setStreamSourceMeta(streamSourceMeta);
            }
            columnReader.startRowGroup(rowGroupStreamSources);
        }
    }
    return true;
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) ResultCachingSelectiveColumnReader(io.prestosql.orc.reader.ResultCachingSelectiveColumnReader) DataCachingSelectiveColumnReader(io.prestosql.orc.reader.DataCachingSelectiveColumnReader) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader)

Example 2 with CachingColumnReader

use of io.prestosql.orc.reader.CachingColumnReader in project hetu-core by openlookeng.

the class TestCachingColumnReader method testBlockCachedOnStartRowGroup.

@Test
public void testBlockCachedOnStartRowGroup() throws Exception {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    Block block = mock(Block.class);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    when(columnReader.readBlock()).thenReturn(block);
    cachingColumnReader.startRowGroup(inputStreamSources);
    verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
    verify(columnReader, times(1)).readBlock();
    verify(cache, times(1)).get(any(OrcRowDataCacheKey.class), any(Callable.class));
    assertEquals(cache.size(), 1);
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Callable(java.util.concurrent.Callable) Test(org.testng.annotations.Test)

Example 3 with CachingColumnReader

use of io.prestosql.orc.reader.CachingColumnReader in project hetu-core by openlookeng.

the class TestCachingColumnReader method testDelegateThrowsException.

@Test(expectedExceptions = IOException.class)
public void testDelegateThrowsException() throws IOException {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    when(columnReader.readBlock()).thenThrow(new OrcCorruptionException(orcDataSourceId, "Value is null but stream is missing")).thenThrow(new OrcCorruptionException(orcDataSourceId, "Value is null but stream is missing"));
    try {
        cachingColumnReader.startRowGroup(inputStreamSources);
    } catch (IOException ioEx) {
        verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
        verify(columnReader, times(2)).readBlock();
        assertEquals(cache.size(), 0);
        throw ioEx;
    }
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) IOException(java.io.IOException) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Test(org.testng.annotations.Test)

Example 4 with CachingColumnReader

use of io.prestosql.orc.reader.CachingColumnReader in project hetu-core by openlookeng.

the class TestCachingColumnReader method testReadBlockRetrievesFromCache.

@Test
public void testReadBlockRetrievesFromCache() throws Exception {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    Block block = mock(Block.class);
    when(columnReader.readBlock()).thenReturn(block);
    cachingColumnReader.startRowGroup(inputStreamSources);
    cachingColumnReader.prepareNextRead(10);
    cachingColumnReader.readBlock();
    cachingColumnReader.prepareNextRead(20);
    cachingColumnReader.readBlock();
    verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
    verify(columnReader, times(1)).readBlock();
    InOrder inOrder = inOrder(block);
    inOrder.verify(block, times(1)).getRegion(0, 10);
    inOrder.verify(block, times(1)).getRegion(10, 20);
    verify(cache, times(1)).get(any(OrcRowDataCacheKey.class), any(Callable.class));
    assertEquals(cache.size(), 1);
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) InOrder(org.mockito.InOrder) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Callable(java.util.concurrent.Callable) Test(org.testng.annotations.Test)

Example 5 with CachingColumnReader

use of io.prestosql.orc.reader.CachingColumnReader in project hetu-core by openlookeng.

the class TestCachingColumnReader method testGetRetainedSizeInBytes.

@Test
public void testGetRetainedSizeInBytes() {
    ColumnReader streamReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(streamReader, column, cache);
    when(streamReader.getRetainedSizeInBytes()).thenReturn((long) 1);
    assertEquals(cachingColumnReader.getRetainedSizeInBytes(), 1);
}
Also used : Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Test(org.testng.annotations.Test)

Aggregations

CachingColumnReader (io.prestosql.orc.reader.CachingColumnReader)8 ColumnReader (io.prestosql.orc.reader.ColumnReader)7 Block (io.prestosql.spi.block.Block)7 Test (org.testng.annotations.Test)7 InputStreamSources (io.prestosql.orc.stream.InputStreamSources)6 StreamSourceMeta (io.prestosql.orc.stream.StreamSourceMeta)5 Callable (java.util.concurrent.Callable)3 ColumnEncoding (io.prestosql.orc.metadata.ColumnEncoding)2 IOException (java.io.IOException)2 ZoneId (java.time.ZoneId)2 InOrder (org.mockito.InOrder)2 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 ImmutableList (com.google.common.collect.ImmutableList)1 ColumnMetadata (io.prestosql.orc.metadata.ColumnMetadata)1 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)1 OrcType (io.prestosql.orc.metadata.OrcType)1 AbstractColumnReader (io.prestosql.orc.reader.AbstractColumnReader)1 DataCachingSelectiveColumnReader (io.prestosql.orc.reader.DataCachingSelectiveColumnReader)1 ResultCachingSelectiveColumnReader (io.prestosql.orc.reader.ResultCachingSelectiveColumnReader)1