Search in sources :

Example 1 with StreamSourceMeta

use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method advanceToNextRowGroup.

private boolean advanceToNextRowGroup() throws IOException {
    nextRowInGroup = 0;
    if (currentRowGroup >= 0) {
        if (rowGroupStatisticsValidation.isPresent()) {
            OrcWriteValidation.StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build().get());
            statisticsValidation.reset();
        }
    }
    while (!rowGroups.hasNext() && currentStripe < stripes.size()) {
        advanceToNextStripe();
        currentRowGroup = -1;
    }
    if (!rowGroups.hasNext()) {
        currentGroupRowCount = 0;
        return false;
    }
    currentRowGroup++;
    RowGroup localCurrentRowGroup = rowGroups.next();
    currentGroupRowCount = localCurrentRowGroup.getRowCount();
    if (localCurrentRowGroup.getMinAverageRowBytes() > 0) {
        maxBatchSize = toIntExact(min(maxBatchSize, max(1, maxBlockBytes / localCurrentRowGroup.getMinAverageRowBytes())));
    }
    currentPosition = currentStripePosition + localCurrentRowGroup.getRowOffset();
    filePosition = stripeFilePositions.get(currentStripe) + localCurrentRowGroup.getRowOffset();
    // give reader data streams from row group
    InputStreamSources rowGroupStreamSources = localCurrentRowGroup.getStreamSources();
    for (AbstractColumnReader columnReader : columnReaders) {
        if (columnReader != null) {
            if (columnReader instanceof CachingColumnReader || columnReader instanceof ResultCachingSelectiveColumnReader || columnReader instanceof DataCachingSelectiveColumnReader) {
                StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
                streamSourceMeta.setDataSourceId(orcDataSource.getId());
                streamSourceMeta.setLastModifiedTime(orcDataSource.getLastModifiedTime());
                streamSourceMeta.setStripeOffset(stripes.get(currentStripe).getOffset());
                streamSourceMeta.setRowGroupOffset(localCurrentRowGroup.getRowOffset());
                streamSourceMeta.setRowCount(localCurrentRowGroup.getRowCount());
                rowGroupStreamSources.setStreamSourceMeta(streamSourceMeta);
            }
            columnReader.startRowGroup(rowGroupStreamSources);
        }
    }
    return true;
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) ResultCachingSelectiveColumnReader(io.prestosql.orc.reader.ResultCachingSelectiveColumnReader) DataCachingSelectiveColumnReader(io.prestosql.orc.reader.DataCachingSelectiveColumnReader) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader)

Example 2 with StreamSourceMeta

use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.

the class TestCachingColumnReader method testBlockCachedOnStartRowGroup.

@Test
public void testBlockCachedOnStartRowGroup() throws Exception {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    Block block = mock(Block.class);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    when(columnReader.readBlock()).thenReturn(block);
    cachingColumnReader.startRowGroup(inputStreamSources);
    verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
    verify(columnReader, times(1)).readBlock();
    verify(cache, times(1)).get(any(OrcRowDataCacheKey.class), any(Callable.class));
    assertEquals(cache.size(), 1);
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Callable(java.util.concurrent.Callable) Test(org.testng.annotations.Test)

Example 3 with StreamSourceMeta

use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.

the class TestCachingColumnReader method testDelegateThrowsException.

@Test(expectedExceptions = IOException.class)
public void testDelegateThrowsException() throws IOException {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    when(columnReader.readBlock()).thenThrow(new OrcCorruptionException(orcDataSourceId, "Value is null but stream is missing")).thenThrow(new OrcCorruptionException(orcDataSourceId, "Value is null but stream is missing"));
    try {
        cachingColumnReader.startRowGroup(inputStreamSources);
    } catch (IOException ioEx) {
        verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
        verify(columnReader, times(2)).readBlock();
        assertEquals(cache.size(), 0);
        throw ioEx;
    }
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) IOException(java.io.IOException) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Test(org.testng.annotations.Test)

Example 4 with StreamSourceMeta

use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.

the class TestCachingColumnReader method testReadBlockRetrievesFromCache.

@Test
public void testReadBlockRetrievesFromCache() throws Exception {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    Block block = mock(Block.class);
    when(columnReader.readBlock()).thenReturn(block);
    cachingColumnReader.startRowGroup(inputStreamSources);
    cachingColumnReader.prepareNextRead(10);
    cachingColumnReader.readBlock();
    cachingColumnReader.prepareNextRead(20);
    cachingColumnReader.readBlock();
    verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
    verify(columnReader, times(1)).readBlock();
    InOrder inOrder = inOrder(block);
    inOrder.verify(block, times(1)).getRegion(0, 10);
    inOrder.verify(block, times(1)).getRegion(10, 20);
    verify(cache, times(1)).get(any(OrcRowDataCacheKey.class), any(Callable.class));
    assertEquals(cache.size(), 1);
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) InOrder(org.mockito.InOrder) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Callable(java.util.concurrent.Callable) Test(org.testng.annotations.Test)

Example 5 with StreamSourceMeta

use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.

the class CachingColumnReader method startRowGroup.

@Override
public void startRowGroup(InputStreamSources dataStreamSources) throws IOException {
    this.offset = 0;
    this.nextBatchSize = 0;
    StreamSourceMeta streamSourceMeta = dataStreamSources.getStreamSourceMeta();
    orcDataSourceId = streamSourceMeta.getDataSourceId();
    lastModifiedTime = streamSourceMeta.getLastModifiedTime();
    stripeOffset = streamSourceMeta.getStripeOffset();
    rowGroupOffset = streamSourceMeta.getRowGroupOffset();
    cachedBlock = getCachedBlock(streamSourceMeta.getRowCount(), dataStreamSources);
    // reset the stream - may not be required at all
    delegate.startRowGroup(dataStreamSources);
}
Also used : StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta)

Aggregations

StreamSourceMeta (io.prestosql.orc.stream.StreamSourceMeta)8 CachingColumnReader (io.prestosql.orc.reader.CachingColumnReader)5 InputStreamSources (io.prestosql.orc.stream.InputStreamSources)5 ColumnReader (io.prestosql.orc.reader.ColumnReader)4 Block (io.prestosql.spi.block.Block)4 Test (org.testng.annotations.Test)4 Callable (java.util.concurrent.Callable)3 IOException (java.io.IOException)2 InOrder (org.mockito.InOrder)2 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 ImmutableList (com.google.common.collect.ImmutableList)1 OrcDataSourceIdWithTimeStamp (io.prestosql.orc.OrcDataSourceIdWithTimeStamp)1 OrcSelectiveRowDataCacheKey (io.prestosql.orc.OrcSelectiveRowDataCacheKey)1 ColumnEncoding (io.prestosql.orc.metadata.ColumnEncoding)1 ColumnMetadata (io.prestosql.orc.metadata.ColumnMetadata)1 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)1 OrcType (io.prestosql.orc.metadata.OrcType)1 AbstractColumnReader (io.prestosql.orc.reader.AbstractColumnReader)1 DataCachingSelectiveColumnReader (io.prestosql.orc.reader.DataCachingSelectiveColumnReader)1