Search in sources :

Example 1 with InputStreamSources

use of io.prestosql.orc.stream.InputStreamSources in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method advanceToNextRowGroup.

private boolean advanceToNextRowGroup() throws IOException {
    nextRowInGroup = 0;
    if (currentRowGroup >= 0) {
        if (rowGroupStatisticsValidation.isPresent()) {
            OrcWriteValidation.StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build().get());
            statisticsValidation.reset();
        }
    }
    while (!rowGroups.hasNext() && currentStripe < stripes.size()) {
        advanceToNextStripe();
        currentRowGroup = -1;
    }
    if (!rowGroups.hasNext()) {
        currentGroupRowCount = 0;
        return false;
    }
    currentRowGroup++;
    RowGroup localCurrentRowGroup = rowGroups.next();
    currentGroupRowCount = localCurrentRowGroup.getRowCount();
    if (localCurrentRowGroup.getMinAverageRowBytes() > 0) {
        maxBatchSize = toIntExact(min(maxBatchSize, max(1, maxBlockBytes / localCurrentRowGroup.getMinAverageRowBytes())));
    }
    currentPosition = currentStripePosition + localCurrentRowGroup.getRowOffset();
    filePosition = stripeFilePositions.get(currentStripe) + localCurrentRowGroup.getRowOffset();
    // give reader data streams from row group
    InputStreamSources rowGroupStreamSources = localCurrentRowGroup.getStreamSources();
    for (AbstractColumnReader columnReader : columnReaders) {
        if (columnReader != null) {
            if (columnReader instanceof CachingColumnReader || columnReader instanceof ResultCachingSelectiveColumnReader || columnReader instanceof DataCachingSelectiveColumnReader) {
                StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
                streamSourceMeta.setDataSourceId(orcDataSource.getId());
                streamSourceMeta.setLastModifiedTime(orcDataSource.getLastModifiedTime());
                streamSourceMeta.setStripeOffset(stripes.get(currentStripe).getOffset());
                streamSourceMeta.setRowGroupOffset(localCurrentRowGroup.getRowOffset());
                streamSourceMeta.setRowCount(localCurrentRowGroup.getRowCount());
                rowGroupStreamSources.setStreamSourceMeta(streamSourceMeta);
            }
            columnReader.startRowGroup(rowGroupStreamSources);
        }
    }
    return true;
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) ResultCachingSelectiveColumnReader(io.prestosql.orc.reader.ResultCachingSelectiveColumnReader) DataCachingSelectiveColumnReader(io.prestosql.orc.reader.DataCachingSelectiveColumnReader) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader)

Example 2 with InputStreamSources

use of io.prestosql.orc.stream.InputStreamSources in project hetu-core by openlookeng.

the class AbstractOrcRecordReader method advanceToNextStripe.

private void advanceToNextStripe() throws IOException {
    currentStripeSystemMemoryContext.close();
    currentStripeSystemMemoryContext = systemMemoryUsage.newAggregatedMemoryContext();
    rowGroups = ImmutableList.<RowGroup>of().iterator();
    if (currentStripe >= 0) {
        if (stripeStatisticsValidation.isPresent()) {
            OrcWriteValidation.StatisticsValidation statisticsValidation = stripeStatisticsValidation.get();
            long offset = stripes.get(currentStripe).getOffset();
            writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build().get());
            statisticsValidation.reset();
        }
    }
    currentStripe++;
    if (currentStripe >= stripes.size()) {
        return;
    }
    if (currentStripe > 0) {
        currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
    }
    StripeInformation stripeInformation = stripes.get(currentStripe);
    validateWriteStripe(stripeInformation.getNumberOfRows());
    Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext);
    if (stripe != null) {
        // Give readers access to dictionary streams
        InputStreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
        ColumnMetadata<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
        for (AbstractColumnReader columnReader : columnReaders) {
            if (columnReader != null) {
                ZoneId fileTimeZone = stripe.getFileTimeZone();
                columnReader.startStripe(fileTimeZone, dictionaryStreamSources, columnEncodings);
            }
        }
        rowGroups = stripe.getRowGroups().iterator();
    }
}
Also used : ColumnEncoding(io.prestosql.orc.metadata.ColumnEncoding) InputStreamSources(io.prestosql.orc.stream.InputStreamSources) ZoneId(java.time.ZoneId) AbstractColumnReader(io.prestosql.orc.reader.AbstractColumnReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation)

Example 3 with InputStreamSources

use of io.prestosql.orc.stream.InputStreamSources in project hetu-core by openlookeng.

the class TestCachingColumnReader method testBlockCachedOnStartRowGroup.

@Test
public void testBlockCachedOnStartRowGroup() throws Exception {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    Block block = mock(Block.class);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    when(columnReader.readBlock()).thenReturn(block);
    cachingColumnReader.startRowGroup(inputStreamSources);
    verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
    verify(columnReader, times(1)).readBlock();
    verify(cache, times(1)).get(any(OrcRowDataCacheKey.class), any(Callable.class));
    assertEquals(cache.size(), 1);
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Callable(java.util.concurrent.Callable) Test(org.testng.annotations.Test)

Example 4 with InputStreamSources

use of io.prestosql.orc.stream.InputStreamSources in project hetu-core by openlookeng.

the class TestCachingColumnReader method testDelegateThrowsException.

@Test(expectedExceptions = IOException.class)
public void testDelegateThrowsException() throws IOException {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    when(columnReader.readBlock()).thenThrow(new OrcCorruptionException(orcDataSourceId, "Value is null but stream is missing")).thenThrow(new OrcCorruptionException(orcDataSourceId, "Value is null but stream is missing"));
    try {
        cachingColumnReader.startRowGroup(inputStreamSources);
    } catch (IOException ioEx) {
        verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
        verify(columnReader, times(2)).readBlock();
        assertEquals(cache.size(), 0);
        throw ioEx;
    }
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) IOException(java.io.IOException) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Test(org.testng.annotations.Test)

Example 5 with InputStreamSources

use of io.prestosql.orc.stream.InputStreamSources in project hetu-core by openlookeng.

the class TestCachingColumnReader method testReadBlockRetrievesFromCache.

@Test
public void testReadBlockRetrievesFromCache() throws Exception {
    ColumnReader columnReader = mock(ColumnReader.class);
    Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
    CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
    InputStreamSources inputStreamSources = mock(InputStreamSources.class);
    StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
    OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
    streamSourceMeta.setDataSourceId(orcDataSourceId);
    when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
    Block block = mock(Block.class);
    when(columnReader.readBlock()).thenReturn(block);
    cachingColumnReader.startRowGroup(inputStreamSources);
    cachingColumnReader.prepareNextRead(10);
    cachingColumnReader.readBlock();
    cachingColumnReader.prepareNextRead(20);
    cachingColumnReader.readBlock();
    verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
    verify(columnReader, times(1)).readBlock();
    InOrder inOrder = inOrder(block);
    inOrder.verify(block, times(1)).getRegion(0, 10);
    inOrder.verify(block, times(1)).getRegion(10, 20);
    verify(cache, times(1)).get(any(OrcRowDataCacheKey.class), any(Callable.class));
    assertEquals(cache.size(), 1);
}
Also used : InputStreamSources(io.prestosql.orc.stream.InputStreamSources) InOrder(org.mockito.InOrder) Block(io.prestosql.spi.block.Block) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) StreamSourceMeta(io.prestosql.orc.stream.StreamSourceMeta) CachingColumnReader(io.prestosql.orc.reader.CachingColumnReader) Callable(java.util.concurrent.Callable) Test(org.testng.annotations.Test)

Aggregations

InputStreamSources (io.prestosql.orc.stream.InputStreamSources)10 CachingColumnReader (io.prestosql.orc.reader.CachingColumnReader)6 ColumnReader (io.prestosql.orc.reader.ColumnReader)5 StreamSourceMeta (io.prestosql.orc.stream.StreamSourceMeta)5 Block (io.prestosql.spi.block.Block)5 Test (org.testng.annotations.Test)5 ColumnEncoding (io.prestosql.orc.metadata.ColumnEncoding)4 ZoneId (java.time.ZoneId)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)3 OrcColumnId (io.prestosql.orc.metadata.OrcColumnId)3 InputStreamSource (io.prestosql.orc.stream.InputStreamSource)3 ValueInputStreamSource (io.prestosql.orc.stream.ValueInputStreamSource)3 Callable (java.util.concurrent.Callable)3 ImmutableList (com.google.common.collect.ImmutableList)2 Checkpoints.getDictionaryStreamCheckpoint (io.prestosql.orc.checkpoint.Checkpoints.getDictionaryStreamCheckpoint)2 StreamCheckpoint (io.prestosql.orc.checkpoint.StreamCheckpoint)2 Stream (io.prestosql.orc.metadata.Stream)2 AbstractColumnReader (io.prestosql.orc.reader.AbstractColumnReader)2 OrcInputStream (io.prestosql.orc.stream.OrcInputStream)2