use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.
the class AbstractOrcRecordReader method advanceToNextRowGroup.
private boolean advanceToNextRowGroup() throws IOException {
nextRowInGroup = 0;
if (currentRowGroup >= 0) {
if (rowGroupStatisticsValidation.isPresent()) {
OrcWriteValidation.StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get();
long offset = stripes.get(currentStripe).getOffset();
writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build().get());
statisticsValidation.reset();
}
}
while (!rowGroups.hasNext() && currentStripe < stripes.size()) {
advanceToNextStripe();
currentRowGroup = -1;
}
if (!rowGroups.hasNext()) {
currentGroupRowCount = 0;
return false;
}
currentRowGroup++;
RowGroup localCurrentRowGroup = rowGroups.next();
currentGroupRowCount = localCurrentRowGroup.getRowCount();
if (localCurrentRowGroup.getMinAverageRowBytes() > 0) {
maxBatchSize = toIntExact(min(maxBatchSize, max(1, maxBlockBytes / localCurrentRowGroup.getMinAverageRowBytes())));
}
currentPosition = currentStripePosition + localCurrentRowGroup.getRowOffset();
filePosition = stripeFilePositions.get(currentStripe) + localCurrentRowGroup.getRowOffset();
// give reader data streams from row group
InputStreamSources rowGroupStreamSources = localCurrentRowGroup.getStreamSources();
for (AbstractColumnReader columnReader : columnReaders) {
if (columnReader != null) {
if (columnReader instanceof CachingColumnReader || columnReader instanceof ResultCachingSelectiveColumnReader || columnReader instanceof DataCachingSelectiveColumnReader) {
StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
streamSourceMeta.setDataSourceId(orcDataSource.getId());
streamSourceMeta.setLastModifiedTime(orcDataSource.getLastModifiedTime());
streamSourceMeta.setStripeOffset(stripes.get(currentStripe).getOffset());
streamSourceMeta.setRowGroupOffset(localCurrentRowGroup.getRowOffset());
streamSourceMeta.setRowCount(localCurrentRowGroup.getRowCount());
rowGroupStreamSources.setStreamSourceMeta(streamSourceMeta);
}
columnReader.startRowGroup(rowGroupStreamSources);
}
}
return true;
}
use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.
the class TestCachingColumnReader method testBlockCachedOnStartRowGroup.
@Test
public void testBlockCachedOnStartRowGroup() throws Exception {
ColumnReader columnReader = mock(ColumnReader.class);
Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
InputStreamSources inputStreamSources = mock(InputStreamSources.class);
StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
streamSourceMeta.setDataSourceId(orcDataSourceId);
Block block = mock(Block.class);
when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
when(columnReader.readBlock()).thenReturn(block);
cachingColumnReader.startRowGroup(inputStreamSources);
verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
verify(columnReader, times(1)).readBlock();
verify(cache, times(1)).get(any(OrcRowDataCacheKey.class), any(Callable.class));
assertEquals(cache.size(), 1);
}
use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.
the class TestCachingColumnReader method testDelegateThrowsException.
@Test(expectedExceptions = IOException.class)
public void testDelegateThrowsException() throws IOException {
ColumnReader columnReader = mock(ColumnReader.class);
Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
InputStreamSources inputStreamSources = mock(InputStreamSources.class);
StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
streamSourceMeta.setDataSourceId(orcDataSourceId);
when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
when(columnReader.readBlock()).thenThrow(new OrcCorruptionException(orcDataSourceId, "Value is null but stream is missing")).thenThrow(new OrcCorruptionException(orcDataSourceId, "Value is null but stream is missing"));
try {
cachingColumnReader.startRowGroup(inputStreamSources);
} catch (IOException ioEx) {
verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
verify(columnReader, times(2)).readBlock();
assertEquals(cache.size(), 0);
throw ioEx;
}
}
use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.
the class TestCachingColumnReader method testReadBlockRetrievesFromCache.
@Test
public void testReadBlockRetrievesFromCache() throws Exception {
ColumnReader columnReader = mock(ColumnReader.class);
Cache<OrcRowDataCacheKey, Block> cache = spy(CacheBuilder.newBuilder().build());
CachingColumnReader cachingColumnReader = new CachingColumnReader(columnReader, column, cache);
InputStreamSources inputStreamSources = mock(InputStreamSources.class);
StreamSourceMeta streamSourceMeta = new StreamSourceMeta();
OrcDataSourceId orcDataSourceId = new OrcDataSourceId("2");
streamSourceMeta.setDataSourceId(orcDataSourceId);
when(inputStreamSources.getStreamSourceMeta()).thenReturn(streamSourceMeta);
Block block = mock(Block.class);
when(columnReader.readBlock()).thenReturn(block);
cachingColumnReader.startRowGroup(inputStreamSources);
cachingColumnReader.prepareNextRead(10);
cachingColumnReader.readBlock();
cachingColumnReader.prepareNextRead(20);
cachingColumnReader.readBlock();
verify(columnReader, atLeastOnce()).startRowGroup(eq(inputStreamSources));
verify(columnReader, times(1)).readBlock();
InOrder inOrder = inOrder(block);
inOrder.verify(block, times(1)).getRegion(0, 10);
inOrder.verify(block, times(1)).getRegion(10, 20);
verify(cache, times(1)).get(any(OrcRowDataCacheKey.class), any(Callable.class));
assertEquals(cache.size(), 1);
}
use of io.prestosql.orc.stream.StreamSourceMeta in project hetu-core by openlookeng.
the class CachingColumnReader method startRowGroup.
@Override
public void startRowGroup(InputStreamSources dataStreamSources) throws IOException {
this.offset = 0;
this.nextBatchSize = 0;
StreamSourceMeta streamSourceMeta = dataStreamSources.getStreamSourceMeta();
orcDataSourceId = streamSourceMeta.getDataSourceId();
lastModifiedTime = streamSourceMeta.getLastModifiedTime();
stripeOffset = streamSourceMeta.getStripeOffset();
rowGroupOffset = streamSourceMeta.getRowGroupOffset();
cachedBlock = getCachedBlock(streamSourceMeta.getRowCount(), dataStreamSources);
// reset the stream - may not be required at all
delegate.startRowGroup(dataStreamSources);
}
Aggregations