use of com.facebook.presto.orc.metadata.StripeInformation in project presto by prestodb.
the class OrcRecordReader method advanceToNextStripe.
private void advanceToNextStripe() throws IOException {
currentStripeSystemMemoryContext.close();
currentStripeSystemMemoryContext = systemMemoryUsage.newAggregatedMemoryContext();
rowGroups = ImmutableList.<RowGroup>of().iterator();
currentStripe++;
if (currentStripe >= stripes.size()) {
return;
}
if (currentStripe > 0) {
currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
}
StripeInformation stripeInformation = stripes.get(currentStripe);
Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext);
if (stripe != null) {
// Give readers access to dictionary streams
StreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
List<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
for (StreamReader column : streamReaders) {
if (column != null) {
column.startStripe(dictionaryStreamSources, columnEncodings);
}
}
rowGroups = stripe.getRowGroups().iterator();
}
}
use of com.facebook.presto.orc.metadata.StripeInformation in project presto by prestodb.
the class TestCachingOrcDataSource method testTinyStripesReadCacheAt.
@Test
public void testTinyStripesReadCacheAt() throws IOException {
DataSize maxMergeDistance = new DataSize(1, Unit.MEGABYTE);
DataSize maxReadSize = new DataSize(8, Unit.MEGABYTE);
TestingOrcDataSource testingOrcDataSource = new TestingOrcDataSource(FakeOrcDataSource.INSTANCE);
CachingOrcDataSource cachingOrcDataSource = new CachingOrcDataSource(testingOrcDataSource, createTinyStripesRangeFinder(ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), maxMergeDistance, maxReadSize));
cachingOrcDataSource.readCacheAt(3);
assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(3, 60)));
cachingOrcDataSource.readCacheAt(63);
assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(63, 8 * 1048576)));
testingOrcDataSource = new TestingOrcDataSource(FakeOrcDataSource.INSTANCE);
cachingOrcDataSource = new CachingOrcDataSource(testingOrcDataSource, createTinyStripesRangeFinder(ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), maxMergeDistance, maxReadSize));
// read at the end of a stripe
cachingOrcDataSource.readCacheAt(62);
assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(3, 60)));
cachingOrcDataSource.readCacheAt(63);
assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(63, 8 * 1048576)));
testingOrcDataSource = new TestingOrcDataSource(FakeOrcDataSource.INSTANCE);
cachingOrcDataSource = new CachingOrcDataSource(testingOrcDataSource, createTinyStripesRangeFinder(ImmutableList.of(new StripeInformation(123, 3, 1, 0, 0), new StripeInformation(123, 4, 1048576, 1048576, 1048576 * 3), new StripeInformation(123, 4 + 1048576 * 5, 1048576, 1048576, 1048576)), maxMergeDistance, maxReadSize));
cachingOrcDataSource.readCacheAt(3);
assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(3, 1 + 1048576 * 5)));
cachingOrcDataSource.readCacheAt(4 + 1048576 * 5);
assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(4 + 1048576 * 5, 3 * 1048576)));
}
use of com.facebook.presto.orc.metadata.StripeInformation in project presto by prestodb.
the class TestCachingOrcDataSource method doIntegration.
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize) throws IOException {
OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), maxMergeDistance, maxReadSize);
// 1 for reading file footer
assertEquals(orcDataSource.getReadCount(), 1);
List<StripeInformation> stripes = orcReader.getFooter().getStripes();
// Sanity check number of stripes. This can be three or higher because of orc writer low memory mode.
assertGreaterThanOrEqual(stripes.size(), 3);
//verify wrapped by CachingOrcReader
assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, maxReadSize), CachingOrcDataSource.class);
OrcRecordReader orcRecordReader = orcReader.createRecordReader(ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext());
int positionCount = 0;
while (true) {
int batchSize = orcRecordReader.nextBatch();
if (batchSize <= 0) {
break;
}
Block block = orcRecordReader.readBlock(VARCHAR, 0);
positionCount += block.getPositionCount();
}
assertEquals(positionCount, POSITION_COUNT);
}
use of com.facebook.presto.orc.metadata.StripeInformation in project presto by prestodb.
the class TestCachingOrcDataSource method testWrapWithCacheIfTinyStripes.
@Test
public void testWrapWithCacheIfTinyStripes() throws IOException {
DataSize maxMergeDistance = new DataSize(1, Unit.MEGABYTE);
DataSize maxReadSize = new DataSize(8, Unit.MEGABYTE);
OrcDataSource actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(), maxMergeDistance, maxReadSize);
assertInstanceOf(actual, CachingOrcDataSource.class);
actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10)), maxMergeDistance, maxReadSize);
assertInstanceOf(actual, CachingOrcDataSource.class);
actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 10, 10, 10)), maxMergeDistance, maxReadSize);
assertInstanceOf(actual, CachingOrcDataSource.class);
actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), maxMergeDistance, maxReadSize);
assertInstanceOf(actual, CachingOrcDataSource.class);
actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20 + 1, 10, 10)), maxMergeDistance, maxReadSize);
assertNotInstanceOf(actual, CachingOrcDataSource.class);
}
Aggregations