Search in sources :

Example 1 with StripeInformation

use of com.facebook.presto.orc.metadata.StripeInformation in project presto by prestodb.

the class OrcRecordReader method advanceToNextStripe.

private void advanceToNextStripe() throws IOException {
    currentStripeSystemMemoryContext.close();
    currentStripeSystemMemoryContext = systemMemoryUsage.newAggregatedMemoryContext();
    rowGroups = ImmutableList.<RowGroup>of().iterator();
    currentStripe++;
    if (currentStripe >= stripes.size()) {
        return;
    }
    if (currentStripe > 0) {
        currentStripePosition += stripes.get(currentStripe - 1).getNumberOfRows();
    }
    StripeInformation stripeInformation = stripes.get(currentStripe);
    Stripe stripe = stripeReader.readStripe(stripeInformation, currentStripeSystemMemoryContext);
    if (stripe != null) {
        // Give readers access to dictionary streams
        StreamSources dictionaryStreamSources = stripe.getDictionaryStreamSources();
        List<ColumnEncoding> columnEncodings = stripe.getColumnEncodings();
        for (StreamReader column : streamReaders) {
            if (column != null) {
                column.startStripe(dictionaryStreamSources, columnEncodings);
            }
        }
        rowGroups = stripe.getRowGroups().iterator();
    }
}
Also used : ColumnEncoding(com.facebook.presto.orc.metadata.ColumnEncoding) StreamReader(com.facebook.presto.orc.reader.StreamReader) StreamSources(com.facebook.presto.orc.stream.StreamSources) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Example 2 with StripeInformation

use of com.facebook.presto.orc.metadata.StripeInformation in project presto by prestodb.

the class TestCachingOrcDataSource method testTinyStripesReadCacheAt.

@Test
public void testTinyStripesReadCacheAt() throws IOException {
    DataSize maxMergeDistance = new DataSize(1, Unit.MEGABYTE);
    DataSize maxReadSize = new DataSize(8, Unit.MEGABYTE);
    TestingOrcDataSource testingOrcDataSource = new TestingOrcDataSource(FakeOrcDataSource.INSTANCE);
    CachingOrcDataSource cachingOrcDataSource = new CachingOrcDataSource(testingOrcDataSource, createTinyStripesRangeFinder(ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), maxMergeDistance, maxReadSize));
    cachingOrcDataSource.readCacheAt(3);
    assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(3, 60)));
    cachingOrcDataSource.readCacheAt(63);
    assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(63, 8 * 1048576)));
    testingOrcDataSource = new TestingOrcDataSource(FakeOrcDataSource.INSTANCE);
    cachingOrcDataSource = new CachingOrcDataSource(testingOrcDataSource, createTinyStripesRangeFinder(ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), maxMergeDistance, maxReadSize));
    // read at the end of a stripe
    cachingOrcDataSource.readCacheAt(62);
    assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(3, 60)));
    cachingOrcDataSource.readCacheAt(63);
    assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(63, 8 * 1048576)));
    testingOrcDataSource = new TestingOrcDataSource(FakeOrcDataSource.INSTANCE);
    cachingOrcDataSource = new CachingOrcDataSource(testingOrcDataSource, createTinyStripesRangeFinder(ImmutableList.of(new StripeInformation(123, 3, 1, 0, 0), new StripeInformation(123, 4, 1048576, 1048576, 1048576 * 3), new StripeInformation(123, 4 + 1048576 * 5, 1048576, 1048576, 1048576)), maxMergeDistance, maxReadSize));
    cachingOrcDataSource.readCacheAt(3);
    assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(3, 1 + 1048576 * 5)));
    cachingOrcDataSource.readCacheAt(4 + 1048576 * 5);
    assertEquals(testingOrcDataSource.getLastReadRanges(), ImmutableList.of(new DiskRange(4 + 1048576 * 5, 3 * 1048576)));
}
Also used : DataSize(io.airlift.units.DataSize) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Test(org.testng.annotations.Test)

Example 3 with StripeInformation

use of com.facebook.presto.orc.metadata.StripeInformation in project presto by prestodb.

the class TestCachingOrcDataSource method doIntegration.

public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize) throws IOException {
    OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), maxMergeDistance, maxReadSize);
    // 1 for reading file footer
    assertEquals(orcDataSource.getReadCount(), 1);
    List<StripeInformation> stripes = orcReader.getFooter().getStripes();
    // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode.
    assertGreaterThanOrEqual(stripes.size(), 3);
    //verify wrapped by CachingOrcReader
    assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, maxReadSize), CachingOrcDataSource.class);
    OrcRecordReader orcRecordReader = orcReader.createRecordReader(ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext());
    int positionCount = 0;
    while (true) {
        int batchSize = orcRecordReader.nextBatch();
        if (batchSize <= 0) {
            break;
        }
        Block block = orcRecordReader.readBlock(VARCHAR, 0);
        positionCount += block.getPositionCount();
    }
    assertEquals(positionCount, POSITION_COUNT);
}
Also used : OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) Block(com.facebook.presto.spi.block.Block) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Example 4 with StripeInformation

use of com.facebook.presto.orc.metadata.StripeInformation in project presto by prestodb.

the class TestCachingOrcDataSource method testWrapWithCacheIfTinyStripes.

@Test
public void testWrapWithCacheIfTinyStripes() throws IOException {
    DataSize maxMergeDistance = new DataSize(1, Unit.MEGABYTE);
    DataSize maxReadSize = new DataSize(8, Unit.MEGABYTE);
    OrcDataSource actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(), maxMergeDistance, maxReadSize);
    assertInstanceOf(actual, CachingOrcDataSource.class);
    actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10)), maxMergeDistance, maxReadSize);
    assertInstanceOf(actual, CachingOrcDataSource.class);
    actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 10, 10, 10)), maxMergeDistance, maxReadSize);
    assertInstanceOf(actual, CachingOrcDataSource.class);
    actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), maxMergeDistance, maxReadSize);
    assertInstanceOf(actual, CachingOrcDataSource.class);
    actual = wrapWithCacheIfTinyStripes(FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20 + 1, 10, 10)), maxMergeDistance, maxReadSize);
    assertNotInstanceOf(actual, CachingOrcDataSource.class);
}
Also used : DataSize(io.airlift.units.DataSize) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation) Test(org.testng.annotations.Test)

Aggregations

StripeInformation (com.facebook.presto.orc.metadata.StripeInformation)4 DataSize (io.airlift.units.DataSize)2 Test (org.testng.annotations.Test)2 AggregatedMemoryContext (com.facebook.presto.orc.memory.AggregatedMemoryContext)1 ColumnEncoding (com.facebook.presto.orc.metadata.ColumnEncoding)1 OrcMetadataReader (com.facebook.presto.orc.metadata.OrcMetadataReader)1 StreamReader (com.facebook.presto.orc.reader.StreamReader)1 StreamSources (com.facebook.presto.orc.stream.StreamSources)1 Block (com.facebook.presto.spi.block.Block)1