Search in sources :

Example 26 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestOrcReaderPositions method testReadUserMetadata.

@Test
public void testReadUserMetadata() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        Map<String, String> metadata = ImmutableMap.of("a", "ala", "b", "ma", "c", "kota");
        createFileWithOnlyUserMetadata(tempFile.getFile(), metadata);
        OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
        OrcReader orcReader = new OrcReader(orcDataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        Footer footer = orcReader.getFooter();
        Map<String, String> readMetadata = Maps.transformValues(footer.getUserMetadata(), Slice::toStringAscii);
        assertEquals(readMetadata, metadata);
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) Slice(io.airlift.slice.Slice) DataSize(io.airlift.units.DataSize) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Footer(com.facebook.presto.orc.metadata.Footer) Test(org.testng.annotations.Test)

Example 27 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestMapFlatBatchStreamReader method runTest.

private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, List<Map<K, V>> expectedValues, boolean skipFirstBatch, boolean skipFirstStripe) throws Exception {
    OrcDataSource orcDataSource = new FileOrcDataSource(new File(getResource(testOrcFileName).getFile()), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    OrcReader orcReader = new OrcReader(orcDataSource, OrcEncoding.DWRF, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    Type mapType = FUNCTION_AND_TYPE_MANAGER.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(keyType.getTypeSignature()), TypeSignatureParameter.of(valueType.getTypeSignature())));
    try (OrcBatchRecordReader recordReader = orcReader.createBatchRecordReader(ImmutableMap.of(0, mapType), createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true), HIVE_STORAGE_TIME_ZONE, new TestingHiveOrcAggregatedMemoryContext(), 1024)) {
        Iterator<?> expectedValuesIterator = expectedValues.iterator();
        boolean isFirst = true;
        int rowsProcessed = 0;
        for (int batchSize = toIntExact(recordReader.nextBatch()); batchSize >= 0; batchSize = toIntExact(recordReader.nextBatch())) {
            if (skipFirstStripe && rowsProcessed < 10_000) {
                assertEquals(advance(expectedValuesIterator, batchSize), batchSize);
            } else if (skipFirstBatch && isFirst) {
                assertEquals(advance(expectedValuesIterator, batchSize), batchSize);
                isFirst = false;
            } else {
                Block block = recordReader.readBlock(0);
                for (int position = 0; position < block.getPositionCount(); position++) {
                    assertEquals(mapType.getObjectValue(SESSION.getSqlFunctionProperties(), block, position), expectedValuesIterator.next(), String.format("row mismatch at processed rows %d, position %d", rowsProcessed, position));
                }
            }
            assertEquals(recordReader.getReaderPosition(), rowsProcessed);
            assertEquals(recordReader.getFilePosition(), rowsProcessed);
            rowsProcessed += batchSize;
        }
        assertFalse(expectedValuesIterator.hasNext());
        assertEquals(recordReader.getReaderPosition(), rowsProcessed);
        assertEquals(recordReader.getFilePosition(), rowsProcessed);
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) TinyintType(com.facebook.presto.common.type.TinyintType) OrcTester.mapType(com.facebook.presto.orc.OrcTester.mapType) BigintType(com.facebook.presto.common.type.BigintType) VarcharType(com.facebook.presto.common.type.VarcharType) RealType(com.facebook.presto.common.type.RealType) SmallintType(com.facebook.presto.common.type.SmallintType) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) BooleanType(com.facebook.presto.common.type.BooleanType) IntegerType(com.facebook.presto.common.type.IntegerType) Type(com.facebook.presto.common.type.Type) DoubleType(com.facebook.presto.common.type.DoubleType) DataSize(io.airlift.units.DataSize) Block(com.facebook.presto.common.block.Block) File(java.io.File)

Example 28 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestOrcLz4 method testReadLz4.

@Test
public void testReadLz4() throws Exception {
    // this file was written with Apache ORC
    // TODO: use Apache ORC library in OrcTester
    byte[] data = toByteArray(getResource("apache-lz4.orc"));
    OrcReader orcReader = new OrcReader(new InMemoryOrcDataSource(data), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(SIZE, SIZE, SIZE, false), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    assertEquals(orcReader.getCompressionKind(), LZ4);
    assertEquals(orcReader.getFooter().getNumberOfRows(), 10_000);
    Map<Integer, Type> includedColumns = ImmutableMap.<Integer, Type>builder().put(0, BIGINT).put(1, INTEGER).put(2, BIGINT).build();
    OrcBatchRecordReader reader = orcReader.createBatchRecordReader(includedColumns, OrcPredicate.TRUE, DateTimeZone.UTC, new TestingHiveOrcAggregatedMemoryContext(), INITIAL_BATCH_SIZE);
    int rows = 0;
    while (true) {
        int batchSize = reader.nextBatch();
        if (batchSize <= 0) {
            break;
        }
        rows += batchSize;
        Block xBlock = reader.readBlock(0);
        Block yBlock = reader.readBlock(1);
        Block zBlock = reader.readBlock(2);
        for (int position = 0; position < batchSize; position++) {
            BIGINT.getLong(xBlock, position);
            INTEGER.getLong(yBlock, position);
            BIGINT.getLong(zBlock, position);
        }
    }
    assertEquals(rows, reader.getFileRowCount());
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Type(com.facebook.presto.common.type.Type) Block(com.facebook.presto.common.block.Block) Test(org.testng.annotations.Test)

Example 29 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestDecryption method testSkipFirstStripe.

@Test
public void testSkipFirstStripe() throws Exception {
    OrcDataSource orcDataSource = new FileOrcDataSource(new File(getResource("encrypted_2splits.dwrf").getFile()), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    OrcReader orcReader = new OrcReader(orcDataSource, DWRF, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), MAX_BLOCK_SIZE, false, false, false, false), false, new DwrfEncryptionProvider(new UnsupportedEncryptionLibrary(), new TestingPlainKeyEncryptionLibrary()), DwrfKeyProvider.of(ImmutableMap.of(0, Slices.utf8Slice("key"))), new RuntimeStats());
    int offset = 10;
    try (OrcSelectiveRecordReader recordReader = getSelectiveRecordReader(orcDataSource, orcReader, offset)) {
        assertFileContentsPresto(ImmutableList.of(BIGINT), recordReader, ImmutableList.of(ImmutableList.of(1L)), ImmutableList.of(0));
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DataSize(io.airlift.units.DataSize) File(java.io.File) Test(org.testng.annotations.Test)

Example 30 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestCachingOrcDataSource method doIntegration.

public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize, DataSize tinyStripeThreshold) throws IOException {
    OrcAggregatedMemoryContext systemMemoryContext = new TestingHiveOrcAggregatedMemoryContext();
    OrcReader orcReader = new OrcReader(orcDataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(maxMergeDistance, tinyStripeThreshold, new DataSize(1, Unit.MEGABYTE), false), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    // 1 for reading file footer
    assertEquals(orcDataSource.getReadCount(), 1);
    List<StripeInformation> stripes = orcReader.getFooter().getStripes();
    // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode.
    assertGreaterThanOrEqual(stripes.size(), 3);
    // verify wrapped by CachingOrcReader
    assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, tinyStripeThreshold, systemMemoryContext), CachingOrcDataSource.class);
    OrcBatchRecordReader orcRecordReader = orcReader.createBatchRecordReader(ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, new TestingHiveOrcAggregatedMemoryContext(), INITIAL_BATCH_SIZE);
    int positionCount = 0;
    while (true) {
        int batchSize = orcRecordReader.nextBatch();
        if (batchSize <= 0) {
            break;
        }
        Block block = orcRecordReader.readBlock(0);
        positionCount += block.getPositionCount();
    }
    assertEquals(positionCount, POSITION_COUNT);
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DataSize(io.airlift.units.DataSize) Block(com.facebook.presto.common.block.Block) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Aggregations

StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)32 RuntimeStats (com.facebook.presto.common.RuntimeStats)15 Test (org.testng.annotations.Test)15 StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)13 DataSize (io.airlift.units.DataSize)12 OrcBatchPageSourceFactory (com.facebook.presto.hive.orc.OrcBatchPageSourceFactory)8 ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)7 RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)7 OrcFileTail (com.facebook.presto.orc.metadata.OrcFileTail)7 List (java.util.List)7 Type (com.facebook.presto.common.type.Type)6 ImmutableList (com.google.common.collect.ImmutableList)6 Optional (java.util.Optional)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 ArrayType (com.facebook.presto.common.type.ArrayType)5 StripeMetadataSourceFactory (com.facebook.presto.orc.StripeMetadataSourceFactory)5 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)4 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)4 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)4 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)4