Search in sources :

Example 21 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class OrcTester method createCustomOrcSelectiveRecordReader.

public static OrcSelectiveRecordReader createCustomOrcSelectiveRecordReader(File file, OrcEncoding orcEncoding, OrcPredicate predicate, List<Type> types, int initialBatchSize, Map<Integer, Map<Subfield, TupleDomainFilter>> filters, List<FilterFunction> filterFunctions, Map<Integer, Integer> filterFunctionInputMapping, Map<Integer, List<Subfield>> requiredSubfields, Map<Integer, Object> constantValues, Map<Integer, Slice> intermediateEncryptionKeys, Map<Integer, Type> includedColumns, List<Integer> outputColumns, boolean mapNullKeysEnabled, OrcAggregatedMemoryContext systemMemoryUsage, boolean appendRowNumber) throws IOException {
    OrcDataSource orcDataSource = new FileOrcDataSource(file, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    OrcReader orcReader = new OrcReader(orcDataSource, orcEncoding, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), MAX_BLOCK_SIZE, false, mapNullKeysEnabled, false, appendRowNumber), false, new DwrfEncryptionProvider(new UnsupportedEncryptionLibrary(), new TestingEncryptionLibrary()), DwrfKeyProvider.of(intermediateEncryptionKeys), new RuntimeStats());
    assertEquals(orcReader.getColumnNames().subList(0, types.size()), makeColumnNames(types.size()));
    return orcReader.createSelectiveRecordReader(includedColumns, outputColumns, filters, filterFunctions, filterFunctionInputMapping, requiredSubfields, constantValues, ImmutableMap.of(), predicate, 0, orcDataSource.getSize(), HIVE_STORAGE_TIME_ZONE, LEGACY_MAP_SUBSCRIPT, systemMemoryUsage, Optional.empty(), initialBatchSize);
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) DataSize(io.airlift.units.DataSize) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource)

Example 22 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class TestOrcReaderPositions method testReadUserMetadata.

@Test
public void testReadUserMetadata() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        Map<String, String> metadata = ImmutableMap.of("a", "ala", "b", "ma", "c", "kota");
        createFileWithOnlyUserMetadata(tempFile.getFile(), metadata);
        OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
        OrcReader orcReader = new OrcReader(orcDataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        Footer footer = orcReader.getFooter();
        Map<String, String> readMetadata = Maps.transformValues(footer.getUserMetadata(), Slice::toStringAscii);
        assertEquals(readMetadata, metadata);
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) Slice(io.airlift.slice.Slice) DataSize(io.airlift.units.DataSize) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Footer(com.facebook.presto.orc.metadata.Footer) Test(org.testng.annotations.Test)

Example 23 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class TestMapFlatBatchStreamReader method runTest.

private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, List<Map<K, V>> expectedValues, boolean skipFirstBatch, boolean skipFirstStripe) throws Exception {
    OrcDataSource orcDataSource = new FileOrcDataSource(new File(getResource(testOrcFileName).getFile()), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    OrcReader orcReader = new OrcReader(orcDataSource, OrcEncoding.DWRF, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    Type mapType = FUNCTION_AND_TYPE_MANAGER.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(keyType.getTypeSignature()), TypeSignatureParameter.of(valueType.getTypeSignature())));
    try (OrcBatchRecordReader recordReader = orcReader.createBatchRecordReader(ImmutableMap.of(0, mapType), createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true), HIVE_STORAGE_TIME_ZONE, new TestingHiveOrcAggregatedMemoryContext(), 1024)) {
        Iterator<?> expectedValuesIterator = expectedValues.iterator();
        boolean isFirst = true;
        int rowsProcessed = 0;
        for (int batchSize = toIntExact(recordReader.nextBatch()); batchSize >= 0; batchSize = toIntExact(recordReader.nextBatch())) {
            if (skipFirstStripe && rowsProcessed < 10_000) {
                assertEquals(advance(expectedValuesIterator, batchSize), batchSize);
            } else if (skipFirstBatch && isFirst) {
                assertEquals(advance(expectedValuesIterator, batchSize), batchSize);
                isFirst = false;
            } else {
                Block block = recordReader.readBlock(0);
                for (int position = 0; position < block.getPositionCount(); position++) {
                    assertEquals(mapType.getObjectValue(SESSION.getSqlFunctionProperties(), block, position), expectedValuesIterator.next(), String.format("row mismatch at processed rows %d, position %d", rowsProcessed, position));
                }
            }
            assertEquals(recordReader.getReaderPosition(), rowsProcessed);
            assertEquals(recordReader.getFilePosition(), rowsProcessed);
            rowsProcessed += batchSize;
        }
        assertFalse(expectedValuesIterator.hasNext());
        assertEquals(recordReader.getReaderPosition(), rowsProcessed);
        assertEquals(recordReader.getFilePosition(), rowsProcessed);
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) TinyintType(com.facebook.presto.common.type.TinyintType) OrcTester.mapType(com.facebook.presto.orc.OrcTester.mapType) BigintType(com.facebook.presto.common.type.BigintType) VarcharType(com.facebook.presto.common.type.VarcharType) RealType(com.facebook.presto.common.type.RealType) SmallintType(com.facebook.presto.common.type.SmallintType) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) BooleanType(com.facebook.presto.common.type.BooleanType) IntegerType(com.facebook.presto.common.type.IntegerType) Type(com.facebook.presto.common.type.Type) DoubleType(com.facebook.presto.common.type.DoubleType) DataSize(io.airlift.units.DataSize) Block(com.facebook.presto.common.block.Block) File(java.io.File)

Example 24 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class TestOrcBloomFilters method testOrcHiveBloomFilterSerde.

@Test
public void testOrcHiveBloomFilterSerde() throws Exception {
    BloomFilter bloomFilterWrite = new BloomFilter(1000L, 0.05);
    bloomFilterWrite.addString(TEST_STRING);
    assertTrue(bloomFilterWrite.testString(TEST_STRING));
    OrcProto.BloomFilter.Builder bloomFilterBuilder = OrcProto.BloomFilter.newBuilder();
    bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet()));
    bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions());
    OrcProto.BloomFilter bloomFilter = bloomFilterBuilder.build();
    OrcProto.BloomFilterIndex bloomFilterIndex = OrcProto.BloomFilterIndex.getDefaultInstance();
    byte[] bytes = serializeBloomFilterToIndex(bloomFilter, bloomFilterIndex);
    // Read through method
    InputStream inputStream = new ByteArrayInputStream(bytes);
    OrcMetadataReader metadataReader = new OrcMetadataReader(new RuntimeStats());
    List<HiveBloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream);
    assertEquals(bloomFilters.size(), 1);
    assertTrue(bloomFilters.get(0).testString(TEST_STRING));
    assertFalse(bloomFilters.get(0).testString(TEST_STRING_NOT_WRITTEN));
    assertEquals(bloomFilterWrite.getBitSize(), bloomFilters.get(0).getBitSize());
    assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions());
    // Validate bit set
    assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet()));
    // Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs)
    CodedInputStream input = CodedInputStream.newInstance(bytes);
    OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input);
    List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList();
    assertEquals(bloomFilterList.size(), 1);
    OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0);
    // Validate contents of ORC bloom filter bit set
    assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet()));
    // hash functions
    assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions());
    // bit size
    assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) InputStream(java.io.InputStream) RuntimeStats(com.facebook.presto.common.RuntimeStats) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) OrcProto(com.facebook.presto.orc.proto.OrcProto) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) TupleDomainOrcPredicate.checkInBloomFilter(com.facebook.presto.orc.TupleDomainOrcPredicate.checkInBloomFilter) BloomFilter(com.facebook.presto.orc.metadata.statistics.BloomFilter) ByteArrayInputStream(java.io.ByteArrayInputStream) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) Test(org.testng.annotations.Test)

Example 25 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class TestOrcLz4 method testReadLz4.

@Test
public void testReadLz4() throws Exception {
    // this file was written with Apache ORC
    // TODO: use Apache ORC library in OrcTester
    byte[] data = toByteArray(getResource("apache-lz4.orc"));
    OrcReader orcReader = new OrcReader(new InMemoryOrcDataSource(data), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(SIZE, SIZE, SIZE, false), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    assertEquals(orcReader.getCompressionKind(), LZ4);
    assertEquals(orcReader.getFooter().getNumberOfRows(), 10_000);
    Map<Integer, Type> includedColumns = ImmutableMap.<Integer, Type>builder().put(0, BIGINT).put(1, INTEGER).put(2, BIGINT).build();
    OrcBatchRecordReader reader = orcReader.createBatchRecordReader(includedColumns, OrcPredicate.TRUE, DateTimeZone.UTC, new TestingHiveOrcAggregatedMemoryContext(), INITIAL_BATCH_SIZE);
    int rows = 0;
    while (true) {
        int batchSize = reader.nextBatch();
        if (batchSize <= 0) {
            break;
        }
        rows += batchSize;
        Block xBlock = reader.readBlock(0);
        Block yBlock = reader.readBlock(1);
        Block zBlock = reader.readBlock(2);
        for (int position = 0; position < batchSize; position++) {
            BIGINT.getLong(xBlock, position);
            INTEGER.getLong(yBlock, position);
            BIGINT.getLong(zBlock, position);
        }
    }
    assertEquals(rows, reader.getFileRowCount());
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Type(com.facebook.presto.common.type.Type) Block(com.facebook.presto.common.block.Block) Test(org.testng.annotations.Test)

Aggregations

RuntimeStats (com.facebook.presto.common.RuntimeStats)31 DataSize (io.airlift.units.DataSize)16 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)15 Type (com.facebook.presto.common.type.Type)11 OrcReader (com.facebook.presto.orc.OrcReader)8 ImmutableList (com.google.common.collect.ImmutableList)8 Block (com.facebook.presto.common.block.Block)7 PrestoException (com.facebook.presto.spi.PrestoException)7 IOException (java.io.IOException)7 OrcDataSource (com.facebook.presto.orc.OrcDataSource)6 OrcReaderOptions (com.facebook.presto.orc.OrcReaderOptions)6 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)6 OrcBatchRecordReader (com.facebook.presto.orc.OrcBatchRecordReader)5 RaptorOrcAggregatedMemoryContext (com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 Test (org.testng.annotations.Test)5 TypeManager (com.facebook.presto.common.type.TypeManager)4 ArrayType (com.facebook.presto.common.type.ArrayType)3 DecimalType (com.facebook.presto.common.type.DecimalType)3 OrcAggregatedMemoryContext (com.facebook.presto.orc.OrcAggregatedMemoryContext)3