use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.
the class OrcTester method createCustomOrcSelectiveRecordReader.
public static OrcSelectiveRecordReader createCustomOrcSelectiveRecordReader(File file, OrcEncoding orcEncoding, OrcPredicate predicate, List<Type> types, int initialBatchSize, Map<Integer, Map<Subfield, TupleDomainFilter>> filters, List<FilterFunction> filterFunctions, Map<Integer, Integer> filterFunctionInputMapping, Map<Integer, List<Subfield>> requiredSubfields, Map<Integer, Object> constantValues, Map<Integer, Slice> intermediateEncryptionKeys, Map<Integer, Type> includedColumns, List<Integer> outputColumns, boolean mapNullKeysEnabled, OrcAggregatedMemoryContext systemMemoryUsage, boolean appendRowNumber) throws IOException {
OrcDataSource orcDataSource = new FileOrcDataSource(file, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
OrcReader orcReader = new OrcReader(orcDataSource, orcEncoding, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), MAX_BLOCK_SIZE, false, mapNullKeysEnabled, false, appendRowNumber), false, new DwrfEncryptionProvider(new UnsupportedEncryptionLibrary(), new TestingEncryptionLibrary()), DwrfKeyProvider.of(intermediateEncryptionKeys), new RuntimeStats());
assertEquals(orcReader.getColumnNames().subList(0, types.size()), makeColumnNames(types.size()));
return orcReader.createSelectiveRecordReader(includedColumns, outputColumns, filters, filterFunctions, filterFunctionInputMapping, requiredSubfields, constantValues, ImmutableMap.of(), predicate, 0, orcDataSource.getSize(), HIVE_STORAGE_TIME_ZONE, LEGACY_MAP_SUBSCRIPT, systemMemoryUsage, Optional.empty(), initialBatchSize);
}
use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.
the class TestOrcReaderPositions method testReadUserMetadata.
@Test
public void testReadUserMetadata() throws Exception {
try (TempFile tempFile = new TempFile()) {
Map<String, String> metadata = ImmutableMap.of("a", "ala", "b", "ma", "c", "kota");
createFileWithOnlyUserMetadata(tempFile.getFile(), metadata);
OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
OrcReader orcReader = new OrcReader(orcDataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
Footer footer = orcReader.getFooter();
Map<String, String> readMetadata = Maps.transformValues(footer.getUserMetadata(), Slice::toStringAscii);
assertEquals(readMetadata, metadata);
}
}
use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.
the class TestMapFlatBatchStreamReader method runTest.
private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, List<Map<K, V>> expectedValues, boolean skipFirstBatch, boolean skipFirstStripe) throws Exception {
OrcDataSource orcDataSource = new FileOrcDataSource(new File(getResource(testOrcFileName).getFile()), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
OrcReader orcReader = new OrcReader(orcDataSource, OrcEncoding.DWRF, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
Type mapType = FUNCTION_AND_TYPE_MANAGER.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(keyType.getTypeSignature()), TypeSignatureParameter.of(valueType.getTypeSignature())));
try (OrcBatchRecordReader recordReader = orcReader.createBatchRecordReader(ImmutableMap.of(0, mapType), createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true), HIVE_STORAGE_TIME_ZONE, new TestingHiveOrcAggregatedMemoryContext(), 1024)) {
Iterator<?> expectedValuesIterator = expectedValues.iterator();
boolean isFirst = true;
int rowsProcessed = 0;
for (int batchSize = toIntExact(recordReader.nextBatch()); batchSize >= 0; batchSize = toIntExact(recordReader.nextBatch())) {
if (skipFirstStripe && rowsProcessed < 10_000) {
assertEquals(advance(expectedValuesIterator, batchSize), batchSize);
} else if (skipFirstBatch && isFirst) {
assertEquals(advance(expectedValuesIterator, batchSize), batchSize);
isFirst = false;
} else {
Block block = recordReader.readBlock(0);
for (int position = 0; position < block.getPositionCount(); position++) {
assertEquals(mapType.getObjectValue(SESSION.getSqlFunctionProperties(), block, position), expectedValuesIterator.next(), String.format("row mismatch at processed rows %d, position %d", rowsProcessed, position));
}
}
assertEquals(recordReader.getReaderPosition(), rowsProcessed);
assertEquals(recordReader.getFilePosition(), rowsProcessed);
rowsProcessed += batchSize;
}
assertFalse(expectedValuesIterator.hasNext());
assertEquals(recordReader.getReaderPosition(), rowsProcessed);
assertEquals(recordReader.getFilePosition(), rowsProcessed);
}
}
use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.
the class TestOrcBloomFilters method testOrcHiveBloomFilterSerde.
@Test
public void testOrcHiveBloomFilterSerde() throws Exception {
BloomFilter bloomFilterWrite = new BloomFilter(1000L, 0.05);
bloomFilterWrite.addString(TEST_STRING);
assertTrue(bloomFilterWrite.testString(TEST_STRING));
OrcProto.BloomFilter.Builder bloomFilterBuilder = OrcProto.BloomFilter.newBuilder();
bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet()));
bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions());
OrcProto.BloomFilter bloomFilter = bloomFilterBuilder.build();
OrcProto.BloomFilterIndex bloomFilterIndex = OrcProto.BloomFilterIndex.getDefaultInstance();
byte[] bytes = serializeBloomFilterToIndex(bloomFilter, bloomFilterIndex);
// Read through method
InputStream inputStream = new ByteArrayInputStream(bytes);
OrcMetadataReader metadataReader = new OrcMetadataReader(new RuntimeStats());
List<HiveBloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream);
assertEquals(bloomFilters.size(), 1);
assertTrue(bloomFilters.get(0).testString(TEST_STRING));
assertFalse(bloomFilters.get(0).testString(TEST_STRING_NOT_WRITTEN));
assertEquals(bloomFilterWrite.getBitSize(), bloomFilters.get(0).getBitSize());
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions());
// Validate bit set
assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet()));
// Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs)
CodedInputStream input = CodedInputStream.newInstance(bytes);
OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input);
List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList();
assertEquals(bloomFilterList.size(), 1);
OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0);
// Validate contents of ORC bloom filter bit set
assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet()));
// hash functions
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions());
// bit size
assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
}
use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.
the class TestOrcLz4 method testReadLz4.
@Test
public void testReadLz4() throws Exception {
// this file was written with Apache ORC
// TODO: use Apache ORC library in OrcTester
byte[] data = toByteArray(getResource("apache-lz4.orc"));
OrcReader orcReader = new OrcReader(new InMemoryOrcDataSource(data), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(SIZE, SIZE, SIZE, false), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
assertEquals(orcReader.getCompressionKind(), LZ4);
assertEquals(orcReader.getFooter().getNumberOfRows(), 10_000);
Map<Integer, Type> includedColumns = ImmutableMap.<Integer, Type>builder().put(0, BIGINT).put(1, INTEGER).put(2, BIGINT).build();
OrcBatchRecordReader reader = orcReader.createBatchRecordReader(includedColumns, OrcPredicate.TRUE, DateTimeZone.UTC, new TestingHiveOrcAggregatedMemoryContext(), INITIAL_BATCH_SIZE);
int rows = 0;
while (true) {
int batchSize = reader.nextBatch();
if (batchSize <= 0) {
break;
}
rows += batchSize;
Block xBlock = reader.readBlock(0);
Block yBlock = reader.readBlock(1);
Block zBlock = reader.readBlock(2);
for (int position = 0; position < batchSize; position++) {
BIGINT.getLong(xBlock, position);
INTEGER.getLong(yBlock, position);
BIGINT.getLong(zBlock, position);
}
}
assertEquals(rows, reader.getFileRowCount());
}
Aggregations