use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class TestOrcReaderPositions method testReadUserMetadata.
@Test
public void testReadUserMetadata() throws Exception {
try (TempFile tempFile = new TempFile()) {
Map<String, String> metadata = ImmutableMap.of("a", "ala", "b", "ma", "c", "kota");
createFileWithOnlyUserMetadata(tempFile.getFile(), metadata);
OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
OrcReader orcReader = new OrcReader(orcDataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
Footer footer = orcReader.getFooter();
Map<String, String> readMetadata = Maps.transformValues(footer.getUserMetadata(), Slice::toStringAscii);
assertEquals(readMetadata, metadata);
}
}
use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class TestMapFlatBatchStreamReader method runTest.
private <K, V> void runTest(String testOrcFileName, Type keyType, Type valueType, List<Map<K, V>> expectedValues, boolean skipFirstBatch, boolean skipFirstStripe) throws Exception {
OrcDataSource orcDataSource = new FileOrcDataSource(new File(getResource(testOrcFileName).getFile()), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
OrcReader orcReader = new OrcReader(orcDataSource, OrcEncoding.DWRF, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
Type mapType = FUNCTION_AND_TYPE_MANAGER.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.of(keyType.getTypeSignature()), TypeSignatureParameter.of(valueType.getTypeSignature())));
try (OrcBatchRecordReader recordReader = orcReader.createBatchRecordReader(ImmutableMap.of(0, mapType), createOrcPredicate(0, mapType, expectedValues, OrcTester.Format.DWRF, true), HIVE_STORAGE_TIME_ZONE, new TestingHiveOrcAggregatedMemoryContext(), 1024)) {
Iterator<?> expectedValuesIterator = expectedValues.iterator();
boolean isFirst = true;
int rowsProcessed = 0;
for (int batchSize = toIntExact(recordReader.nextBatch()); batchSize >= 0; batchSize = toIntExact(recordReader.nextBatch())) {
if (skipFirstStripe && rowsProcessed < 10_000) {
assertEquals(advance(expectedValuesIterator, batchSize), batchSize);
} else if (skipFirstBatch && isFirst) {
assertEquals(advance(expectedValuesIterator, batchSize), batchSize);
isFirst = false;
} else {
Block block = recordReader.readBlock(0);
for (int position = 0; position < block.getPositionCount(); position++) {
assertEquals(mapType.getObjectValue(SESSION.getSqlFunctionProperties(), block, position), expectedValuesIterator.next(), String.format("row mismatch at processed rows %d, position %d", rowsProcessed, position));
}
}
assertEquals(recordReader.getReaderPosition(), rowsProcessed);
assertEquals(recordReader.getFilePosition(), rowsProcessed);
rowsProcessed += batchSize;
}
assertFalse(expectedValuesIterator.hasNext());
assertEquals(recordReader.getReaderPosition(), rowsProcessed);
assertEquals(recordReader.getFilePosition(), rowsProcessed);
}
}
use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class TestOrcLz4 method testReadLz4.
@Test
public void testReadLz4() throws Exception {
// this file was written with Apache ORC
// TODO: use Apache ORC library in OrcTester
byte[] data = toByteArray(getResource("apache-lz4.orc"));
OrcReader orcReader = new OrcReader(new InMemoryOrcDataSource(data), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(SIZE, SIZE, SIZE, false), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
assertEquals(orcReader.getCompressionKind(), LZ4);
assertEquals(orcReader.getFooter().getNumberOfRows(), 10_000);
Map<Integer, Type> includedColumns = ImmutableMap.<Integer, Type>builder().put(0, BIGINT).put(1, INTEGER).put(2, BIGINT).build();
OrcBatchRecordReader reader = orcReader.createBatchRecordReader(includedColumns, OrcPredicate.TRUE, DateTimeZone.UTC, new TestingHiveOrcAggregatedMemoryContext(), INITIAL_BATCH_SIZE);
int rows = 0;
while (true) {
int batchSize = reader.nextBatch();
if (batchSize <= 0) {
break;
}
rows += batchSize;
Block xBlock = reader.readBlock(0);
Block yBlock = reader.readBlock(1);
Block zBlock = reader.readBlock(2);
for (int position = 0; position < batchSize; position++) {
BIGINT.getLong(xBlock, position);
INTEGER.getLong(yBlock, position);
BIGINT.getLong(zBlock, position);
}
}
assertEquals(rows, reader.getFileRowCount());
}
use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class TestDecryption method testSkipFirstStripe.
@Test
public void testSkipFirstStripe() throws Exception {
OrcDataSource orcDataSource = new FileOrcDataSource(new File(getResource("encrypted_2splits.dwrf").getFile()), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
OrcReader orcReader = new OrcReader(orcDataSource, DWRF, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), MAX_BLOCK_SIZE, false, false, false, false), false, new DwrfEncryptionProvider(new UnsupportedEncryptionLibrary(), new TestingPlainKeyEncryptionLibrary()), DwrfKeyProvider.of(ImmutableMap.of(0, Slices.utf8Slice("key"))), new RuntimeStats());
int offset = 10;
try (OrcSelectiveRecordReader recordReader = getSelectiveRecordReader(orcDataSource, orcReader, offset)) {
assertFileContentsPresto(ImmutableList.of(BIGINT), recordReader, ImmutableList.of(ImmutableList.of(1L)), ImmutableList.of(0));
}
}
use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class TestCachingOrcDataSource method doIntegration.
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize, DataSize tinyStripeThreshold) throws IOException {
OrcAggregatedMemoryContext systemMemoryContext = new TestingHiveOrcAggregatedMemoryContext();
OrcReader orcReader = new OrcReader(orcDataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(maxMergeDistance, tinyStripeThreshold, new DataSize(1, Unit.MEGABYTE), false), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
// 1 for reading file footer
assertEquals(orcDataSource.getReadCount(), 1);
List<StripeInformation> stripes = orcReader.getFooter().getStripes();
// Sanity check number of stripes. This can be three or higher because of orc writer low memory mode.
assertGreaterThanOrEqual(stripes.size(), 3);
// verify wrapped by CachingOrcReader
assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, tinyStripeThreshold, systemMemoryContext), CachingOrcDataSource.class);
OrcBatchRecordReader orcRecordReader = orcReader.createBatchRecordReader(ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, new TestingHiveOrcAggregatedMemoryContext(), INITIAL_BATCH_SIZE);
int positionCount = 0;
while (true) {
int batchSize = orcRecordReader.nextBatch();
if (batchSize <= 0) {
break;
}
Block block = orcRecordReader.readBlock(0);
positionCount += block.getPositionCount();
}
assertEquals(positionCount, POSITION_COUNT);
}
Aggregations