use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.
the class OrcTester method assertRoundTrip.
public void assertRoundTrip(ObjectInspector objectInspector, Iterable<?> readValues, Type type) throws Exception {
for (Format formatVersion : formats) {
MetadataReader metadataReader;
if (DWRF == formatVersion) {
if (hasType(objectInspector, PrimitiveCategory.DATE)) {
// DWRF doesn't support dates
return;
}
if (hasType(objectInspector, PrimitiveCategory.DECIMAL)) {
// DWRF doesn't support decimals
return;
}
if (hasType(objectInspector, PrimitiveCategory.CHAR)) {
// DWRF doesn't support chars
return;
}
metadataReader = new DwrfMetadataReader();
} else {
metadataReader = new OrcMetadataReader();
}
for (Compression compression : compressions) {
try (TempFile tempFile = new TempFile()) {
writeOrcColumn(tempFile.getFile(), formatVersion, compression, objectInspector, readValues.iterator());
assertFileContents(objectInspector, tempFile, readValues, false, false, metadataReader, type);
if (skipBatchTestsEnabled) {
assertFileContents(objectInspector, tempFile, readValues, true, false, metadataReader, type);
}
if (skipStripeTestsEnabled) {
assertFileContents(objectInspector, tempFile, readValues, false, true, metadataReader, type);
}
}
}
}
}
use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.
the class TestOrcReaderPositions method testStripeSkipping.
@Test
public void testStripeSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
createMultiStripeFile(tempFile.getFile());
// test reading second and fourth stripes
OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
if (numberOfRows == 100) {
return true;
}
IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
};
OrcRecordReader reader = createCustomOrcRecordReader(tempFile, new OrcMetadataReader(), predicate, BIGINT);
assertEquals(reader.getFileRowCount(), 100);
assertEquals(reader.getReaderRowCount(), 40);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
// second stripe
assertEquals(reader.nextBatch(), 20);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFilePosition(), 20);
assertCurrentBatch(reader, 1);
// fourth stripe
assertEquals(reader.nextBatch(), 20);
assertEquals(reader.getReaderPosition(), 20);
assertEquals(reader.getFilePosition(), 60);
assertCurrentBatch(reader, 3);
assertEquals(reader.nextBatch(), -1);
assertEquals(reader.getReaderPosition(), 40);
assertEquals(reader.getFilePosition(), 100);
reader.close();
}
}
use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.
the class TestOrcReaderPositions method testEntireFile.
@Test
public void testEntireFile() throws Exception {
try (TempFile tempFile = new TempFile()) {
createMultiStripeFile(tempFile.getFile());
OrcRecordReader reader = createCustomOrcRecordReader(tempFile, new OrcMetadataReader(), OrcPredicate.TRUE, BIGINT);
assertEquals(reader.getReaderRowCount(), 100);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFileRowCount(), reader.getReaderRowCount());
assertEquals(reader.getFilePosition(), reader.getReaderPosition());
for (int i = 0; i < 5; i++) {
assertEquals(reader.nextBatch(), 20);
assertEquals(reader.getReaderPosition(), i * 20L);
assertEquals(reader.getFilePosition(), reader.getReaderPosition());
assertCurrentBatch(reader, i);
}
assertEquals(reader.nextBatch(), -1);
assertEquals(reader.getReaderPosition(), 100);
assertEquals(reader.getFilePosition(), reader.getReaderPosition());
reader.close();
}
}
use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.
the class OrcStorageManager method getPageSource.
@Override
public ConnectorPageSource getPageSource(UUID shardUuid, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId) {
OrcDataSource dataSource = openShard(shardUuid, readerAttributes);
AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
try {
OrcReader reader = new OrcReader(dataSource, new OrcMetadataReader(), readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize());
Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
for (int i = 0; i < columnIds.size(); i++) {
long columnId = columnIds.get(i);
if (isHiddenColumn(columnId)) {
columnIndexes.add(toSpecialIndex(columnId));
continue;
}
Integer index = indexMap.get(columnId);
if (index == null) {
columnIndexes.add(NULL_COLUMN);
} else {
columnIndexes.add(index);
includedColumns.put(index, columnTypes.get(i));
}
}
OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, UTC, systemMemoryUsage);
Optional<ShardRewriter> shardRewriter = Optional.empty();
if (transactionId.isPresent()) {
shardRewriter = Optional.of(createShardRewriter(transactionId.getAsLong(), bucketNumber, shardUuid));
}
return new OrcPageSource(shardRewriter, recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage);
} catch (IOException | RuntimeException e) {
closeQuietly(dataSource);
throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
} catch (Throwable t) {
closeQuietly(dataSource);
throw t;
}
}
use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.
the class OrcTestingUtil method createReader.
public static OrcRecordReader createReader(OrcDataSource dataSource, List<Long> columnIds, List<Type> types) throws IOException {
OrcReader orcReader = new OrcReader(dataSource, new OrcMetadataReader(), new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
List<String> columnNames = orcReader.getColumnNames();
assertEquals(columnNames.size(), columnIds.size());
Map<Integer, Type> includedColumns = new HashMap<>();
int ordinal = 0;
for (long columnId : columnIds) {
assertEquals(columnNames.get(ordinal), String.valueOf(columnId));
includedColumns.put(ordinal, types.get(ordinal));
ordinal++;
}
return createRecordReader(orcReader, includedColumns);
}
Aggregations