Search in sources :

Example 1 with OrcMetadataReader

use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.

the class OrcTester method assertRoundTrip.

public void assertRoundTrip(ObjectInspector objectInspector, Iterable<?> readValues, Type type) throws Exception {
    for (Format formatVersion : formats) {
        MetadataReader metadataReader;
        if (DWRF == formatVersion) {
            if (hasType(objectInspector, PrimitiveCategory.DATE)) {
                // DWRF doesn't support dates
                return;
            }
            if (hasType(objectInspector, PrimitiveCategory.DECIMAL)) {
                // DWRF doesn't support decimals
                return;
            }
            if (hasType(objectInspector, PrimitiveCategory.CHAR)) {
                // DWRF doesn't support chars
                return;
            }
            metadataReader = new DwrfMetadataReader();
        } else {
            metadataReader = new OrcMetadataReader();
        }
        for (Compression compression : compressions) {
            try (TempFile tempFile = new TempFile()) {
                writeOrcColumn(tempFile.getFile(), formatVersion, compression, objectInspector, readValues.iterator());
                assertFileContents(objectInspector, tempFile, readValues, false, false, metadataReader, type);
                if (skipBatchTestsEnabled) {
                    assertFileContents(objectInspector, tempFile, readValues, true, false, metadataReader, type);
                }
                if (skipStripeTestsEnabled) {
                    assertFileContents(objectInspector, tempFile, readValues, false, true, metadataReader, type);
                }
            }
        }
    }
}
Also used : OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) DwrfMetadataReader(com.facebook.presto.orc.metadata.DwrfMetadataReader) MetadataReader(com.facebook.presto.orc.metadata.MetadataReader) DwrfMetadataReader(com.facebook.presto.orc.metadata.DwrfMetadataReader)

Example 2 with OrcMetadataReader

use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.

the class TestOrcReaderPositions method testStripeSkipping.

@Test
public void testStripeSkipping() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        createMultiStripeFile(tempFile.getFile());
        // test reading second and fourth stripes
        OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
            if (numberOfRows == 100) {
                return true;
            }
            IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
            return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
        };
        OrcRecordReader reader = createCustomOrcRecordReader(tempFile, new OrcMetadataReader(), predicate, BIGINT);
        assertEquals(reader.getFileRowCount(), 100);
        assertEquals(reader.getReaderRowCount(), 40);
        assertEquals(reader.getFilePosition(), 0);
        assertEquals(reader.getReaderPosition(), 0);
        // second stripe
        assertEquals(reader.nextBatch(), 20);
        assertEquals(reader.getReaderPosition(), 0);
        assertEquals(reader.getFilePosition(), 20);
        assertCurrentBatch(reader, 1);
        // fourth stripe
        assertEquals(reader.nextBatch(), 20);
        assertEquals(reader.getReaderPosition(), 20);
        assertEquals(reader.getFilePosition(), 60);
        assertCurrentBatch(reader, 3);
        assertEquals(reader.nextBatch(), -1);
        assertEquals(reader.getReaderPosition(), 40);
        assertEquals(reader.getFilePosition(), 100);
        reader.close();
    }
}
Also used : OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) Block(com.facebook.presto.spi.block.Block) Slice(io.airlift.slice.Slice) OrcWriterOptions(org.apache.hadoop.hive.ql.io.orc.OrcWriterOptions) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Assert.assertEquals(org.testng.Assert.assertEquals) Writable(org.apache.hadoop.io.Writable) Test(org.testng.annotations.Test) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ByteBuffer(java.nio.ByteBuffer) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) BIGINT(com.facebook.presto.spi.type.BigintType.BIGINT) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) IntegerStatistics(com.facebook.presto.orc.metadata.IntegerStatistics) TempFile(com.facebook.presto.orc.OrcTester.TempFile) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) Path(org.apache.hadoop.fs.Path) ImmutableMap(com.google.common.collect.ImmutableMap) Footer(com.facebook.presto.orc.metadata.Footer) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SNAPPY(org.apache.hadoop.hive.ql.io.orc.CompressionKind.SNAPPY) NullMemoryManager(org.apache.hadoop.hive.ql.io.orc.NullMemoryManager) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) IOException(java.io.IOException) Field(java.lang.reflect.Field) Maps(com.google.common.collect.Maps) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) DataSize(io.airlift.units.DataSize) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) Serializer(org.apache.hadoop.hive.serde2.Serializer) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) TempFile(com.facebook.presto.orc.OrcTester.TempFile) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) IntegerStatistics(com.facebook.presto.orc.metadata.IntegerStatistics) Test(org.testng.annotations.Test)

Example 3 with OrcMetadataReader

use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.

the class TestOrcReaderPositions method testEntireFile.

@Test
public void testEntireFile() throws Exception {
    try (TempFile tempFile = new TempFile()) {
        createMultiStripeFile(tempFile.getFile());
        OrcRecordReader reader = createCustomOrcRecordReader(tempFile, new OrcMetadataReader(), OrcPredicate.TRUE, BIGINT);
        assertEquals(reader.getReaderRowCount(), 100);
        assertEquals(reader.getReaderPosition(), 0);
        assertEquals(reader.getFileRowCount(), reader.getReaderRowCount());
        assertEquals(reader.getFilePosition(), reader.getReaderPosition());
        for (int i = 0; i < 5; i++) {
            assertEquals(reader.nextBatch(), 20);
            assertEquals(reader.getReaderPosition(), i * 20L);
            assertEquals(reader.getFilePosition(), reader.getReaderPosition());
            assertCurrentBatch(reader, i);
        }
        assertEquals(reader.nextBatch(), -1);
        assertEquals(reader.getReaderPosition(), 100);
        assertEquals(reader.getFilePosition(), reader.getReaderPosition());
        reader.close();
    }
}
Also used : TempFile(com.facebook.presto.orc.OrcTester.TempFile) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) OrcTester.createCustomOrcRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader) Test(org.testng.annotations.Test)

Example 4 with OrcMetadataReader

use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.

the class OrcStorageManager method getPageSource.

@Override
public ConnectorPageSource getPageSource(UUID shardUuid, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId) {
    OrcDataSource dataSource = openShard(shardUuid, readerAttributes);
    AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(dataSource, new OrcMetadataReader(), readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize());
        Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
        for (int i = 0; i < columnIds.size(); i++) {
            long columnId = columnIds.get(i);
            if (isHiddenColumn(columnId)) {
                columnIndexes.add(toSpecialIndex(columnId));
                continue;
            }
            Integer index = indexMap.get(columnId);
            if (index == null) {
                columnIndexes.add(NULL_COLUMN);
            } else {
                columnIndexes.add(index);
                includedColumns.put(index, columnTypes.get(i));
            }
        }
        OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
        OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, UTC, systemMemoryUsage);
        Optional<ShardRewriter> shardRewriter = Optional.empty();
        if (transactionId.isPresent()) {
            shardRewriter = Optional.of(createShardRewriter(transactionId.getAsLong(), bucketNumber, shardUuid));
        }
        return new OrcPageSource(shardRewriter, recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage);
    } catch (IOException | RuntimeException e) {
        closeQuietly(dataSource);
        throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
    } catch (Throwable t) {
        closeQuietly(dataSource);
        throw t;
    }
}
Also used : FileOrcDataSource(com.facebook.presto.orc.FileOrcDataSource) OrcDataSource(com.facebook.presto.orc.OrcDataSource) ImmutableList(com.google.common.collect.ImmutableList) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) DecimalType(com.facebook.presto.spi.type.DecimalType) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcReader(com.facebook.presto.orc.OrcReader) OptionalLong(java.util.OptionalLong) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate)

Example 5 with OrcMetadataReader

use of com.facebook.presto.orc.metadata.OrcMetadataReader in project presto by prestodb.

the class OrcTestingUtil method createReader.

public static OrcRecordReader createReader(OrcDataSource dataSource, List<Long> columnIds, List<Type> types) throws IOException {
    OrcReader orcReader = new OrcReader(dataSource, new OrcMetadataReader(), new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
    List<String> columnNames = orcReader.getColumnNames();
    assertEquals(columnNames.size(), columnIds.size());
    Map<Integer, Type> includedColumns = new HashMap<>();
    int ordinal = 0;
    for (long columnId : columnIds) {
        assertEquals(columnNames.get(ordinal), String.valueOf(columnId));
        includedColumns.put(ordinal, types.get(ordinal));
        ordinal++;
    }
    return createRecordReader(orcReader, includedColumns);
}
Also used : Type(com.facebook.presto.spi.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) HashMap(java.util.HashMap) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) DataSize(io.airlift.units.DataSize)

Aggregations

OrcMetadataReader (com.facebook.presto.orc.metadata.OrcMetadataReader)11 DataSize (io.airlift.units.DataSize)5 Test (org.testng.annotations.Test)5 OrcReader (com.facebook.presto.orc.OrcReader)4 TempFile (com.facebook.presto.orc.OrcTester.TempFile)4 IOException (java.io.IOException)4 OrcTester.createCustomOrcRecordReader (com.facebook.presto.orc.OrcTester.createCustomOrcRecordReader)3 Footer (com.facebook.presto.orc.metadata.Footer)3 Block (com.facebook.presto.spi.block.Block)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Slice (io.airlift.slice.Slice)3 FileOrcDataSource (com.facebook.presto.orc.FileOrcDataSource)2 OrcDataSource (com.facebook.presto.orc.OrcDataSource)2 ORC_12 (com.facebook.presto.orc.OrcTester.Format.ORC_12)2 OrcTester.createOrcRecordWriter (com.facebook.presto.orc.OrcTester.createOrcRecordWriter)2 OrcTester.createSettableStructObjectInspector (com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector)2 AggregatedMemoryContext (com.facebook.presto.orc.memory.AggregatedMemoryContext)2 IntegerStatistics (com.facebook.presto.orc.metadata.IntegerStatistics)2 PrestoException (com.facebook.presto.spi.PrestoException)2 BIGINT (com.facebook.presto.spi.type.BigintType.BIGINT)2