Search in sources :

Example 1 with OrcReader

use of com.facebook.presto.orc.OrcReader in project presto by prestodb.

the class OrcPageSourceFactory method createOrcPageSource.

public static OrcPageSource createOrcPageSource(MetadataReader metadataReader, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize, boolean orcBloomFiltersEnabled) {
    OrcDataSource orcDataSource;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
        long size = fileSystem.getFileStatus(path).getLen();
        FSDataInputStream inputStream = fileSystem.open(path);
        orcDataSource = new HdfsOrcDataSource(path.toString(), size, maxMergeDistance, maxBufferSize, streamBufferSize, inputStream);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(orcDataSource, metadataReader, maxMergeDistance, maxBufferSize);
        List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader, path);
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        for (HiveColumnHandle column : physicalColumns) {
            if (column.getColumnType() == REGULAR) {
                Type type = typeManager.getType(column.getTypeSignature());
                includedColumns.put(column.getHiveColumnIndex(), type);
                columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
            }
        }
        OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled);
        OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage);
        return new OrcPageSource(recordReader, orcDataSource, physicalColumns, typeManager, systemMemoryUsage);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) ImmutableList(com.google.common.collect.ImmutableList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) FileSystem(org.apache.hadoop.fs.FileSystem) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcDataSource(com.facebook.presto.orc.OrcDataSource) IOException(java.io.IOException) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)

Example 2 with OrcReader

use of com.facebook.presto.orc.OrcReader in project presto by prestodb.

the class OrcStorageManager method getPageSource.

@Override
public ConnectorPageSource getPageSource(UUID shardUuid, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId) {
    OrcDataSource dataSource = openShard(shardUuid, readerAttributes);
    AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(dataSource, new OrcMetadataReader(), readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize());
        Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
        for (int i = 0; i < columnIds.size(); i++) {
            long columnId = columnIds.get(i);
            if (isHiddenColumn(columnId)) {
                columnIndexes.add(toSpecialIndex(columnId));
                continue;
            }
            Integer index = indexMap.get(columnId);
            if (index == null) {
                columnIndexes.add(NULL_COLUMN);
            } else {
                columnIndexes.add(index);
                includedColumns.put(index, columnTypes.get(i));
            }
        }
        OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
        OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, UTC, systemMemoryUsage);
        Optional<ShardRewriter> shardRewriter = Optional.empty();
        if (transactionId.isPresent()) {
            shardRewriter = Optional.of(createShardRewriter(transactionId.getAsLong(), bucketNumber, shardUuid));
        }
        return new OrcPageSource(shardRewriter, recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage);
    } catch (IOException | RuntimeException e) {
        closeQuietly(dataSource);
        throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
    } catch (Throwable t) {
        closeQuietly(dataSource);
        throw t;
    }
}
Also used : FileOrcDataSource(com.facebook.presto.orc.FileOrcDataSource) OrcDataSource(com.facebook.presto.orc.OrcDataSource) ImmutableList(com.google.common.collect.ImmutableList) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) DecimalType(com.facebook.presto.spi.type.DecimalType) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcReader(com.facebook.presto.orc.OrcReader) OptionalLong(java.util.OptionalLong) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate)

Example 3 with OrcReader

use of com.facebook.presto.orc.OrcReader in project presto by prestodb.

the class OrcTestingUtil method createReader.

public static OrcRecordReader createReader(OrcDataSource dataSource, List<Long> columnIds, List<Type> types) throws IOException {
    OrcReader orcReader = new OrcReader(dataSource, new OrcMetadataReader(), new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
    List<String> columnNames = orcReader.getColumnNames();
    assertEquals(columnNames.size(), columnIds.size());
    Map<Integer, Type> includedColumns = new HashMap<>();
    int ordinal = 0;
    for (long columnId : columnIds) {
        assertEquals(columnNames.get(ordinal), String.valueOf(columnId));
        includedColumns.put(ordinal, types.get(ordinal));
        ordinal++;
    }
    return createRecordReader(orcReader, includedColumns);
}
Also used : Type(com.facebook.presto.spi.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) HashMap(java.util.HashMap) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) DataSize(io.airlift.units.DataSize)

Example 4 with OrcReader

use of com.facebook.presto.orc.OrcReader in project presto by prestodb.

the class OrcStorageManager method computeShardStats.

private List<ColumnStats> computeShardStats(File file) {
    try (OrcDataSource dataSource = fileOrcDataSource(defaultReaderAttributes, file)) {
        OrcReader reader = new OrcReader(dataSource, new OrcMetadataReader(), defaultReaderAttributes.getMaxMergeDistance(), defaultReaderAttributes.getMaxReadSize());
        ImmutableList.Builder<ColumnStats> list = ImmutableList.builder();
        for (ColumnInfo info : getColumnInfo(reader)) {
            computeColumnStats(reader, info.getColumnId(), info.getType()).ifPresent(list::add);
        }
        return list.build();
    } catch (IOException e) {
        throw new PrestoException(RAPTOR_ERROR, "Failed to read file: " + file, e);
    }
}
Also used : FileOrcDataSource(com.facebook.presto.orc.FileOrcDataSource) OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcReader(com.facebook.presto.orc.OrcReader) ImmutableList(com.google.common.collect.ImmutableList) ColumnStats(com.facebook.presto.raptor.metadata.ColumnStats) ShardStats.computeColumnStats(com.facebook.presto.raptor.storage.ShardStats.computeColumnStats) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) ColumnInfo(com.facebook.presto.raptor.metadata.ColumnInfo) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException)

Example 5 with OrcReader

use of com.facebook.presto.orc.OrcReader in project presto by prestodb.

the class OrcTestingUtil method createReaderNoRows.

public static OrcRecordReader createReaderNoRows(OrcDataSource dataSource) throws IOException {
    OrcReader orcReader = new OrcReader(dataSource, new OrcMetadataReader(), new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
    assertEquals(orcReader.getColumnNames().size(), 0);
    return createRecordReader(orcReader, ImmutableMap.of());
}
Also used : OrcReader(com.facebook.presto.orc.OrcReader) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) DataSize(io.airlift.units.DataSize)

Aggregations

OrcReader (com.facebook.presto.orc.OrcReader)5 OrcMetadataReader (com.facebook.presto.orc.metadata.OrcMetadataReader)4 OrcDataSource (com.facebook.presto.orc.OrcDataSource)3 PrestoException (com.facebook.presto.spi.PrestoException)3 Type (com.facebook.presto.spi.type.Type)3 ImmutableList (com.google.common.collect.ImmutableList)3 IOException (java.io.IOException)3 FileOrcDataSource (com.facebook.presto.orc.FileOrcDataSource)2 OrcPredicate (com.facebook.presto.orc.OrcPredicate)2 OrcRecordReader (com.facebook.presto.orc.OrcRecordReader)2 TupleDomainOrcPredicate (com.facebook.presto.orc.TupleDomainOrcPredicate)2 AggregatedMemoryContext (com.facebook.presto.orc.memory.AggregatedMemoryContext)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 DataSize (io.airlift.units.DataSize)2 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)1 ColumnReference (com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)1 OrcType (com.facebook.presto.orc.metadata.OrcType)1 ColumnInfo (com.facebook.presto.raptor.metadata.ColumnInfo)1 ColumnStats (com.facebook.presto.raptor.metadata.ColumnStats)1 ShardStats.computeColumnStats (com.facebook.presto.raptor.storage.ShardStats.computeColumnStats)1