Search in sources :

Example 1 with OrcPredicate

use of com.facebook.presto.orc.OrcPredicate in project presto by prestodb.

the class OrcStorageManager method getPageSource.

@Override
public ConnectorPageSource getPageSource(UUID shardUuid, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId) {
    OrcDataSource dataSource = openShard(shardUuid, readerAttributes);
    AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(dataSource, new OrcMetadataReader(), readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize());
        Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
        for (int i = 0; i < columnIds.size(); i++) {
            long columnId = columnIds.get(i);
            if (isHiddenColumn(columnId)) {
                columnIndexes.add(toSpecialIndex(columnId));
                continue;
            }
            Integer index = indexMap.get(columnId);
            if (index == null) {
                columnIndexes.add(NULL_COLUMN);
            } else {
                columnIndexes.add(index);
                includedColumns.put(index, columnTypes.get(i));
            }
        }
        OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
        OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, UTC, systemMemoryUsage);
        Optional<ShardRewriter> shardRewriter = Optional.empty();
        if (transactionId.isPresent()) {
            shardRewriter = Optional.of(createShardRewriter(transactionId.getAsLong(), bucketNumber, shardUuid));
        }
        return new OrcPageSource(shardRewriter, recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage);
    } catch (IOException | RuntimeException e) {
        closeQuietly(dataSource);
        throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
    } catch (Throwable t) {
        closeQuietly(dataSource);
        throw t;
    }
}
Also used : FileOrcDataSource(com.facebook.presto.orc.FileOrcDataSource) OrcDataSource(com.facebook.presto.orc.OrcDataSource) ImmutableList(com.google.common.collect.ImmutableList) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) DecimalType(com.facebook.presto.spi.type.DecimalType) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcReader(com.facebook.presto.orc.OrcReader) OptionalLong(java.util.OptionalLong) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate)

Example 2 with OrcPredicate

use of com.facebook.presto.orc.OrcPredicate in project presto by prestodb.

the class OrcPageSourceFactory method createOrcPageSource.

public static OrcPageSource createOrcPageSource(MetadataReader metadataReader, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize, boolean orcBloomFiltersEnabled) {
    OrcDataSource orcDataSource;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
        long size = fileSystem.getFileStatus(path).getLen();
        FSDataInputStream inputStream = fileSystem.open(path);
        orcDataSource = new HdfsOrcDataSource(path.toString(), size, maxMergeDistance, maxBufferSize, streamBufferSize, inputStream);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(orcDataSource, metadataReader, maxMergeDistance, maxBufferSize);
        List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader, path);
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        for (HiveColumnHandle column : physicalColumns) {
            if (column.getColumnType() == REGULAR) {
                Type type = typeManager.getType(column.getTypeSignature());
                includedColumns.put(column.getHiveColumnIndex(), type);
                columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
            }
        }
        OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled);
        OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage);
        return new OrcPageSource(recordReader, orcDataSource, physicalColumns, typeManager, systemMemoryUsage);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) ImmutableList(com.google.common.collect.ImmutableList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) FileSystem(org.apache.hadoop.fs.FileSystem) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcDataSource(com.facebook.presto.orc.OrcDataSource) IOException(java.io.IOException) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)

Aggregations

OrcDataSource (com.facebook.presto.orc.OrcDataSource)2 OrcPredicate (com.facebook.presto.orc.OrcPredicate)2 OrcReader (com.facebook.presto.orc.OrcReader)2 OrcRecordReader (com.facebook.presto.orc.OrcRecordReader)2 TupleDomainOrcPredicate (com.facebook.presto.orc.TupleDomainOrcPredicate)2 AggregatedMemoryContext (com.facebook.presto.orc.memory.AggregatedMemoryContext)2 PrestoException (com.facebook.presto.spi.PrestoException)2 Type (com.facebook.presto.spi.type.Type)2 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 IOException (java.io.IOException)2 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)1 FileOrcDataSource (com.facebook.presto.orc.FileOrcDataSource)1 ColumnReference (com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)1 OrcMetadataReader (com.facebook.presto.orc.metadata.OrcMetadataReader)1 OrcType (com.facebook.presto.orc.metadata.OrcType)1 DecimalType (com.facebook.presto.spi.type.DecimalType)1 VarcharType.createUnboundedVarcharType (com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType)1 FileNotFoundException (java.io.FileNotFoundException)1 OptionalLong (java.util.OptionalLong)1