Search in sources :

Example 1 with AggregatedMemoryContext

use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.

the class OrcPageSourceFactory method createOrcPageSource.

public static OrcPageSource createOrcPageSource(MetadataReader metadataReader, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize, boolean orcBloomFiltersEnabled) {
    OrcDataSource orcDataSource;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
        long size = fileSystem.getFileStatus(path).getLen();
        FSDataInputStream inputStream = fileSystem.open(path);
        orcDataSource = new HdfsOrcDataSource(path.toString(), size, maxMergeDistance, maxBufferSize, streamBufferSize, inputStream);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(orcDataSource, metadataReader, maxMergeDistance, maxBufferSize);
        List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader, path);
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        for (HiveColumnHandle column : physicalColumns) {
            if (column.getColumnType() == REGULAR) {
                Type type = typeManager.getType(column.getTypeSignature());
                includedColumns.put(column.getHiveColumnIndex(), type);
                columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
            }
        }
        OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled);
        OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage);
        return new OrcPageSource(recordReader, orcDataSource, physicalColumns, typeManager, systemMemoryUsage);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) ImmutableList(com.google.common.collect.ImmutableList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) FileSystem(org.apache.hadoop.fs.FileSystem) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcDataSource(com.facebook.presto.orc.OrcDataSource) IOException(java.io.IOException) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)

Example 2 with AggregatedMemoryContext

use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.

the class OrcStorageManager method getPageSource.

@Override
public ConnectorPageSource getPageSource(UUID shardUuid, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId) {
    OrcDataSource dataSource = openShard(shardUuid, readerAttributes);
    AggregatedMemoryContext systemMemoryUsage = new AggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(dataSource, new OrcMetadataReader(), readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize());
        Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
        for (int i = 0; i < columnIds.size(); i++) {
            long columnId = columnIds.get(i);
            if (isHiddenColumn(columnId)) {
                columnIndexes.add(toSpecialIndex(columnId));
                continue;
            }
            Integer index = indexMap.get(columnId);
            if (index == null) {
                columnIndexes.add(NULL_COLUMN);
            } else {
                columnIndexes.add(index);
                includedColumns.put(index, columnTypes.get(i));
            }
        }
        OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
        OrcRecordReader recordReader = reader.createRecordReader(includedColumns.build(), predicate, UTC, systemMemoryUsage);
        Optional<ShardRewriter> shardRewriter = Optional.empty();
        if (transactionId.isPresent()) {
            shardRewriter = Optional.of(createShardRewriter(transactionId.getAsLong(), bucketNumber, shardUuid));
        }
        return new OrcPageSource(shardRewriter, recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage);
    } catch (IOException | RuntimeException e) {
        closeQuietly(dataSource);
        throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
    } catch (Throwable t) {
        closeQuietly(dataSource);
        throw t;
    }
}
Also used : FileOrcDataSource(com.facebook.presto.orc.FileOrcDataSource) OrcDataSource(com.facebook.presto.orc.OrcDataSource) ImmutableList(com.google.common.collect.ImmutableList) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) DecimalType(com.facebook.presto.spi.type.DecimalType) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcReader(com.facebook.presto.orc.OrcReader) OptionalLong(java.util.OptionalLong) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate)

Example 3 with AggregatedMemoryContext

use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.

the class ShardStats method doComputeColumnStats.

private static ColumnStats doComputeColumnStats(OrcReader orcReader, long columnId, Type type) throws IOException {
    int columnIndex = columnIndex(orcReader.getColumnNames(), columnId);
    OrcRecordReader reader = orcReader.createRecordReader(ImmutableMap.of(columnIndex, type), OrcPredicate.TRUE, UTC, new AggregatedMemoryContext());
    if (type.equals(BooleanType.BOOLEAN)) {
        return indexBoolean(type, reader, columnIndex, columnId);
    }
    if (type.equals(BigintType.BIGINT) || type.equals(DateType.DATE) || type.equals(TimestampType.TIMESTAMP)) {
        return indexLong(type, reader, columnIndex, columnId);
    }
    if (type.equals(DoubleType.DOUBLE)) {
        return indexDouble(type, reader, columnIndex, columnId);
    }
    if (type instanceof VarcharType) {
        return indexString(type, reader, columnIndex, columnId);
    }
    return null;
}
Also used : VarcharType(com.facebook.presto.spi.type.VarcharType) OrcRecordReader(com.facebook.presto.orc.OrcRecordReader) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext)

Example 4 with AggregatedMemoryContext

use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.

the class OrcTester method createCustomOrcRecordReader.

static OrcRecordReader createCustomOrcRecordReader(TempFile tempFile, MetadataReader metadataReader, OrcPredicate predicate, Type type) throws IOException {
    OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
    OrcReader orcReader = new OrcReader(orcDataSource, metadataReader, new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
    assertEquals(orcReader.getColumnNames(), ImmutableList.of("test"));
    assertEquals(orcReader.getFooter().getRowsInRowGroup(), 10_000);
    return orcReader.createRecordReader(ImmutableMap.of(0, type), predicate, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext());
}
Also used : DataSize(io.airlift.units.DataSize) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext)

Example 5 with AggregatedMemoryContext

use of com.facebook.presto.orc.memory.AggregatedMemoryContext in project presto by prestodb.

the class TestCachingOrcDataSource method doIntegration.

public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize) throws IOException {
    OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), maxMergeDistance, maxReadSize);
    // 1 for reading file footer
    assertEquals(orcDataSource.getReadCount(), 1);
    List<StripeInformation> stripes = orcReader.getFooter().getStripes();
    // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode.
    assertGreaterThanOrEqual(stripes.size(), 3);
    //verify wrapped by CachingOrcReader
    assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, maxReadSize), CachingOrcDataSource.class);
    OrcRecordReader orcRecordReader = orcReader.createRecordReader(ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext());
    int positionCount = 0;
    while (true) {
        int batchSize = orcRecordReader.nextBatch();
        if (batchSize <= 0) {
            break;
        }
        Block block = orcRecordReader.readBlock(VARCHAR, 0);
        positionCount += block.getPositionCount();
    }
    assertEquals(positionCount, POSITION_COUNT);
}
Also used : OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) Block(com.facebook.presto.spi.block.Block) AggregatedMemoryContext(com.facebook.presto.orc.memory.AggregatedMemoryContext) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Aggregations

AggregatedMemoryContext (com.facebook.presto.orc.memory.AggregatedMemoryContext)5 OrcRecordReader (com.facebook.presto.orc.OrcRecordReader)3 OrcDataSource (com.facebook.presto.orc.OrcDataSource)2 OrcPredicate (com.facebook.presto.orc.OrcPredicate)2 OrcReader (com.facebook.presto.orc.OrcReader)2 TupleDomainOrcPredicate (com.facebook.presto.orc.TupleDomainOrcPredicate)2 OrcMetadataReader (com.facebook.presto.orc.metadata.OrcMetadataReader)2 PrestoException (com.facebook.presto.spi.PrestoException)2 Type (com.facebook.presto.spi.type.Type)2 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 IOException (java.io.IOException)2 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)1 FileOrcDataSource (com.facebook.presto.orc.FileOrcDataSource)1 ColumnReference (com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)1 OrcType (com.facebook.presto.orc.metadata.OrcType)1 StripeInformation (com.facebook.presto.orc.metadata.StripeInformation)1 Block (com.facebook.presto.spi.block.Block)1 DecimalType (com.facebook.presto.spi.type.DecimalType)1 VarcharType (com.facebook.presto.spi.type.VarcharType)1