Search in sources :

Example 1 with StandardFunctionResolution

use of com.facebook.presto.spi.function.StandardFunctionResolution in project presto by prestodb.

the class TestFunctionResolution method testStandardFunctionResolution.

@Test
public void testStandardFunctionResolution() {
    StandardFunctionResolution standardFunctionResolution = functionResolution;
    // not
    assertTrue(standardFunctionResolution.isNotFunction(standardFunctionResolution.notFunction()));
    // negate
    assertTrue(standardFunctionResolution.isNegateFunction(standardFunctionResolution.negateFunction(DOUBLE)));
    assertFalse(standardFunctionResolution.isNotFunction(standardFunctionResolution.negateFunction(DOUBLE)));
    // arithmetic
    assertTrue(standardFunctionResolution.isArithmeticFunction(standardFunctionResolution.arithmeticFunction(ADD, DOUBLE, DOUBLE)));
    assertFalse(standardFunctionResolution.isComparisonFunction(standardFunctionResolution.arithmeticFunction(ADD, DOUBLE, DOUBLE)));
    // comparison
    assertTrue(standardFunctionResolution.isComparisonFunction(standardFunctionResolution.comparisonFunction(GREATER_THAN, DOUBLE, DOUBLE)));
    assertFalse(standardFunctionResolution.isArithmeticFunction(standardFunctionResolution.comparisonFunction(GREATER_THAN, DOUBLE, DOUBLE)));
    // between
    assertTrue(standardFunctionResolution.isBetweenFunction(standardFunctionResolution.betweenFunction(DOUBLE, DOUBLE, DOUBLE)));
    assertFalse(standardFunctionResolution.isNotFunction(standardFunctionResolution.betweenFunction(DOUBLE, DOUBLE, DOUBLE)));
    // subscript
    assertTrue(standardFunctionResolution.isSubscriptFunction(standardFunctionResolution.subscriptFunction(new ArrayType(DOUBLE), BIGINT)));
    assertFalse(standardFunctionResolution.isBetweenFunction(standardFunctionResolution.subscriptFunction(new ArrayType(DOUBLE), BIGINT)));
}
Also used : ArrayType(com.facebook.presto.common.type.ArrayType) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) Test(org.testng.annotations.Test)

Example 2 with StandardFunctionResolution

use of com.facebook.presto.spi.function.StandardFunctionResolution in project presto by prestodb.

the class HiveFilterPushdown method pushdownFilter.

@VisibleForTesting
public static ConnectorPushdownFilterResult pushdownFilter(ConnectorSession session, ConnectorMetadata metadata, SemiTransactionalHiveMetastore metastore, RowExpressionService rowExpressionService, StandardFunctionResolution functionResolution, HivePartitionManager partitionManager, FunctionMetadataManager functionMetadataManager, ConnectorTableHandle tableHandle, RowExpression filter, Optional<ConnectorTableLayoutHandle> currentLayoutHandle) {
    checkArgument(!FALSE_CONSTANT.equals(filter), "Cannot pushdown filter that is always false");
    if (TRUE_CONSTANT.equals(filter) && currentLayoutHandle.isPresent()) {
        return new ConnectorPushdownFilterResult(metadata.getTableLayout(session, currentLayoutHandle.get()), TRUE_CONSTANT);
    }
    // Split the filter into 3 groups of conjuncts:
    // - range filters that apply to entire columns,
    // - range filters that apply to subfields,
    // - the rest. Intersect these with possibly pre-existing filters.
    DomainTranslator.ExtractionResult<Subfield> decomposedFilter = rowExpressionService.getDomainTranslator().fromPredicate(session, filter, new SubfieldExtractor(functionResolution, rowExpressionService.getExpressionOptimizer(), session).toColumnExtractor());
    if (currentLayoutHandle.isPresent()) {
        HiveTableLayoutHandle currentHiveLayout = (HiveTableLayoutHandle) currentLayoutHandle.get();
        decomposedFilter = intersectExtractionResult(new DomainTranslator.ExtractionResult(currentHiveLayout.getDomainPredicate(), currentHiveLayout.getRemainingPredicate()), decomposedFilter);
    }
    if (decomposedFilter.getTupleDomain().isNone()) {
        return new ConnectorPushdownFilterResult(EMPTY_TABLE_LAYOUT, FALSE_CONSTANT);
    }
    RowExpression optimizedRemainingExpression = rowExpressionService.getExpressionOptimizer().optimize(decomposedFilter.getRemainingExpression(), OPTIMIZED, session);
    if (optimizedRemainingExpression instanceof ConstantExpression) {
        ConstantExpression constantExpression = (ConstantExpression) optimizedRemainingExpression;
        if (FALSE_CONSTANT.equals(constantExpression) || constantExpression.getValue() == null) {
            return new ConnectorPushdownFilterResult(EMPTY_TABLE_LAYOUT, FALSE_CONSTANT);
        }
    }
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    TupleDomain<ColumnHandle> entireColumnDomain = decomposedFilter.getTupleDomain().transform(subfield -> isEntireColumn(subfield) ? subfield.getRootName() : null).transform(columnHandles::get);
    if (currentLayoutHandle.isPresent()) {
        entireColumnDomain = entireColumnDomain.intersect(((HiveTableLayoutHandle) (currentLayoutHandle.get())).getPartitionColumnPredicate());
    }
    Constraint<ColumnHandle> constraint = new Constraint<>(entireColumnDomain);
    // Extract deterministic conjuncts that apply to partition columns and specify these as Constraint#predicate
    if (!TRUE_CONSTANT.equals(decomposedFilter.getRemainingExpression())) {
        LogicalRowExpressions logicalRowExpressions = new LogicalRowExpressions(rowExpressionService.getDeterminismEvaluator(), functionResolution, functionMetadataManager);
        RowExpression deterministicPredicate = logicalRowExpressions.filterDeterministicConjuncts(decomposedFilter.getRemainingExpression());
        if (!TRUE_CONSTANT.equals(deterministicPredicate)) {
            ConstraintEvaluator evaluator = new ConstraintEvaluator(rowExpressionService, session, columnHandles, deterministicPredicate);
            constraint = new Constraint<>(entireColumnDomain, evaluator::isCandidate);
        }
    }
    HivePartitionResult hivePartitionResult = partitionManager.getPartitions(metastore, tableHandle, constraint, session);
    TupleDomain<Subfield> domainPredicate = withColumnDomains(ImmutableMap.<Subfield, Domain>builder().putAll(hivePartitionResult.getUnenforcedConstraint().transform(HiveFilterPushdown::toSubfield).getDomains().orElse(ImmutableMap.of())).putAll(decomposedFilter.getTupleDomain().transform(subfield -> !isEntireColumn(subfield) ? subfield : null).getDomains().orElse(ImmutableMap.of())).build());
    Set<String> predicateColumnNames = new HashSet<>();
    domainPredicate.getDomains().get().keySet().stream().map(Subfield::getRootName).forEach(predicateColumnNames::add);
    // Include only columns referenced in the optimized expression. Although the expression is sent to the worker node
    // unoptimized, the worker is expected to optimize the expression before executing.
    extractAll(optimizedRemainingExpression).stream().map(VariableReferenceExpression::getName).forEach(predicateColumnNames::add);
    Map<String, HiveColumnHandle> predicateColumns = predicateColumnNames.stream().map(columnHandles::get).map(HiveColumnHandle.class::cast).collect(toImmutableMap(HiveColumnHandle::getName, Functions.identity()));
    SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
    LogicalRowExpressions logicalRowExpressions = new LogicalRowExpressions(rowExpressionService.getDeterminismEvaluator(), functionResolution, functionMetadataManager);
    List<RowExpression> conjuncts = extractConjuncts(decomposedFilter.getRemainingExpression());
    RowExpression dynamicFilterExpression = extractDynamicConjuncts(conjuncts, logicalRowExpressions);
    RowExpression remainingExpression = extractStaticConjuncts(conjuncts, logicalRowExpressions);
    remainingExpression = removeNestedDynamicFilters(remainingExpression);
    Table table = metastore.getTable(new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), isUserDefinedTypeEncodingEnabled(session), metastore.getColumnConverterProvider()), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    return new ConnectorPushdownFilterResult(metadata.getTableLayout(session, new HiveTableLayoutHandle(tableName, table.getStorage().getLocation(), hivePartitionResult.getPartitionColumns(), // remove comments to optimize serialization costs
    pruneColumnComments(hivePartitionResult.getDataColumns()), hivePartitionResult.getTableParameters(), hivePartitionResult.getPartitions(), domainPredicate, remainingExpression, predicateColumns, hivePartitionResult.getEnforcedConstraint(), hivePartitionResult.getBucketHandle(), hivePartitionResult.getBucketFilter(), true, createTableLayoutString(session, rowExpressionService, tableName, hivePartitionResult.getBucketHandle(), hivePartitionResult.getBucketFilter(), remainingExpression, domainPredicate), currentLayoutHandle.map(layout -> ((HiveTableLayoutHandle) layout).getRequestedColumns()).orElse(Optional.empty()), false)), dynamicFilterExpression);
}
Also used : LogicalRowExpressions.extractConjuncts(com.facebook.presto.expressions.LogicalRowExpressions.extractConjuncts) HiveSessionProperties.isParquetPushdownFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetPushdownFilterEnabled) HiveTableLayoutHandle(com.facebook.presto.hive.HiveTableLayoutHandle) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) PlanVisitor(com.facebook.presto.spi.plan.PlanVisitor) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) HiveTransactionManager(com.facebook.presto.hive.HiveTransactionManager) ValuesNode(com.facebook.presto.spi.plan.ValuesNode) TupleDomain.withColumnDomains(com.facebook.presto.common.predicate.TupleDomain.withColumnDomains) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Map(java.util.Map) FALSE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.FALSE_CONSTANT) INVALID_FUNCTION_ARGUMENT(com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT) HiveBucketHandle(com.facebook.presto.hive.HiveBucketHandle) BiMap(com.google.common.collect.BiMap) ImmutableSet(com.google.common.collect.ImmutableSet) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) Collections.emptyList(java.util.Collections.emptyList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) DomainTranslator(com.facebook.presto.spi.relation.DomainTranslator) Set(java.util.Set) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ConnectorSession(com.facebook.presto.spi.ConnectorSession) DynamicFilters.extractDynamicConjuncts(com.facebook.presto.expressions.DynamicFilters.extractDynamicConjuncts) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) RowExpressionService(com.facebook.presto.spi.relation.RowExpressionService) DefaultRowExpressionTraversalVisitor(com.facebook.presto.expressions.DefaultRowExpressionTraversalVisitor) HiveTableHandle(com.facebook.presto.hive.HiveTableHandle) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) HivePartitionResult(com.facebook.presto.hive.HivePartitionResult) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) DynamicFilters.removeNestedDynamicFilters(com.facebook.presto.expressions.DynamicFilters.removeNestedDynamicFilters) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) ConnectorPlanOptimizer(com.facebook.presto.spi.ConnectorPlanOptimizer) Table(com.facebook.presto.hive.metastore.Table) DIVISION_BY_ZERO(com.facebook.presto.spi.StandardErrorCode.DIVISION_BY_ZERO) Column(com.facebook.presto.hive.metastore.Column) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) ConstantExpression(com.facebook.presto.spi.relation.ConstantExpression) Function(java.util.function.Function) HashSet(java.util.HashSet) FilterNode(com.facebook.presto.spi.plan.FilterNode) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) LogicalRowExpressions(com.facebook.presto.expressions.LogicalRowExpressions) VariableAllocator(com.facebook.presto.spi.VariableAllocator) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) TableHandle(com.facebook.presto.spi.TableHandle) FunctionMetadataManager(com.facebook.presto.spi.function.FunctionMetadataManager) HiveTableProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveTableProperties.getHiveStorageFormat) HivePartitionManager(com.facebook.presto.hive.HivePartitionManager) RowExpression(com.facebook.presto.spi.relation.RowExpression) Functions(com.google.common.base.Functions) PlanNodeIdAllocator(com.facebook.presto.spi.plan.PlanNodeIdAllocator) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) SubfieldExtractor(com.facebook.presto.hive.SubfieldExtractor) LogicalRowExpressions.and(com.facebook.presto.expressions.LogicalRowExpressions.and) INVALID_CAST_ARGUMENT(com.facebook.presto.spi.StandardErrorCode.INVALID_CAST_ARGUMENT) Constraint(com.facebook.presto.spi.Constraint) HiveBucketing(com.facebook.presto.hive.HiveBucketing) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) OPTIMIZED(com.facebook.presto.spi.relation.ExpressionOptimizer.Level.OPTIMIZED) HiveMetadata(com.facebook.presto.hive.HiveMetadata) PlanNode(com.facebook.presto.spi.plan.PlanNode) DynamicFilters.extractStaticConjuncts(com.facebook.presto.expressions.DynamicFilters.extractStaticConjuncts) NUMERIC_VALUE_OUT_OF_RANGE(com.facebook.presto.spi.StandardErrorCode.NUMERIC_VALUE_OUT_OF_RANGE) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnHandle(com.facebook.presto.spi.ColumnHandle) HiveStorageFormat(com.facebook.presto.hive.HiveStorageFormat) TableScanNode(com.facebook.presto.spi.plan.TableScanNode) Sets.intersection(com.google.common.collect.Sets.intersection) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) ImmutableBiMap.toImmutableBiMap(com.google.common.collect.ImmutableBiMap.toImmutableBiMap) VisibleForTesting(com.google.common.annotations.VisibleForTesting) RowExpressionNodeInliner.replaceExpression(com.facebook.presto.expressions.RowExpressionNodeInliner.replaceExpression) HivePartitionResult(com.facebook.presto.hive.HivePartitionResult) Constraint(com.facebook.presto.spi.Constraint) ConstantExpression(com.facebook.presto.spi.relation.ConstantExpression) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) SubfieldExtractor(com.facebook.presto.hive.SubfieldExtractor) HiveTableHandle(com.facebook.presto.hive.HiveTableHandle) DomainTranslator(com.facebook.presto.spi.relation.DomainTranslator) HiveTableLayoutHandle(com.facebook.presto.hive.HiveTableLayoutHandle) Subfield(com.facebook.presto.common.Subfield) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HashSet(java.util.HashSet) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Table(com.facebook.presto.hive.metastore.Table) LogicalRowExpressions(com.facebook.presto.expressions.LogicalRowExpressions) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) RowExpression(com.facebook.presto.spi.relation.RowExpression) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with StandardFunctionResolution

use of com.facebook.presto.spi.function.StandardFunctionResolution in project presto by prestodb.

the class ParquetPageSourceFactory method createParquetPageSource.

public static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, SchemaTableName tableName, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TypeManager typeManager, StandardFunctionResolution functionResolution, TupleDomain<HiveColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, HiveFileContext hiveFileContext, ParquetMetadataSource parquetMetadataSource, boolean columnIndexFilterEnabled) {
    AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(user, path, configuration).openFile(path, hiveFileContext);
        dataSource = buildHdfsParquetDataSource(inputStream, path, stats);
        ParquetMetadata parquetMetadata = parquetMetadataSource.getParquetMetadata(dataSource, fileSize, hiveFileContext.isCacheable()).getParquetMetadata();
        if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
            return new AggregatedParquetPageSource(columns, parquetMetadata, typeManager, functionResolution);
        }
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        Optional<MessageType> message = columns.stream().filter(column -> column.getColumnType() == REGULAR || isPushedDownSubfield(column)).map(column -> getColumnType(typeManager.getType(column.getTypeSignature()), fileSchema, useParquetColumnNames, column, tableName, path)).filter(Optional::isPresent).map(Optional::get).map(type -> new MessageType(fileSchema.getName(), type)).reduce(MessageType::union);
        MessageType requestedSchema = message.orElse(new MessageType(fileSchema.getName(), ImmutableList.of()));
        ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                footerBlocks.add(block);
            }
        }
        Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
        TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
        Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
        final ParquetDataSource finalDataSource = dataSource;
        ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
        List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
        for (BlockMetaData block : footerBlocks.build()) {
            Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
            if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
                blocks.add(block);
                blockIndexStores.add(columnIndexStore.orElse(null));
                hiveFileContext.incrementCounter("parquet.blocksRead", 1);
                hiveFileContext.incrementCounter("parquet.rowsRead", block.getRowCount());
                hiveFileContext.incrementCounter("parquet.totalBytesRead", block.getTotalByteSize());
            } else {
                hiveFileContext.incrementCounter("parquet.blocksSkipped", 1);
                hiveFileContext.incrementCounter("parquet.rowsSkipped", block.getRowCount());
                hiveFileContext.incrementCounter("parquet.totalBytesSkipped", block.getTotalByteSize());
            }
        }
        MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
        ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks.build(), dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
        ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        for (HiveColumnHandle column : columns) {
            checkArgument(column.getColumnType() == REGULAR || column.getColumnType() == SYNTHESIZED, "column type must be regular or synthesized column");
            String name = column.getName();
            Type type = typeManager.getType(column.getTypeSignature());
            namesBuilder.add(name);
            typesBuilder.add(type);
            if (column.getColumnType() == SYNTHESIZED) {
                Subfield pushedDownSubfield = getPushedDownSubfield(column);
                List<String> nestedColumnPath = nestedColumnPath(pushedDownSubfield);
                Optional<ColumnIO> columnIO = findNestedColumnIO(lookupColumnByName(messageColumnIO, pushedDownSubfield.getRootName()), nestedColumnPath);
                if (columnIO.isPresent()) {
                    fieldsBuilder.add(constructField(type, columnIO.get()));
                } else {
                    fieldsBuilder.add(Optional.empty());
                }
            } else if (getParquetType(type, fileSchema, useParquetColumnNames, column, tableName, path).isPresent()) {
                String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
                fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));
            } else {
                fieldsBuilder.add(Optional.empty());
            }
        }
        return new ParquetPageSource(parquetReader, typesBuilder.build(), fieldsBuilder.build(), namesBuilder.build(), hiveFileContext.getStats());
    } catch (Exception e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        if (e instanceof ParquetCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, e);
        }
        if (e instanceof AccessControlException) {
            throw new PrestoException(PERMISSION_DENIED, e.getMessage(), e);
        }
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) DateTimeZone(org.joda.time.DateTimeZone) TINYINT(com.facebook.presto.common.type.StandardTypes.TINYINT) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) ROW(com.facebook.presto.common.type.StandardTypes.ROW) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) Set(java.util.Set) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ARRAY(com.facebook.presto.common.type.StandardTypes.ARRAY) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CHAR(com.facebook.presto.common.type.StandardTypes.CHAR) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) ColumnIO(org.apache.parquet.io.ColumnIO) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ArrayList(java.util.ArrayList) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) DATE(com.facebook.presto.common.type.StandardTypes.DATE) IOException(java.io.IOException) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) Domain(com.facebook.presto.common.predicate.Domain) INTEGER(com.facebook.presto.common.type.StandardTypes.INTEGER) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) REAL(com.facebook.presto.common.type.StandardTypes.REAL) RowType(com.facebook.presto.common.type.RowType) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) ParquetTypeUtils.lookupColumnByName(com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) MAP(com.facebook.presto.common.type.StandardTypes.MAP) Locale(java.util.Locale) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) BIGINT(com.facebook.presto.common.type.StandardTypes.BIGINT) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) ImmutableSet(com.google.common.collect.ImmutableSet) GroupType(org.apache.parquet.schema.GroupType) ImmutableMap(com.google.common.collect.ImmutableMap) Collectors(java.util.stream.Collectors) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) VARCHAR(com.facebook.presto.common.type.StandardTypes.VARCHAR) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) Entry(java.util.Map.Entry) Optional(java.util.Optional) TIMESTAMP(com.facebook.presto.common.type.StandardTypes.TIMESTAMP) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) SMALLINT(com.facebook.presto.common.type.StandardTypes.SMALLINT) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) DECIMAL(com.facebook.presto.common.type.StandardTypes.DECIMAL) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) VARBINARY(com.facebook.presto.common.type.StandardTypes.VARBINARY) Storage(com.facebook.presto.hive.metastore.Storage) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) PERMISSION_DENIED(com.facebook.presto.spi.StandardErrorCode.PERMISSION_DENIED) Field(com.facebook.presto.parquet.Field) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) AccessControlException(org.apache.hadoop.security.AccessControlException) SYNTHESIZED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) HiveSessionProperties.columnIndexFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.columnIndexFilterEnabled) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ImmutableList(com.google.common.collect.ImmutableList) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Predicate(com.facebook.presto.parquet.predicate.Predicate) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) MessageType(org.apache.parquet.schema.MessageType) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) Subfield(com.facebook.presto.common.Subfield) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) Optional(java.util.Optional) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) AccessControlException(org.apache.hadoop.security.AccessControlException) IOException(java.io.IOException) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) ColumnIO(org.apache.parquet.io.ColumnIO) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) AccessControlException(org.apache.hadoop.security.AccessControlException) RowType(com.facebook.presto.common.type.RowType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Type(com.facebook.presto.common.type.Type) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 4 with StandardFunctionResolution

use of com.facebook.presto.spi.function.StandardFunctionResolution in project presto by prestodb.

the class OrcBatchPageSourceFactory method createOrcPageSource.

public static ConnectorPageSource createOrcPageSource(OrcEncoding orcEncoding, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, StandardFunctionResolution functionResolution, DataSize maxBufferSize, DataSize streamBufferSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, FileFormatDataSourceStats stats, int domainCompactionThreshold, OrcFileTailSource orcFileTailSource, StripeMetadataSourceFactory stripeMetadataSourceFactory, HiveFileContext hiveFileContext, OrcReaderOptions orcReaderOptions, Optional<EncryptionInformation> encryptionInformation, DwrfEncryptionProvider dwrfEncryptionProvider) {
    checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1");
    OrcDataSource orcDataSource;
    try {
        FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(sessionUser, path, configuration).openFile(path, hiveFileContext);
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, orcReaderOptions.getMaxMergeDistance(), maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    OrcAggregatedMemoryContext systemMemoryUsage = new HiveOrcAggregatedMemoryContext();
    try {
        DwrfKeyProvider dwrfKeyProvider = new ProjectionBasedDwrfKeyProvider(encryptionInformation, columns, useOrcColumnNames, path);
        OrcReader reader = new OrcReader(orcDataSource, orcEncoding, orcFileTailSource, stripeMetadataSourceFactory, new HiveOrcAggregatedMemoryContext(), orcReaderOptions, hiveFileContext.isCacheable(), dwrfEncryptionProvider, dwrfKeyProvider, hiveFileContext.getStats());
        List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader.getTypes(), path);
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        for (HiveColumnHandle column : physicalColumns) {
            if (column.getColumnType() == REGULAR) {
                Type type = typeManager.getType(column.getTypeSignature());
                includedColumns.put(column.getHiveColumnIndex(), type);
                columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
            }
        }
        if (!physicalColumns.isEmpty() && physicalColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
            return new AggregatedOrcPageSource(physicalColumns, reader.getFooter(), typeManager, functionResolution);
        }
        OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled, Optional.of(domainCompactionThreshold));
        OrcBatchRecordReader recordReader = reader.createBatchRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage, INITIAL_BATCH_SIZE);
        return new OrcBatchPageSource(recordReader, reader.getOrcDataSource(), physicalColumns, typeManager, systemMemoryUsage, stats, hiveFileContext.getStats());
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) HiveSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.hive.HiveSessionProperties.getOrcTinyStripeThreshold) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) FixedPageSource(com.facebook.presto.spi.FixedPageSource) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HiveSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) HiveSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) HiveSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcZstdJniDecompressionEnabled) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveFileContext(com.facebook.presto.hive.HiveFileContext) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) HiveSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxMergeDistance) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HiveSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxReadBlockSize) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getOrcMaxBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxBufferSize) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) Type(com.facebook.presto.common.type.Type) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference) Storage(com.facebook.presto.hive.metastore.Storage) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) HiveSessionProperties.getOrcStreamBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcStreamBufferSize) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) IOException(java.io.IOException) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HiveUtil.getPhysicalHiveColumnHandles(com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles) OrcReader(com.facebook.presto.orc.OrcReader) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) ImmutableList(com.google.common.collect.ImmutableList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)

Example 5 with StandardFunctionResolution

use of com.facebook.presto.spi.function.StandardFunctionResolution in project presto by prestodb.

the class StatisticsAggregationPlanner method createStatisticsAggregation.

public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMetadata statisticsMetadata, Map<String, VariableReferenceExpression> columnToVariableMap, boolean useOriginalExpression) {
    StatisticAggregationsDescriptor.Builder<VariableReferenceExpression> descriptor = StatisticAggregationsDescriptor.builder();
    List<String> groupingColumns = statisticsMetadata.getGroupingColumns();
    List<VariableReferenceExpression> groupingVariables = groupingColumns.stream().map(columnToVariableMap::get).collect(toImmutableList());
    for (int i = 0; i < groupingVariables.size(); i++) {
        descriptor.addGrouping(groupingColumns.get(i), groupingVariables.get(i));
    }
    ImmutableMap.Builder<VariableReferenceExpression, AggregationNode.Aggregation> aggregations = ImmutableMap.builder();
    StandardFunctionResolution functionResolution = new FunctionResolution(metadata.getFunctionAndTypeManager());
    for (TableStatisticType type : statisticsMetadata.getTableStatistics()) {
        if (type != ROW_COUNT) {
            throw new PrestoException(NOT_SUPPORTED, "Table-wide statistic type not supported: " + type);
        }
        AggregationNode.Aggregation aggregation = new AggregationNode.Aggregation(new CallExpression("count", functionResolution.countFunction(), BIGINT, ImmutableList.of()), Optional.empty(), Optional.empty(), false, Optional.empty());
        VariableReferenceExpression variable = variableAllocator.newVariable("rowCount", BIGINT);
        aggregations.put(variable, aggregation);
        descriptor.addTableStatistic(ROW_COUNT, variable);
    }
    for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) {
        String columnName = columnStatisticMetadata.getColumnName();
        ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType();
        VariableReferenceExpression inputVariable = columnToVariableMap.get(columnName);
        verify(inputVariable != null, "inputVariable is null");
        ColumnStatisticsAggregation aggregation = createColumnAggregation(statisticType, inputVariable, useOriginalExpression);
        VariableReferenceExpression variable = variableAllocator.newVariable(statisticType + ":" + columnName, aggregation.getOutputType());
        aggregations.put(variable, aggregation.getAggregation());
        descriptor.addColumnStatistic(columnStatisticMetadata, variable);
    }
    StatisticAggregations aggregation = new StatisticAggregations(aggregations.build(), groupingVariables);
    return new TableStatisticAggregation(aggregation, descriptor.build());
}
Also used : ColumnStatisticMetadata(com.facebook.presto.spi.statistics.ColumnStatisticMetadata) PrestoException(com.facebook.presto.spi.PrestoException) AggregationNode(com.facebook.presto.spi.plan.AggregationNode) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) FunctionResolution(com.facebook.presto.sql.relational.FunctionResolution) ImmutableMap(com.google.common.collect.ImmutableMap) StatisticAggregations(com.facebook.presto.sql.planner.plan.StatisticAggregations) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) ColumnStatisticType(com.facebook.presto.spi.statistics.ColumnStatisticType) StatisticAggregationsDescriptor(com.facebook.presto.sql.planner.plan.StatisticAggregationsDescriptor) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) TableStatisticType(com.facebook.presto.spi.statistics.TableStatisticType) CallExpression(com.facebook.presto.spi.relation.CallExpression)

Aggregations

StandardFunctionResolution (com.facebook.presto.spi.function.StandardFunctionResolution)5 PrestoException (com.facebook.presto.spi.PrestoException)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)3 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)3 ConnectorSession (com.facebook.presto.spi.ConnectorSession)3 SchemaTableName (com.facebook.presto.spi.SchemaTableName)3 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 ImmutableList (com.google.common.collect.ImmutableList)3 List (java.util.List)3 Map (java.util.Map)3 Objects.requireNonNull (java.util.Objects.requireNonNull)3 Optional (java.util.Optional)3 Subfield (com.facebook.presto.common.Subfield)2 Domain (com.facebook.presto.common.predicate.Domain)2 Type (com.facebook.presto.common.type.Type)2 TypeManager (com.facebook.presto.common.type.TypeManager)2 EncryptionInformation (com.facebook.presto.hive.EncryptionInformation)2 FileFormatDataSourceStats (com.facebook.presto.hive.FileFormatDataSourceStats)2 HdfsEnvironment (com.facebook.presto.hive.HdfsEnvironment)2