Search in sources :

Example 1 with SubsetDefinition

use of io.trino.sql.tree.SubsetDefinition in project trino by trinodb.

the class PatternRecognitionAnalyzer method analyze.

public static PatternRecognitionAnalysis analyze(List<SubsetDefinition> subsets, List<VariableDefinition> variableDefinitions, List<MeasureDefinition> measures, RowPattern pattern, Optional<SkipTo> skipTo) {
    // extract label names (Identifiers) from PATTERN and SUBSET clauses. create labels respecting SQL identifier semantics
    Set<String> primaryLabels = extractExpressions(ImmutableList.of(pattern), Identifier.class).stream().map(PatternRecognitionAnalyzer::label).collect(toImmutableSet());
    List<String> unionLabels = subsets.stream().map(SubsetDefinition::getName).map(PatternRecognitionAnalyzer::label).collect(toImmutableList());
    // analyze SUBSET
    Set<String> unique = new HashSet<>();
    for (SubsetDefinition subset : subsets) {
        String label = label(subset.getName());
        if (primaryLabels.contains(label)) {
            throw semanticException(INVALID_LABEL, subset.getName(), "union pattern variable name: %s is a duplicate of primary pattern variable name", subset.getName());
        }
        if (!unique.add(label)) {
            throw semanticException(INVALID_LABEL, subset.getName(), "union pattern variable name: %s is declared twice", subset.getName());
        }
        for (Identifier element : subset.getIdentifiers()) {
            // TODO can there be repetitions in the list of subset elements? (currently repetitions are supported)
            if (!primaryLabels.contains(label(element))) {
                throw semanticException(INVALID_LABEL, element, "subset element: %s is not a primary pattern variable", element);
            }
        }
    }
    // analyze DEFINE
    unique = new HashSet<>();
    for (VariableDefinition definition : variableDefinitions) {
        String label = label(definition.getName());
        if (!primaryLabels.contains(label)) {
            throw semanticException(INVALID_LABEL, definition.getName(), "defined variable: %s is not a primary pattern variable", definition.getName());
        }
        if (!unique.add(label)) {
            throw semanticException(INVALID_LABEL, definition.getName(), "pattern variable with name: %s is defined twice", definition.getName());
        }
        // DEFINE clause only supports RUNNING semantics which is default
        Expression expression = definition.getExpression();
        extractExpressions(ImmutableList.of(expression), FunctionCall.class).stream().filter(functionCall -> functionCall.getProcessingMode().map(mode -> mode.getMode() == FINAL).orElse(false)).findFirst().ifPresent(functionCall -> {
            throw semanticException(INVALID_PROCESSING_MODE, functionCall.getProcessingMode().get(), "FINAL semantics is not supported in DEFINE clause");
        });
    }
    // record primary labels without definitions. they are implicitly associated with `true` condition
    Set<String> undefinedLabels = Sets.difference(primaryLabels, unique);
    // validate pattern quantifiers
    ImmutableMap.Builder<NodeRef<RangeQuantifier>, Range> ranges = ImmutableMap.builder();
    preOrder(pattern).filter(RangeQuantifier.class::isInstance).map(RangeQuantifier.class::cast).forEach(quantifier -> {
        Optional<Long> atLeast = quantifier.getAtLeast().map(LongLiteral::getValue);
        atLeast.ifPresent(value -> {
            if (value < 0) {
                throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier lower bound must be greater than or equal to 0");
            }
            if (value > Integer.MAX_VALUE) {
                throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier lower bound must not exceed " + Integer.MAX_VALUE);
            }
        });
        Optional<Long> atMost = quantifier.getAtMost().map(LongLiteral::getValue);
        atMost.ifPresent(value -> {
            if (value < 1) {
                throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier upper bound must be greater than or equal to 1");
            }
            if (value > Integer.MAX_VALUE) {
                throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier upper bound must not exceed " + Integer.MAX_VALUE);
            }
        });
        if (atLeast.isPresent() && atMost.isPresent()) {
            if (atLeast.get() > atMost.get()) {
                throw semanticException(INVALID_RANGE, quantifier, "Pattern quantifier lower bound must not exceed upper bound");
            }
        }
        ranges.put(NodeRef.of(quantifier), new Range(atLeast.map(Math::toIntExact), atMost.map(Math::toIntExact)));
    });
    // validate AFTER MATCH SKIP
    Set<String> allLabels = ImmutableSet.<String>builder().addAll(primaryLabels).addAll(unionLabels).build();
    skipTo.flatMap(SkipTo::getIdentifier).ifPresent(identifier -> {
        String label = label(identifier);
        if (!allLabels.contains(label)) {
            throw semanticException(INVALID_LABEL, identifier, "%s is not a primary or union pattern variable", identifier);
        }
    });
    // check no prohibited nesting: cannot nest one row pattern recognition within another
    List<Expression> expressions = Streams.concat(measures.stream().map(MeasureDefinition::getExpression), variableDefinitions.stream().map(VariableDefinition::getExpression)).collect(toImmutableList());
    expressions.forEach(expression -> preOrder(expression).filter(child -> child instanceof PatternRecognitionRelation || child instanceof RowPattern).findFirst().ifPresent(nested -> {
        throw semanticException(NESTED_ROW_PATTERN_RECOGNITION, nested, "nested row pattern recognition in row pattern recognition");
    }));
    return new PatternRecognitionAnalysis(allLabels, undefinedLabels, ranges.buildOrThrow());
}
Also used : AnchorPattern(io.trino.sql.tree.AnchorPattern) ExpressionTreeUtils.extractExpressions(io.trino.sql.analyzer.ExpressionTreeUtils.extractExpressions) MeasureDefinition(io.trino.sql.tree.MeasureDefinition) INVALID_ROW_PATTERN(io.trino.spi.StandardErrorCode.INVALID_ROW_PATTERN) SkipTo(io.trino.sql.tree.SkipTo) INVALID_LABEL(io.trino.spi.StandardErrorCode.INVALID_LABEL) Range(io.trino.sql.analyzer.Analysis.Range) SubsetDefinition(io.trino.sql.tree.SubsetDefinition) RangeQuantifier(io.trino.sql.tree.RangeQuantifier) HashSet(java.util.HashSet) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ImmutableList(com.google.common.collect.ImmutableList) LongLiteral(io.trino.sql.tree.LongLiteral) ExcludedPattern(io.trino.sql.tree.ExcludedPattern) NodeRef(io.trino.sql.tree.NodeRef) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RowPattern(io.trino.sql.tree.RowPattern) FunctionCall(io.trino.sql.tree.FunctionCall) SemanticExceptions.semanticException(io.trino.sql.analyzer.SemanticExceptions.semanticException) Identifier(io.trino.sql.tree.Identifier) NUMERIC_VALUE_OUT_OF_RANGE(io.trino.spi.StandardErrorCode.NUMERIC_VALUE_OUT_OF_RANGE) ImmutableSet(com.google.common.collect.ImmutableSet) RowsPerMatch(io.trino.sql.tree.PatternRecognitionRelation.RowsPerMatch) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PatternRecognitionRelation(io.trino.sql.tree.PatternRecognitionRelation) Set(java.util.Set) NESTED_ROW_PATTERN_RECOGNITION(io.trino.spi.StandardErrorCode.NESTED_ROW_PATTERN_RECOGNITION) VariableDefinition(io.trino.sql.tree.VariableDefinition) Streams(com.google.common.collect.Streams) AstUtils.preOrder(io.trino.sql.util.AstUtils.preOrder) Sets(com.google.common.collect.Sets) INVALID_PATTERN_RECOGNITION_FUNCTION(io.trino.spi.StandardErrorCode.INVALID_PATTERN_RECOGNITION_FUNCTION) List(java.util.List) INVALID_RANGE(io.trino.spi.StandardErrorCode.INVALID_RANGE) INVALID_PROCESSING_MODE(io.trino.spi.StandardErrorCode.INVALID_PROCESSING_MODE) PatternSearchMode(io.trino.sql.tree.PatternSearchMode) Optional(java.util.Optional) Expression(io.trino.sql.tree.Expression) FINAL(io.trino.sql.tree.ProcessingMode.Mode.FINAL) VariableDefinition(io.trino.sql.tree.VariableDefinition) LongLiteral(io.trino.sql.tree.LongLiteral) Range(io.trino.sql.analyzer.Analysis.Range) RangeQuantifier(io.trino.sql.tree.RangeQuantifier) MeasureDefinition(io.trino.sql.tree.MeasureDefinition) ImmutableMap(com.google.common.collect.ImmutableMap) PatternRecognitionRelation(io.trino.sql.tree.PatternRecognitionRelation) NodeRef(io.trino.sql.tree.NodeRef) SubsetDefinition(io.trino.sql.tree.SubsetDefinition) Identifier(io.trino.sql.tree.Identifier) Expression(io.trino.sql.tree.Expression) RowPattern(io.trino.sql.tree.RowPattern) HashSet(java.util.HashSet)

Example 2 with SubsetDefinition

use of io.trino.sql.tree.SubsetDefinition in project trino by trinodb.

the class RelationPlanner method planPatternRecognitionComponents.

public PatternRecognitionComponents planPatternRecognitionComponents(Function<Expression, Expression> expressionRewrite, List<SubsetDefinition> subsets, List<MeasureDefinition> measures, Optional<SkipTo> skipTo, Optional<PatternSearchMode> searchMode, RowPattern pattern, List<VariableDefinition> variableDefinitions) {
    // rewrite subsets
    ImmutableMap.Builder<IrLabel, Set<IrLabel>> rewrittenSubsets = ImmutableMap.builder();
    for (SubsetDefinition subsetDefinition : subsets) {
        IrLabel label = irLabel(subsetDefinition.getName());
        Set<IrLabel> elements = subsetDefinition.getIdentifiers().stream().map(RelationPlanner::irLabel).collect(toImmutableSet());
        rewrittenSubsets.put(label, elements);
    }
    // NOTE: There might be aggregate functions in measure definitions and variable definitions.
    // They are handled different than top level aggregations in a query:
    // 1. Their arguments are not pre-projected and replaced with single symbols. This is because the arguments might
    // not be eligible for pre-projection, when they contain references to CLASSIFIER() or MATCH_NUMBER() functions
    // which are evaluated at runtime. If some aggregation arguments can be pre-projected, it will be done in the
    // Optimizer.
    // 2. Their arguments do not need to be coerced by hand. Since the pattern aggregation arguments are rewritten as
    // parts of enclosing expressions, and not as standalone expressions, all necessary coercions will be applied by the
    // TranslationMap.
    // rewrite measures
    ImmutableMap.Builder<Symbol, Measure> rewrittenMeasures = ImmutableMap.builder();
    ImmutableList.Builder<Symbol> measureOutputs = ImmutableList.builder();
    for (MeasureDefinition measureDefinition : measures) {
        Type type = analysis.getType(measureDefinition.getExpression());
        Symbol symbol = symbolAllocator.newSymbol(measureDefinition.getName().getValue().toLowerCase(ENGLISH), type);
        Expression expression = expressionRewrite.apply(measureDefinition.getExpression());
        ExpressionAndValuePointers measure = LogicalIndexExtractor.rewrite(expression, rewrittenSubsets.buildOrThrow(), symbolAllocator, plannerContext.getMetadata());
        rewrittenMeasures.put(symbol, new Measure(measure, type));
        measureOutputs.add(symbol);
    }
    // rewrite pattern to IR
    IrRowPattern rewrittenPattern = RowPatternToIrRewriter.rewrite(pattern, analysis);
    // rewrite variable definitions
    ImmutableMap.Builder<IrLabel, ExpressionAndValuePointers> rewrittenVariableDefinitions = ImmutableMap.builder();
    for (VariableDefinition variableDefinition : variableDefinitions) {
        IrLabel label = irLabel(variableDefinition.getName());
        Expression expression = expressionRewrite.apply(variableDefinition.getExpression());
        ExpressionAndValuePointers definition = LogicalIndexExtractor.rewrite(expression, rewrittenSubsets.buildOrThrow(), symbolAllocator, plannerContext.getMetadata());
        rewrittenVariableDefinitions.put(label, definition);
    }
    // add `true` definition for undefined labels
    for (String label : analysis.getUndefinedLabels(pattern)) {
        rewrittenVariableDefinitions.put(irLabel(label), ExpressionAndValuePointers.TRUE);
    }
    return new PatternRecognitionComponents(rewrittenSubsets.buildOrThrow(), rewrittenMeasures.buildOrThrow(), measureOutputs.build(), skipTo.flatMap(SkipTo::getIdentifier).map(RelationPlanner::irLabel), skipTo.map(SkipTo::getPosition).orElse(PAST_LAST), searchMode.map(mode -> mode.getMode() == INITIAL).orElse(TRUE), rewrittenPattern, rewrittenVariableDefinitions.buildOrThrow());
}
Also used : IrLabel(io.trino.sql.planner.rowpattern.ir.IrLabel) Set(java.util.Set) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) AggregationNode.singleGroupingSet(io.trino.sql.planner.plan.AggregationNode.singleGroupingSet) ImmutableSet(com.google.common.collect.ImmutableSet) VariableDefinition(io.trino.sql.tree.VariableDefinition) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) MeasureDefinition(io.trino.sql.tree.MeasureDefinition) SkipTo(io.trino.sql.tree.SkipTo) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) IrRowPattern(io.trino.sql.planner.rowpattern.ir.IrRowPattern) SubsetDefinition(io.trino.sql.tree.SubsetDefinition) RelationType(io.trino.sql.analyzer.RelationType) RowType(io.trino.spi.type.RowType) TypeSignatureTranslator.toSqlType(io.trino.sql.analyzer.TypeSignatureTranslator.toSqlType) Type(io.trino.spi.type.Type) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) CoalesceExpression(io.trino.sql.tree.CoalesceExpression) Expression(io.trino.sql.tree.Expression) SubqueryExpression(io.trino.sql.tree.SubqueryExpression) ExpressionAndValuePointers(io.trino.sql.planner.rowpattern.LogicalIndexExtractor.ExpressionAndValuePointers) Measure(io.trino.sql.planner.plan.PatternRecognitionNode.Measure)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)2 Expression (io.trino.sql.tree.Expression)2 MeasureDefinition (io.trino.sql.tree.MeasureDefinition)2 SkipTo (io.trino.sql.tree.SkipTo)2 SubsetDefinition (io.trino.sql.tree.SubsetDefinition)2 VariableDefinition (io.trino.sql.tree.VariableDefinition)2 Set (java.util.Set)2 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 Sets (com.google.common.collect.Sets)1 Streams (com.google.common.collect.Streams)1 INVALID_LABEL (io.trino.spi.StandardErrorCode.INVALID_LABEL)1 INVALID_PATTERN_RECOGNITION_FUNCTION (io.trino.spi.StandardErrorCode.INVALID_PATTERN_RECOGNITION_FUNCTION)1 INVALID_PROCESSING_MODE (io.trino.spi.StandardErrorCode.INVALID_PROCESSING_MODE)1 INVALID_RANGE (io.trino.spi.StandardErrorCode.INVALID_RANGE)1 INVALID_ROW_PATTERN (io.trino.spi.StandardErrorCode.INVALID_ROW_PATTERN)1 NESTED_ROW_PATTERN_RECOGNITION (io.trino.spi.StandardErrorCode.NESTED_ROW_PATTERN_RECOGNITION)1