Search in sources :

Example 1 with RangeQuantifier

use of io.trino.sql.tree.RangeQuantifier in project trino by trinodb.

the class PatternRecognitionAnalyzer method analyze.

public static PatternRecognitionAnalysis analyze(List<SubsetDefinition> subsets, List<VariableDefinition> variableDefinitions, List<MeasureDefinition> measures, RowPattern pattern, Optional<SkipTo> skipTo) {
    // extract label names (Identifiers) from PATTERN and SUBSET clauses. create labels respecting SQL identifier semantics
    Set<String> primaryLabels = extractExpressions(ImmutableList.of(pattern), Identifier.class).stream().map(PatternRecognitionAnalyzer::label).collect(toImmutableSet());
    List<String> unionLabels = subsets.stream().map(SubsetDefinition::getName).map(PatternRecognitionAnalyzer::label).collect(toImmutableList());
    // analyze SUBSET
    Set<String> unique = new HashSet<>();
    for (SubsetDefinition subset : subsets) {
        String label = label(subset.getName());
        if (primaryLabels.contains(label)) {
            throw semanticException(INVALID_LABEL, subset.getName(), "union pattern variable name: %s is a duplicate of primary pattern variable name", subset.getName());
        }
        if (!unique.add(label)) {
            throw semanticException(INVALID_LABEL, subset.getName(), "union pattern variable name: %s is declared twice", subset.getName());
        }
        for (Identifier element : subset.getIdentifiers()) {
            // TODO can there be repetitions in the list of subset elements? (currently repetitions are supported)
            if (!primaryLabels.contains(label(element))) {
                throw semanticException(INVALID_LABEL, element, "subset element: %s is not a primary pattern variable", element);
            }
        }
    }
    // analyze DEFINE
    unique = new HashSet<>();
    for (VariableDefinition definition : variableDefinitions) {
        String label = label(definition.getName());
        if (!primaryLabels.contains(label)) {
            throw semanticException(INVALID_LABEL, definition.getName(), "defined variable: %s is not a primary pattern variable", definition.getName());
        }
        if (!unique.add(label)) {
            throw semanticException(INVALID_LABEL, definition.getName(), "pattern variable with name: %s is defined twice", definition.getName());
        }
        // DEFINE clause only supports RUNNING semantics which is default
        Expression expression = definition.getExpression();
        extractExpressions(ImmutableList.of(expression), FunctionCall.class).stream().filter(functionCall -> functionCall.getProcessingMode().map(mode -> mode.getMode() == FINAL).orElse(false)).findFirst().ifPresent(functionCall -> {
            throw semanticException(INVALID_PROCESSING_MODE, functionCall.getProcessingMode().get(), "FINAL semantics is not supported in DEFINE clause");
        });
    }
    // record primary labels without definitions. they are implicitly associated with `true` condition
    Set<String> undefinedLabels = Sets.difference(primaryLabels, unique);
    // validate pattern quantifiers
    ImmutableMap.Builder<NodeRef<RangeQuantifier>, Range> ranges = ImmutableMap.builder();
    preOrder(pattern).filter(RangeQuantifier.class::isInstance).map(RangeQuantifier.class::cast).forEach(quantifier -> {
        Optional<Long> atLeast = quantifier.getAtLeast().map(LongLiteral::getValue);
        atLeast.ifPresent(value -> {
            if (value < 0) {
                throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier lower bound must be greater than or equal to 0");
            }
            if (value > Integer.MAX_VALUE) {
                throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier lower bound must not exceed " + Integer.MAX_VALUE);
            }
        });
        Optional<Long> atMost = quantifier.getAtMost().map(LongLiteral::getValue);
        atMost.ifPresent(value -> {
            if (value < 1) {
                throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier upper bound must be greater than or equal to 1");
            }
            if (value > Integer.MAX_VALUE) {
                throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier upper bound must not exceed " + Integer.MAX_VALUE);
            }
        });
        if (atLeast.isPresent() && atMost.isPresent()) {
            if (atLeast.get() > atMost.get()) {
                throw semanticException(INVALID_RANGE, quantifier, "Pattern quantifier lower bound must not exceed upper bound");
            }
        }
        ranges.put(NodeRef.of(quantifier), new Range(atLeast.map(Math::toIntExact), atMost.map(Math::toIntExact)));
    });
    // validate AFTER MATCH SKIP
    Set<String> allLabels = ImmutableSet.<String>builder().addAll(primaryLabels).addAll(unionLabels).build();
    skipTo.flatMap(SkipTo::getIdentifier).ifPresent(identifier -> {
        String label = label(identifier);
        if (!allLabels.contains(label)) {
            throw semanticException(INVALID_LABEL, identifier, "%s is not a primary or union pattern variable", identifier);
        }
    });
    // check no prohibited nesting: cannot nest one row pattern recognition within another
    List<Expression> expressions = Streams.concat(measures.stream().map(MeasureDefinition::getExpression), variableDefinitions.stream().map(VariableDefinition::getExpression)).collect(toImmutableList());
    expressions.forEach(expression -> preOrder(expression).filter(child -> child instanceof PatternRecognitionRelation || child instanceof RowPattern).findFirst().ifPresent(nested -> {
        throw semanticException(NESTED_ROW_PATTERN_RECOGNITION, nested, "nested row pattern recognition in row pattern recognition");
    }));
    return new PatternRecognitionAnalysis(allLabels, undefinedLabels, ranges.buildOrThrow());
}
Also used : AnchorPattern(io.trino.sql.tree.AnchorPattern) ExpressionTreeUtils.extractExpressions(io.trino.sql.analyzer.ExpressionTreeUtils.extractExpressions) MeasureDefinition(io.trino.sql.tree.MeasureDefinition) INVALID_ROW_PATTERN(io.trino.spi.StandardErrorCode.INVALID_ROW_PATTERN) SkipTo(io.trino.sql.tree.SkipTo) INVALID_LABEL(io.trino.spi.StandardErrorCode.INVALID_LABEL) Range(io.trino.sql.analyzer.Analysis.Range) SubsetDefinition(io.trino.sql.tree.SubsetDefinition) RangeQuantifier(io.trino.sql.tree.RangeQuantifier) HashSet(java.util.HashSet) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) ImmutableList(com.google.common.collect.ImmutableList) LongLiteral(io.trino.sql.tree.LongLiteral) ExcludedPattern(io.trino.sql.tree.ExcludedPattern) NodeRef(io.trino.sql.tree.NodeRef) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) RowPattern(io.trino.sql.tree.RowPattern) FunctionCall(io.trino.sql.tree.FunctionCall) SemanticExceptions.semanticException(io.trino.sql.analyzer.SemanticExceptions.semanticException) Identifier(io.trino.sql.tree.Identifier) NUMERIC_VALUE_OUT_OF_RANGE(io.trino.spi.StandardErrorCode.NUMERIC_VALUE_OUT_OF_RANGE) ImmutableSet(com.google.common.collect.ImmutableSet) RowsPerMatch(io.trino.sql.tree.PatternRecognitionRelation.RowsPerMatch) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PatternRecognitionRelation(io.trino.sql.tree.PatternRecognitionRelation) Set(java.util.Set) NESTED_ROW_PATTERN_RECOGNITION(io.trino.spi.StandardErrorCode.NESTED_ROW_PATTERN_RECOGNITION) VariableDefinition(io.trino.sql.tree.VariableDefinition) Streams(com.google.common.collect.Streams) AstUtils.preOrder(io.trino.sql.util.AstUtils.preOrder) Sets(com.google.common.collect.Sets) INVALID_PATTERN_RECOGNITION_FUNCTION(io.trino.spi.StandardErrorCode.INVALID_PATTERN_RECOGNITION_FUNCTION) List(java.util.List) INVALID_RANGE(io.trino.spi.StandardErrorCode.INVALID_RANGE) INVALID_PROCESSING_MODE(io.trino.spi.StandardErrorCode.INVALID_PROCESSING_MODE) PatternSearchMode(io.trino.sql.tree.PatternSearchMode) Optional(java.util.Optional) Expression(io.trino.sql.tree.Expression) FINAL(io.trino.sql.tree.ProcessingMode.Mode.FINAL) VariableDefinition(io.trino.sql.tree.VariableDefinition) LongLiteral(io.trino.sql.tree.LongLiteral) Range(io.trino.sql.analyzer.Analysis.Range) RangeQuantifier(io.trino.sql.tree.RangeQuantifier) MeasureDefinition(io.trino.sql.tree.MeasureDefinition) ImmutableMap(com.google.common.collect.ImmutableMap) PatternRecognitionRelation(io.trino.sql.tree.PatternRecognitionRelation) NodeRef(io.trino.sql.tree.NodeRef) SubsetDefinition(io.trino.sql.tree.SubsetDefinition) Identifier(io.trino.sql.tree.Identifier) Expression(io.trino.sql.tree.Expression) RowPattern(io.trino.sql.tree.RowPattern) HashSet(java.util.HashSet)

Example 2 with RangeQuantifier

use of io.trino.sql.tree.RangeQuantifier in project trino by trinodb.

the class TestSqlParser method testRowPattern.

@Test
public void testRowPattern() {
    assertThat(rowPattern("(A B)* | CC+? DD?? E | (F | G)")).isEqualTo(new PatternAlternation(location(1, 1), ImmutableList.of(new PatternAlternation(location(1, 1), ImmutableList.of(new QuantifiedPattern(location(1, 1), new PatternConcatenation(location(1, 2), ImmutableList.of(new PatternVariable(location(1, 2), new Identifier(location(1, 2), "A", false)), new PatternVariable(location(1, 4), new Identifier(location(1, 4), "B", false)))), new ZeroOrMoreQuantifier(location(1, 6), true)), new PatternConcatenation(location(1, 10), ImmutableList.of(new PatternConcatenation(location(1, 10), ImmutableList.of(new QuantifiedPattern(location(1, 10), new PatternVariable(location(1, 10), new Identifier(location(1, 10), "CC", false)), new OneOrMoreQuantifier(location(1, 12), false)), new QuantifiedPattern(location(1, 15), new PatternVariable(location(1, 15), new Identifier(location(1, 15), "DD", false)), new ZeroOrOneQuantifier(location(1, 17), false)))), new PatternVariable(location(1, 20), new Identifier(location(1, 20), "E", false)))))), new PatternAlternation(location(1, 25), ImmutableList.of(new PatternVariable(location(1, 25), new Identifier(location(1, 25), "F", false)), new PatternVariable(location(1, 29), new Identifier(location(1, 29), "G", false)))))));
    assertThat(rowPattern("A | B | C D E F")).isEqualTo(new PatternAlternation(location(1, 1), ImmutableList.of(new PatternAlternation(location(1, 1), ImmutableList.of(new PatternVariable(location(1, 1), new Identifier(location(1, 1), "A", false)), new PatternVariable(location(1, 5), new Identifier(location(1, 5), "B", false)))), new PatternConcatenation(location(1, 9), ImmutableList.of(new PatternConcatenation(location(1, 9), ImmutableList.of(new PatternConcatenation(location(1, 9), ImmutableList.of(new PatternVariable(location(1, 9), new Identifier(location(1, 9), "C", false)), new PatternVariable(location(1, 11), new Identifier(location(1, 11), "D", false)))), new PatternVariable(location(1, 13), new Identifier(location(1, 13), "E", false)))), new PatternVariable(location(1, 15), new Identifier(location(1, 15), "F", false)))))));
    assertThatThrownBy(() -> SQL_PARSER.createRowPattern("A!")).isInstanceOf(ParsingException.class).hasMessageMatching("line 1:2: mismatched input '!'.*");
    assertThatThrownBy(() -> SQL_PARSER.createRowPattern("A**")).isInstanceOf(ParsingException.class).hasMessageMatching("line 1:3: mismatched input '*'.*");
    assertThat(rowPattern("A??")).isEqualTo(new QuantifiedPattern(location(1, 1), new PatternVariable(location(1, 1), new Identifier(location(1, 1), "A", false)), new ZeroOrOneQuantifier(location(1, 2), false)));
    assertThat(rowPattern("^$")).isEqualTo(new PatternConcatenation(location(1, 1), ImmutableList.of(new AnchorPattern(location(1, 1), AnchorPattern.Type.PARTITION_START), new AnchorPattern(location(1, 2), AnchorPattern.Type.PARTITION_END))));
    assertThat(rowPattern("()")).isEqualTo(new EmptyPattern(location(1, 1)));
    assertThat(rowPattern("A{3}")).isEqualTo(new QuantifiedPattern(location(1, 1), new PatternVariable(location(1, 1), new Identifier(location(1, 1), "A", false)), new RangeQuantifier(location(1, 2), true, Optional.of(new LongLiteral(location(1, 3), "3")), Optional.of(new LongLiteral(location(1, 3), "3")))));
    assertThat(rowPattern("A{3,}")).isEqualTo(new QuantifiedPattern(location(1, 1), new PatternVariable(location(1, 1), new Identifier(location(1, 1), "A", false)), new RangeQuantifier(location(1, 2), true, Optional.of(new LongLiteral(location(1, 3), "3")), Optional.empty())));
    assertThat(rowPattern("A{,3}")).isEqualTo(new QuantifiedPattern(location(1, 1), new PatternVariable(location(1, 1), new Identifier(location(1, 1), "A", false)), new RangeQuantifier(location(1, 2), true, Optional.empty(), Optional.of(new LongLiteral(location(1, 4), "3")))));
    assertThat(rowPattern("A{3,4}")).isEqualTo(new QuantifiedPattern(location(1, 1), new PatternVariable(location(1, 1), new Identifier(location(1, 1), "A", false)), new RangeQuantifier(location(1, 2), true, Optional.of(new LongLiteral(location(1, 3), "3")), Optional.of(new LongLiteral(location(1, 5), "4")))));
}
Also used : PatternVariable(io.trino.sql.tree.PatternVariable) LongLiteral(io.trino.sql.tree.LongLiteral) PatternConcatenation(io.trino.sql.tree.PatternConcatenation) EmptyPattern(io.trino.sql.tree.EmptyPattern) ZeroOrMoreQuantifier(io.trino.sql.tree.ZeroOrMoreQuantifier) ZeroOrOneQuantifier(io.trino.sql.tree.ZeroOrOneQuantifier) RangeQuantifier(io.trino.sql.tree.RangeQuantifier) QuantifiedPattern(io.trino.sql.tree.QuantifiedPattern) QueryUtil.quotedIdentifier(io.trino.sql.QueryUtil.quotedIdentifier) Identifier(io.trino.sql.tree.Identifier) PatternAlternation(io.trino.sql.tree.PatternAlternation) OneOrMoreQuantifier(io.trino.sql.tree.OneOrMoreQuantifier) AnchorPattern(io.trino.sql.tree.AnchorPattern) Test(org.junit.jupiter.api.Test)

Aggregations

AnchorPattern (io.trino.sql.tree.AnchorPattern)2 Identifier (io.trino.sql.tree.Identifier)2 LongLiteral (io.trino.sql.tree.LongLiteral)2 RangeQuantifier (io.trino.sql.tree.RangeQuantifier)2 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)1 Sets (com.google.common.collect.Sets)1 Streams (com.google.common.collect.Streams)1 INVALID_LABEL (io.trino.spi.StandardErrorCode.INVALID_LABEL)1 INVALID_PATTERN_RECOGNITION_FUNCTION (io.trino.spi.StandardErrorCode.INVALID_PATTERN_RECOGNITION_FUNCTION)1 INVALID_PROCESSING_MODE (io.trino.spi.StandardErrorCode.INVALID_PROCESSING_MODE)1 INVALID_RANGE (io.trino.spi.StandardErrorCode.INVALID_RANGE)1 INVALID_ROW_PATTERN (io.trino.spi.StandardErrorCode.INVALID_ROW_PATTERN)1 NESTED_ROW_PATTERN_RECOGNITION (io.trino.spi.StandardErrorCode.NESTED_ROW_PATTERN_RECOGNITION)1 NOT_SUPPORTED (io.trino.spi.StandardErrorCode.NOT_SUPPORTED)1 NUMERIC_VALUE_OUT_OF_RANGE (io.trino.spi.StandardErrorCode.NUMERIC_VALUE_OUT_OF_RANGE)1 QueryUtil.quotedIdentifier (io.trino.sql.QueryUtil.quotedIdentifier)1