use of io.trino.sql.tree.SubsetDefinition in project trino by trinodb.
the class PatternRecognitionAnalyzer method analyze.
public static PatternRecognitionAnalysis analyze(List<SubsetDefinition> subsets, List<VariableDefinition> variableDefinitions, List<MeasureDefinition> measures, RowPattern pattern, Optional<SkipTo> skipTo) {
// extract label names (Identifiers) from PATTERN and SUBSET clauses. create labels respecting SQL identifier semantics
Set<String> primaryLabels = extractExpressions(ImmutableList.of(pattern), Identifier.class).stream().map(PatternRecognitionAnalyzer::label).collect(toImmutableSet());
List<String> unionLabels = subsets.stream().map(SubsetDefinition::getName).map(PatternRecognitionAnalyzer::label).collect(toImmutableList());
// analyze SUBSET
Set<String> unique = new HashSet<>();
for (SubsetDefinition subset : subsets) {
String label = label(subset.getName());
if (primaryLabels.contains(label)) {
throw semanticException(INVALID_LABEL, subset.getName(), "union pattern variable name: %s is a duplicate of primary pattern variable name", subset.getName());
}
if (!unique.add(label)) {
throw semanticException(INVALID_LABEL, subset.getName(), "union pattern variable name: %s is declared twice", subset.getName());
}
for (Identifier element : subset.getIdentifiers()) {
// TODO can there be repetitions in the list of subset elements? (currently repetitions are supported)
if (!primaryLabels.contains(label(element))) {
throw semanticException(INVALID_LABEL, element, "subset element: %s is not a primary pattern variable", element);
}
}
}
// analyze DEFINE
unique = new HashSet<>();
for (VariableDefinition definition : variableDefinitions) {
String label = label(definition.getName());
if (!primaryLabels.contains(label)) {
throw semanticException(INVALID_LABEL, definition.getName(), "defined variable: %s is not a primary pattern variable", definition.getName());
}
if (!unique.add(label)) {
throw semanticException(INVALID_LABEL, definition.getName(), "pattern variable with name: %s is defined twice", definition.getName());
}
// DEFINE clause only supports RUNNING semantics which is default
Expression expression = definition.getExpression();
extractExpressions(ImmutableList.of(expression), FunctionCall.class).stream().filter(functionCall -> functionCall.getProcessingMode().map(mode -> mode.getMode() == FINAL).orElse(false)).findFirst().ifPresent(functionCall -> {
throw semanticException(INVALID_PROCESSING_MODE, functionCall.getProcessingMode().get(), "FINAL semantics is not supported in DEFINE clause");
});
}
// record primary labels without definitions. they are implicitly associated with `true` condition
Set<String> undefinedLabels = Sets.difference(primaryLabels, unique);
// validate pattern quantifiers
ImmutableMap.Builder<NodeRef<RangeQuantifier>, Range> ranges = ImmutableMap.builder();
preOrder(pattern).filter(RangeQuantifier.class::isInstance).map(RangeQuantifier.class::cast).forEach(quantifier -> {
Optional<Long> atLeast = quantifier.getAtLeast().map(LongLiteral::getValue);
atLeast.ifPresent(value -> {
if (value < 0) {
throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier lower bound must be greater than or equal to 0");
}
if (value > Integer.MAX_VALUE) {
throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier lower bound must not exceed " + Integer.MAX_VALUE);
}
});
Optional<Long> atMost = quantifier.getAtMost().map(LongLiteral::getValue);
atMost.ifPresent(value -> {
if (value < 1) {
throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier upper bound must be greater than or equal to 1");
}
if (value > Integer.MAX_VALUE) {
throw semanticException(NUMERIC_VALUE_OUT_OF_RANGE, quantifier, "Pattern quantifier upper bound must not exceed " + Integer.MAX_VALUE);
}
});
if (atLeast.isPresent() && atMost.isPresent()) {
if (atLeast.get() > atMost.get()) {
throw semanticException(INVALID_RANGE, quantifier, "Pattern quantifier lower bound must not exceed upper bound");
}
}
ranges.put(NodeRef.of(quantifier), new Range(atLeast.map(Math::toIntExact), atMost.map(Math::toIntExact)));
});
// validate AFTER MATCH SKIP
Set<String> allLabels = ImmutableSet.<String>builder().addAll(primaryLabels).addAll(unionLabels).build();
skipTo.flatMap(SkipTo::getIdentifier).ifPresent(identifier -> {
String label = label(identifier);
if (!allLabels.contains(label)) {
throw semanticException(INVALID_LABEL, identifier, "%s is not a primary or union pattern variable", identifier);
}
});
// check no prohibited nesting: cannot nest one row pattern recognition within another
List<Expression> expressions = Streams.concat(measures.stream().map(MeasureDefinition::getExpression), variableDefinitions.stream().map(VariableDefinition::getExpression)).collect(toImmutableList());
expressions.forEach(expression -> preOrder(expression).filter(child -> child instanceof PatternRecognitionRelation || child instanceof RowPattern).findFirst().ifPresent(nested -> {
throw semanticException(NESTED_ROW_PATTERN_RECOGNITION, nested, "nested row pattern recognition in row pattern recognition");
}));
return new PatternRecognitionAnalysis(allLabels, undefinedLabels, ranges.buildOrThrow());
}
use of io.trino.sql.tree.SubsetDefinition in project trino by trinodb.
the class RelationPlanner method planPatternRecognitionComponents.
public PatternRecognitionComponents planPatternRecognitionComponents(Function<Expression, Expression> expressionRewrite, List<SubsetDefinition> subsets, List<MeasureDefinition> measures, Optional<SkipTo> skipTo, Optional<PatternSearchMode> searchMode, RowPattern pattern, List<VariableDefinition> variableDefinitions) {
// rewrite subsets
ImmutableMap.Builder<IrLabel, Set<IrLabel>> rewrittenSubsets = ImmutableMap.builder();
for (SubsetDefinition subsetDefinition : subsets) {
IrLabel label = irLabel(subsetDefinition.getName());
Set<IrLabel> elements = subsetDefinition.getIdentifiers().stream().map(RelationPlanner::irLabel).collect(toImmutableSet());
rewrittenSubsets.put(label, elements);
}
// NOTE: There might be aggregate functions in measure definitions and variable definitions.
// They are handled different than top level aggregations in a query:
// 1. Their arguments are not pre-projected and replaced with single symbols. This is because the arguments might
// not be eligible for pre-projection, when they contain references to CLASSIFIER() or MATCH_NUMBER() functions
// which are evaluated at runtime. If some aggregation arguments can be pre-projected, it will be done in the
// Optimizer.
// 2. Their arguments do not need to be coerced by hand. Since the pattern aggregation arguments are rewritten as
// parts of enclosing expressions, and not as standalone expressions, all necessary coercions will be applied by the
// TranslationMap.
// rewrite measures
ImmutableMap.Builder<Symbol, Measure> rewrittenMeasures = ImmutableMap.builder();
ImmutableList.Builder<Symbol> measureOutputs = ImmutableList.builder();
for (MeasureDefinition measureDefinition : measures) {
Type type = analysis.getType(measureDefinition.getExpression());
Symbol symbol = symbolAllocator.newSymbol(measureDefinition.getName().getValue().toLowerCase(ENGLISH), type);
Expression expression = expressionRewrite.apply(measureDefinition.getExpression());
ExpressionAndValuePointers measure = LogicalIndexExtractor.rewrite(expression, rewrittenSubsets.buildOrThrow(), symbolAllocator, plannerContext.getMetadata());
rewrittenMeasures.put(symbol, new Measure(measure, type));
measureOutputs.add(symbol);
}
// rewrite pattern to IR
IrRowPattern rewrittenPattern = RowPatternToIrRewriter.rewrite(pattern, analysis);
// rewrite variable definitions
ImmutableMap.Builder<IrLabel, ExpressionAndValuePointers> rewrittenVariableDefinitions = ImmutableMap.builder();
for (VariableDefinition variableDefinition : variableDefinitions) {
IrLabel label = irLabel(variableDefinition.getName());
Expression expression = expressionRewrite.apply(variableDefinition.getExpression());
ExpressionAndValuePointers definition = LogicalIndexExtractor.rewrite(expression, rewrittenSubsets.buildOrThrow(), symbolAllocator, plannerContext.getMetadata());
rewrittenVariableDefinitions.put(label, definition);
}
// add `true` definition for undefined labels
for (String label : analysis.getUndefinedLabels(pattern)) {
rewrittenVariableDefinitions.put(irLabel(label), ExpressionAndValuePointers.TRUE);
}
return new PatternRecognitionComponents(rewrittenSubsets.buildOrThrow(), rewrittenMeasures.buildOrThrow(), measureOutputs.build(), skipTo.flatMap(SkipTo::getIdentifier).map(RelationPlanner::irLabel), skipTo.map(SkipTo::getPosition).orElse(PAST_LAST), searchMode.map(mode -> mode.getMode() == INITIAL).orElse(TRUE), rewrittenPattern, rewrittenVariableDefinitions.buildOrThrow());
}
Aggregations