use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecGroupWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final boolean isCountWindow;
if (window instanceof TumblingGroupWindow) {
isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
} else if (window instanceof SlidingGroupWindow) {
isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
} else {
isCountWindow = false;
}
if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOGGER.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent " + "excessive state size. You may specify a retention time of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final int inputTimeFieldIndex;
if (isRowtimeAttribute(window.timeAttribute())) {
inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
if (inputTimeFieldIndex < 0) {
throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
}
} else {
inputTimeFieldIndex = -1;
}
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
Arrays.fill(aggCallNeedRetractions, needRetraction);
final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
final GeneratedClass<?> aggCodeGenerator = createAggsHandler(aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
final LogicalType[] aggResultTypes = extractLogicalTypes(aggInfoList.getActualValueTypes());
final LogicalType[] windowPropertyTypes = Arrays.stream(namedWindowProperties).map(p -> p.getProperty().getResultType()).toArray(LogicalType[]::new);
final EqualiserCodeGenerator generator = new EqualiserCodeGenerator(ArrayUtils.addAll(aggResultTypes, windowPropertyTypes));
final GeneratedRecordEqualiser equaliser = generator.generateRecordEqualiser("WindowValueEqualiser");
final LogicalType[] aggValueTypes = extractLogicalTypes(aggInfoList.getActualValueTypes());
final LogicalType[] accTypes = extractLogicalTypes(aggInfoList.getAccTypes());
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final WindowOperator<?, ?> operator = createWindowOperator(config, aggCodeGenerator, equaliser, accTypes, windowPropertyTypes, aggValueTypes, inputRowType.getChildren().toArray(new LogicalType[0]), inputTimeFieldIndex, shiftTimeZone, inputCountIndex);
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GROUP_WINDOW_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecMatch method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
checkOrderKeys(inputRowType);
final EventComparator<RowData> eventComparator = createEventComparator(config, inputRowType);
final Transformation<RowData> timestampedInputTransform = translateOrder(inputTransform, inputRowType);
final Tuple2<Pattern<RowData, RowData>, List<String>> cepPatternAndNames = translatePattern(matchSpec, config.getTableConfig(), planner.getRelBuilder(), inputRowType);
final Pattern<RowData, RowData> cepPattern = cepPatternAndNames.f0;
// TODO remove this once it is supported in CEP library
if (NFACompiler.canProduceEmptyMatches(cepPattern)) {
throw new TableException("Patterns that can produce empty matches are not supported. There must be at least one non-optional state.");
}
// TODO remove this once it is supported in CEP library
if (cepPattern.getQuantifier().hasProperty(Quantifier.QuantifierProperty.GREEDY)) {
throw new TableException("Greedy quantifiers are not allowed as the last element of a Pattern yet. " + "Finish your pattern with either a simple variable or reluctant quantifier.");
}
if (matchSpec.isAllRows()) {
throw new TableException("All rows per match mode is not supported yet.");
}
final int[] partitionKeys = matchSpec.getPartition().getFieldIndices();
final SortSpec.SortFieldSpec timeOrderField = matchSpec.getOrderKeys().getFieldSpec(0);
final LogicalType timeOrderFieldType = inputRowType.getTypeAt(timeOrderField.getFieldIndex());
final boolean isProctime = TypeCheckUtils.isProcTime(timeOrderFieldType);
final InternalTypeInfo<RowData> inputTypeInfo = (InternalTypeInfo<RowData>) inputTransform.getOutputType();
final TypeSerializer<RowData> inputSerializer = inputTypeInfo.createSerializer(planner.getExecEnv().getConfig());
final NFACompiler.NFAFactory<RowData> nfaFactory = NFACompiler.compileFactory(cepPattern, false);
final MatchCodeGenerator generator = new MatchCodeGenerator(new CodeGeneratorContext(config.getTableConfig()), planner.getRelBuilder(), // nullableInput
false, JavaScalaConversionUtil.toScala(cepPatternAndNames.f1), JavaScalaConversionUtil.toScala(Optional.empty()), CodeGenUtils.DEFAULT_COLLECTOR_TERM());
generator.bindInput(inputRowType, CodeGenUtils.DEFAULT_INPUT1_TERM(), JavaScalaConversionUtil.toScala(Optional.empty()));
final PatternProcessFunctionRunner patternProcessFunction = generator.generateOneRowPerMatchExpression((RowType) getOutputType(), partitionKeys, matchSpec.getMeasures());
final CepOperator<RowData, RowData, RowData> operator = new CepOperator<>(inputSerializer, isProctime, nfaFactory, eventComparator, cepPattern.getAfterMatchSkipStrategy(), patternProcessFunction, null);
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(timestampedInputTransform, createTransformationMeta(MATCH_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), timestampedInputTransform.getParallelism());
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(partitionKeys, inputTypeInfo);
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
if (inputsContainSingleton()) {
transform.setParallelism(1);
transform.setMaxParallelism(1);
}
return transform;
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecOverAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (overSpec.getGroups().size() > 1) {
throw new TableException("All aggregates must be computed on the same window.");
}
final OverSpec.GroupSpec group = overSpec.getGroups().get(0);
final int[] orderKeys = group.getSort().getFieldIndices();
final boolean[] isAscendingOrders = group.getSort().getAscendingOrders();
if (orderKeys.length != 1 || isAscendingOrders.length != 1) {
throw new TableException("The window can only be ordered by a single time column.");
}
if (!isAscendingOrders[0]) {
throw new TableException("The window can only be ordered in ASCENDING mode.");
}
final int[] partitionKeys = overSpec.getPartition().getFieldIndices();
if (partitionKeys.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent " + "excessive state size. You may specify a retention time of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final int orderKey = orderKeys[0];
final LogicalType orderKeyType = inputRowType.getFields().get(orderKey).getType();
// check time field && identify window rowtime attribute
final int rowTimeIdx;
if (isRowtimeAttribute(orderKeyType)) {
rowTimeIdx = orderKey;
} else if (isProctimeAttribute(orderKeyType)) {
rowTimeIdx = -1;
} else {
throw new TableException("OVER windows' ordering in stream mode must be defined on a time attribute.");
}
final List<RexLiteral> constants = overSpec.getConstants();
final List<String> fieldNames = new ArrayList<>(inputRowType.getFieldNames());
final List<LogicalType> fieldTypes = new ArrayList<>(inputRowType.getChildren());
IntStream.range(0, constants.size()).forEach(i -> fieldNames.add("TMP" + i));
for (int i = 0; i < constants.size(); ++i) {
fieldNames.add("TMP" + i);
fieldTypes.add(FlinkTypeFactory.toLogicalType(constants.get(i).getType()));
}
final RowType aggInputRowType = RowType.of(fieldTypes.toArray(new LogicalType[0]), fieldNames.toArray(new String[0]));
final CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig());
final KeyedProcessFunction<RowData, RowData, RowData> overProcessFunction;
if (group.getLowerBound().isPreceding() && group.getLowerBound().isUnbounded() && group.getUpperBound().isCurrentRow()) {
// unbounded OVER window
overProcessFunction = createUnboundedOverProcessFunction(ctx, group.getAggCalls(), constants, aggInputRowType, inputRowType, rowTimeIdx, group.isRows(), config, planner.getRelBuilder());
} else if (group.getLowerBound().isPreceding() && !group.getLowerBound().isUnbounded() && group.getUpperBound().isCurrentRow()) {
final Object boundValue = OverAggregateUtil.getBoundary(overSpec, group.getLowerBound());
if (boundValue instanceof BigDecimal) {
throw new TableException("the specific value is decimal which haven not supported yet.");
}
// bounded OVER window
final long precedingOffset = -1 * (long) boundValue + (group.isRows() ? 1 : 0);
overProcessFunction = createBoundedOverProcessFunction(ctx, group.getAggCalls(), constants, aggInputRowType, inputRowType, rowTimeIdx, group.isRows(), precedingOffset, config, planner.getRelBuilder());
} else {
throw new TableException("OVER RANGE FOLLOWING windows are not supported yet.");
}
final KeyedProcessOperator<RowData, RowData, RowData> operator = new KeyedProcessOperator<>(overProcessFunction);
OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(OVER_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(partitionKeys, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecIncrementalGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final AggregateInfoList partialLocalAggInfoList = AggregateUtil.createPartialAggInfoList(partialLocalAggInputType, JavaScalaConversionUtil.toScala(Arrays.asList(partialOriginalAggCalls)), partialAggCallNeedRetractions, partialAggNeedRetraction, false);
final GeneratedAggsHandleFunction partialAggsHandler = generateAggsHandler("PartialGroupAggsHandler", partialLocalAggInfoList, partialAggGrouping.length, partialLocalAggInfoList.getAccTypes(), config, planner.getRelBuilder(), // the partial aggregate accumulators will be buffered, so need copy
true);
final AggregateInfoList incrementalAggInfo = AggregateUtil.createIncrementalAggInfoList(partialLocalAggInputType, JavaScalaConversionUtil.toScala(Arrays.asList(partialOriginalAggCalls)), partialAggCallNeedRetractions, partialAggNeedRetraction);
final GeneratedAggsHandleFunction finalAggsHandler = generateAggsHandler("FinalGroupAggsHandler", incrementalAggInfo, 0, partialLocalAggInfoList.getAccTypes(), config, planner.getRelBuilder(), // the final aggregate accumulators is not buffered
false);
final RowDataKeySelector partialKeySelector = KeySelectorUtil.getRowDataSelector(partialAggGrouping, InternalTypeInfo.of(inputEdge.getOutputType()));
final RowDataKeySelector finalKeySelector = KeySelectorUtil.getRowDataSelector(finalAggGrouping, partialKeySelector.getProducedType());
final MiniBatchIncrementalGroupAggFunction aggFunction = new MiniBatchIncrementalGroupAggFunction(partialAggsHandler, finalAggsHandler, finalKeySelector, config.getStateRetentionTime());
final OneInputStreamOperator<RowData, RowData> operator = new KeyedMapBundleOperator<>(aggFunction, AggregateUtil.createMiniBatchTrigger(config));
// partitioned aggregation
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(INCREMENTAL_GROUP_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
transform.setStateKeySelector(partialKeySelector);
transform.setStateKeyType(partialKeySelector.getProducedType());
return transform;
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecLegacyTableSourceScan method createConversionTransformationIfNeeded.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> createConversionTransformationIfNeeded(StreamExecutionEnvironment streamExecEnv, ExecNodeConfig config, Transformation<?> sourceTransform, @Nullable RexNode rowtimeExpression) {
final RowType outputType = (RowType) getOutputType();
final Transformation<RowData> transformation;
final int[] fieldIndexes = computeIndexMapping(true);
if (needInternalConversion(fieldIndexes)) {
final String extractElement, resetElement;
if (ScanUtil.hasTimeAttributeField(fieldIndexes)) {
String elementTerm = OperatorCodeGenerator.ELEMENT();
extractElement = String.format("ctx.%s = %s;", elementTerm, elementTerm);
resetElement = String.format("ctx.%s = null;", elementTerm);
} else {
extractElement = "";
resetElement = "";
}
final CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig()).setOperatorBaseClass(TableStreamOperator.class);
// the produced type may not carry the correct precision user defined in DDL, because
// it may be converted from legacy type. Fix precision using logical schema from DDL.
// Code generation requires the correct precision of input fields.
final DataType fixedProducedDataType = TableSourceUtil.fixPrecisionForProducedDataType(tableSource, outputType);
transformation = ScanUtil.convertToInternalRow(ctx, (Transformation<Object>) sourceTransform, fieldIndexes, fixedProducedDataType, outputType, qualifiedName, (detailName, simplifyName) -> createFormattedTransformationName(detailName, simplifyName, config), (description) -> createFormattedTransformationDescription(description, config), JavaScalaConversionUtil.toScala(Optional.ofNullable(rowtimeExpression)), extractElement, resetElement);
} else {
transformation = (Transformation<RowData>) sourceTransform;
}
final RelDataType relDataType = FlinkTypeFactory.INSTANCE().buildRelNodeRowType(outputType);
final DataStream<RowData> ingestedTable = new DataStream<>(streamExecEnv, transformation);
final Optional<RowtimeAttributeDescriptor> rowtimeDesc = JavaScalaConversionUtil.toJava(TableSourceUtil.getRowtimeAttributeDescriptor(tableSource, relDataType));
final DataStream<RowData> withWatermarks = rowtimeDesc.map(desc -> {
int rowtimeFieldIdx = relDataType.getFieldNames().indexOf(desc.getAttributeName());
WatermarkStrategy strategy = desc.getWatermarkStrategy();
if (strategy instanceof PeriodicWatermarkAssigner) {
PeriodicWatermarkAssignerWrapper watermarkGenerator = new PeriodicWatermarkAssignerWrapper((PeriodicWatermarkAssigner) strategy, rowtimeFieldIdx);
return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
} else if (strategy instanceof PunctuatedWatermarkAssigner) {
PunctuatedWatermarkAssignerWrapper watermarkGenerator = new PunctuatedWatermarkAssignerWrapper((PunctuatedWatermarkAssigner) strategy, rowtimeFieldIdx, tableSource.getProducedDataType());
return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
} else {
// underlying DataStream.
return ingestedTable;
}
}).orElse(// No need to generate watermarks if no rowtime
ingestedTable);
// attribute is specified.
return withWatermarks.getTransformation();
}
Aggregations