use of org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig in project flink by apache.
the class StreamExecGroupWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final boolean isCountWindow;
if (window instanceof TumblingGroupWindow) {
isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
} else if (window instanceof SlidingGroupWindow) {
isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
} else {
isCountWindow = false;
}
if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOGGER.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent " + "excessive state size. You may specify a retention time of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final int inputTimeFieldIndex;
if (isRowtimeAttribute(window.timeAttribute())) {
inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
if (inputTimeFieldIndex < 0) {
throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
}
} else {
inputTimeFieldIndex = -1;
}
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
Arrays.fill(aggCallNeedRetractions, needRetraction);
final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
final GeneratedClass<?> aggCodeGenerator = createAggsHandler(aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
final LogicalType[] aggResultTypes = extractLogicalTypes(aggInfoList.getActualValueTypes());
final LogicalType[] windowPropertyTypes = Arrays.stream(namedWindowProperties).map(p -> p.getProperty().getResultType()).toArray(LogicalType[]::new);
final EqualiserCodeGenerator generator = new EqualiserCodeGenerator(ArrayUtils.addAll(aggResultTypes, windowPropertyTypes));
final GeneratedRecordEqualiser equaliser = generator.generateRecordEqualiser("WindowValueEqualiser");
final LogicalType[] aggValueTypes = extractLogicalTypes(aggInfoList.getActualValueTypes());
final LogicalType[] accTypes = extractLogicalTypes(aggInfoList.getAccTypes());
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final WindowOperator<?, ?> operator = createWindowOperator(config, aggCodeGenerator, equaliser, accTypes, windowPropertyTypes, aggValueTypes, inputRowType.getChildren().toArray(new LogicalType[0]), inputTimeFieldIndex, shiftTimeZone, inputCountIndex);
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GROUP_WINDOW_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig in project flink by apache.
the class StreamExecLegacyTableSourceScan method createConversionTransformationIfNeeded.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> createConversionTransformationIfNeeded(StreamExecutionEnvironment streamExecEnv, ExecNodeConfig config, Transformation<?> sourceTransform, @Nullable RexNode rowtimeExpression) {
final RowType outputType = (RowType) getOutputType();
final Transformation<RowData> transformation;
final int[] fieldIndexes = computeIndexMapping(true);
if (needInternalConversion(fieldIndexes)) {
final String extractElement, resetElement;
if (ScanUtil.hasTimeAttributeField(fieldIndexes)) {
String elementTerm = OperatorCodeGenerator.ELEMENT();
extractElement = String.format("ctx.%s = %s;", elementTerm, elementTerm);
resetElement = String.format("ctx.%s = null;", elementTerm);
} else {
extractElement = "";
resetElement = "";
}
final CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig()).setOperatorBaseClass(TableStreamOperator.class);
// the produced type may not carry the correct precision user defined in DDL, because
// it may be converted from legacy type. Fix precision using logical schema from DDL.
// Code generation requires the correct precision of input fields.
final DataType fixedProducedDataType = TableSourceUtil.fixPrecisionForProducedDataType(tableSource, outputType);
transformation = ScanUtil.convertToInternalRow(ctx, (Transformation<Object>) sourceTransform, fieldIndexes, fixedProducedDataType, outputType, qualifiedName, (detailName, simplifyName) -> createFormattedTransformationName(detailName, simplifyName, config), (description) -> createFormattedTransformationDescription(description, config), JavaScalaConversionUtil.toScala(Optional.ofNullable(rowtimeExpression)), extractElement, resetElement);
} else {
transformation = (Transformation<RowData>) sourceTransform;
}
final RelDataType relDataType = FlinkTypeFactory.INSTANCE().buildRelNodeRowType(outputType);
final DataStream<RowData> ingestedTable = new DataStream<>(streamExecEnv, transformation);
final Optional<RowtimeAttributeDescriptor> rowtimeDesc = JavaScalaConversionUtil.toJava(TableSourceUtil.getRowtimeAttributeDescriptor(tableSource, relDataType));
final DataStream<RowData> withWatermarks = rowtimeDesc.map(desc -> {
int rowtimeFieldIdx = relDataType.getFieldNames().indexOf(desc.getAttributeName());
WatermarkStrategy strategy = desc.getWatermarkStrategy();
if (strategy instanceof PeriodicWatermarkAssigner) {
PeriodicWatermarkAssignerWrapper watermarkGenerator = new PeriodicWatermarkAssignerWrapper((PeriodicWatermarkAssigner) strategy, rowtimeFieldIdx);
return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
} else if (strategy instanceof PunctuatedWatermarkAssigner) {
PunctuatedWatermarkAssignerWrapper watermarkGenerator = new PunctuatedWatermarkAssignerWrapper((PunctuatedWatermarkAssigner) strategy, rowtimeFieldIdx, tableSource.getProducedDataType());
return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
} else {
// underlying DataStream.
return ingestedTable;
}
}).orElse(// No need to generate watermarks if no rowtime
ingestedTable);
// attribute is specified.
return withWatermarks.getTransformation();
}
use of org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig in project flink by apache.
the class BatchExecPythonOverAggregate method getPythonOverWindowAggregateFunctionOperator.
@SuppressWarnings("unchecked")
private OneInputStreamOperator<RowData, RowData> getPythonOverWindowAggregateFunctionOperator(ExecNodeConfig config, Configuration pythonConfig, RowType inputRowType, RowType outputRowType, boolean[] isRangeWindows, int[] udafInputOffsets, PythonFunctionInfo[] pythonFunctionInfos) {
Class<?> clazz = CommonPythonUtil.loadClass(ARROW_PYTHON_OVER_WINDOW_AGGREGATE_FUNCTION_OPERATOR_NAME);
RowType udfInputType = (RowType) Projection.of(udafInputOffsets).project(inputRowType);
RowType udfOutputType = (RowType) Projection.range(inputRowType.getFieldCount(), outputRowType.getFieldCount()).project(outputRowType);
PartitionSpec partitionSpec = overSpec.getPartition();
List<OverSpec.GroupSpec> groups = overSpec.getGroups();
SortSpec sortSpec = groups.get(groups.size() - 1).getSort();
try {
Constructor<?> ctor = clazz.getConstructor(Configuration.class, PythonFunctionInfo[].class, RowType.class, RowType.class, RowType.class, long[].class, long[].class, boolean[].class, int[].class, int.class, boolean.class, GeneratedProjection.class, GeneratedProjection.class, GeneratedProjection.class);
return (OneInputStreamOperator<RowData, RowData>) ctor.newInstance(pythonConfig, pythonFunctionInfos, inputRowType, udfInputType, udfOutputType, lowerBoundary.stream().mapToLong(i -> i).toArray(), upperBoundary.stream().mapToLong(i -> i).toArray(), isRangeWindows, aggWindowIndex.stream().mapToInt(i -> i).toArray(), sortSpec.getFieldIndices()[0], sortSpec.getAscendingOrders()[0], ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "UdafInputProjection", inputRowType, udfInputType, udafInputOffsets), ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "GroupKey", inputRowType, (RowType) Projection.of(partitionSpec.getFieldIndices()).project(inputRowType), partitionSpec.getFieldIndices()), ProjectionCodeGenerator.generateProjection(CodeGeneratorContext.apply(config.getTableConfig()), "GroupSet", inputRowType, (RowType) Projection.of(partitionSpec.getFieldIndices()).project(inputRowType), partitionSpec.getFieldIndices()));
} catch (NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
throw new TableException("Python BatchArrowPythonOverWindowAggregateFunctionOperator constructed failed.", e);
}
}
use of org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig in project flink by apache.
the class BatchExecMultipleInput method translateToPlanInternal.
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final List<Transformation<?>> inputTransforms = new ArrayList<>();
for (ExecEdge inputEdge : getInputEdges()) {
inputTransforms.add(inputEdge.translateToPlan(planner));
}
final Transformation<?> outputTransform = rootNode.translateToPlan(planner);
final int[] readOrders = getInputProperties().stream().map(InputProperty::getPriority).mapToInt(i -> i).toArray();
final TableOperatorWrapperGenerator generator = new TableOperatorWrapperGenerator(inputTransforms, outputTransform, readOrders);
generator.generate();
final List<Pair<Transformation<?>, InputSpec>> inputTransformAndInputSpecPairs = generator.getInputTransformAndInputSpecPairs();
final MultipleInputTransformation<RowData> multipleInputTransform = new MultipleInputTransformation<>(createTransformationName(config), new BatchMultipleInputStreamOperatorFactory(inputTransformAndInputSpecPairs.stream().map(Pair::getValue).collect(Collectors.toList()), generator.getHeadWrappers(), generator.getTailWrapper()), InternalTypeInfo.of(getOutputType()), generator.getParallelism());
multipleInputTransform.setDescription(createTransformationDescription(config));
inputTransformAndInputSpecPairs.forEach(input -> multipleInputTransform.addInput(input.getKey()));
if (generator.getMaxParallelism() > 0) {
multipleInputTransform.setMaxParallelism(generator.getMaxParallelism());
}
// set resources
multipleInputTransform.setResources(generator.getMinResources(), generator.getPreferredResources());
final int memoryWeight = generator.getManagedMemoryWeight();
final long memoryBytes = (long) memoryWeight << 20;
ExecNodeUtil.setManagedMemoryWeight(multipleInputTransform, memoryBytes);
// set chaining strategy for source chaining
multipleInputTransform.setChainingStrategy(ChainingStrategy.HEAD_WITH_SOURCES);
return multipleInputTransform;
}
use of org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig in project flink by apache.
the class BatchExecPythonGroupWindowAggregate method createPythonOneInputTransformation.
private OneInputTransformation<RowData, RowData> createPythonOneInputTransformation(Transformation<RowData> inputTransform, RowType inputRowType, RowType outputRowType, int maxLimitSize, long windowSize, long slideSize, Configuration pythonConfig, ExecNodeConfig config) {
int[] namePropertyTypeArray = Arrays.stream(namedWindowProperties).mapToInt(p -> {
WindowProperty property = p.getProperty();
if (property instanceof WindowStart) {
return 0;
}
if (property instanceof WindowEnd) {
return 1;
}
if (property instanceof RowtimeAttribute) {
return 2;
}
throw new TableException("Unexpected property " + property);
}).toArray();
Tuple2<int[], PythonFunctionInfo[]> aggInfos = CommonPythonUtil.extractPythonAggregateFunctionInfosFromAggregateCall(aggCalls);
int[] pythonUdafInputOffsets = aggInfos.f0;
PythonFunctionInfo[] pythonFunctionInfos = aggInfos.f1;
OneInputStreamOperator<RowData, RowData> pythonOperator = getPythonGroupWindowAggregateFunctionOperator(config, pythonConfig, inputRowType, outputRowType, maxLimitSize, windowSize, slideSize, namePropertyTypeArray, pythonUdafInputOffsets, pythonFunctionInfos);
return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), pythonOperator, InternalTypeInfo.of(outputRowType), inputTransform.getParallelism());
}
Aggregations