use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecDropUpdateBefore method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final Transformation<RowData> inputTransform = (Transformation<RowData>) getInputEdges().get(0).translateToPlan(planner);
final StreamFilter<RowData> operator = new StreamFilter<>(new DropUpdateBeforeFunction());
return ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(DROP_UPDATE_BEFORE_TRANSFORMATION, config), operator, inputTransform.getOutputType(), inputTransform.getParallelism());
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecChangelogNormalize method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final InternalTypeInfo<RowData> rowTypeInfo = (InternalTypeInfo<RowData>) inputTransform.getOutputType();
final OneInputStreamOperator<RowData, RowData> operator;
final long stateIdleTime = config.getStateRetentionTime();
final boolean isMiniBatchEnabled = config.get(ExecutionConfigOptions.TABLE_EXEC_MINIBATCH_ENABLED);
GeneratedRecordEqualiser generatedEqualiser = new EqualiserCodeGenerator(rowTypeInfo.toRowType()).generateRecordEqualiser("DeduplicateRowEqualiser");
if (isMiniBatchEnabled) {
TypeSerializer<RowData> rowSerializer = rowTypeInfo.createSerializer(planner.getExecEnv().getConfig());
ProcTimeMiniBatchDeduplicateKeepLastRowFunction processFunction = new ProcTimeMiniBatchDeduplicateKeepLastRowFunction(rowTypeInfo, rowSerializer, stateIdleTime, generateUpdateBefore, // generateInsert
true, // inputInsertOnly
false, generatedEqualiser);
CountBundleTrigger<RowData> trigger = AggregateUtil.createMiniBatchTrigger(config);
operator = new KeyedMapBundleOperator<>(processFunction, trigger);
} else {
ProcTimeDeduplicateKeepLastRowFunction processFunction = new ProcTimeDeduplicateKeepLastRowFunction(rowTypeInfo, stateIdleTime, generateUpdateBefore, // generateInsert
true, // inputInsertOnly
false, generatedEqualiser);
operator = new KeyedProcessOperator<>(processFunction);
}
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(CHANGELOG_NORMALIZE_TRANSFORMATION, config), operator, rowTypeInfo, inputTransform.getParallelism());
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(uniqueKeys, rowTypeInfo);
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class InternalConfigOptionsTest method testTranslateExecNodeGraphWithInternalTemporalConf.
@Test
public void testTranslateExecNodeGraphWithInternalTemporalConf() {
Table table = tEnv.sqlQuery("SELECT LOCALTIME, LOCALTIMESTAMP, CURRENT_TIME, CURRENT_TIMESTAMP");
RelNode relNode = planner.optimize(TableTestUtil.toRelNode(table));
ExecNodeGraph execNodeGraph = planner.translateToExecNodeGraph(toScala(Collections.singletonList(relNode)));
// PlannerBase#translateToExecNodeGraph will set internal temporal configurations and
// cleanup them after translate finished
List<Transformation<?>> transformation = planner.translateToPlan(execNodeGraph);
// check the translation success
Assert.assertEquals(1, transformation.size());
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecPythonGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final boolean countStarInserted = aggInfoList.countStarInserted();
Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
final OneInputStreamOperator<RowData, RowData> operator = getPythonAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), inputCountIndex, countStarInserted);
// partitioned aggregation
OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.api.dag.Transformation in project flink by apache.
the class StreamExecPythonGroupWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final boolean isCountWindow;
if (window instanceof TumblingGroupWindow) {
isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
} else if (window instanceof SlidingGroupWindow) {
isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
} else {
isCountWindow = false;
}
if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOGGER.warn("No state retention interval configured for a query which accumulates state." + " Please provide a query configuration with valid retention interval to" + " prevent excessive state size. You may specify a retention time of 0 to" + " not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final RowType outputRowType = InternalTypeInfo.of(getOutputType()).toRowType();
final int inputTimeFieldIndex;
if (isRowtimeAttribute(window.timeAttribute())) {
inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
if (inputTimeFieldIndex < 0) {
throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
}
} else {
inputTimeFieldIndex = -1;
}
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
Tuple2<WindowAssigner<?>, Trigger<?>> windowAssignerAndTrigger = generateWindowAssignerAndTrigger();
WindowAssigner<?> windowAssigner = windowAssignerAndTrigger.f0;
Trigger<?> trigger = windowAssignerAndTrigger.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
boolean isGeneralPythonUDAF = Arrays.stream(aggCalls).anyMatch(x -> PythonUtil.isPythonAggregate(x, PythonFunctionKind.GENERAL));
OneInputTransformation<RowData, RowData> transform;
WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, window);
if (isGeneralPythonUDAF) {
final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
Arrays.fill(aggCallNeedRetractions, needRetraction);
final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, true);
transform = createGeneralPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, aggInfoList, emitStrategy.getAllowLateness(), pythonConfig, shiftTimeZone);
} else {
transform = createPandasPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, trigger, emitStrategy.getAllowLateness(), pythonConfig, config, shiftTimeZone);
}
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
Aggregations