use of org.apache.flink.streaming.api.transformations.OneInputTransformation in project flink by apache.
the class PythonOperatorChainingOptimizerTest method testSingleTransformation.
@Test
public void testSingleTransformation() {
PythonKeyedProcessOperator<?> keyedProcessOperator = createKeyedProcessOperator("f1", new RowTypeInfo(Types.INT(), Types.INT()), Types.STRING());
PythonProcessOperator<?, ?> processOperator1 = createProcessOperator("f2", Types.STRING(), Types.LONG());
PythonProcessOperator<?, ?> processOperator2 = createProcessOperator("f3", Types.LONG(), Types.INT());
Transformation<?> sourceTransformation = mock(SourceTransformation.class);
OneInputTransformation<?, ?> keyedProcessTransformation = new OneInputTransformation(sourceTransformation, "keyedProcess", keyedProcessOperator, keyedProcessOperator.getProducedType(), 2);
Transformation<?> processTransformation1 = new OneInputTransformation(keyedProcessTransformation, "process", processOperator1, processOperator1.getProducedType(), 2);
Transformation<?> processTransformation2 = new OneInputTransformation(processTransformation1, "process", processOperator2, processOperator2.getProducedType(), 2);
List<Transformation<?>> transformations = new ArrayList<>();
transformations.add(processTransformation2);
List<Transformation<?>> optimized = PythonOperatorChainingOptimizer.optimize(transformations);
assertEquals(2, optimized.size());
OneInputTransformation<?, ?> chainedTransformation = (OneInputTransformation<?, ?>) optimized.get(0);
assertEquals(sourceTransformation.getOutputType(), chainedTransformation.getInputType());
assertEquals(processOperator2.getProducedType(), chainedTransformation.getOutputType());
OneInputStreamOperator<?, ?> chainedOperator = chainedTransformation.getOperator();
assertTrue(chainedOperator instanceof PythonKeyedProcessOperator);
validateChainedPythonFunctions(((PythonKeyedProcessOperator<?>) chainedOperator).getPythonFunctionInfo(), "f3", "f2", "f1");
}
use of org.apache.flink.streaming.api.transformations.OneInputTransformation in project flink by apache.
the class StreamExecDeduplicate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final InternalTypeInfo<RowData> rowTypeInfo = (InternalTypeInfo<RowData>) inputTransform.getOutputType();
final TypeSerializer<RowData> rowSerializer = rowTypeInfo.createSerializer(planner.getExecEnv().getConfig());
final OneInputStreamOperator<RowData, RowData> operator;
if (isRowtime) {
operator = new RowtimeDeduplicateOperatorTranslator(config, rowTypeInfo, rowSerializer, inputRowType, keepLastRow, generateUpdateBefore).createDeduplicateOperator();
} else {
operator = new ProcTimeDeduplicateOperatorTranslator(config, rowTypeInfo, rowSerializer, inputRowType, keepLastRow, generateUpdateBefore).createDeduplicateOperator();
}
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(DEDUPLICATE_TRANSFORMATION, config), operator, rowTypeInfo, inputTransform.getParallelism());
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(uniqueKeys, rowTypeInfo);
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.streaming.api.transformations.OneInputTransformation in project flink by apache.
the class StreamExecGlobalGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent excessive " + "state size. You may specify a retention time of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList localAggInfoList = AggregateUtil.transformToStreamAggregateInfoList(localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, JavaScalaConversionUtil.toScala(Optional.ofNullable(indexOfCountStar)), // isStateBackendDataViews
false, // needDistinctInfo
true);
final AggregateInfoList globalAggInfoList = AggregateUtil.transformToStreamAggregateInfoList(localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, JavaScalaConversionUtil.toScala(Optional.ofNullable(indexOfCountStar)), // isStateBackendDataViews
true, // needDistinctInfo
true);
final GeneratedAggsHandleFunction localAggsHandler = generateAggsHandler("LocalGroupAggsHandler", localAggInfoList, grouping.length, localAggInfoList.getAccTypes(), config, planner.getRelBuilder());
final GeneratedAggsHandleFunction globalAggsHandler = generateAggsHandler("GlobalGroupAggsHandler", globalAggInfoList, // mergedAccOffset
0, localAggInfoList.getAccTypes(), config, planner.getRelBuilder());
final int indexOfCountStar = globalAggInfoList.getIndexOfCountStar();
final LogicalType[] globalAccTypes = Arrays.stream(globalAggInfoList.getAccTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
final LogicalType[] globalAggValueTypes = Arrays.stream(globalAggInfoList.getActualValueTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
final GeneratedRecordEqualiser recordEqualiser = new EqualiserCodeGenerator(globalAggValueTypes).generateRecordEqualiser("GroupAggValueEqualiser");
final OneInputStreamOperator<RowData, RowData> operator;
final boolean isMiniBatchEnabled = config.get(ExecutionConfigOptions.TABLE_EXEC_MINIBATCH_ENABLED);
if (isMiniBatchEnabled) {
MiniBatchGlobalGroupAggFunction aggFunction = new MiniBatchGlobalGroupAggFunction(localAggsHandler, globalAggsHandler, recordEqualiser, globalAccTypes, indexOfCountStar, generateUpdateBefore, config.getStateRetentionTime());
operator = new KeyedMapBundleOperator<>(aggFunction, AggregateUtil.createMiniBatchTrigger(config));
} else {
throw new TableException("Local-Global optimization is only worked in miniBatch mode");
}
// partitioned aggregation
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GLOBAL_GROUP_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.streaming.api.transformations.OneInputTransformation in project flink by apache.
the class StreamExecGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent excessive " + "state size. You may specify a retention time of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator(new CodeGeneratorContext(config.getTableConfig()), planner.getRelBuilder(), JavaScalaConversionUtil.toScala(inputRowType.getChildren()), // TODO: but other operators do not copy this input field.....
true).needAccumulate();
if (needRetraction) {
generator.needRetract();
}
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, true);
final GeneratedAggsHandleFunction aggsHandler = generator.generateAggsHandler("GroupAggsHandler", aggInfoList);
final LogicalType[] accTypes = Arrays.stream(aggInfoList.getAccTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
final LogicalType[] aggValueTypes = Arrays.stream(aggInfoList.getActualValueTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
final GeneratedRecordEqualiser recordEqualiser = new EqualiserCodeGenerator(aggValueTypes).generateRecordEqualiser("GroupAggValueEqualiser");
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final boolean isMiniBatchEnabled = config.get(ExecutionConfigOptions.TABLE_EXEC_MINIBATCH_ENABLED);
final OneInputStreamOperator<RowData, RowData> operator;
if (isMiniBatchEnabled) {
MiniBatchGroupAggFunction aggFunction = new MiniBatchGroupAggFunction(aggsHandler, recordEqualiser, accTypes, inputRowType, inputCountIndex, generateUpdateBefore, config.getStateRetentionTime());
operator = new KeyedMapBundleOperator<>(aggFunction, AggregateUtil.createMiniBatchTrigger(config));
} else {
GroupAggFunction aggFunction = new GroupAggFunction(aggsHandler, recordEqualiser, accTypes, inputCountIndex, generateUpdateBefore, config.getStateRetentionTime());
operator = new KeyedProcessOperator<>(aggFunction);
}
// partitioned aggregation
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GROUP_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.streaming.api.transformations.OneInputTransformation in project flink by apache.
the class StreamExecGroupWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final boolean isCountWindow;
if (window instanceof TumblingGroupWindow) {
isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
} else if (window instanceof SlidingGroupWindow) {
isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
} else {
isCountWindow = false;
}
if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOGGER.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent " + "excessive state size. You may specify a retention time of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final int inputTimeFieldIndex;
if (isRowtimeAttribute(window.timeAttribute())) {
inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
if (inputTimeFieldIndex < 0) {
throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
}
} else {
inputTimeFieldIndex = -1;
}
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
Arrays.fill(aggCallNeedRetractions, needRetraction);
final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
final GeneratedClass<?> aggCodeGenerator = createAggsHandler(aggInfoList, config, planner.getRelBuilder(), inputRowType.getChildren(), shiftTimeZone);
final LogicalType[] aggResultTypes = extractLogicalTypes(aggInfoList.getActualValueTypes());
final LogicalType[] windowPropertyTypes = Arrays.stream(namedWindowProperties).map(p -> p.getProperty().getResultType()).toArray(LogicalType[]::new);
final EqualiserCodeGenerator generator = new EqualiserCodeGenerator(ArrayUtils.addAll(aggResultTypes, windowPropertyTypes));
final GeneratedRecordEqualiser equaliser = generator.generateRecordEqualiser("WindowValueEqualiser");
final LogicalType[] aggValueTypes = extractLogicalTypes(aggInfoList.getActualValueTypes());
final LogicalType[] accTypes = extractLogicalTypes(aggInfoList.getAccTypes());
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final WindowOperator<?, ?> operator = createWindowOperator(config, aggCodeGenerator, equaliser, accTypes, windowPropertyTypes, aggValueTypes, inputRowType.getChildren().toArray(new LogicalType[0]), inputTimeFieldIndex, shiftTimeZone, inputCountIndex);
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GROUP_WINDOW_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
Aggregations