use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class CommonExecSink method applyKeyBy.
/**
* Apply a primary key partition transformation to guarantee the strict ordering of changelog
* messages.
*/
private Transformation<RowData> applyKeyBy(ReadableConfig config, Transformation<RowData> inputTransform, int[] primaryKeys, int sinkParallelism, int inputParallelism, boolean inputInsertOnly, boolean needMaterialize) {
final ExecutionConfigOptions.SinkKeyedShuffle sinkShuffleByPk = config.get(ExecutionConfigOptions.TABLE_EXEC_SINK_KEYED_SHUFFLE);
boolean sinkKeyBy = false;
switch(sinkShuffleByPk) {
case NONE:
break;
case AUTO:
sinkKeyBy = inputInsertOnly && sinkParallelism != inputParallelism;
break;
case FORCE:
// single parallelism has no problem
sinkKeyBy = sinkParallelism != 1 || inputParallelism != 1;
break;
}
if (!sinkKeyBy && !needMaterialize) {
return inputTransform;
}
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(primaryKeys, getInputTypeInfo());
final KeyGroupStreamPartitioner<RowData, RowData> partitioner = new KeyGroupStreamPartitioner<>(selector, KeyGroupRangeAssignment.DEFAULT_LOWER_BOUND_MAX_PARALLELISM);
Transformation<RowData> partitionedTransform = new PartitionTransformation<>(inputTransform, partitioner);
createTransformationMeta(PARTITIONER_TRANSFORMATION, "Partitioner", "Partitioner", config).fill(partitionedTransform);
partitionedTransform.setParallelism(sinkParallelism);
return partitionedTransform;
}
use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class CommonExecSink method applyUpsertMaterialize.
private Transformation<RowData> applyUpsertMaterialize(Transformation<RowData> inputTransform, int[] primaryKeys, int sinkParallelism, ReadableConfig config, RowType physicalRowType) {
GeneratedRecordEqualiser equaliser = new EqualiserCodeGenerator(physicalRowType).generateRecordEqualiser("SinkMaterializeEqualiser");
SinkUpsertMaterializer operator = new SinkUpsertMaterializer(StateConfigUtil.createTtlConfig(config.get(ExecutionConfigOptions.IDLE_STATE_RETENTION).toMillis()), InternalSerializers.create(physicalRowType), equaliser);
final String[] fieldNames = physicalRowType.getFieldNames().toArray(new String[0]);
final List<String> pkFieldNames = Arrays.stream(primaryKeys).mapToObj(idx -> fieldNames[idx]).collect(Collectors.toList());
OneInputTransformation<RowData, RowData> materializeTransform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(UPSERT_MATERIALIZE_TRANSFORMATION, String.format("SinkMaterializer(pk=[%s])", String.join(", ", pkFieldNames)), "SinkMaterializer", config), operator, inputTransform.getOutputType(), sinkParallelism);
RowDataKeySelector keySelector = KeySelectorUtil.getRowDataSelector(primaryKeys, InternalTypeInfo.of(physicalRowType));
materializeTransform.setStateKeySelector(keySelector);
materializeTransform.setStateKeyType(keySelector.getProducedType());
return materializeTransform;
}
use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class StreamExecChangelogNormalize method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final InternalTypeInfo<RowData> rowTypeInfo = (InternalTypeInfo<RowData>) inputTransform.getOutputType();
final OneInputStreamOperator<RowData, RowData> operator;
final long stateIdleTime = config.getStateRetentionTime();
final boolean isMiniBatchEnabled = config.get(ExecutionConfigOptions.TABLE_EXEC_MINIBATCH_ENABLED);
GeneratedRecordEqualiser generatedEqualiser = new EqualiserCodeGenerator(rowTypeInfo.toRowType()).generateRecordEqualiser("DeduplicateRowEqualiser");
if (isMiniBatchEnabled) {
TypeSerializer<RowData> rowSerializer = rowTypeInfo.createSerializer(planner.getExecEnv().getConfig());
ProcTimeMiniBatchDeduplicateKeepLastRowFunction processFunction = new ProcTimeMiniBatchDeduplicateKeepLastRowFunction(rowTypeInfo, rowSerializer, stateIdleTime, generateUpdateBefore, // generateInsert
true, // inputInsertOnly
false, generatedEqualiser);
CountBundleTrigger<RowData> trigger = AggregateUtil.createMiniBatchTrigger(config);
operator = new KeyedMapBundleOperator<>(processFunction, trigger);
} else {
ProcTimeDeduplicateKeepLastRowFunction processFunction = new ProcTimeDeduplicateKeepLastRowFunction(rowTypeInfo, stateIdleTime, generateUpdateBefore, // generateInsert
true, // inputInsertOnly
false, generatedEqualiser);
operator = new KeyedProcessOperator<>(processFunction);
}
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(CHANGELOG_NORMALIZE_TRANSFORMATION, config), operator, rowTypeInfo, inputTransform.getParallelism());
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(uniqueKeys, rowTypeInfo);
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class StreamExecPythonGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval " + "to prevent excessive state size. You may specify a retention time " + "of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, // isStateBackendDataViews
true, // needDistinctInfo
true);
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final boolean countStarInserted = aggInfoList.countStarInserted();
Tuple2<PythonAggregateFunctionInfo[], DataViewSpec[][]> aggInfosAndDataViewSpecs = CommonPythonUtil.extractPythonAggregateFunctionInfos(aggInfoList, aggCalls);
PythonAggregateFunctionInfo[] pythonFunctionInfos = aggInfosAndDataViewSpecs.f0;
DataViewSpec[][] dataViewSpecs = aggInfosAndDataViewSpecs.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
final OneInputStreamOperator<RowData, RowData> operator = getPythonAggregateFunctionOperator(pythonConfig, inputRowType, InternalTypeInfo.of(getOutputType()).toRowType(), pythonFunctionInfos, dataViewSpecs, config.getStateRetentionTime(), config.getMaxIdleStateRetentionTime(), inputCountIndex, countStarInserted);
// partitioned aggregation
OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationName(config), createTransformationDescription(config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class StreamExecPythonGroupWindowAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final boolean isCountWindow;
if (window instanceof TumblingGroupWindow) {
isCountWindow = hasRowIntervalType(((TumblingGroupWindow) window).size());
} else if (window instanceof SlidingGroupWindow) {
isCountWindow = hasRowIntervalType(((SlidingGroupWindow) window).size());
} else {
isCountWindow = false;
}
if (isCountWindow && grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOGGER.warn("No state retention interval configured for a query which accumulates state." + " Please provide a query configuration with valid retention interval to" + " prevent excessive state size. You may specify a retention time of 0 to" + " not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final RowType outputRowType = InternalTypeInfo.of(getOutputType()).toRowType();
final int inputTimeFieldIndex;
if (isRowtimeAttribute(window.timeAttribute())) {
inputTimeFieldIndex = timeFieldIndex(FlinkTypeFactory.INSTANCE().buildRelNodeRowType(inputRowType), planner.getRelBuilder(), window.timeAttribute());
if (inputTimeFieldIndex < 0) {
throw new TableException("Group window must defined on a time attribute, " + "but the time attribute can't be found.\n" + "This should never happen. Please file an issue.");
}
} else {
inputTimeFieldIndex = -1;
}
final ZoneId shiftTimeZone = TimeWindowUtil.getShiftTimeZone(window.timeAttribute().getOutputDataType().getLogicalType(), config.getLocalTimeZone());
Tuple2<WindowAssigner<?>, Trigger<?>> windowAssignerAndTrigger = generateWindowAssignerAndTrigger();
WindowAssigner<?> windowAssigner = windowAssignerAndTrigger.f0;
Trigger<?> trigger = windowAssignerAndTrigger.f1;
Configuration pythonConfig = CommonPythonUtil.getMergedConfig(planner.getExecEnv(), config.getTableConfig());
boolean isGeneralPythonUDAF = Arrays.stream(aggCalls).anyMatch(x -> PythonUtil.isPythonAggregate(x, PythonFunctionKind.GENERAL));
OneInputTransformation<RowData, RowData> transform;
WindowEmitStrategy emitStrategy = WindowEmitStrategy.apply(config, window);
if (isGeneralPythonUDAF) {
final boolean[] aggCallNeedRetractions = new boolean[aggCalls.length];
Arrays.fill(aggCallNeedRetractions, needRetraction);
final AggregateInfoList aggInfoList = transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, true);
transform = createGeneralPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, aggInfoList, emitStrategy.getAllowLateness(), pythonConfig, shiftTimeZone);
} else {
transform = createPandasPythonStreamWindowGroupOneInputTransformation(inputTransform, inputRowType, outputRowType, inputTimeFieldIndex, windowAssigner, trigger, emitStrategy.getAllowLateness(), pythonConfig, config, shiftTimeZone);
}
if (CommonPythonUtil.isPythonWorkerUsingManagedMemory(pythonConfig)) {
transform.declareManagedMemoryUseCaseAtSlotScope(ManagedMemoryUseCase.PYTHON);
}
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
Aggregations