use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class AbstractStreamArrowPythonAggregateFunctionOperatorTest method getTestHarness.
public OneInputStreamOperatorTestHarness<RowData, RowData> getTestHarness(Configuration config) throws Exception {
RowType inputType = getInputType();
RowType outputType = getOutputType();
AbstractArrowPythonAggregateFunctionOperator operator = getTestOperator(config, new PythonFunctionInfo[] { new PythonFunctionInfo(PythonScalarFunctionOperatorTestBase.DummyPythonFunction.INSTANCE, new Integer[] { 0 }) }, inputType, outputType, new int[] { 0 }, new int[] { 2 });
int[] grouping = new int[] { 0 };
RowDataKeySelector keySelector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(getInputType()));
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, keySelector, keySelector.getProducedType());
testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.PYTHON, 0.5);
testHarness.setup(new RowDataSerializer(outputType));
return testHarness;
}
use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class StreamExecDeduplicate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final InternalTypeInfo<RowData> rowTypeInfo = (InternalTypeInfo<RowData>) inputTransform.getOutputType();
final TypeSerializer<RowData> rowSerializer = rowTypeInfo.createSerializer(planner.getExecEnv().getConfig());
final OneInputStreamOperator<RowData, RowData> operator;
if (isRowtime) {
operator = new RowtimeDeduplicateOperatorTranslator(config, rowTypeInfo, rowSerializer, inputRowType, keepLastRow, generateUpdateBefore).createDeduplicateOperator();
} else {
operator = new ProcTimeDeduplicateOperatorTranslator(config, rowTypeInfo, rowSerializer, inputRowType, keepLastRow, generateUpdateBefore).createDeduplicateOperator();
}
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(DEDUPLICATE_TRANSFORMATION, config), operator, rowTypeInfo, inputTransform.getParallelism());
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(uniqueKeys, rowTypeInfo);
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class StreamExecExchange method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final Transformation<RowData> inputTransform = (Transformation<RowData>) getInputEdges().get(0).translateToPlan(planner);
final StreamPartitioner<RowData> partitioner;
final int parallelism;
final InputProperty inputProperty = getInputProperties().get(0);
final InputProperty.DistributionType distributionType = inputProperty.getRequiredDistribution().getType();
switch(distributionType) {
case SINGLETON:
partitioner = new GlobalPartitioner<>();
parallelism = 1;
break;
case HASH:
// TODO Eliminate duplicate keys
int[] keys = ((HashDistribution) inputProperty.getRequiredDistribution()).getKeys();
InternalTypeInfo<RowData> inputType = (InternalTypeInfo<RowData>) inputTransform.getOutputType();
RowDataKeySelector keySelector = KeySelectorUtil.getRowDataSelector(keys, inputType);
partitioner = new KeyGroupStreamPartitioner<>(keySelector, DEFAULT_LOWER_BOUND_MAX_PARALLELISM);
parallelism = ExecutionConfig.PARALLELISM_DEFAULT;
break;
default:
throw new TableException(String.format("%s is not supported now!", distributionType));
}
final Transformation<RowData> transformation = new PartitionTransformation<>(inputTransform, partitioner);
createTransformationMeta(EXCHANGE_TRANSFORMATION, config).fill(transformation);
transformation.setParallelism(parallelism);
transformation.setOutputType(InternalTypeInfo.of(getOutputType()));
return transformation;
}
use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class StreamExecGlobalGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent excessive " + "state size. You may specify a retention time of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggregateInfoList localAggInfoList = AggregateUtil.transformToStreamAggregateInfoList(localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, JavaScalaConversionUtil.toScala(Optional.ofNullable(indexOfCountStar)), // isStateBackendDataViews
false, // needDistinctInfo
true);
final AggregateInfoList globalAggInfoList = AggregateUtil.transformToStreamAggregateInfoList(localAggInputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, JavaScalaConversionUtil.toScala(Optional.ofNullable(indexOfCountStar)), // isStateBackendDataViews
true, // needDistinctInfo
true);
final GeneratedAggsHandleFunction localAggsHandler = generateAggsHandler("LocalGroupAggsHandler", localAggInfoList, grouping.length, localAggInfoList.getAccTypes(), config, planner.getRelBuilder());
final GeneratedAggsHandleFunction globalAggsHandler = generateAggsHandler("GlobalGroupAggsHandler", globalAggInfoList, // mergedAccOffset
0, localAggInfoList.getAccTypes(), config, planner.getRelBuilder());
final int indexOfCountStar = globalAggInfoList.getIndexOfCountStar();
final LogicalType[] globalAccTypes = Arrays.stream(globalAggInfoList.getAccTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
final LogicalType[] globalAggValueTypes = Arrays.stream(globalAggInfoList.getActualValueTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
final GeneratedRecordEqualiser recordEqualiser = new EqualiserCodeGenerator(globalAggValueTypes).generateRecordEqualiser("GroupAggValueEqualiser");
final OneInputStreamOperator<RowData, RowData> operator;
final boolean isMiniBatchEnabled = config.get(ExecutionConfigOptions.TABLE_EXEC_MINIBATCH_ENABLED);
if (isMiniBatchEnabled) {
MiniBatchGlobalGroupAggFunction aggFunction = new MiniBatchGlobalGroupAggFunction(localAggsHandler, globalAggsHandler, recordEqualiser, globalAccTypes, indexOfCountStar, generateUpdateBefore, config.getStateRetentionTime());
operator = new KeyedMapBundleOperator<>(aggFunction, AggregateUtil.createMiniBatchTrigger(config));
} else {
throw new TableException("Local-Global optimization is only worked in miniBatch mode");
}
// partitioned aggregation
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GLOBAL_GROUP_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.keyselector.RowDataKeySelector in project flink by apache.
the class StreamExecGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
if (grouping.length > 0 && config.getStateRetentionTime() < 0) {
LOG.warn("No state retention interval configured for a query which accumulates state. " + "Please provide a query configuration with valid retention interval to prevent excessive " + "state size. You may specify a retention time of 0 to not clean up the state.");
}
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final RowType inputRowType = (RowType) inputEdge.getOutputType();
final AggsHandlerCodeGenerator generator = new AggsHandlerCodeGenerator(new CodeGeneratorContext(config.getTableConfig()), planner.getRelBuilder(), JavaScalaConversionUtil.toScala(inputRowType.getChildren()), // TODO: but other operators do not copy this input field.....
true).needAccumulate();
if (needRetraction) {
generator.needRetract();
}
final AggregateInfoList aggInfoList = AggregateUtil.transformToStreamAggregateInfoList(inputRowType, JavaScalaConversionUtil.toScala(Arrays.asList(aggCalls)), aggCallNeedRetractions, needRetraction, true, true);
final GeneratedAggsHandleFunction aggsHandler = generator.generateAggsHandler("GroupAggsHandler", aggInfoList);
final LogicalType[] accTypes = Arrays.stream(aggInfoList.getAccTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
final LogicalType[] aggValueTypes = Arrays.stream(aggInfoList.getActualValueTypes()).map(LogicalTypeDataTypeConverter::fromDataTypeToLogicalType).toArray(LogicalType[]::new);
final GeneratedRecordEqualiser recordEqualiser = new EqualiserCodeGenerator(aggValueTypes).generateRecordEqualiser("GroupAggValueEqualiser");
final int inputCountIndex = aggInfoList.getIndexOfCountStar();
final boolean isMiniBatchEnabled = config.get(ExecutionConfigOptions.TABLE_EXEC_MINIBATCH_ENABLED);
final OneInputStreamOperator<RowData, RowData> operator;
if (isMiniBatchEnabled) {
MiniBatchGroupAggFunction aggFunction = new MiniBatchGroupAggFunction(aggsHandler, recordEqualiser, accTypes, inputRowType, inputCountIndex, generateUpdateBefore, config.getStateRetentionTime());
operator = new KeyedMapBundleOperator<>(aggFunction, AggregateUtil.createMiniBatchTrigger(config));
} else {
GroupAggFunction aggFunction = new GroupAggFunction(aggsHandler, recordEqualiser, accTypes, inputCountIndex, generateUpdateBefore, config.getStateRetentionTime());
operator = new KeyedProcessOperator<>(aggFunction);
}
// partitioned aggregation
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(GROUP_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
final RowDataKeySelector selector = KeySelectorUtil.getRowDataSelector(grouping, InternalTypeInfo.of(inputRowType));
transform.setStateKeySelector(selector);
transform.setStateKeyType(selector.getProducedType());
return transform;
}
Aggregations