use of org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator in project flink by apache.
the class StreamExecIncrementalGroupAggregate method translateToPlanInternal.
@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> translateToPlanInternal(PlannerBase planner, ExecNodeConfig config) {
final ExecEdge inputEdge = getInputEdges().get(0);
final Transformation<RowData> inputTransform = (Transformation<RowData>) inputEdge.translateToPlan(planner);
final AggregateInfoList partialLocalAggInfoList = AggregateUtil.createPartialAggInfoList(partialLocalAggInputType, JavaScalaConversionUtil.toScala(Arrays.asList(partialOriginalAggCalls)), partialAggCallNeedRetractions, partialAggNeedRetraction, false);
final GeneratedAggsHandleFunction partialAggsHandler = generateAggsHandler("PartialGroupAggsHandler", partialLocalAggInfoList, partialAggGrouping.length, partialLocalAggInfoList.getAccTypes(), config, planner.getRelBuilder(), // the partial aggregate accumulators will be buffered, so need copy
true);
final AggregateInfoList incrementalAggInfo = AggregateUtil.createIncrementalAggInfoList(partialLocalAggInputType, JavaScalaConversionUtil.toScala(Arrays.asList(partialOriginalAggCalls)), partialAggCallNeedRetractions, partialAggNeedRetraction);
final GeneratedAggsHandleFunction finalAggsHandler = generateAggsHandler("FinalGroupAggsHandler", incrementalAggInfo, 0, partialLocalAggInfoList.getAccTypes(), config, planner.getRelBuilder(), // the final aggregate accumulators is not buffered
false);
final RowDataKeySelector partialKeySelector = KeySelectorUtil.getRowDataSelector(partialAggGrouping, InternalTypeInfo.of(inputEdge.getOutputType()));
final RowDataKeySelector finalKeySelector = KeySelectorUtil.getRowDataSelector(finalAggGrouping, partialKeySelector.getProducedType());
final MiniBatchIncrementalGroupAggFunction aggFunction = new MiniBatchIncrementalGroupAggFunction(partialAggsHandler, finalAggsHandler, finalKeySelector, config.getStateRetentionTime());
final OneInputStreamOperator<RowData, RowData> operator = new KeyedMapBundleOperator<>(aggFunction, AggregateUtil.createMiniBatchTrigger(config));
// partitioned aggregation
final OneInputTransformation<RowData, RowData> transform = ExecNodeUtil.createOneInputTransformation(inputTransform, createTransformationMeta(INCREMENTAL_GROUP_AGGREGATE_TRANSFORMATION, config), operator, InternalTypeInfo.of(getOutputType()), inputTransform.getParallelism());
// set KeyType and Selector for state
transform.setStateKeySelector(partialKeySelector);
transform.setStateKeyType(partialKeySelector.getProducedType());
return transform;
}
use of org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator in project flink by apache.
the class ProcTimeMiniBatchDeduplicateKeepLastRowFunctionTest method createTestHarness.
private OneInputStreamOperatorTestHarness<RowData, RowData> createTestHarness(ProcTimeMiniBatchDeduplicateKeepLastRowFunction func) throws Exception {
CountBundleTrigger<Tuple2<String, String>> trigger = new CountBundleTrigger<>(3);
KeyedMapBundleOperator op = new KeyedMapBundleOperator(func, trigger);
return new KeyedOneInputStreamOperatorTestHarness<>(op, rowKeySelector, rowKeySelector.getProducedType());
}
use of org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator in project flink by apache.
the class ProcTimeMiniBatchDeduplicateKeepFirstRowFunctionTest method createTestHarness.
private OneInputStreamOperatorTestHarness<RowData, RowData> createTestHarness(ProcTimeMiniBatchDeduplicateKeepFirstRowFunction func) throws Exception {
CountBundleTrigger<Tuple2<String, String>> trigger = new CountBundleTrigger<>(3);
KeyedMapBundleOperator op = new KeyedMapBundleOperator(func, trigger);
return new KeyedOneInputStreamOperatorTestHarness<>(op, rowKeySelector, rowKeySelector.getProducedType());
}
use of org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator in project flink by apache.
the class RowTimeDeduplicateFunctionTest method testRowTimeDeduplicateKeepFirstRow.
private void testRowTimeDeduplicateKeepFirstRow(boolean generateUpdateBefore, boolean generateInsert, List<Object> expectedOutput) throws Exception {
final boolean keepLastRow = false;
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness;
KeyedMapBundleOperator<RowData, RowData, RowData, RowData> keyedMapBundleOperator = null;
KeyedProcessOperator keyedProcessOperator = null;
if (miniBatchEnable) {
RowTimeMiniBatchDeduplicateFunction func = new RowTimeMiniBatchDeduplicateFunction(inputRowType, serializer, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
CountBundleTrigger trigger = new CountBundleTrigger<RowData>(miniBatchSize);
keyedMapBundleOperator = new KeyedMapBundleOperator(func, trigger);
testHarness = createTestHarness(keyedMapBundleOperator);
} else {
RowTimeDeduplicateFunction func = new RowTimeDeduplicateFunction(inputRowType, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
keyedProcessOperator = new KeyedProcessOperator<>(func);
testHarness = createTestHarness(keyedProcessOperator);
}
List<Object> actualOutput = new ArrayList<>();
testHarness.open();
testHarness.processElement(insertRecord("key1", 13, 99L));
testHarness.processElement(insertRecord("key1", 13, 99L));
testHarness.processElement(insertRecord("key1", 12, 100L));
testHarness.processElement(insertRecord("key2", 11, 101L));
// test 1: keep first row with row time
testHarness.processWatermark(new Watermark(102));
actualOutput.addAll(testHarness.getOutput());
// do a snapshot, close and restore again
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
testHarness.close();
if (miniBatchEnable) {
testHarness = createTestHarness(keyedMapBundleOperator);
} else {
testHarness = createTestHarness(keyedProcessOperator);
}
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.processElement(insertRecord("key1", 12, 300L));
testHarness.processElement(insertRecord("key2", 11, 301L));
testHarness.processElement(insertRecord("key3", 5, 299L));
// test 2: load snapshot state
testHarness.processWatermark(new Watermark(302));
// test 3: expire the state
testHarness.setStateTtlProcessingTime(minTtlTime.toMilliseconds() + 1);
testHarness.processElement(insertRecord("key1", 12, 400L));
testHarness.processElement(insertRecord("key2", 11, 401L));
testHarness.processWatermark(402);
// ("key1", 13, 99L) and ("key2", 11, 101L) had retired, thus output ("key1", 12,
// 200L),("key2", 11, 201L)
actualOutput.addAll(testHarness.getOutput());
assertor.assertOutputEqualsSorted("output wrong.", expectedOutput, actualOutput);
testHarness.close();
}
use of org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator in project flink by apache.
the class RowTimeDeduplicateFunctionTest method testRowTimeDeduplicateKeepLastRow.
private void testRowTimeDeduplicateKeepLastRow(boolean generateUpdateBefore, boolean generateInsert, List<Object> expectedOutput) throws Exception {
final boolean keepLastRow = true;
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness;
KeyedMapBundleOperator<RowData, RowData, RowData, RowData> keyedMapBundleOperator = null;
KeyedProcessOperator keyedProcessOperator = null;
if (miniBatchEnable) {
RowTimeMiniBatchDeduplicateFunction func = new RowTimeMiniBatchDeduplicateFunction(inputRowType, serializer, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
CountBundleTrigger trigger = new CountBundleTrigger<RowData>(miniBatchSize);
keyedMapBundleOperator = new KeyedMapBundleOperator(func, trigger);
testHarness = createTestHarness(keyedMapBundleOperator);
} else {
RowTimeDeduplicateFunction func = new RowTimeDeduplicateFunction(inputRowType, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, true);
keyedProcessOperator = new KeyedProcessOperator<>(func);
testHarness = createTestHarness(keyedProcessOperator);
}
List<Object> actualOutput = new ArrayList<>();
testHarness.open();
testHarness.processElement(insertRecord("key1", 13, 99L));
testHarness.processElement(insertRecord("key1", 12, 100L));
testHarness.processElement(insertRecord("key2", 11, 101L));
// test 1: keep last row with row time
testHarness.processWatermark(new Watermark(102));
actualOutput.addAll(testHarness.getOutput());
// do a snapshot, close and restore again
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
testHarness.close();
if (miniBatchEnable) {
testHarness = createTestHarness(keyedMapBundleOperator);
} else {
testHarness = createTestHarness(keyedProcessOperator);
}
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.processElement(insertRecord("key1", 12, 300L));
testHarness.processElement(insertRecord("key2", 11, 301L));
testHarness.processElement(insertRecord("key3", 5, 299L));
// test 2: load snapshot state
testHarness.processWatermark(new Watermark(302));
// test 3: expire the state
testHarness.setStateTtlProcessingTime(minTtlTime.toMilliseconds() + 1);
testHarness.processElement(insertRecord("key1", 12, 400L));
testHarness.processElement(insertRecord("key2", 11, 401L));
testHarness.processWatermark(402);
// all state has expired, so the record ("key1", 12, 400L), ("key2", 12, 401L) will be
// INSERT message
actualOutput.addAll(testHarness.getOutput());
assertor.assertOutputEqualsSorted("output wrong.", expectedOutput, actualOutput);
testHarness.close();
}
Aggregations