use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink-mirror by flink-ci.
the class RowTimeDeduplicateFunctionTest method testRowTimeDeduplicateKeepFirstRow.
private void testRowTimeDeduplicateKeepFirstRow(boolean generateUpdateBefore, boolean generateInsert, List<Object> expectedOutput) throws Exception {
final boolean keepLastRow = false;
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness;
KeyedMapBundleOperator<RowData, RowData, RowData, RowData> keyedMapBundleOperator = null;
KeyedProcessOperator keyedProcessOperator = null;
if (miniBatchEnable) {
RowTimeMiniBatchDeduplicateFunction func = new RowTimeMiniBatchDeduplicateFunction(inputRowType, serializer, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
CountBundleTrigger trigger = new CountBundleTrigger<RowData>(miniBatchSize);
keyedMapBundleOperator = new KeyedMapBundleOperator(func, trigger);
testHarness = createTestHarness(keyedMapBundleOperator);
} else {
RowTimeDeduplicateFunction func = new RowTimeDeduplicateFunction(inputRowType, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
keyedProcessOperator = new KeyedProcessOperator<>(func);
testHarness = createTestHarness(keyedProcessOperator);
}
List<Object> actualOutput = new ArrayList<>();
testHarness.open();
testHarness.processElement(insertRecord("key1", 13, 99L));
testHarness.processElement(insertRecord("key1", 13, 99L));
testHarness.processElement(insertRecord("key1", 12, 100L));
testHarness.processElement(insertRecord("key2", 11, 101L));
// test 1: keep first row with row time
testHarness.processWatermark(new Watermark(102));
actualOutput.addAll(testHarness.getOutput());
// do a snapshot, close and restore again
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
testHarness.close();
if (miniBatchEnable) {
testHarness = createTestHarness(keyedMapBundleOperator);
} else {
testHarness = createTestHarness(keyedProcessOperator);
}
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.processElement(insertRecord("key1", 12, 300L));
testHarness.processElement(insertRecord("key2", 11, 301L));
testHarness.processElement(insertRecord("key3", 5, 299L));
// test 2: load snapshot state
testHarness.processWatermark(new Watermark(302));
// test 3: expire the state
testHarness.setStateTtlProcessingTime(minTtlTime.toMilliseconds() + 1);
testHarness.processElement(insertRecord("key1", 12, 400L));
testHarness.processElement(insertRecord("key2", 11, 401L));
testHarness.processWatermark(402);
// ("key1", 13, 99L) and ("key2", 11, 101L) had retired, thus output ("key1", 12,
// 200L),("key2", 11, 201L)
actualOutput.addAll(testHarness.getOutput());
assertor.assertOutputEqualsSorted("output wrong.", expectedOutput, actualOutput);
testHarness.close();
}
use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink-mirror by flink-ci.
the class RowTimeDeduplicateFunctionTest method testRowTimeDeduplicateKeepLastRow.
private void testRowTimeDeduplicateKeepLastRow(boolean generateUpdateBefore, boolean generateInsert, List<Object> expectedOutput) throws Exception {
final boolean keepLastRow = true;
OneInputStreamOperatorTestHarness<RowData, RowData> testHarness;
KeyedMapBundleOperator<RowData, RowData, RowData, RowData> keyedMapBundleOperator = null;
KeyedProcessOperator keyedProcessOperator = null;
if (miniBatchEnable) {
RowTimeMiniBatchDeduplicateFunction func = new RowTimeMiniBatchDeduplicateFunction(inputRowType, serializer, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
CountBundleTrigger trigger = new CountBundleTrigger<RowData>(miniBatchSize);
keyedMapBundleOperator = new KeyedMapBundleOperator(func, trigger);
testHarness = createTestHarness(keyedMapBundleOperator);
} else {
RowTimeDeduplicateFunction func = new RowTimeDeduplicateFunction(inputRowType, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, true);
keyedProcessOperator = new KeyedProcessOperator<>(func);
testHarness = createTestHarness(keyedProcessOperator);
}
List<Object> actualOutput = new ArrayList<>();
testHarness.open();
testHarness.processElement(insertRecord("key1", 13, 99L));
testHarness.processElement(insertRecord("key1", 12, 100L));
testHarness.processElement(insertRecord("key2", 11, 101L));
// test 1: keep last row with row time
testHarness.processWatermark(new Watermark(102));
actualOutput.addAll(testHarness.getOutput());
// do a snapshot, close and restore again
OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
testHarness.close();
if (miniBatchEnable) {
testHarness = createTestHarness(keyedMapBundleOperator);
} else {
testHarness = createTestHarness(keyedProcessOperator);
}
testHarness.setup();
testHarness.initializeState(snapshot);
testHarness.open();
testHarness.processElement(insertRecord("key1", 12, 300L));
testHarness.processElement(insertRecord("key2", 11, 301L));
testHarness.processElement(insertRecord("key3", 5, 299L));
// test 2: load snapshot state
testHarness.processWatermark(new Watermark(302));
// test 3: expire the state
testHarness.setStateTtlProcessingTime(minTtlTime.toMilliseconds() + 1);
testHarness.processElement(insertRecord("key1", 12, 400L));
testHarness.processElement(insertRecord("key2", 11, 401L));
testHarness.processWatermark(402);
// all state has expired, so the record ("key1", 12, 400L), ("key2", 12, 401L) will be
// INSERT message
actualOutput.addAll(testHarness.getOutput());
assertor.assertOutputEqualsSorted("output wrong.", expectedOutput, actualOutput);
testHarness.close();
}
use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink by splunk.
the class RowTimeMiniBatchLatestChangeDeduplicateFunctionTest method createTestHarness.
private OneInputStreamOperatorTestHarness<RowData, RowData> createTestHarness(boolean generateUpdateBefore, boolean generateInsert, boolean keepLastRow) throws Exception {
RowTimeMiniBatchLatestChangeDeduplicateFunction func = new RowTimeMiniBatchLatestChangeDeduplicateFunction(inputRowType, serializer, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
CountBundleTrigger trigger = new CountBundleTrigger<RowData>(miniBatchSize);
KeyedMapBundleOperator<RowData, RowData, RowData, RowData> keyedMapBundleOperator = new KeyedMapBundleOperator(func, trigger);
return createTestHarness(keyedMapBundleOperator);
}
use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink by splunk.
the class ProcTimeMiniBatchDeduplicateKeepLastRowFunctionTest method createTestHarness.
private OneInputStreamOperatorTestHarness<RowData, RowData> createTestHarness(ProcTimeMiniBatchDeduplicateKeepLastRowFunction func) throws Exception {
CountBundleTrigger<Tuple2<String, String>> trigger = new CountBundleTrigger<>(3);
KeyedMapBundleOperator op = new KeyedMapBundleOperator(func, trigger);
return new KeyedOneInputStreamOperatorTestHarness<>(op, rowKeySelector, rowKeySelector.getProducedType());
}
use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink by apache.
the class ProcTimeMiniBatchDeduplicateKeepLastRowFunctionTest method createTestHarness.
private OneInputStreamOperatorTestHarness<RowData, RowData> createTestHarness(ProcTimeMiniBatchDeduplicateKeepLastRowFunction func) throws Exception {
CountBundleTrigger<Tuple2<String, String>> trigger = new CountBundleTrigger<>(3);
KeyedMapBundleOperator op = new KeyedMapBundleOperator(func, trigger);
return new KeyedOneInputStreamOperatorTestHarness<>(op, rowKeySelector, rowKeySelector.getProducedType());
}
Aggregations