Search in sources :

Example 6 with CountBundleTrigger

use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink by apache.

the class ProcTimeMiniBatchDeduplicateKeepFirstRowFunctionTest method createTestHarness.

private OneInputStreamOperatorTestHarness<RowData, RowData> createTestHarness(ProcTimeMiniBatchDeduplicateKeepFirstRowFunction func) throws Exception {
    CountBundleTrigger<Tuple2<String, String>> trigger = new CountBundleTrigger<>(3);
    KeyedMapBundleOperator op = new KeyedMapBundleOperator(func, trigger);
    return new KeyedOneInputStreamOperatorTestHarness<>(op, rowKeySelector, rowKeySelector.getProducedType());
}
Also used : CountBundleTrigger(org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger) Tuple2(org.apache.flink.api.java.tuple.Tuple2) KeyedMapBundleOperator(org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)

Example 7 with CountBundleTrigger

use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink by apache.

the class RowTimeDeduplicateFunctionTest method testRowTimeDeduplicateKeepFirstRow.

private void testRowTimeDeduplicateKeepFirstRow(boolean generateUpdateBefore, boolean generateInsert, List<Object> expectedOutput) throws Exception {
    final boolean keepLastRow = false;
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness;
    KeyedMapBundleOperator<RowData, RowData, RowData, RowData> keyedMapBundleOperator = null;
    KeyedProcessOperator keyedProcessOperator = null;
    if (miniBatchEnable) {
        RowTimeMiniBatchDeduplicateFunction func = new RowTimeMiniBatchDeduplicateFunction(inputRowType, serializer, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
        CountBundleTrigger trigger = new CountBundleTrigger<RowData>(miniBatchSize);
        keyedMapBundleOperator = new KeyedMapBundleOperator(func, trigger);
        testHarness = createTestHarness(keyedMapBundleOperator);
    } else {
        RowTimeDeduplicateFunction func = new RowTimeDeduplicateFunction(inputRowType, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
        keyedProcessOperator = new KeyedProcessOperator<>(func);
        testHarness = createTestHarness(keyedProcessOperator);
    }
    List<Object> actualOutput = new ArrayList<>();
    testHarness.open();
    testHarness.processElement(insertRecord("key1", 13, 99L));
    testHarness.processElement(insertRecord("key1", 13, 99L));
    testHarness.processElement(insertRecord("key1", 12, 100L));
    testHarness.processElement(insertRecord("key2", 11, 101L));
    // test 1: keep first row with row time
    testHarness.processWatermark(new Watermark(102));
    actualOutput.addAll(testHarness.getOutput());
    // do a snapshot, close and restore again
    OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
    testHarness.close();
    if (miniBatchEnable) {
        testHarness = createTestHarness(keyedMapBundleOperator);
    } else {
        testHarness = createTestHarness(keyedProcessOperator);
    }
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.open();
    testHarness.processElement(insertRecord("key1", 12, 300L));
    testHarness.processElement(insertRecord("key2", 11, 301L));
    testHarness.processElement(insertRecord("key3", 5, 299L));
    // test 2:  load snapshot state
    testHarness.processWatermark(new Watermark(302));
    // test 3: expire the state
    testHarness.setStateTtlProcessingTime(minTtlTime.toMilliseconds() + 1);
    testHarness.processElement(insertRecord("key1", 12, 400L));
    testHarness.processElement(insertRecord("key2", 11, 401L));
    testHarness.processWatermark(402);
    // ("key1", 13, 99L) and ("key2", 11, 101L) had retired, thus output ("key1", 12,
    // 200L),("key2", 11, 201L)
    actualOutput.addAll(testHarness.getOutput());
    assertor.assertOutputEqualsSorted("output wrong.", expectedOutput, actualOutput);
    testHarness.close();
}
Also used : ArrayList(java.util.ArrayList) KeyedMapBundleOperator(org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) RowData(org.apache.flink.table.data.RowData) CountBundleTrigger(org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger) KeyedProcessOperator(org.apache.flink.streaming.api.operators.KeyedProcessOperator) Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Example 8 with CountBundleTrigger

use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink by apache.

the class RowTimeDeduplicateFunctionTest method testRowTimeDeduplicateKeepLastRow.

private void testRowTimeDeduplicateKeepLastRow(boolean generateUpdateBefore, boolean generateInsert, List<Object> expectedOutput) throws Exception {
    final boolean keepLastRow = true;
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness;
    KeyedMapBundleOperator<RowData, RowData, RowData, RowData> keyedMapBundleOperator = null;
    KeyedProcessOperator keyedProcessOperator = null;
    if (miniBatchEnable) {
        RowTimeMiniBatchDeduplicateFunction func = new RowTimeMiniBatchDeduplicateFunction(inputRowType, serializer, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
        CountBundleTrigger trigger = new CountBundleTrigger<RowData>(miniBatchSize);
        keyedMapBundleOperator = new KeyedMapBundleOperator(func, trigger);
        testHarness = createTestHarness(keyedMapBundleOperator);
    } else {
        RowTimeDeduplicateFunction func = new RowTimeDeduplicateFunction(inputRowType, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, true);
        keyedProcessOperator = new KeyedProcessOperator<>(func);
        testHarness = createTestHarness(keyedProcessOperator);
    }
    List<Object> actualOutput = new ArrayList<>();
    testHarness.open();
    testHarness.processElement(insertRecord("key1", 13, 99L));
    testHarness.processElement(insertRecord("key1", 12, 100L));
    testHarness.processElement(insertRecord("key2", 11, 101L));
    // test 1: keep last row with row time
    testHarness.processWatermark(new Watermark(102));
    actualOutput.addAll(testHarness.getOutput());
    // do a snapshot, close and restore again
    OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
    testHarness.close();
    if (miniBatchEnable) {
        testHarness = createTestHarness(keyedMapBundleOperator);
    } else {
        testHarness = createTestHarness(keyedProcessOperator);
    }
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.open();
    testHarness.processElement(insertRecord("key1", 12, 300L));
    testHarness.processElement(insertRecord("key2", 11, 301L));
    testHarness.processElement(insertRecord("key3", 5, 299L));
    // test 2: load snapshot state
    testHarness.processWatermark(new Watermark(302));
    // test 3: expire the state
    testHarness.setStateTtlProcessingTime(minTtlTime.toMilliseconds() + 1);
    testHarness.processElement(insertRecord("key1", 12, 400L));
    testHarness.processElement(insertRecord("key2", 11, 401L));
    testHarness.processWatermark(402);
    // all state has expired, so the record ("key1", 12, 400L), ("key2", 12, 401L) will be
    // INSERT message
    actualOutput.addAll(testHarness.getOutput());
    assertor.assertOutputEqualsSorted("output wrong.", expectedOutput, actualOutput);
    testHarness.close();
}
Also used : ArrayList(java.util.ArrayList) KeyedMapBundleOperator(org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) RowData(org.apache.flink.table.data.RowData) CountBundleTrigger(org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger) KeyedProcessOperator(org.apache.flink.streaming.api.operators.KeyedProcessOperator) Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Example 9 with CountBundleTrigger

use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink by apache.

the class RowTimeMiniBatchLatestChangeDeduplicateFunctionTest method createTestHarness.

private OneInputStreamOperatorTestHarness<RowData, RowData> createTestHarness(boolean generateUpdateBefore, boolean generateInsert, boolean keepLastRow) throws Exception {
    RowTimeMiniBatchLatestChangeDeduplicateFunction func = new RowTimeMiniBatchLatestChangeDeduplicateFunction(inputRowType, serializer, minTtlTime.toMilliseconds(), rowTimeIndex, generateUpdateBefore, generateInsert, keepLastRow);
    CountBundleTrigger trigger = new CountBundleTrigger<RowData>(miniBatchSize);
    KeyedMapBundleOperator<RowData, RowData, RowData, RowData> keyedMapBundleOperator = new KeyedMapBundleOperator(func, trigger);
    return createTestHarness(keyedMapBundleOperator);
}
Also used : CountBundleTrigger(org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger) RowData(org.apache.flink.table.data.RowData) KeyedMapBundleOperator(org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator)

Example 10 with CountBundleTrigger

use of org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger in project flink by splunk.

the class ProcTimeMiniBatchDeduplicateKeepFirstRowFunctionTest method createTestHarness.

private OneInputStreamOperatorTestHarness<RowData, RowData> createTestHarness(ProcTimeMiniBatchDeduplicateKeepFirstRowFunction func) throws Exception {
    CountBundleTrigger<Tuple2<String, String>> trigger = new CountBundleTrigger<>(3);
    KeyedMapBundleOperator op = new KeyedMapBundleOperator(func, trigger);
    return new KeyedOneInputStreamOperatorTestHarness<>(op, rowKeySelector, rowKeySelector.getProducedType());
}
Also used : CountBundleTrigger(org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger) Tuple2(org.apache.flink.api.java.tuple.Tuple2) KeyedMapBundleOperator(org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)

Aggregations

KeyedMapBundleOperator (org.apache.flink.table.runtime.operators.bundle.KeyedMapBundleOperator)15 CountBundleTrigger (org.apache.flink.table.runtime.operators.bundle.trigger.CountBundleTrigger)15 RowData (org.apache.flink.table.data.RowData)9 ArrayList (java.util.ArrayList)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)6 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)6 KeyedProcessOperator (org.apache.flink.streaming.api.operators.KeyedProcessOperator)6 Watermark (org.apache.flink.streaming.api.watermark.Watermark)6 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)6