Search in sources :

Example 81 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class RowTimeIntervalJoinTest method testRowTimeInnerJoinWithNegativeBounds.

/**
 * a.rowtime >= b.rowtime - 10 and a.rowtime <= b.rowtime - 7. *
 */
@Test
public void testRowTimeInnerJoinWithNegativeBounds() throws Exception {
    RowTimeIntervalJoin joinProcessFunc = new RowTimeIntervalJoin(FlinkJoinType.INNER, -10, -7, 0, rowType, rowType, joinFunction, 0, 0);
    KeyedTwoInputStreamOperatorTestHarness<RowData, RowData, RowData, RowData> testHarness = createTestHarness(joinProcessFunc);
    testHarness.open();
    testHarness.processWatermark1(new Watermark(1));
    testHarness.processWatermark2(new Watermark(1));
    // This row will not be cached.
    testHarness.processElement2(insertRecord(2L, "k1"));
    assertEquals(0, testHarness.numKeyedStateEntries());
    testHarness.processWatermark1(new Watermark(2));
    testHarness.processWatermark2(new Watermark(2));
    testHarness.processElement1(insertRecord(3L, "k1"));
    testHarness.processElement2(insertRecord(3L, "k1"));
    // Test for -10 boundary (13 - 10 = 3).
    // This row from the right stream will be cached.
    // The clean time for the left stream is 13 - 7 + 1 - 1 = 8
    testHarness.processElement2(insertRecord(13L, "k1"));
    // Test for -7 boundary (13 - 7 = 6).
    testHarness.processElement1(insertRecord(6L, "k1"));
    assertEquals(4, testHarness.numKeyedStateEntries());
    // Trigger the left timer with timestamp  8.
    // The row with timestamp = 13 will be removed here (13 < 10 + 7).
    testHarness.processWatermark1(new Watermark(10));
    testHarness.processWatermark2(new Watermark(10));
    assertEquals(2, testHarness.numKeyedStateEntries());
    // Clear the states.
    testHarness.processWatermark1(new Watermark(18));
    testHarness.processWatermark2(new Watermark(18));
    assertEquals(0, testHarness.numKeyedStateEntries());
    List<Object> expectedOutput = new ArrayList<>();
    expectedOutput.add(new Watermark(-9));
    expectedOutput.add(new Watermark(-8));
    expectedOutput.add(insertRecord(3L, "k1", 13L, "k1"));
    expectedOutput.add(insertRecord(6L, "k1", 13L, "k1"));
    expectedOutput.add(new Watermark(0));
    expectedOutput.add(new Watermark(8));
    assertor.assertOutputEquals("output wrong.", expectedOutput, testHarness.getOutput());
    testHarness.close();
}
Also used : RowData(org.apache.flink.table.data.RowData) ArrayList(java.util.ArrayList) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 82 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class RowTimeIntervalJoinTest method testRowTimeLeftOuterJoin.

@Test
public void testRowTimeLeftOuterJoin() throws Exception {
    RowTimeIntervalJoin joinProcessFunc = new RowTimeIntervalJoin(FlinkJoinType.LEFT, -5, 9, 0, rowType, rowType, joinFunction, 0, 0);
    KeyedTwoInputStreamOperatorTestHarness<RowData, RowData, RowData, RowData> testHarness = createTestHarness(joinProcessFunc);
    testHarness.open();
    testHarness.processElement1(insertRecord(1L, "k1"));
    testHarness.processElement2(insertRecord(1L, "k2"));
    assertEquals(2, testHarness.numEventTimeTimers());
    assertEquals(4, testHarness.numKeyedStateEntries());
    // The left row with timestamp = 1 will be padded and removed (14=1+5+1+((5+9)/2)).
    testHarness.processWatermark1(new Watermark(14));
    testHarness.processWatermark2(new Watermark(14));
    assertEquals(1, testHarness.numEventTimeTimers());
    assertEquals(2, testHarness.numKeyedStateEntries());
    // The right row with timestamp = 1 will be removed (18=1+9+1+((5+9)/2)).
    testHarness.processWatermark1(new Watermark(18));
    testHarness.processWatermark2(new Watermark(18));
    assertEquals(0, testHarness.numEventTimeTimers());
    assertEquals(0, testHarness.numKeyedStateEntries());
    testHarness.processElement1(insertRecord(2L, "k1"));
    testHarness.processElement2(insertRecord(2L, "k2"));
    // The late rows with timestamp = 2 will not be cached, but a null padding result for the
    // left
    // row will be emitted.
    assertEquals(0, testHarness.numKeyedStateEntries());
    assertEquals(0, testHarness.numEventTimeTimers());
    // Make sure the common (inner) join can be performed.
    testHarness.processElement1(insertRecord(19L, "k1"));
    testHarness.processElement1(insertRecord(20L, "k1"));
    testHarness.processElement2(insertRecord(26L, "k1"));
    testHarness.processElement2(insertRecord(25L, "k1"));
    testHarness.processElement1(insertRecord(21L, "k1"));
    testHarness.processElement2(insertRecord(39L, "k2"));
    testHarness.processElement2(insertRecord(40L, "k2"));
    testHarness.processElement1(insertRecord(50L, "k2"));
    testHarness.processElement1(insertRecord(49L, "k2"));
    testHarness.processElement2(insertRecord(41L, "k2"));
    testHarness.processWatermark1(new Watermark(100));
    testHarness.processWatermark2(new Watermark(100));
    List<Object> expectedOutput = new ArrayList<>();
    // The timestamp 14 is set with the triggered timer.
    expectedOutput.add(insertRecord(1L, "k1", null, null));
    expectedOutput.add(new Watermark(5));
    expectedOutput.add(new Watermark(9));
    expectedOutput.add(insertRecord(2L, "k1", null, null));
    expectedOutput.add(insertRecord(20L, "k1", 25L, "k1"));
    expectedOutput.add(insertRecord(21L, "k1", 25L, "k1"));
    expectedOutput.add(insertRecord(21L, "k1", 26L, "k1"));
    expectedOutput.add(insertRecord(49L, "k2", 40L, "k2"));
    expectedOutput.add(insertRecord(49L, "k2", 41L, "k2"));
    expectedOutput.add(insertRecord(50L, "k2", 41L, "k2"));
    // The timestamp 32 is set with the triggered timer.
    expectedOutput.add(insertRecord(19L, "k1", null, null));
    expectedOutput.add(new Watermark(91));
    assertor.assertOutputEquals("output wrong.", expectedOutput, testHarness.getOutput());
    testHarness.close();
}
Also used : RowData(org.apache.flink.table.data.RowData) ArrayList(java.util.ArrayList) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 83 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class WindowJoinOperatorTest method testOuterJoin.

@Test
public void testOuterJoin() throws Exception {
    KeyedTwoInputStreamOperatorTestHarness<RowData, RowData, RowData, RowData> testHarness = createTestHarness(FlinkJoinType.FULL);
    testHarness.open();
    testHarness.processWatermark1(new Watermark(1));
    testHarness.processWatermark2(new Watermark(1));
    // Test late data would be dropped
    testHarness.processElement1(insertRecord(toUtcTimestampMills(1L, shiftTimeZone), "k1"));
    assertEquals(0, testHarness.numEventTimeTimers());
    testHarness.processElement1(insertRecord(toUtcTimestampMills(3L, shiftTimeZone), "k1"));
    testHarness.processElement1(insertRecord(toUtcTimestampMills(3L, shiftTimeZone), "k1"));
    testHarness.processElement2(insertRecord(toUtcTimestampMills(3L, shiftTimeZone), "k1"));
    testHarness.processElement1(insertRecord(toUtcTimestampMills(6L, shiftTimeZone), "k1"));
    testHarness.processElement2(insertRecord(toUtcTimestampMills(9L, shiftTimeZone), "k1"));
    assertEquals(3, testHarness.numEventTimeTimers());
    assertEquals(4, testHarness.numKeyedStateEntries());
    testHarness.processWatermark1(new Watermark(10));
    testHarness.processWatermark2(new Watermark(10));
    List<Object> expectedOutput = new ArrayList<>();
    expectedOutput.add(new Watermark(1));
    expectedOutput.add(insertRecord(toUtcTimestampMills(3L, shiftTimeZone), "k1", toUtcTimestampMills(3L, shiftTimeZone), "k1"));
    expectedOutput.add(insertRecord(toUtcTimestampMills(3L, shiftTimeZone), "k1", toUtcTimestampMills(3L, shiftTimeZone), "k1"));
    expectedOutput.add(insertRecord(toUtcTimestampMills(6L, shiftTimeZone), "k1", null, null));
    expectedOutput.add(insertRecord(null, null, toUtcTimestampMills(9L, shiftTimeZone), "k1"));
    expectedOutput.add(new Watermark(10));
    ASSERTER.assertOutputEqualsSorted("output wrong.", expectedOutput, testHarness.getOutput());
    assertEquals(0, testHarness.numEventTimeTimers());
    assertEquals(0, testHarness.numKeyedStateEntries());
    testHarness.processElement1(insertRecord(toUtcTimestampMills(12L, shiftTimeZone), "k1"));
    testHarness.processElement1(insertRecord(toUtcTimestampMills(15L, shiftTimeZone), "k1"));
    testHarness.processElement2(insertRecord(toUtcTimestampMills(15L, shiftTimeZone), "k1"));
    testHarness.processElement2(insertRecord(toUtcTimestampMills(15L, shiftTimeZone), "k1"));
    assertEquals(3, testHarness.numKeyedStateEntries());
    testHarness.processWatermark1(new Watermark(13));
    testHarness.processWatermark2(new Watermark(13));
    expectedOutput.add(insertRecord(toUtcTimestampMills(12L, shiftTimeZone), "k1", null, null));
    expectedOutput.add(new Watermark(13));
    assertEquals(2, testHarness.numKeyedStateEntries());
    ASSERTER.assertOutputEquals("output wrong.", expectedOutput, testHarness.getOutput());
    testHarness.processWatermark1(new Watermark(18));
    testHarness.processWatermark2(new Watermark(18));
    expectedOutput.add(insertRecord(toUtcTimestampMills(15L, shiftTimeZone), "k1", toUtcTimestampMills(15L, shiftTimeZone), "k1"));
    expectedOutput.add(insertRecord(toUtcTimestampMills(15L, shiftTimeZone), "k1", toUtcTimestampMills(15L, shiftTimeZone), "k1"));
    expectedOutput.add(new Watermark(18));
    ASSERTER.assertOutputEquals("output wrong.", expectedOutput, testHarness.getOutput());
    testHarness.close();
}
Also used : RowData(org.apache.flink.table.data.RowData) ArrayList(java.util.ArrayList) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 84 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class InputTest method setup.

@Before
public void setup() {
    element = new StreamRecord<>(GenericRowData.of(StringData.fromString("123")), 456);
    watermark = new Watermark(1223456789);
    latencyMarker = new LatencyMarker(122345678, new OperatorID(123, 456), 1);
}
Also used : LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Before(org.junit.Before)

Example 85 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class WindowOperatorTest method testEventTimeSessionWindows.

@Test
@SuppressWarnings("unchecked")
public void testEventTimeSessionWindows() throws Exception {
    closeCalled.set(0);
    WindowOperator operator = WindowOperatorBuilder.builder().withInputFields(inputFieldTypes).withShiftTimezone(shiftTimeZone).session(Duration.ofSeconds(3)).withEventTime(2).aggregateAndBuild(getTimeWindowAggFunction(), equaliser, accTypes, aggResultTypes, windowTypes);
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    testHarness.open();
    // add elements out-of-order
    testHarness.processElement(insertRecord("key2", 1, 0L));
    testHarness.processElement(insertRecord("key2", 2, 1000L));
    testHarness.processElement(insertRecord("key2", 3, 2500L));
    testHarness.processElement(insertRecord("key1", 1, 10L));
    testHarness.processElement(insertRecord("key1", 2, 1000L));
    // do a snapshot, close and restore again
    OperatorSubtaskState snapshotV2 = testHarness.snapshot(0L, 0);
    testHarness.close();
    expectedOutput.clear();
    testHarness = createTestHarness(operator);
    testHarness.setup();
    testHarness.initializeState(snapshotV2);
    testHarness.open();
    assertEquals(0L, operator.getWatermarkLatency().getValue());
    testHarness.processElement(insertRecord("key1", 3, 2500L));
    testHarness.processElement(insertRecord("key2", 4, 5501L));
    testHarness.processElement(insertRecord("key2", 5, 6000L));
    testHarness.processElement(insertRecord("key2", 5, 6000L));
    testHarness.processElement(insertRecord("key2", 6, 6050L));
    testHarness.processWatermark(new Watermark(12000));
    expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key1", 6L, 3L, localMills(10L), localMills(5500L), localMills(5499L))));
    expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key2", 6L, 3L, localMills(0L), localMills(5500L), localMills(5499L))));
    expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key2", 20L, 4L, localMills(5501L), localMills(9050L), localMills(9049L))));
    expectedOutput.add(new Watermark(12000));
    // add a late data
    testHarness.processElement(insertRecord("key1", 3, 4000L));
    testHarness.processElement(insertRecord("key2", 10, 15000L));
    testHarness.processElement(insertRecord("key2", 20, 15000L));
    testHarness.processWatermark(new Watermark(17999));
    expectedOutput.addAll(doubleRecord(isTableAggregate, insertRecord("key2", 30L, 2L, localMills(15000L), localMills(18000L), localMills(17999L))));
    expectedOutput.add(new Watermark(17999));
    assertor.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.setProcessingTime(18000);
    assertEquals(1L, operator.getWatermarkLatency().getValue());
    testHarness.close();
    // we close once in the rest...
    assertEquals("Close was not called.", 2, closeCalled.get());
    assertEquals(1, operator.getNumLateRecordsDropped().getCount());
}
Also used : JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Aggregations

Watermark (org.apache.flink.streaming.api.watermark.Watermark)318 Test (org.junit.Test)258 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)124 RowData (org.apache.flink.table.data.RowData)83 ArrayList (java.util.ArrayList)62 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)51 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)51 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)45 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)39 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)39 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)36 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)30 List (java.util.List)26 Map (java.util.Map)26 Configuration (org.apache.flink.configuration.Configuration)25 GenericRowData (org.apache.flink.table.data.GenericRowData)25 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)24 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)20 Event (org.apache.flink.cep.Event)20 SubEvent (org.apache.flink.cep.SubEvent)20