Search in sources :

Example 6 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class SlicingWindowAggOperatorTest method testProcessingTimeTumblingWindows.

@Test
public void testProcessingTimeTumblingWindows() throws Exception {
    final SliceAssigner assigner = SliceAssigners.tumbling(-1, shiftTimeZone, Duration.ofHours(5));
    // the assigned windows should like as following, e.g. the given timeZone is GMT+08:00:
    // local windows(timestamp in GMT+08:00)   <=>  epoch windows(timestamp in UTC)
    // [1970-01-01 00:00, 1970-01-01 05:00] <=> [1969-12-31 16:00, 1969-12-31 21:00]
    // [1970-01-01 05:00, 1970-01-01 10:00] <=> [1969-12-31 21:00, 1970-01-01 02:00]
    final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
    SlicingWindowOperator<RowData, ?> operator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).build();
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
    testHarness.setup(OUT_SERIALIZER);
    testHarness.open();
    // process elements
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T00:00:00.003"));
    // timestamp is ignored in processing time
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(7000L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(7000L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(7000L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(7000L)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T05:00:00"));
    expectedOutput.add(insertRecord("key2", 3L, 3L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T05:00:00")));
    expectedOutput.add(insertRecord("key1", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T05:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(7000L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(7000L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(7000L)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T10:00:01"));
    expectedOutput.add(insertRecord("key1", 3L, 3L, epochMills(UTC_ZONE_ID, "1970-01-01T05:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T10:00:00")));
    assertEquals(Long.valueOf(0L), operator.getWatermarkLatency().getValue());
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.close();
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 7 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class SlicingWindowAggOperatorTest method testEventTimeCumulativeWindows.

@Test
public void testEventTimeCumulativeWindows() throws Exception {
    final SliceAssigner assigner = SliceAssigners.cumulative(2, shiftTimeZone, Duration.ofSeconds(3), Duration.ofSeconds(1));
    final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
    SlicingWindowOperator<RowData, ?> operator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).build();
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
    testHarness.setup(OUT_SERIALIZER);
    testHarness.open();
    // process elements
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    // add elements out-of-order
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(2999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3000L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(20L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(0L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1998L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1000L)));
    testHarness.processWatermark(new Watermark(999));
    expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(0L), localMills(1000L)));
    expectedOutput.add(new Watermark(999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processWatermark(new Watermark(1999));
    expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(0L), localMills(2000L)));
    expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(0L), localMills(2000L)));
    expectedOutput.add(new Watermark(1999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // do a snapshot, close and restore again
    testHarness.prepareSnapshotPreBarrier(0L);
    OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
    testHarness.close();
    assertTrue("Close was not called.", aggsFunction.closeCalled.get() > 0);
    expectedOutput.clear();
    testHarness = createTestHarness(operator);
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.open();
    // the late event would not trigger window [0, 2000L) again even if the job restore from
    // savepoint
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1000L)));
    testHarness.processWatermark(new Watermark(1999));
    expectedOutput.add(new Watermark(1999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processWatermark(new Watermark(2999));
    expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(0L), localMills(3000L)));
    expectedOutput.add(insertRecord("key2", 5L, 5L, localMills(0L), localMills(3000L)));
    expectedOutput.add(new Watermark(2999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processWatermark(new Watermark(3999));
    expectedOutput.add(insertRecord("key2", 1L, 1L, localMills(3000L), localMills(4000L)));
    expectedOutput.add(new Watermark(3999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // late element for [3K, 4K), but should be accumulated into [3K, 5K) [3K, 6K)
    testHarness.processElement(insertRecord("key1", 2, fromEpochMillis(3500L)));
    testHarness.processWatermark(new Watermark(4999));
    expectedOutput.add(insertRecord("key2", 1L, 1L, localMills(3000L), localMills(5000L)));
    expectedOutput.add(insertRecord("key1", 2L, 1L, localMills(3000L), localMills(5000L)));
    expectedOutput.add(new Watermark(4999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // late for all assigned windows, should be dropped
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(2999L)));
    testHarness.processWatermark(new Watermark(5999));
    expectedOutput.add(insertRecord("key2", 1L, 1L, localMills(3000L), localMills(6000L)));
    expectedOutput.add(insertRecord("key1", 2L, 1L, localMills(3000L), localMills(6000L)));
    expectedOutput.add(new Watermark(5999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // those don't have any effect...
    testHarness.processWatermark(new Watermark(6999));
    testHarness.processWatermark(new Watermark(7999));
    expectedOutput.add(new Watermark(6999));
    expectedOutput.add(new Watermark(7999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    assertEquals(1, operator.getNumLateRecordsDropped().getCount());
    testHarness.close();
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Example 8 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class SlicingWindowAggOperatorTest method testEventTimeTumblingWindows.

@Test
public void testEventTimeTumblingWindows() throws Exception {
    final SliceAssigner assigner = SliceAssigners.tumbling(2, shiftTimeZone, Duration.ofSeconds(3));
    final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
    SlicingWindowOperator<RowData, ?> operator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).build();
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
    testHarness.setup(OUT_SERIALIZER);
    testHarness.open();
    // process elements
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    // add elements out-of-order
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(3000L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(20L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(0L)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1998L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1999L)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(1000L)));
    testHarness.processWatermark(new Watermark(999));
    expectedOutput.add(new Watermark(999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processWatermark(new Watermark(1999));
    expectedOutput.add(new Watermark(1999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // do a snapshot, close and restore again
    testHarness.prepareSnapshotPreBarrier(0L);
    OperatorSubtaskState snapshot = testHarness.snapshot(0L, 0);
    testHarness.close();
    assertTrue("Close was not called.", aggsFunction.closeCalled.get() > 0);
    expectedOutput.clear();
    testHarness = createTestHarness(operator);
    testHarness.setup();
    testHarness.initializeState(snapshot);
    testHarness.open();
    testHarness.processWatermark(new Watermark(2999));
    expectedOutput.add(insertRecord("key1", 3L, 3L, localMills(0L), localMills(3000L)));
    expectedOutput.add(insertRecord("key2", 3L, 3L, localMills(0L), localMills(3000L)));
    expectedOutput.add(new Watermark(2999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processWatermark(new Watermark(3999));
    expectedOutput.add(new Watermark(3999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // late element, should be dropped
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(2500L)));
    testHarness.processWatermark(new Watermark(4999));
    expectedOutput.add(new Watermark(4999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // late element, should be dropped
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(2999L)));
    testHarness.processWatermark(new Watermark(5999));
    expectedOutput.add(insertRecord("key2", 2L, 2L, localMills(3000L), localMills(6000L)));
    expectedOutput.add(new Watermark(5999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    // those don't have any effect...
    testHarness.processWatermark(new Watermark(6999));
    testHarness.processWatermark(new Watermark(7999));
    expectedOutput.add(new Watermark(6999));
    expectedOutput.add(new Watermark(7999));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    assertEquals(2, operator.getNumLateRecordsDropped().getCount());
    testHarness.close();
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Example 9 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class SlicingWindowAggOperatorTest method testInvalidWindows.

@Test
public void testInvalidWindows() {
    final SliceAssigner assigner = SliceAssigners.hopping(2, shiftTimeZone, Duration.ofSeconds(3), Duration.ofSeconds(1));
    final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
    try {
        // hopping window without specifying count star index
        SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).build();
        fail("should fail");
    } catch (Exception e) {
        assertThat(e, containsMessage("Hopping window requires a COUNT(*) in the aggregate functions."));
    }
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) Test(org.junit.Test)

Example 10 with SliceAssigner

use of org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner in project flink by apache.

the class SlicingWindowAggOperatorTest method testProcessingTimeCumulativeWindows.

@Test
public void testProcessingTimeCumulativeWindows() throws Exception {
    final SliceAssigner assigner = SliceAssigners.cumulative(-1, shiftTimeZone, Duration.ofDays(1), Duration.ofHours(8));
    final SumAndCountAggsFunction aggsFunction = new SumAndCountAggsFunction(assigner);
    SlicingWindowOperator<RowData, ?> operator = SlicingWindowAggOperatorBuilder.builder().inputSerializer(INPUT_ROW_SER).shiftTimeZone(shiftTimeZone).keySerializer(KEY_SER).assigner(assigner).aggregate(wrapGenerated(aggsFunction), ACC_SER).build();
    OneInputStreamOperatorTestHarness<RowData, RowData> testHarness = createTestHarness(operator);
    testHarness.setup(OUT_SERIALIZER);
    testHarness.open();
    // process elements
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    // timestamp is ignored in processing time
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T00:00:00.003"));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T08:00:00"));
    expectedOutput.add(insertRecord("key2", 1L, 1L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T08:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-01T16:00:00"));
    expectedOutput.add(insertRecord("key2", 3L, 3L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-01T16:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-02T00:00:00"));
    expectedOutput.add(insertRecord("key2", 3L, 3L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-02T00:00:00")));
    expectedOutput.add(insertRecord("key1", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-01T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-02T00:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key2", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.processElement(insertRecord("key1", 1, fromEpochMillis(Long.MAX_VALUE)));
    testHarness.setProcessingTime(epochMills(shiftTimeZone, "1970-01-03T08:00:00"));
    expectedOutput.add(insertRecord("key1", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-02T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-02T08:00:00")));
    expectedOutput.add(insertRecord("key2", 1L, 1L, epochMills(UTC_ZONE_ID, "1970-01-02T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-02T08:00:00")));
    expectedOutput.add(insertRecord("key1", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-02T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-02T16:00:00")));
    expectedOutput.add(insertRecord("key2", 1L, 1L, epochMills(UTC_ZONE_ID, "1970-01-02T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-02T16:00:00")));
    expectedOutput.add(insertRecord("key1", 2L, 2L, epochMills(UTC_ZONE_ID, "1970-01-02T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-03T00:00:00")));
    expectedOutput.add(insertRecord("key2", 1L, 1L, epochMills(UTC_ZONE_ID, "1970-01-02T00:00:00"), epochMills(UTC_ZONE_ID, "1970-01-03T00:00:00")));
    ASSERTER.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput());
    testHarness.close();
    assertTrue("Close was not called.", aggsFunction.closeCalled.get() > 0);
}
Also used : SliceAssigner(org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Aggregations

SliceAssigner (org.apache.flink.table.runtime.operators.window.slicing.SliceAssigner)11 RowData (org.apache.flink.table.data.RowData)9 Test (org.junit.Test)7 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)6 GenericRowData (org.apache.flink.table.data.GenericRowData)6 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)6 ZoneId (java.time.ZoneId)3 Transformation (org.apache.flink.api.dag.Transformation)3 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)3 Watermark (org.apache.flink.streaming.api.watermark.Watermark)3 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)3 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)3 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)3 RowDataSerializer (org.apache.flink.table.runtime.typeutils.RowDataSerializer)3 RowType (org.apache.flink.table.types.logical.RowType)3 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)2 LogicalType (org.apache.flink.table.types.logical.LogicalType)2 CumulativeWindowSpec (org.apache.flink.table.planner.plan.logical.CumulativeWindowSpec)1 HoppingWindowSpec (org.apache.flink.table.planner.plan.logical.HoppingWindowSpec)1 SliceAttachedWindowingStrategy (org.apache.flink.table.planner.plan.logical.SliceAttachedWindowingStrategy)1