Search in sources :

Example 51 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class KinesisDataFetcher method emitRecordAndUpdateState.

// ------------------------------------------------------------------------
// Thread-safe operations for record emitting and shard state updating
// that assure atomicity with respect to the checkpoint lock
// ------------------------------------------------------------------------
/**
 * Prepare a record and hand it over to the {@link RecordEmitter}, which may collect it
 * asynchronously. This method is called by {@link ShardConsumer}s.
 *
 * @param record the record to collect
 * @param recordTimestamp timestamp to attach to the collected record
 * @param shardStateIndex index of the shard to update in subscribedShardsState; this index
 *     should be the returned value from {@link
 *     KinesisDataFetcher#registerNewSubscribedShardState(KinesisStreamShardState)}, called when
 *     the shard state was registered.
 * @param lastSequenceNumber the last sequence number value to update
 */
protected void emitRecordAndUpdateState(T record, long recordTimestamp, int shardStateIndex, SequenceNumber lastSequenceNumber) {
    ShardWatermarkState sws = shardWatermarks.get(shardStateIndex);
    Preconditions.checkNotNull(sws, "shard watermark state initialized in registerNewSubscribedShardState");
    Watermark watermark = null;
    if (sws.periodicWatermarkAssigner != null) {
        recordTimestamp = sws.periodicWatermarkAssigner.extractTimestamp(record, sws.lastRecordTimestamp);
        // track watermark per record since extractTimestamp has side effect
        watermark = sws.periodicWatermarkAssigner.getCurrentWatermark();
    }
    sws.lastRecordTimestamp = recordTimestamp;
    sws.lastUpdated = getCurrentTimeMillis();
    RecordWrapper<T> recordWrapper = new RecordWrapper<>(record, recordTimestamp);
    recordWrapper.shardStateIndex = shardStateIndex;
    recordWrapper.lastSequenceNumber = lastSequenceNumber;
    recordWrapper.watermark = watermark;
    try {
        sws.emitQueue.put(recordWrapper);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
}
Also used : Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Example 52 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class BatchExecutionInternalTimeServiceTest method testCurrentWatermark.

@Test
public void testCurrentWatermark() throws Exception {
    BatchExecutionKeyedStateBackend<Integer> keyedStatedBackend = new BatchExecutionKeyedStateBackend<>(KEY_SERIALIZER, new KeyGroupRange(0, 1));
    InternalTimeServiceManager<Integer> timeServiceManager = BatchExecutionInternalTimeServiceManager.create(keyedStatedBackend, this.getClass().getClassLoader(), new DummyKeyContext(), new TestProcessingTimeService(), Collections.emptyList());
    List<Long> timers = new ArrayList<>();
    TriggerWithTimerServiceAccess<Integer, VoidNamespace> eventTimeTrigger = TriggerWithTimerServiceAccess.eventTimeTrigger((timer, timerService) -> {
        assertThat(timerService.currentWatermark(), equalTo(Long.MAX_VALUE));
        timers.add(timer.getTimestamp());
    });
    InternalTimerService<VoidNamespace> timerService = timeServiceManager.getInternalTimerService("test", KEY_SERIALIZER, new VoidNamespaceSerializer(), eventTimeTrigger);
    eventTimeTrigger.setTimerService(timerService);
    assertThat(timerService.currentWatermark(), equalTo(Long.MIN_VALUE));
    keyedStatedBackend.setCurrentKey(1);
    timerService.registerEventTimeTimer(VoidNamespace.INSTANCE, 123);
    assertThat(timerService.currentWatermark(), equalTo(Long.MIN_VALUE));
    // advancing the watermark to a value different than Long.MAX_VALUE should have no effect
    timeServiceManager.advanceWatermark(new Watermark(1000));
    assertThat(timerService.currentWatermark(), equalTo(Long.MIN_VALUE));
    // changing the current key fires all timers
    keyedStatedBackend.setCurrentKey(2);
    assertThat(timerService.currentWatermark(), equalTo(Long.MIN_VALUE));
    timerService.registerEventTimeTimer(VoidNamespace.INSTANCE, 124);
    // advancing the watermark to Long.MAX_VALUE should fire remaining key
    timeServiceManager.advanceWatermark(Watermark.MAX_WATERMARK);
    assertThat(timers, equalTo(Arrays.asList(123L, 124L)));
}
Also used : KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) VoidNamespace(org.apache.flink.runtime.state.VoidNamespace) VoidNamespaceSerializer(org.apache.flink.runtime.state.VoidNamespaceSerializer) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 53 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class SourceOperatorEventTimeTest method testPerSplitOutputEventWatermarks.

@Test
public void testPerSplitOutputEventWatermarks() throws Exception {
    final WatermarkStrategy<Integer> watermarkStrategy = WatermarkStrategy.forGenerator((ctx) -> new OnEventTestWatermarkGenerator<>());
    final List<Watermark> result = testSequenceOfWatermarks(emitProgressiveWatermarks, watermarkStrategy, (output) -> {
        output.createOutputForSplit("one");
        output.createOutputForSplit("two");
    }, (output) -> output.createOutputForSplit("one").collect(0, 100L), (output) -> output.createOutputForSplit("two").collect(0, 200L), (output) -> output.createOutputForSplit("one").collect(0, 150L), (output) -> output.releaseOutputForSplit("one"), (output) -> output.createOutputForSplit("two").collect(0, 200L));
    assertWatermarksOrEmpty(result, new Watermark(100L), new Watermark(150L), new Watermark(200L));
}
Also used : Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 54 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class MultiInputSortingDataInputsTest method twoInputOrderTest.

@SuppressWarnings("unchecked")
public void twoInputOrderTest(int preferredIndex, int sortedIndex) throws Exception {
    CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
    List<StreamElement> sortedInputElements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
    CollectionDataInput<Integer> sortedInput = new CollectionDataInput<>(sortedInputElements, sortedIndex);
    List<StreamElement> preferredInputElements = Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), new Watermark(99L));
    CollectionDataInput<Integer> preferredInput = new CollectionDataInput<>(preferredInputElements, preferredIndex);
    KeySelector<Integer, Integer> keySelector = value -> value;
    try (MockEnvironment environment = MockEnvironment.builder().build()) {
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { sortedInput }, new KeySelector[] { keySelector }, new TypeSerializer[] { new IntSerializer() }, new IntSerializer(), new StreamTaskInput[] { preferredInput }, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
        StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
        StreamTaskInput<?>[] preferredDataInputs = selectableSortingInputs.getPassThroughInputs();
        try (StreamTaskInput<Object> preferredTaskInput = (StreamTaskInput<Object>) preferredDataInputs[0];
            StreamTaskInput<Object> sortedTaskInput = (StreamTaskInput<Object>) sortingDataInputs[0]) {
            MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
            @SuppressWarnings("rawtypes") StreamOneInputProcessor[] inputProcessors = new StreamOneInputProcessor[2];
            inputProcessors[preferredIndex] = new StreamOneInputProcessor<>(preferredTaskInput, collectingDataOutput, new DummyOperatorChain());
            inputProcessors[sortedIndex] = new StreamOneInputProcessor<>(sortedTaskInput, collectingDataOutput, new DummyOperatorChain());
            StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, inputProcessors);
            DataInputStatus inputStatus;
            do {
                inputStatus = processor.processInput();
            } while (inputStatus != DataInputStatus.END_OF_INPUT);
        }
    }
    assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), // max watermark from the preferred input
    new Watermark(99L), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from the sorted input
    Watermark.MAX_WATERMARK)));
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Configuration(org.apache.flink.configuration.Configuration) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) List(java.util.List) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) Configuration(org.apache.flink.configuration.Configuration) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Watermark(org.apache.flink.streaming.api.watermark.Watermark) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler)

Example 55 with Watermark

use of org.apache.flink.streaming.api.watermark.Watermark in project flink by apache.

the class IntervalJoinOperatorTest method processElementsAndWatermarks.

private void processElementsAndWatermarks(TestHarness testHarness) throws Exception {
    if (lhsFasterThanRhs) {
        // add to lhs
        for (int i = 1; i <= 4; i++) {
            testHarness.processElement1(createStreamRecord(i, "lhs"));
            testHarness.processWatermark1(new Watermark(i));
        }
        // add to rhs
        for (int i = 1; i <= 4; i++) {
            testHarness.processElement2(createStreamRecord(i, "rhs"));
            testHarness.processWatermark2(new Watermark(i));
        }
    } else {
        // add to rhs
        for (int i = 1; i <= 4; i++) {
            testHarness.processElement2(createStreamRecord(i, "rhs"));
            testHarness.processWatermark2(new Watermark(i));
        }
        // add to lhs
        for (int i = 1; i <= 4; i++) {
            testHarness.processElement1(createStreamRecord(i, "lhs"));
            testHarness.processWatermark1(new Watermark(i));
        }
    }
}
Also used : Watermark(org.apache.flink.streaming.api.watermark.Watermark) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint)

Aggregations

Watermark (org.apache.flink.streaming.api.watermark.Watermark)318 Test (org.junit.Test)258 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)124 RowData (org.apache.flink.table.data.RowData)83 ArrayList (java.util.ArrayList)62 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)51 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)51 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)45 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)39 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)39 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)36 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)30 List (java.util.List)26 Map (java.util.Map)26 Configuration (org.apache.flink.configuration.Configuration)25 GenericRowData (org.apache.flink.table.data.GenericRowData)25 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)24 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)20 Event (org.apache.flink.cep.Event)20 SubEvent (org.apache.flink.cep.SubEvent)20