Search in sources :

Example 91 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class KafkaMigrationTestBase method writeSnapshot.

/**
 * Manually run this to write binary snapshot data.
 */
@Ignore
@Test
public void writeSnapshot() throws Exception {
    try {
        checkState(flinkGenerateSavepointVersion.isPresent());
        startClusters();
        OperatorSubtaskState snapshot = initializeTestState();
        OperatorSnapshotUtil.writeStateHandle(snapshot, getOperatorSnapshotPath(flinkGenerateSavepointVersion.get()));
    } finally {
        shutdownClusters();
    }
}
Also used : OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 92 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method writeSnapshot.

// ------------------------------------------------------------------------
@SuppressWarnings("unchecked")
private void writeSnapshot(String path, HashMap<StreamShardMetadata, SequenceNumber> state) throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(state.size());
    for (StreamShardMetadata shardMetadata : state.keySet()) {
        Shard shard = new Shard();
        shard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
        sequenceNumberRange.withStartingSequenceNumber("1");
        shard.setSequenceNumberRange(sequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), state, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumer);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    testHarness.setup();
    testHarness.open();
    final AtomicReference<Throwable> error = new AtomicReference<>();
    // run the source asynchronously
    Thread runner = new Thread() {

        @Override
        public void run() {
            try {
                consumer.run(new TestSourceContext<>());
            } catch (Throwable t) {
                t.printStackTrace();
                error.set(t);
            }
        }
    };
    runner.start();
    fetcher.waitUntilRun();
    final OperatorSubtaskState snapshot;
    synchronized (testHarness.getCheckpointLock()) {
        snapshot = testHarness.snapshot(0L, 0L);
    }
    OperatorSnapshotUtil.writeStateHandle(snapshot, path);
    consumerOperator.close();
    runner.join();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) AtomicReference(java.util.concurrent.atomic.AtomicReference) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Shard(com.amazonaws.services.kinesis.model.Shard)

Example 93 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class CEPMigrationTest method writeSinglePatternAfterMigrationSnapshot.

/**
 * Manually run this to write binary snapshot data.
 */
@Ignore
@Test
public void writeSinglePatternAfterMigrationSnapshot() throws Exception {
    KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {

        private static final long serialVersionUID = -4873366487571254798L;

        @Override
        public Integer getKey(Event value) throws Exception {
            return value.getId();
        }
    };
    final Event startEvent1 = new Event(42, "start", 1.0);
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = new KeyedOneInputStreamOperatorTestHarness<>(CepOperatorTestUtilities.getKeyedCepOperator(false, new SinglePatternNFAFactory()), keySelector, BasicTypeInfo.INT_TYPE_INFO);
    try {
        harness.setup();
        harness.open();
        harness.processWatermark(new Watermark(5));
        // do snapshot and save to file
        OperatorSubtaskState snapshot = harness.snapshot(0L, 0L);
        OperatorSnapshotUtil.writeStateHandle(snapshot, "src/test/resources/cep-migration-single-pattern-afterwards-flink" + flinkGenerateSavepointVersion + "-snapshot");
    } finally {
        harness.close();
    }
}
Also used : Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) KeySelector(org.apache.flink.api.java.functions.KeySelector) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 94 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class CEPRescalingTest method testCEPFunctionScalingUp.

@Test
public void testCEPFunctionScalingUp() throws Exception {
    int maxParallelism = 10;
    KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {

        private static final long serialVersionUID = -4873366487571254798L;

        @Override
        public Integer getKey(Event value) throws Exception {
            return value.getId();
        }
    };
    // valid pattern events belong to different keygroups
    // that will be shipped to different tasks when changing parallelism.
    Event startEvent1 = new Event(7, "start", 1.0);
    SubEvent middleEvent1 = new SubEvent(7, "foo", 1.0, 10.0);
    Event endEvent1 = new Event(7, "end", 1.0);
    int keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent1), maxParallelism);
    assertEquals(1, keygroup);
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // this will go to task index 2
    Event startEvent2 = new Event(10, "start", 1.0);
    SubEvent middleEvent2 = new SubEvent(10, "foo", 1.0, 10.0);
    Event endEvent2 = new Event(10, "end", 1.0);
    keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent2), maxParallelism);
    assertEquals(9, keygroup);
    assertEquals(1, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // now we start the test, we go from parallelism 1 to 2.
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = null;
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness1 = null;
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness2 = null;
    try {
        harness = getTestHarness(maxParallelism, 1, 0);
        harness.open();
        // valid element
        harness.processElement(new StreamRecord<>(startEvent1, 1));
        harness.processElement(new StreamRecord<>(new Event(7, "foobar", 1.0), 2));
        // valid element
        harness.processElement(new StreamRecord<>(startEvent2, 3));
        // valid element
        harness.processElement(new StreamRecord<Event>(middleEvent2, 4));
        // take a snapshot with some elements in internal sorting queue
        OperatorSubtaskState snapshot = harness.snapshot(0, 0);
        harness.close();
        // initialize two sub-tasks with the previously snapshotted state to simulate scaling up
        // we know that the valid element will go to index 0,
        // so we initialize the two tasks and we put the rest of
        // the valid elements for the pattern on task 0.
        OperatorSubtaskState initState1 = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelism, 1, 2, 0);
        OperatorSubtaskState initState2 = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelism, 1, 2, 1);
        harness1 = getTestHarness(maxParallelism, 2, 0);
        harness1.setup();
        harness1.initializeState(initState1);
        harness1.open();
        // if element timestamps are not correctly checkpointed/restored this will lead to
        // a pruning time underflow exception in NFA
        harness1.processWatermark(new Watermark(2));
        // valid element
        harness1.processElement(new StreamRecord<Event>(middleEvent1, 3));
        // valid element
        harness1.processElement(new StreamRecord<>(endEvent1, 5));
        harness1.processWatermark(new Watermark(Long.MAX_VALUE));
        // watermarks and the result
        assertEquals(3, harness1.getOutput().size());
        verifyWatermark(harness1.getOutput().poll(), 2);
        verifyPattern(harness1.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
        harness2 = getTestHarness(maxParallelism, 2, 1);
        harness2.setup();
        harness2.initializeState(initState2);
        harness2.open();
        // now we move to the second parallel task
        harness2.processWatermark(new Watermark(2));
        harness2.processElement(new StreamRecord<>(endEvent2, 5));
        harness2.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4));
        harness2.processWatermark(new Watermark(Long.MAX_VALUE));
        assertEquals(3, harness2.getOutput().size());
        verifyWatermark(harness2.getOutput().poll(), 2);
        verifyPattern(harness2.getOutput().poll(), startEvent2, middleEvent2, endEvent2);
    } finally {
        closeSilently(harness);
        closeSilently(harness1);
        closeSilently(harness2);
    }
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) KeySelector(org.apache.flink.api.java.functions.KeySelector) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Example 95 with OperatorSubtaskState

use of org.apache.flink.runtime.checkpoint.OperatorSubtaskState in project flink by apache.

the class CEPRescalingTest method testCEPFunctionScalingDown.

@Test
public void testCEPFunctionScalingDown() throws Exception {
    int maxParallelism = 10;
    KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {

        private static final long serialVersionUID = -4873366487571254798L;

        @Override
        public Integer getKey(Event value) throws Exception {
            return value.getId();
        }
    };
    // create some valid pattern events on predetermined key groups and task indices
    // this will go to task index 0
    Event startEvent1 = new Event(7, "start", 1.0);
    SubEvent middleEvent1 = new SubEvent(7, "foo", 1.0, 10.0);
    Event endEvent1 = new Event(7, "end", 1.0);
    // verification of the key choice
    int keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent1), maxParallelism);
    assertEquals(1, keygroup);
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 3, keygroup));
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // this will go to task index 1
    Event startEvent2 = new Event(45, "start", 1.0);
    SubEvent middleEvent2 = new SubEvent(45, "foo", 1.0, 10.0);
    Event endEvent2 = new Event(45, "end", 1.0);
    keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent2), maxParallelism);
    assertEquals(6, keygroup);
    assertEquals(1, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 3, keygroup));
    assertEquals(1, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // this will go to task index 0
    Event startEvent3 = new Event(90, "start", 1.0);
    SubEvent middleEvent3 = new SubEvent(90, "foo", 1.0, 10.0);
    Event endEvent3 = new Event(90, "end", 1.0);
    keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent3), maxParallelism);
    assertEquals(2, keygroup);
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 3, keygroup));
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // this will go to task index 2
    Event startEvent4 = new Event(10, "start", 1.0);
    SubEvent middleEvent4 = new SubEvent(10, "foo", 1.0, 10.0);
    Event endEvent4 = new Event(10, "end", 1.0);
    keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent4), maxParallelism);
    assertEquals(9, keygroup);
    assertEquals(2, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 3, keygroup));
    assertEquals(1, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // starting the test, we will go from parallelism of 3 to parallelism of 2
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness1 = getTestHarness(maxParallelism, 3, 0);
    harness1.open();
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness2 = getTestHarness(maxParallelism, 3, 1);
    harness2.open();
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness3 = getTestHarness(maxParallelism, 3, 2);
    harness3.open();
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness4 = null;
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness5 = null;
    try {
        harness1.processWatermark(Long.MIN_VALUE);
        harness2.processWatermark(Long.MIN_VALUE);
        harness3.processWatermark(Long.MIN_VALUE);
        // valid element
        harness1.processElement(new StreamRecord<>(startEvent1, 1));
        harness1.processElement(new StreamRecord<>(new Event(7, "foobar", 1.0), 2));
        // valid element
        harness1.processElement(new StreamRecord<Event>(middleEvent1, 3));
        // valid element
        harness1.processElement(new StreamRecord<>(endEvent1, 5));
        // till here we have a valid sequence, so after creating the
        // new instance and sending it a watermark, we expect it to fire,
        // even with no new elements.
        harness1.processElement(new StreamRecord<>(startEvent3, 10));
        harness1.processElement(new StreamRecord<>(startEvent1, 10));
        harness2.processElement(new StreamRecord<>(startEvent2, 7));
        harness2.processElement(new StreamRecord<Event>(middleEvent2, 8));
        harness3.processElement(new StreamRecord<>(startEvent4, 15));
        harness3.processElement(new StreamRecord<Event>(middleEvent4, 16));
        harness3.processElement(new StreamRecord<>(endEvent4, 17));
        // so far we only have the initial watermark
        assertEquals(1, harness1.getOutput().size());
        verifyWatermark(harness1.getOutput().poll(), Long.MIN_VALUE);
        assertEquals(1, harness2.getOutput().size());
        verifyWatermark(harness2.getOutput().poll(), Long.MIN_VALUE);
        assertEquals(1, harness3.getOutput().size());
        verifyWatermark(harness3.getOutput().poll(), Long.MIN_VALUE);
        // we take a snapshot and make it look as a single operator
        // this will be the initial state of all downstream tasks.
        OperatorSubtaskState snapshot = AbstractStreamOperatorTestHarness.repackageState(harness2.snapshot(0, 0), harness1.snapshot(0, 0), harness3.snapshot(0, 0));
        OperatorSubtaskState initState1 = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelism, 3, 2, 0);
        OperatorSubtaskState initState2 = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelism, 3, 2, 1);
        harness4 = getTestHarness(maxParallelism, 2, 0);
        harness4.setup();
        harness4.initializeState(initState1);
        harness4.open();
        harness5 = getTestHarness(maxParallelism, 2, 1);
        harness5.setup();
        harness5.initializeState(initState2);
        harness5.open();
        harness5.processElement(new StreamRecord<>(endEvent2, 11));
        harness5.processWatermark(new Watermark(12));
        verifyPattern(harness5.getOutput().poll(), startEvent2, middleEvent2, endEvent2);
        verifyWatermark(harness5.getOutput().poll(), 12);
        // if element timestamps are not correctly checkpointed/restored this will lead to
        // a pruning time underflow exception in NFA
        harness4.processWatermark(new Watermark(12));
        assertEquals(2, harness4.getOutput().size());
        verifyPattern(harness4.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
        verifyWatermark(harness4.getOutput().poll(), 12);
        // valid element
        harness4.processElement(new StreamRecord<Event>(middleEvent3, 15));
        // valid element
        harness4.processElement(new StreamRecord<>(endEvent3, 16));
        // valid element
        harness4.processElement(new StreamRecord<Event>(middleEvent1, 15));
        // valid element
        harness4.processElement(new StreamRecord<>(endEvent1, 16));
        harness4.processWatermark(new Watermark(Long.MAX_VALUE));
        harness5.processWatermark(new Watermark(Long.MAX_VALUE));
        // verify result
        assertEquals(3, harness4.getOutput().size());
        // check the order of the events in the output
        Queue<Object> output = harness4.getOutput();
        StreamRecord<?> resultRecord = (StreamRecord<?>) output.peek();
        assertTrue(resultRecord.getValue() instanceof Map);
        @SuppressWarnings("unchecked") Map<String, List<Event>> patternMap = (Map<String, List<Event>>) resultRecord.getValue();
        if (patternMap.get("start").get(0).getId() == 7) {
            verifyPattern(harness4.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
            verifyPattern(harness4.getOutput().poll(), startEvent3, middleEvent3, endEvent3);
        } else {
            verifyPattern(harness4.getOutput().poll(), startEvent3, middleEvent3, endEvent3);
            verifyPattern(harness4.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
        }
        // after scaling down this should end up here
        assertEquals(2, harness5.getOutput().size());
        verifyPattern(harness5.getOutput().poll(), startEvent4, middleEvent4, endEvent4);
    } finally {
        closeSilently(harness1);
        closeSilently(harness2);
        closeSilently(harness3);
        closeSilently(harness4);
        closeSilently(harness5);
    }
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) KeySelector(org.apache.flink.api.java.functions.KeySelector) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) List(java.util.List) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Aggregations

OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)178 Test (org.junit.Test)142 Watermark (org.apache.flink.streaming.api.watermark.Watermark)52 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)37 RowData (org.apache.flink.table.data.RowData)31 ArrayList (java.util.ArrayList)28 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)25 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)23 Map (java.util.Map)22 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)21 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)19 HashMap (java.util.HashMap)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)16 Event (org.apache.flink.cep.Event)16 SubEvent (org.apache.flink.cep.SubEvent)16 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)15 GenericRowData (org.apache.flink.table.data.GenericRowData)15 Ignore (org.junit.Ignore)15 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)14