Search in sources :

Example 11 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint.

@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpoint() throws Exception {
    List<String> fakeStreams = new LinkedList<>();
    fakeStreams.add("fakeStream1");
    fakeStreams.add("fakeStream2");
    Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
    // fakeStream1 has 3 shards before restore
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
    // fakeStream2 has 2 shards before restore
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
    Map<String, Integer> streamToShardCount = new HashMap<>();
    streamToShardCount.put("fakeStream1", // fakeStream1 will still have 3 shards after restore
    3);
    streamToShardCount.put("fakeStream2", // fakeStream2 will still have 2 shards after restore
    2);
    HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
    final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
    for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
        fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
        fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
    }
    CheckedThread runFetcherThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            fetcher.runFetcher();
        }
    };
    runFetcherThread.start();
    fetcher.waitUntilInitialDiscovery();
    fetcher.shutdownFetcher();
    runFetcherThread.sync();
    // assert that the streams tracked in the state are identical to the subscribed streams
    Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
    assertEquals(fakeStreams.size(), streamsInState.size());
    assertTrue(streamsInState.containsAll(fakeStreams));
    // assert that the last seen shards in state is correctly set
    for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
        assertEquals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1), streamToLastSeenShard.getValue());
    }
}
Also used : HashMap(java.util.HashMap) CheckedThread(org.apache.flink.core.testutils.CheckedThread) LinkedList(java.util.LinkedList) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 12 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist.

@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNoNewShardsSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
    List<String> fakeStreams = new LinkedList<>();
    fakeStreams.add("fakeStream1");
    fakeStreams.add("fakeStream2");
    // fakeStream3 will not have any shards
    fakeStreams.add("fakeStream3");
    // fakeStream4 will not have any shards
    fakeStreams.add("fakeStream4");
    Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
    // fakeStream1 has 3 shards before restore
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
    // fakeStream2 has 2 shards before restore
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
    Map<String, Integer> streamToShardCount = new HashMap<>();
    // fakeStream1 has fixed 3 shards
    streamToShardCount.put("fakeStream1", 3);
    // fakeStream2 has fixed 2 shards
    streamToShardCount.put("fakeStream2", 2);
    // no shards can be found for fakeStream3
    streamToShardCount.put("fakeStream3", 0);
    // no shards can be found for fakeStream4
    streamToShardCount.put("fakeStream4", 0);
    HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
    // using a non-resharded streams kinesis behaviour to represent that Kinesis is not
    // resharded AFTER the restore
    final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
    for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
        fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
        fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
    }
    CheckedThread runFetcherThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            fetcher.runFetcher();
        }
    };
    runFetcherThread.start();
    fetcher.waitUntilInitialDiscovery();
    fetcher.shutdownFetcher();
    runFetcherThread.sync();
    // assert that the streams tracked in the state are identical to the subscribed streams
    Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
    assertEquals(fakeStreams.size(), streamsInState.size());
    assertTrue(streamsInState.containsAll(fakeStreams));
    // assert that the last seen shards in state is correctly set
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(2), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(1), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
    assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
    assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
Also used : HashMap(java.util.HashMap) CheckedThread(org.apache.flink.core.testutils.CheckedThread) LinkedList(java.util.LinkedList) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 13 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testRecordPublisherFactoryIsTornDownWhenDeregisterStreamConsumerThrowsException.

@Test(timeout = 10000)
public void testRecordPublisherFactoryIsTornDownWhenDeregisterStreamConsumerThrowsException() throws InterruptedException {
    KinesisProxyV2Interface kinesisV2 = mock(KinesisProxyV2Interface.class);
    TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<String>(singletonList("fakeStream1"), new TestSourceContext<>(), TestUtils.efoProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), new HashMap<>(), mock(KinesisProxyInterface.class), kinesisV2) {

        @Override
        protected void deregisterStreamConsumer() {
            throw new RuntimeException();
        }
    };
    fetcher.shutdownFetcher();
    verify(kinesisV2).close();
    fetcher.awaitTermination();
}
Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisProxyV2Interface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Interface) KinesisProxyInterface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Test(org.junit.Test)

Example 14 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpointAndSomeStreamsDoNotExist.

@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpointAndSomeStreamsDoNotExist() throws Exception {
    List<String> fakeStreams = new LinkedList<>();
    fakeStreams.add("fakeStream1");
    fakeStreams.add("fakeStream2");
    // fakeStream3 will not have any shards
    fakeStreams.add("fakeStream3");
    // fakeStream4 will not have any shards
    fakeStreams.add("fakeStream4");
    Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
    // fakeStream1 has 3 shards before restore
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
    // fakeStream2 has 2 shards before restore
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
    Map<String, Integer> streamToShardCount = new HashMap<>();
    streamToShardCount.put("fakeStream1", // fakeStream1 had 3 shards before & 1 new shard after restore
    3 + 1);
    streamToShardCount.put("fakeStream2", // fakeStream2 had 2 shards before & 2 new shard after restore
    2 + 3);
    // no shards can be found for fakeStream3
    streamToShardCount.put("fakeStream3", 0);
    // no shards can be found for fakeStream4
    streamToShardCount.put("fakeStream4", 0);
    HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
    // using a non-resharded streams kinesis behaviour to represent that Kinesis is not
    // resharded AFTER the restore
    final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
    for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
        fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
        fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
    }
    CheckedThread runFetcherThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            fetcher.runFetcher();
        }
    };
    runFetcherThread.start();
    fetcher.waitUntilInitialDiscovery();
    fetcher.shutdownFetcher();
    runFetcherThread.sync();
    // assert that the streams tracked in the state are identical to the subscribed streams
    Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
    assertEquals(fakeStreams.size(), streamsInState.size());
    assertTrue(streamsInState.containsAll(fakeStreams));
    // assert that the last seen shards in state is correctly set
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(3), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream1"));
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(4), subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream2"));
    assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream3"));
    assertNull(subscribedStreamsToLastSeenShardIdsUnderTest.get("fakeStream4"));
}
Also used : HashMap(java.util.HashMap) CheckedThread(org.apache.flink.core.testutils.CheckedThread) LinkedList(java.util.LinkedList) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 15 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisConsumerTest method testUseRestoredStateForSnapshotIfFetcherNotInitialized.

// ----------------------------------------------------------------------
// Tests related to state initialization
// ----------------------------------------------------------------------
@Test
public void testUseRestoredStateForSnapshotIfFetcherNotInitialized() throws Exception {
    Properties config = TestUtils.getStandardProperties();
    List<Tuple2<StreamShardMetadata, SequenceNumber>> globalUnionState = new ArrayList<>(4);
    globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("1")));
    globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1)))), new SequenceNumber("1")));
    globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2)))), new SequenceNumber("1")));
    globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(3)))), new SequenceNumber("1")));
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Tuple2<StreamShardMetadata, SequenceNumber> state : globalUnionState) {
        listState.add(state);
    }
    FlinkKinesisConsumer<String> consumer = new FlinkKinesisConsumer<>("fakeStream", new SimpleStringSchema(), config);
    RuntimeContext context = mock(RuntimeContext.class);
    when(context.getIndexOfThisSubtask()).thenReturn(0);
    when(context.getNumberOfParallelSubtasks()).thenReturn(2);
    consumer.setRuntimeContext(context);
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    consumer.initializeState(initializationContext);
    // only opened, not run
    consumer.open(new Configuration());
    // arbitrary checkpoint id and timestamp
    consumer.snapshotState(new StateSnapshotContextSynchronousImpl(123, 123));
    assertTrue(listState.isClearCalled());
    // the checkpointed list state should contain only the shards that it should subscribe to
    assertEquals(globalUnionState.size() / 2, listState.getList().size());
    assertTrue(listState.getList().contains(globalUnionState.get(0)));
    assertTrue(listState.getList().contains(globalUnionState.get(2)));
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) StateSnapshotContextSynchronousImpl(org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl) ArrayList(java.util.ArrayList) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Properties(java.util.Properties) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Shard(com.amazonaws.services.kinesis.model.Shard) KinesisStreamShard(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)63 Test (org.junit.Test)35 Properties (java.util.Properties)30 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)20 CheckedThread (org.apache.flink.core.testutils.CheckedThread)13 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)13 Shard (com.amazonaws.services.kinesis.model.Shard)11 ArrayList (java.util.ArrayList)11 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)11 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)11 LinkedList (java.util.LinkedList)9 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)9 HashMap (java.util.HashMap)8 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)7 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)7 Map (java.util.Map)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)6 Matchers.anyString (org.mockito.Matchers.anyString)6 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)5