Search in sources :

Example 51 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testShardToSubtaskMappingWithCustomHashFunction.

// ----------------------------------------------------------------------
// Tests shard distribution with custom hash function
// ----------------------------------------------------------------------
@Test
public void testShardToSubtaskMappingWithCustomHashFunction() throws Exception {
    int totalCountOfSubtasks = 10;
    int shardCount = 3;
    for (int i = 0; i < 2; i++) {
        final int hash = i;
        final KinesisShardAssigner allShardsSingleSubtaskFn = (shard, subtasks) -> hash;
        Map<String, Integer> streamToShardCount = new HashMap<>();
        List<String> fakeStreams = new LinkedList<>();
        fakeStreams.add("fakeStream");
        streamToShardCount.put("fakeStream", shardCount);
        for (int j = 0; j < totalCountOfSubtasks; j++) {
            int subtaskIndex = j;
            // subscribe with default hashing
            final TestableKinesisDataFetcher fetcher = new TestableKinesisDataFetcher(fakeStreams, new TestSourceContext<>(), new Properties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), totalCountOfSubtasks, subtaskIndex, new AtomicReference<>(), new LinkedList<>(), KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
            Whitebox.setInternalState(fetcher, "shardAssigner", // override hashing
            allShardsSingleSubtaskFn);
            List<StreamShardHandle> shards = fetcher.discoverNewShardsToSubscribe();
            fetcher.shutdownFetcher();
            String msg = String.format("for hash=%d, subtask=%d", hash, subtaskIndex);
            if (j == i) {
                assertEquals(msg, shardCount, shards.size());
            } else {
                assertEquals(msg, 0, shards.size());
            }
        }
    }
}
Also used : KinesisShardAssigner(org.apache.flink.streaming.connectors.kinesis.KinesisShardAssigner) Shard(com.amazonaws.services.kinesis.model.Shard) Arrays(java.util.Arrays) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) Random(java.util.Random) TestSourceContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext) Collections.singletonList(java.util.Collections.singletonList) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) MutableLong(org.apache.commons.lang3.mutable.MutableLong) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) KinesisProxyInterface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface) KinesisShardIdGenerator(org.apache.flink.streaming.connectors.kinesis.testutils.KinesisShardIdGenerator) Assert.fail(org.junit.Assert.fail) KinesisShardAssigner(org.apache.flink.streaming.connectors.kinesis.KinesisShardAssigner) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) Set(java.util.Set) BlockingQueue(java.util.concurrent.BlockingQueue) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) MutableBoolean(org.apache.commons.lang3.mutable.MutableBoolean) Mockito.mock(org.mockito.Mockito.mock) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) Whitebox(org.powermock.reflect.Whitebox) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) HashKeyRange(com.amazonaws.services.kinesis.model.HashKeyRange) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) RecordPublisher(org.apache.flink.streaming.connectors.kinesis.internals.publisher.RecordPublisher) AtomicReference(java.util.concurrent.atomic.AtomicReference) BoundedOutOfOrdernessTimestampExtractor(org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor) ArrayList(java.util.ArrayList) CheckedThread(org.apache.flink.core.testutils.CheckedThread) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.FlinkKinesisConsumer) LinkedList(java.util.LinkedList) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) KinesisProxyV2Interface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Interface) Time(org.apache.flink.streaming.api.windowing.time.Time) Properties(java.util.Properties) FakeKinesisBehavioursFactory(org.apache.flink.streaming.connectors.kinesis.testutils.FakeKinesisBehavioursFactory) SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) AlwaysThrowsDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.testutils.AlwaysThrowsDeserializationSchema) Mockito.when(org.mockito.Mockito.when) TestUtils(org.apache.flink.streaming.connectors.kinesis.testutils.TestUtils) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Assert.assertNull(org.junit.Assert.assertNull) TestableKinesisDataFetcherForShardConsumerException(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcherForShardConsumerException) Assert(org.junit.Assert) Collections(java.util.Collections) SHARD_DISCOVERY_INTERVAL_MILLIS(org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants.SHARD_DISCOVERY_INTERVAL_MILLIS) Assert.assertEquals(org.junit.Assert.assertEquals) HashMap(java.util.HashMap) Properties(java.util.Properties) LinkedList(java.util.LinkedList) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Test(org.junit.Test)

Example 52 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpoint.

@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNewShardsFoundSinceRestoredCheckpoint() throws Exception {
    List<String> fakeStreams = new LinkedList<>();
    fakeStreams.add("fakeStream1");
    fakeStreams.add("fakeStream2");
    Map<StreamShardHandle, String> restoredStateUnderTest = new HashMap<>();
    // fakeStream1 has 3 shards before restore
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2))), UUID.randomUUID().toString());
    // fakeStream2 has 2 shards before restore
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0))), UUID.randomUUID().toString());
    restoredStateUnderTest.put(new StreamShardHandle("fakeStream2", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1))), UUID.randomUUID().toString());
    Map<String, Integer> streamToShardCount = new HashMap<>();
    streamToShardCount.put("fakeStream1", // fakeStream1 had 3 shards before & 1 new shard after restore
    3 + 1);
    streamToShardCount.put("fakeStream2", // fakeStream2 had 2 shards before & 3 new shard after restore
    2 + 3);
    HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
    // using a non-resharded streams kinesis behaviour to represent that Kinesis is not
    // resharded AFTER the restore
    final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
    for (Map.Entry<StreamShardHandle, String> restoredState : restoredStateUnderTest.entrySet()) {
        fetcher.advanceLastDiscoveredShardOfStream(restoredState.getKey().getStreamName(), restoredState.getKey().getShard().getShardId());
        fetcher.registerNewSubscribedShardState(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(restoredState.getKey()), restoredState.getKey(), new SequenceNumber(restoredState.getValue())));
    }
    CheckedThread runFetcherThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            fetcher.runFetcher();
        }
    };
    runFetcherThread.start();
    fetcher.waitUntilInitialDiscovery();
    fetcher.shutdownFetcher();
    runFetcherThread.sync();
    // assert that the streams tracked in the state are identical to the subscribed streams
    Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
    assertEquals(fakeStreams.size(), streamsInState.size());
    assertTrue(streamsInState.containsAll(fakeStreams));
    // assert that the last seen shards in state is correctly set
    for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
        assertEquals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1), streamToLastSeenShard.getValue());
    }
}
Also used : HashMap(java.util.HashMap) CheckedThread(org.apache.flink.core.testutils.CheckedThread) LinkedList(java.util.LinkedList) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 53 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testStreamToLastSeenShardStateIsCorrectlySetWhenNotRestoringFromFailure.

@Test
public void testStreamToLastSeenShardStateIsCorrectlySetWhenNotRestoringFromFailure() throws Exception {
    List<String> fakeStreams = new LinkedList<>();
    fakeStreams.add("fakeStream1");
    fakeStreams.add("fakeStream2");
    fakeStreams.add("fakeStream3");
    fakeStreams.add("fakeStream4");
    HashMap<String, String> subscribedStreamsToLastSeenShardIdsUnderTest = KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(fakeStreams);
    Map<String, Integer> streamToShardCount = new HashMap<>();
    Random rand = new Random();
    for (String fakeStream : fakeStreams) {
        streamToShardCount.put(fakeStream, rand.nextInt(5) + 1);
    }
    final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(fakeStreams, new TestSourceContext<>(), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), subscribedStreamsToLastSeenShardIdsUnderTest, FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(streamToShardCount));
    final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(TestUtils.getStandardProperties(), fetcher, 1, 0);
    CheckedThread consumerThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            consumer.run(new TestSourceContext<>());
        }
    };
    consumerThread.start();
    fetcher.waitUntilRun();
    consumer.cancel();
    consumerThread.sync();
    // assert that the streams tracked in the state are identical to the subscribed streams
    Set<String> streamsInState = subscribedStreamsToLastSeenShardIdsUnderTest.keySet();
    assertEquals(fakeStreams.size(), streamsInState.size());
    assertTrue(streamsInState.containsAll(fakeStreams));
    // assert that the last seen shards in state is correctly set
    for (Map.Entry<String, String> streamToLastSeenShard : subscribedStreamsToLastSeenShardIdsUnderTest.entrySet()) {
        assertEquals(KinesisShardIdGenerator.generateFromShardOrder(streamToShardCount.get(streamToLastSeenShard.getKey()) - 1), streamToLastSeenShard.getValue());
    }
}
Also used : HashMap(java.util.HashMap) CheckedThread(org.apache.flink.core.testutils.CheckedThread) LinkedList(java.util.LinkedList) Random(java.util.Random) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 54 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testSkipCorruptedRecord.

@Test
public void testSkipCorruptedRecord() throws Exception {
    final String stream = "fakeStream";
    final int numShards = 3;
    final LinkedList<KinesisStreamShardState> testShardStates = new LinkedList<>();
    final TestSourceContext<String> sourceContext = new TestSourceContext<>();
    final TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(singletonList(stream), sourceContext, TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 1, 0, new AtomicReference<>(), testShardStates, new HashMap<>(), FakeKinesisBehavioursFactory.nonReshardedStreamsBehaviour(Collections.singletonMap(stream, numShards)));
    // FlinkKinesisConsumer is responsible for setting up the fetcher before it can be run;
    // run the consumer until it reaches the point where the fetcher starts to run
    final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(TestUtils.getStandardProperties(), fetcher, 1, 0);
    CheckedThread consumerThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            consumer.run(new TestSourceContext<>());
        }
    };
    consumerThread.start();
    fetcher.waitUntilRun();
    consumer.cancel();
    consumerThread.sync();
    assertEquals(numShards, testShardStates.size());
    for (int i = 0; i < numShards; i++) {
        fetcher.emitRecordAndUpdateState("record-" + i, 10L, i, new SequenceNumber("seq-num-1"));
        assertEquals(new SequenceNumber("seq-num-1"), testShardStates.get(i).getLastProcessedSequenceNum());
        assertEquals(new StreamRecord<>("record-" + i, 10L), sourceContext.removeLatestOutput());
    }
    // emitting a null (i.e., a corrupt record) should not produce any output, but still have
    // the shard state updated
    fetcher.emitRecordAndUpdateState(null, 10L, 1, new SequenceNumber("seq-num-2"));
    assertEquals(new SequenceNumber("seq-num-2"), testShardStates.get(1).getLastProcessedSequenceNum());
    // no output should have been collected
    assertNull(sourceContext.removeLatestOutput());
}
Also used : CheckedThread(org.apache.flink.core.testutils.CheckedThread) LinkedList(java.util.LinkedList) TestSourceContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestSourceContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Test(org.junit.Test)

Example 55 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KafkaSinkExternalContext method createSink.

@Override
public Sink<String> createSink(TestingSinkSettings sinkSettings) {
    if (!topicExists(topicName)) {
        createTopic(topicName, 4, (short) 1);
    }
    KafkaSinkBuilder<String> builder = KafkaSink.builder();
    final Properties properties = new Properties();
    properties.put(ProducerConfig.TRANSACTION_TIMEOUT_CONFIG, DEFAULT_TRANSACTION_TIMEOUT_IN_MS);
    builder.setBootstrapServers(bootstrapServers).setDeliverGuarantee(toDeliveryGuarantee(sinkSettings.getCheckpointingMode())).setTransactionalIdPrefix("testingFramework").setKafkaProducerConfig(properties).setRecordSerializer(KafkaRecordSerializationSchema.builder().setTopic(topicName).setValueSerializationSchema(new SimpleStringSchema()).build());
    return builder.build();
}
Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Properties(java.util.Properties)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)63 Test (org.junit.Test)35 Properties (java.util.Properties)30 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)20 CheckedThread (org.apache.flink.core.testutils.CheckedThread)13 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)13 Shard (com.amazonaws.services.kinesis.model.Shard)11 ArrayList (java.util.ArrayList)11 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)11 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)11 LinkedList (java.util.LinkedList)9 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)9 HashMap (java.util.HashMap)8 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)7 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)7 Map (java.util.Map)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)6 Matchers.anyString (org.mockito.Matchers.anyString)6 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)5