Search in sources :

Example 41 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method testRestoreWithEmptyState.

@Test
public void testRestoreWithEmptyState() throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
    for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
        Shard shard = new Shard();
        shard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
        sequenceNumberRange.withStartingSequenceNumber("1");
        shard.setSequenceNumberRange(sequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setup();
    testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-empty-snapshot"));
    testHarness.open();
    consumerFunction.run(new TestSourceContext<>());
    // assert that no state was restored
    assertTrue(consumerFunction.getRestoredState().isEmpty());
    // although the restore state is empty, the fetcher should still have been registered the
    // initial discovered shard;
    // furthermore, the discovered shard should be considered a newly created shard while the
    // job wasn't running,
    // and therefore should be consumed from the earliest sequence number
    KinesisStreamShardState restoredShardState = fetcher.getSubscribedShardsState().get(0);
    assertEquals(TEST_STREAM_NAME, restoredShardState.getStreamShardHandle().getStreamName());
    assertEquals(TEST_SHARD_ID, restoredShardState.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredShardState.getStreamShardHandle().isClosed());
    assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredShardState.getLastProcessedSequenceNum());
    consumerOperator.close();
    consumerOperator.cancel();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) Test(org.junit.Test)

Example 42 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method testRestore.

@Test
public void testRestore() throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
    for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
        Shard shard = new Shard();
        shard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
        sequenceNumberRange.withStartingSequenceNumber("1");
        shard.setSequenceNumberRange(sequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setup();
    testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-snapshot"));
    testHarness.open();
    consumerFunction.run(new TestSourceContext<>());
    // assert that state is correctly restored
    assertNotEquals(null, consumerFunction.getRestoredState());
    assertEquals(1, consumerFunction.getRestoredState().size());
    assertEquals(TEST_STATE, removeEquivalenceWrappers(consumerFunction.getRestoredState()));
    assertEquals(1, fetcher.getSubscribedShardsState().size());
    assertEquals(TEST_SEQUENCE_NUMBER, fetcher.getSubscribedShardsState().get(0).getLastProcessedSequenceNum());
    KinesisStreamShardState restoredShardState = fetcher.getSubscribedShardsState().get(0);
    assertEquals(TEST_STREAM_NAME, restoredShardState.getStreamShardHandle().getStreamName());
    assertEquals(TEST_SHARD_ID, restoredShardState.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredShardState.getStreamShardHandle().isClosed());
    assertEquals(TEST_SEQUENCE_NUMBER, restoredShardState.getLastProcessedSequenceNum());
    consumerOperator.close();
    consumerOperator.cancel();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) Test(org.junit.Test)

Example 43 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class EmulatedFullTopologyTest method testFullTopology.

// ======================================================================================================
// IMPORTANT: This test makes use of things that happen in the emulated PubSub that
// are GUARANTEED to be different in the real Google hosted PubSub.
// So running these tests against the real thing will have a very high probability of
// failing.
// The assumptions:
// 1) The ordering of the messages is maintained.
// We are inserting a STOP_MARKER _after_ the set of test measurements and we assume this
// STOP event will
// arrive after the actual test data so we can stop the processing. In the real PubSub this
// is NOT true.
// 2) Exactly once: We assume that every message we put in comes out exactly once.
// In the real PubSub there are a lot of situations (mostly failure/retry) where this is not
// true.
@Test
public void testFullTopology() throws Exception {
    // ===============================================================================
    // Step 0: The test data
    List<String> input = new ArrayList<>(Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten"));
    List<String> messagesToSend = new ArrayList<>(input);
    // Now add some stream termination messages.
    // NOTE: Messages are pulled from PubSub in batches by the source.
    // So we need enough STOP_MARKERs to ensure ALL parallel tasks get at least one
    // STOP_MARKER
    // If not then at least one task will not terminate and the test will not end.
    // We pull 3 at a time, have 4 parallel: We need at least 12 STOP_MARKERS
    IntStream.rangeClosed(1, 20).forEach(i -> messagesToSend.add(STOP_MARKER));
    // IMPORTANT NOTE: This way of testing uses an effect of the PubSub emulator that is
    // absolutely
    // guaranteed NOT to work in the real PubSub: The ordering of the messages is maintained in
    // the topic.
    // So here we can assume that if we add a stop message LAST we can terminate the test stream
    // when we see it.
    // ===============================================================================
    // Step 1: We put test data into the topic
    // Publish the test messages into the input topic
    Publisher publisher = pubsubHelper.createPublisher(PROJECT_NAME, INPUT_TOPIC_NAME);
    for (String s : messagesToSend) {
        publisher.publish(PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(s)).build()).get();
    }
    publisher.shutdown();
    // ===============================================================================
    // Step 2: Now we run our topology
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(100);
    env.setParallelism(4);
    env.setRestartStrategy(RestartStrategies.noRestart());
    // Silly topology
    env.addSource(// a self termination feature.
    PubSubSource.newBuilder().withDeserializationSchema(new SimpleStringSchemaWithStopMarkerDetection()).withProjectName(PROJECT_NAME).withSubscriptionName(INPUT_SUBSCRIPTION_NAME).withCredentials(EmulatorCredentials.getInstance()).withPubSubSubscriberFactory(new PubSubSubscriberFactoryForEmulator(getPubSubHostPort(), PROJECT_NAME, INPUT_SUBSCRIPTION_NAME, 1, Duration.ofSeconds(1), 3)).build()).map((MapFunction<String, String>) StringUtils::reverse).addSink(PubSubSink.newBuilder().withSerializationSchema(new SimpleStringSchema()).withProjectName(PROJECT_NAME).withTopicName(OUTPUT_TOPIC_NAME).withCredentials(EmulatorCredentials.getInstance()).withHostAndPortForEmulator(getPubSubHostPort()).build());
    env.execute("Running unit test");
    // ===============================================================================
    // Now we should have all the resulting data in the output topic.
    // Step 3: Get the result from the output topic and verify if everything is there
    List<ReceivedMessage> receivedMessages = pubsubHelper.pullMessages(PROJECT_NAME, OUTPUT_SUBSCRIPTION_NAME, 100);
    assertEquals("Wrong number of elements", input.size(), receivedMessages.size());
    // Check output strings
    List<String> output = new ArrayList<>();
    // Extract the actual Strings from the ReceivedMessages
    receivedMessages.forEach(msg -> output.add(msg.getMessage().getData().toStringUtf8()));
    for (String test : input) {
        String reversedTest = org.apache.commons.lang3.StringUtils.reverse(test);
        LOG.info("Checking if \"{}\" --> \"{}\" exists", test, reversedTest);
        assertTrue("Missing " + test, output.contains(reversedTest));
    }
// ===============================================================================
}
Also used : StringUtils(org.apache.commons.lang3.StringUtils) PubSubSubscriberFactoryForEmulator(org.apache.flink.streaming.connectors.gcp.pubsub.emulator.PubSubSubscriberFactoryForEmulator) ArrayList(java.util.ArrayList) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ByteString(com.google.protobuf.ByteString) Publisher(com.google.cloud.pubsub.v1.Publisher) MapFunction(org.apache.flink.api.common.functions.MapFunction) ReceivedMessage(com.google.pubsub.v1.ReceivedMessage) Test(org.junit.Test)

Example 44 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class EmulatedPubSubSinkTest method testFlinkSink.

@Test
public void testFlinkSink() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    List<String> input = Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten");
    // Create test stream
    DataStream<String> theData = env.fromCollection(input).name("Test input").map((MapFunction<String, String>) StringUtils::reverse);
    // Sink into pubsub
    theData.addSink(PubSubSink.newBuilder().withSerializationSchema(new SimpleStringSchema()).withProjectName(PROJECT_NAME).withTopicName(TOPIC_NAME).withHostAndPortForEmulator(getPubSubHostPort()).withCredentials(EmulatorCredentials.getInstance()).build()).name("PubSub sink");
    // Run
    env.execute();
    // Now get the result from PubSub and verify if everything is there
    List<ReceivedMessage> receivedMessages = pubsubHelper.pullMessages(PROJECT_NAME, SUBSCRIPTION_NAME, 100);
    assertEquals("Wrong number of elements", input.size(), receivedMessages.size());
    // Check output strings
    List<String> output = new ArrayList<>();
    receivedMessages.forEach(msg -> output.add(msg.getMessage().getData().toStringUtf8()));
    for (String test : input) {
        assertTrue("Missing " + test, output.contains(StringUtils.reverse(test)));
    }
}
Also used : ArrayList(java.util.ArrayList) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ReceivedMessage(com.google.pubsub.v1.ReceivedMessage) Test(org.junit.Test)

Example 45 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisConsumerTest method testPeriodicWatermark.

@Test
public void testPeriodicWatermark() throws Exception {
    String streamName = "fakeStreamName";
    Time maxOutOfOrderness = Time.milliseconds(5);
    long autoWatermarkInterval = 1_000;
    HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
    subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
    KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
    Properties props = new Properties();
    props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
    props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
    BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
    BlockingQueue<String> shard2 = new LinkedBlockingQueue<>();
    Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
    streamToQueueMap.put(streamName, Arrays.asList(shard1, shard2));
    // override createFetcher to mock Kinesis
    FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {

        @Override
        protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
            KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), null, new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
            };
            return fetcher;
        }
    };
    sourceFunc.setShardAssigner((streamShardHandle, i) -> {
        // shardId-000000000000
        return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
    });
    sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
    // there is currently no test harness specifically for sources,
    // so we overlay the source thread here
    AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
    testHarness.initializeEmptyState();
    testHarness.open();
    ConcurrentLinkedQueue<Watermark> watermarks = new ConcurrentLinkedQueue<>();
    @SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), testHarness.getOutput()) {

        @Override
        public void emitWatermark(Watermark mark) {
            watermarks.add(mark);
        }

        @Override
        public void markAsTemporarilyIdle() {
        }
    };
    new Thread(() -> {
        try {
            sourceFunc.run(sourceContext);
        } catch (InterruptedException e) {
        // expected on cancel
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }).start();
    shard1.put("1");
    shard1.put("2");
    shard2.put("10");
    int recordCount = 3;
    int watermarkCount = 0;
    awaitRecordCount(testHarness.getOutput(), recordCount);
    // Trigger watermark emit, first watermark is -3
    // - Shard-1 @2
    // - Shard-2 @10
    // - Watermark = min(2, 10) - maxOutOfOrderness = 2 - 5 = -3
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    watermarkCount++;
    // advance watermark
    shard1.put("10");
    recordCount++;
    awaitRecordCount(testHarness.getOutput(), recordCount);
    // Trigger watermark emit, second watermark is -3
    // - Shard-1 @10
    // - Shard-2 @10
    // - Watermark = min(10, 10) - maxOutOfOrderness = 10 - 5 = 5
    testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
    watermarkCount++;
    sourceFunc.cancel();
    testHarness.close();
    assertEquals("record count", recordCount, testHarness.getOutput().size());
    assertThat(watermarks, org.hamcrest.Matchers.contains(new Watermark(-3), new Watermark(5)));
    assertEquals("watermark count", watermarkCount, watermarks.size());
}
Also used : HashMap(java.util.HashMap) Time(org.apache.flink.streaming.api.windowing.time.Time) Properties(java.util.Properties) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) List(java.util.List) ArrayList(java.util.ArrayList) KinesisDeserializationSchema(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchema) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) KinesisDeserializationSchemaWrapper(org.apache.flink.streaming.connectors.kinesis.serialization.KinesisDeserializationSchemaWrapper) CollectingSourceContext(org.apache.flink.streaming.util.CollectingSourceContext) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)63 Test (org.junit.Test)35 Properties (java.util.Properties)30 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)20 CheckedThread (org.apache.flink.core.testutils.CheckedThread)13 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)13 Shard (com.amazonaws.services.kinesis.model.Shard)11 ArrayList (java.util.ArrayList)11 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)11 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)11 LinkedList (java.util.LinkedList)9 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)9 HashMap (java.util.HashMap)8 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)7 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)7 Map (java.util.Map)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)6 Matchers.anyString (org.mockito.Matchers.anyString)6 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)5