Search in sources :

Example 6 with SimpleStringSchema

use of org.apache.flink.streaming.util.serialization.SimpleStringSchema in project flink by apache.

the class Kafka09FetcherTest method testCommitDoesNotBlock.

@Test
public void testCommitDoesNotBlock() throws Exception {
    // test data
    final KafkaTopicPartition testPartition = new KafkaTopicPartition("test", 42);
    final Map<KafkaTopicPartition, Long> testCommitData = new HashMap<>();
    testCommitData.put(testPartition, 11L);
    // to synchronize when the consumer is in its blocking method
    final OneShotLatch sync = new OneShotLatch();
    // ----- the mock consumer with blocking poll calls ----
    final MultiShotLatch blockerLatch = new MultiShotLatch();
    KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
    when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {

        @Override
        public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) throws InterruptedException {
            sync.trigger();
            blockerLatch.await();
            return ConsumerRecords.empty();
        }
    });
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) {
            blockerLatch.trigger();
            return null;
        }
    }).when(mockConsumer).wakeup();
    // make sure the fetcher creates the mock consumer
    whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);
    // ----- create the test fetcher -----
    @SuppressWarnings("unchecked") SourceContext<String> sourceContext = mock(SourceContext.class);
    Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets = Collections.singletonMap(new KafkaTopicPartition("test", 42), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
    KeyedDeserializationSchema<String> schema = new KeyedDeserializationSchemaWrapper<>(new SimpleStringSchema());
    final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(sourceContext, partitionsWithInitialOffsets, null, /* periodic watermark extractor */
    null, /* punctuated watermark extractor */
    new TestProcessingTimeService(), 10, /* watermark interval */
    this.getClass().getClassLoader(), "task_name", new UnregisteredMetricsGroup(), schema, new Properties(), 0L, false);
    // ----- run the fetcher -----
    final AtomicReference<Throwable> error = new AtomicReference<>();
    final Thread fetcherRunner = new Thread("fetcher runner") {

        @Override
        public void run() {
            try {
                fetcher.runFetchLoop();
            } catch (Throwable t) {
                error.set(t);
            }
        }
    };
    fetcherRunner.start();
    // wait until the fetcher has reached the method of interest
    sync.await();
    // ----- trigger the offset commit -----
    final AtomicReference<Throwable> commitError = new AtomicReference<>();
    final Thread committer = new Thread("committer runner") {

        @Override
        public void run() {
            try {
                fetcher.commitInternalOffsetsToKafka(testCommitData);
            } catch (Throwable t) {
                commitError.set(t);
            }
        }
    };
    committer.start();
    // ----- ensure that the committer finishes in time  -----
    committer.join(30000);
    assertFalse("The committer did not finish in time", committer.isAlive());
    // ----- test done, wait till the fetcher is done for a clean shutdown -----
    fetcher.cancel();
    fetcherRunner.join();
    // check that there were no errors in the fetcher
    final Throwable fetcherError = error.get();
    if (fetcherError != null && !(fetcherError instanceof Handover.ClosedException)) {
        throw new Exception("Exception in the fetcher", fetcherError);
    }
    final Throwable committerError = commitError.get();
    if (committerError != null) {
        throw new Exception("Exception in the committer", committerError);
    }
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) HashMap(java.util.HashMap) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Properties(java.util.Properties) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) KeyedDeserializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper) Handover(org.apache.flink.streaming.connectors.kafka.internal.Handover) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) AtomicReference(java.util.concurrent.atomic.AtomicReference) KafkaConsumerThread(org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread) Kafka09Fetcher(org.apache.flink.streaming.connectors.kafka.internal.Kafka09Fetcher) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Mockito.anyLong(org.mockito.Mockito.anyLong) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 7 with SimpleStringSchema

use of org.apache.flink.streaming.util.serialization.SimpleStringSchema in project flink by apache.

the class Kafka09FetcherTest method testCancellationWhenEmitBlocks.

@Test
public void testCancellationWhenEmitBlocks() throws Exception {
    // ----- some test data -----
    final String topic = "test-topic";
    final int partition = 3;
    final byte[] payload = new byte[] { 1, 2, 3, 4 };
    final List<ConsumerRecord<byte[], byte[]>> records = Arrays.asList(new ConsumerRecord<byte[], byte[]>(topic, partition, 15, payload, payload), new ConsumerRecord<byte[], byte[]>(topic, partition, 16, payload, payload), new ConsumerRecord<byte[], byte[]>(topic, partition, 17, payload, payload));
    final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> data = new HashMap<>();
    data.put(new TopicPartition(topic, partition), records);
    final ConsumerRecords<byte[], byte[]> consumerRecords = new ConsumerRecords<>(data);
    // ----- the test consumer -----
    final KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
    when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {

        @Override
        public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) {
            return consumerRecords;
        }
    });
    whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);
    // ----- build a fetcher -----
    BlockingSourceContext<String> sourceContext = new BlockingSourceContext<>();
    Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets = Collections.singletonMap(new KafkaTopicPartition(topic, partition), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
    KeyedDeserializationSchema<String> schema = new KeyedDeserializationSchemaWrapper<>(new SimpleStringSchema());
    final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(sourceContext, partitionsWithInitialOffsets, null, /* periodic watermark extractor */
    null, /* punctuated watermark extractor */
    new TestProcessingTimeService(), 10, /* watermark interval */
    this.getClass().getClassLoader(), "task_name", new UnregisteredMetricsGroup(), schema, new Properties(), 0L, false);
    // ----- run the fetcher -----
    final AtomicReference<Throwable> error = new AtomicReference<>();
    final Thread fetcherRunner = new Thread("fetcher runner") {

        @Override
        public void run() {
            try {
                fetcher.runFetchLoop();
            } catch (Throwable t) {
                error.set(t);
            }
        }
    };
    fetcherRunner.start();
    // wait until the thread started to emit records to the source context
    sourceContext.waitTillHasBlocker();
    // now we try to cancel the fetcher, including the interruption usually done on the task thread
    // once it has finished, there must be no more thread blocked on the source context
    fetcher.cancel();
    fetcherRunner.interrupt();
    fetcherRunner.join();
    assertFalse("fetcher threads did not properly finish", sourceContext.isStillBlocking());
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) HashMap(java.util.HashMap) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Properties(java.util.Properties) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) KeyedDeserializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper) List(java.util.List) AtomicReference(java.util.concurrent.atomic.AtomicReference) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) KafkaConsumerThread(org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread) Kafka09Fetcher(org.apache.flink.streaming.connectors.kafka.internal.Kafka09Fetcher) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Mockito.anyLong(org.mockito.Mockito.anyLong) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 8 with SimpleStringSchema

use of org.apache.flink.streaming.util.serialization.SimpleStringSchema in project flink by apache.

the class KafkaConsumerTestBase method runStartFromKafkaCommitOffsets.

/**
	 * This test first writes a total of 300 records to a test topic, reads the first 150 so that some offsets are
	 * committed to Kafka, and then startup the consumer again to read the remaining records starting from the committed offsets.
	 * The test ensures that whatever offsets were committed to Kafka, the consumer correctly picks them up
	 * and starts at the correct position.
	 */
public void runStartFromKafkaCommitOffsets() throws Exception {
    final int parallelism = 3;
    final int recordsInEachPartition = 300;
    final String topicName = writeSequence("testStartFromKafkaCommitOffsetsTopic", recordsInEachPartition, parallelism, 1);
    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
    Long o1;
    Long o2;
    Long o3;
    int attempt = 0;
    // make sure that o1, o2, o3 are not all null before proceeding
    do {
        attempt++;
        LOG.info("Attempt " + attempt + " to read records and commit some offsets to Kafka");
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
        env.getConfig().disableSysoutLogging();
        env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
        env.setParallelism(parallelism);
        // fast checkpoints to make sure we commit some offsets
        env.enableCheckpointing(20);
        env.addSource(kafkaServer.getConsumer(topicName, new SimpleStringSchema(), standardProps)).map(new ThrottledMapper<String>(50)).map(new MapFunction<String, Object>() {

            int count = 0;

            @Override
            public Object map(String value) throws Exception {
                count++;
                if (count == 150) {
                    throw new SuccessException();
                }
                return null;
            }
        }).addSink(new DiscardingSink<>());
        tryExecute(env, "Read some records to commit offsets to Kafka");
        o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
        o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
        o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
    } while (o1 == null && o2 == null && o3 == null && attempt < 3);
    if (o1 == null && o2 == null && o3 == null) {
        throw new RuntimeException("No offsets have been committed after 3 attempts");
    }
    LOG.info("Got final committed offsets from Kafka o1={}, o2={}, o3={}", o1, o2, o3);
    final StreamExecutionEnvironment env2 = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env2.getConfig().disableSysoutLogging();
    env2.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env2.setParallelism(parallelism);
    // whatever offsets were committed for each partition, the consumer should pick
    // them up and start from the correct position so that the remaining records are all read
    HashMap<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset = new HashMap<>();
    partitionsToValuesCountAndStartOffset.put(0, new Tuple2<>((o1 != null) ? (int) (recordsInEachPartition - o1) : recordsInEachPartition, (o1 != null) ? o1.intValue() : 0));
    partitionsToValuesCountAndStartOffset.put(1, new Tuple2<>((o2 != null) ? (int) (recordsInEachPartition - o2) : recordsInEachPartition, (o2 != null) ? o2.intValue() : 0));
    partitionsToValuesCountAndStartOffset.put(2, new Tuple2<>((o3 != null) ? (int) (recordsInEachPartition - o3) : recordsInEachPartition, (o3 != null) ? o3.intValue() : 0));
    readSequence(env2, StartupMode.GROUP_OFFSETS, null, standardProps, topicName, partitionsToValuesCountAndStartOffset);
    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);
}
Also used : HashMap(java.util.HashMap) MapFunction(org.apache.flink.api.common.functions.MapFunction) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 9 with SimpleStringSchema

use of org.apache.flink.streaming.util.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisConsumerTest method testSnapshotStateShouldBeNullIfSourceNotOpened.

// ----------------------------------------------------------------------
// Tests related to state initialization
// ----------------------------------------------------------------------
@Test
public void testSnapshotStateShouldBeNullIfSourceNotOpened() throws Exception {
    Properties config = new Properties();
    config.setProperty(AWSConfigConstants.AWS_REGION, "us-east-1");
    config.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, "accessKeyId");
    config.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, "secretKey");
    FlinkKinesisConsumer<String> consumer = new FlinkKinesisConsumer<>("fakeStream", new SimpleStringSchema(), config);
    //arbitrary checkpoint id and timestamp
    assertTrue(consumer.snapshotState(123, 123) == null);
}
Also used : TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) Properties(java.util.Properties) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 10 with SimpleStringSchema

use of org.apache.flink.streaming.util.serialization.SimpleStringSchema in project flink by apache.

the class Kafka09FetcherTest method ensureOffsetsGetCommitted.

@Test
public void ensureOffsetsGetCommitted() throws Exception {
    // test data
    final KafkaTopicPartition testPartition1 = new KafkaTopicPartition("test", 42);
    final KafkaTopicPartition testPartition2 = new KafkaTopicPartition("another", 99);
    final Map<KafkaTopicPartition, Long> testCommitData1 = new HashMap<>();
    testCommitData1.put(testPartition1, 11L);
    testCommitData1.put(testPartition2, 18L);
    final Map<KafkaTopicPartition, Long> testCommitData2 = new HashMap<>();
    testCommitData2.put(testPartition1, 19L);
    testCommitData2.put(testPartition2, 28L);
    final BlockingQueue<Map<TopicPartition, OffsetAndMetadata>> commitStore = new LinkedBlockingQueue<>();
    // ----- the mock consumer with poll(), wakeup(), and commit(A)sync calls ----
    final MultiShotLatch blockerLatch = new MultiShotLatch();
    KafkaConsumer<?, ?> mockConsumer = mock(KafkaConsumer.class);
    when(mockConsumer.poll(anyLong())).thenAnswer(new Answer<ConsumerRecords<?, ?>>() {

        @Override
        public ConsumerRecords<?, ?> answer(InvocationOnMock invocation) throws InterruptedException {
            blockerLatch.await();
            return ConsumerRecords.empty();
        }
    });
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) {
            blockerLatch.trigger();
            return null;
        }
    }).when(mockConsumer).wakeup();
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) {
            @SuppressWarnings("unchecked") Map<TopicPartition, OffsetAndMetadata> offsets = (Map<TopicPartition, OffsetAndMetadata>) invocation.getArguments()[0];
            OffsetCommitCallback callback = (OffsetCommitCallback) invocation.getArguments()[1];
            commitStore.add(offsets);
            callback.onComplete(offsets, null);
            return null;
        }
    }).when(mockConsumer).commitAsync(Mockito.<Map<TopicPartition, OffsetAndMetadata>>any(), any(OffsetCommitCallback.class));
    // make sure the fetcher creates the mock consumer
    whenNew(KafkaConsumer.class).withAnyArguments().thenReturn(mockConsumer);
    // ----- create the test fetcher -----
    @SuppressWarnings("unchecked") SourceContext<String> sourceContext = mock(SourceContext.class);
    Map<KafkaTopicPartition, Long> partitionsWithInitialOffsets = Collections.singletonMap(new KafkaTopicPartition("test", 42), KafkaTopicPartitionStateSentinel.GROUP_OFFSET);
    KeyedDeserializationSchema<String> schema = new KeyedDeserializationSchemaWrapper<>(new SimpleStringSchema());
    final Kafka09Fetcher<String> fetcher = new Kafka09Fetcher<>(sourceContext, partitionsWithInitialOffsets, null, /* periodic watermark extractor */
    null, /* punctuated watermark extractor */
    new TestProcessingTimeService(), 10, /* watermark interval */
    this.getClass().getClassLoader(), "task_name", new UnregisteredMetricsGroup(), schema, new Properties(), 0L, false);
    // ----- run the fetcher -----
    final AtomicReference<Throwable> error = new AtomicReference<>();
    final Thread fetcherRunner = new Thread("fetcher runner") {

        @Override
        public void run() {
            try {
                fetcher.runFetchLoop();
            } catch (Throwable t) {
                error.set(t);
            }
        }
    };
    fetcherRunner.start();
    // ----- trigger the first offset commit -----
    fetcher.commitInternalOffsetsToKafka(testCommitData1);
    Map<TopicPartition, OffsetAndMetadata> result1 = commitStore.take();
    for (Entry<TopicPartition, OffsetAndMetadata> entry : result1.entrySet()) {
        TopicPartition partition = entry.getKey();
        if (partition.topic().equals("test")) {
            assertEquals(42, partition.partition());
            assertEquals(12L, entry.getValue().offset());
        } else if (partition.topic().equals("another")) {
            assertEquals(99, partition.partition());
            assertEquals(17L, entry.getValue().offset());
        }
    }
    // ----- trigger the second offset commit -----
    fetcher.commitInternalOffsetsToKafka(testCommitData2);
    Map<TopicPartition, OffsetAndMetadata> result2 = commitStore.take();
    for (Entry<TopicPartition, OffsetAndMetadata> entry : result2.entrySet()) {
        TopicPartition partition = entry.getKey();
        if (partition.topic().equals("test")) {
            assertEquals(42, partition.partition());
            assertEquals(20L, entry.getValue().offset());
        } else if (partition.topic().equals("another")) {
            assertEquals(99, partition.partition());
            assertEquals(27L, entry.getValue().offset());
        }
    }
    // ----- test done, wait till the fetcher is done for a clean shutdown -----
    fetcher.cancel();
    fetcherRunner.join();
    // check that there were no errors in the fetcher
    final Throwable caughtError = error.get();
    if (caughtError != null && !(caughtError instanceof Handover.ClosedException)) {
        throw new Exception("Exception in the fetcher", caughtError);
    }
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) HashMap(java.util.HashMap) MultiShotLatch(org.apache.flink.core.testutils.MultiShotLatch) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Properties(java.util.Properties) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) KeyedDeserializationSchemaWrapper(org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Handover(org.apache.flink.streaming.connectors.kafka.internal.Handover) AtomicReference(java.util.concurrent.atomic.AtomicReference) KafkaConsumerThread(org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread) Kafka09Fetcher(org.apache.flink.streaming.connectors.kafka.internal.Kafka09Fetcher) InvocationOnMock(org.mockito.invocation.InvocationOnMock) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) Mockito.anyLong(org.mockito.Mockito.anyLong) SimpleStringSchema(org.apache.flink.streaming.util.serialization.SimpleStringSchema) TestProcessingTimeService(org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService) HashMap(java.util.HashMap) Map(java.util.Map) OffsetCommitCallback(org.apache.kafka.clients.consumer.OffsetCommitCallback) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

SimpleStringSchema (org.apache.flink.streaming.util.serialization.SimpleStringSchema)15 Test (org.junit.Test)12 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)12 Properties (java.util.Properties)11 HashMap (java.util.HashMap)7 InvocationOnMock (org.mockito.invocation.InvocationOnMock)7 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 UnregisteredMetricsGroup (org.apache.flink.metrics.groups.UnregisteredMetricsGroup)6 KafkaConsumerThread (org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread)6 KafkaTopicPartition (org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)6 TestProcessingTimeService (org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService)6 KeyedDeserializationSchemaWrapper (org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper)6 ConsumerRecords (org.apache.kafka.clients.consumer.ConsumerRecords)6 Mockito.anyLong (org.mockito.Mockito.anyLong)6 Configuration (org.apache.flink.configuration.Configuration)4 MultiShotLatch (org.apache.flink.core.testutils.MultiShotLatch)4 Handover (org.apache.flink.streaming.connectors.kafka.internal.Handover)4 TopicPartition (org.apache.kafka.common.TopicPartition)4 UnknownHostException (java.net.UnknownHostException)3 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)3