Examples with SimpleStringSchema - org.apache.flink.api.common.serialization.SimpleStringSchema

Example 6 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKafkaProducerBaseTest method testPartitionerInvokedWithDeterminatePartitionList.

/**
 * Tests that partitions list is determinate and correctly provided to custom partitioner.
 */
@SuppressWarnings("unchecked")
@Test
public void testPartitionerInvokedWithDeterminatePartitionList() throws Exception {
    FlinkKafkaPartitioner<String> mockPartitioner = mock(FlinkKafkaPartitioner.class);
    RuntimeContext mockRuntimeContext = mock(StreamingRuntimeContext.class);
    when(mockRuntimeContext.getIndexOfThisSubtask()).thenReturn(0);
    when(mockRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(1);
    // out-of-order list of 4 partitions
    List<PartitionInfo> mockPartitionsList = new ArrayList<>(4);
    mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 3, null, null, null));
    mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 1, null, null, null));
    mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 0, null, null, null));
    mockPartitionsList.add(new PartitionInfo(DummyFlinkKafkaProducer.DUMMY_TOPIC, 2, null, null, null));
    final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), mockPartitioner);
    producer.setRuntimeContext(mockRuntimeContext);
    final KafkaProducer mockProducer = producer.getMockKafkaProducer();
    when(mockProducer.partitionsFor(anyString())).thenReturn(mockPartitionsList);
    when(mockProducer.metrics()).thenReturn(null);
    producer.open(new Configuration());
    verify(mockPartitioner, times(1)).open(0, 1);
    producer.invoke("foobar", SinkContextUtil.forTimestamp(0));
    verify(mockPartitioner, times(1)).partition("foobar", null, "foobar".getBytes(), DummyFlinkKafkaProducer.DUMMY_TOPIC, new int[] { 0, 1, 2, 3 });
}

Also used : KafkaProducer(org.apache.kafka.clients.producer.KafkaProducer) Configuration(org.apache.flink.configuration.Configuration) ArrayList(java.util.ArrayList) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Mockito.anyString(org.mockito.Mockito.anyString) PartitionInfo(org.apache.kafka.common.PartitionInfo) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) Test(org.junit.Test)

Example 7 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KafkaConsumerTestBase method runAutoOffsetRetrievalAndCommitToKafka.

/**
 * This test ensures that when the consumers retrieve some start offset from kafka (earliest,
 * latest), that this offset is committed to Kafka, even if some partitions are not read.
 *
 * <p>Test: - Create 3 partitions - write 50 messages into each. - Start three consumers with
 * auto.offset.reset='latest' and wait until they committed into Kafka. - Check if the offsets
 * in Kafka are set to 50 for the three partitions
 *
 * <p>See FLINK-3440 as well
 */
public void runAutoOffsetRetrievalAndCommitToKafka() throws Exception {
    // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition
    // should be 50)
    final int parallelism = 3;
    final int recordsInEachPartition = 50;
    final String topicName = writeSequence("testAutoOffsetRetrievalAndCommitToKafkaTopic", recordsInEachPartition, parallelism, 1);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.setParallelism(parallelism);
    env.enableCheckpointing(200);
    Properties readProps = new Properties();
    readProps.putAll(standardProps);
    readProps.setProperty("auto.offset.reset", // set to reset to latest, so that partitions are initially not read
    "latest");
    DataStream<String> stream = getStream(env, topicName, new SimpleStringSchema(), readProps);
    stream.addSink(new DiscardingSink<String>());
    final AtomicReference<Throwable> errorRef = new AtomicReference<>();
    final Thread runner = new Thread("runner") {

        @Override
        public void run() {
            try {
                env.execute();
            } catch (Throwable t) {
                if (!(t instanceof JobCancellationException)) {
                    errorRef.set(t);
                }
            }
        }
    };
    runner.start();
    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
    // the final committed offset in Kafka should be 50
    final Long l50 = 50L;
    final long deadline = 30_000_000_000L + System.nanoTime();
    do {
        Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
        Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
        Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
        if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) {
            break;
        }
        Thread.sleep(100);
    } while (System.nanoTime() < deadline);
    // cancel the job & wait for the job to finish
    client.cancel(Iterables.getOnlyElement(getRunningJobs(client))).get();
    runner.join();
    final Throwable t = errorRef.get();
    if (t != null) {
        throw new RuntimeException("Job failed with an exception", t);
    }
    // final check to see if offsets are correctly in Kafka
    Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
    Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
    Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
    Assert.assertEquals(Long.valueOf(50L), o1);
    Assert.assertEquals(Long.valueOf(50L), o2);
    Assert.assertEquals(Long.valueOf(50L), o3);
    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);
}

Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 8 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KafkaShortRetentionTestBase method runFailOnAutoOffsetResetNone.

/**
 * Ensure that the consumer is properly failing if "auto.offset.reset" is set to "none".
 */
public void runFailOnAutoOffsetResetNone() throws Exception {
    final String topic = "auto-offset-reset-none-test";
    final int parallelism = 1;
    kafkaServer.createTestTopic(topic, parallelism, 1);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    // fail immediately
    env.setRestartStrategy(RestartStrategies.noRestart());
    // ----------- add consumer ----------
    Properties customProps = new Properties();
    customProps.putAll(standardProps);
    customProps.putAll(secureProps);
    customProps.setProperty("auto.offset.reset", // test that "none" leads to an exception
    "none");
    FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, new SimpleStringSchema(), customProps);
    DataStreamSource<String> consuming = env.addSource(source);
    consuming.addSink(new DiscardingSink<String>());
    try {
        env.execute("Test auto offset reset none");
    } catch (Throwable e) {
        // check if correct exception has been thrown
        if (!e.getCause().getCause().getMessage().contains("Undefined offset with no reset policy for partition")) {
            throw e;
        }
    }
    kafkaServer.deleteTestTopic(topic);
}

Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Example 9 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisEventsGeneratorProducerThread method create.

public static Thread create(final int totalEventCount, final int parallelism, final String awsAccessKey, final String awsSecretKey, final String awsRegion, final String kinesisStreamName, final AtomicReference<Throwable> errorHandler, final int flinkPort, final Configuration flinkConfig) {
    Runnable kinesisEventsGeneratorProducer = new Runnable() {

        @Override
        public void run() {
            try {
                StreamExecutionEnvironment see = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort, flinkConfig);
                see.setParallelism(parallelism);
                // start data generator
                DataStream<String> simpleStringStream = see.addSource(new KinesisEventsGeneratorProducerThread.EventsGenerator(totalEventCount)).setParallelism(1);
                Properties producerProps = new Properties();
                producerProps.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, awsAccessKey);
                producerProps.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, awsSecretKey);
                producerProps.setProperty(AWSConfigConstants.AWS_REGION, awsRegion);
                FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), producerProps);
                kinesis.setFailOnError(true);
                kinesis.setDefaultStream(kinesisStreamName);
                kinesis.setDefaultPartition("0");
                simpleStringStream.addSink(kinesis);
                LOG.info("Starting producing topology");
                see.execute("Producing topology");
                LOG.info("Producing topo finished");
            } catch (Exception e) {
                LOG.warn("Error while running producing topology", e);
                errorHandler.set(e);
            }
        }
    };
    return new Thread(kinesisEventsGeneratorProducer);
}

Also used : FlinkKinesisProducer(org.apache.flink.streaming.connectors.kinesis.FlinkKinesisProducer) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Example 10 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KinesisDataFetcherTest method testRecordPublisherFactoryIsTornDown.

@Test(timeout = 1000L)
public void testRecordPublisherFactoryIsTornDown() throws InterruptedException {
    KinesisProxyV2Interface kinesisV2 = mock(KinesisProxyV2Interface.class);
    TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<String>(singletonList("fakeStream1"), new TestSourceContext<>(), TestUtils.efoProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), 10, 2, new AtomicReference<>(), new LinkedList<>(), new HashMap<>(), mock(KinesisProxyInterface.class), kinesisV2) {
    };
    fetcher.shutdownFetcher();
    fetcher.awaitTermination();
}

Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisProxyV2Interface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyV2Interface) KinesisProxyInterface(org.apache.flink.streaming.connectors.kinesis.proxy.KinesisProxyInterface) TestableKinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher) Test(org.junit.Test)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)63 Test (org.junit.Test)35 Properties (java.util.Properties)30 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)20 CheckedThread (org.apache.flink.core.testutils.CheckedThread)13 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)13 Shard (com.amazonaws.services.kinesis.model.Shard)11 ArrayList (java.util.ArrayList)11 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)11 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)11 LinkedList (java.util.LinkedList)9 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)9 HashMap (java.util.HashMap)8 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)7 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)7 Map (java.util.Map)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)6 Matchers.anyString (org.mockito.Matchers.anyString)6 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)5