Search in sources :

Example 31 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project aws-doc-sdk-examples by awsdocs.

the class StreamingJob method createSinkFromStaticConfig.

private static FlinkKinesisProducer<String> createSinkFromStaticConfig() {
    Properties outputProperties = new Properties();
    outputProperties.setProperty(ConsumerConfigConstants.AWS_REGION, region);
    outputProperties.setProperty("AggregationEnabled", "false");
    FlinkKinesisProducer<String> sink = new FlinkKinesisProducer<>(new SimpleStringSchema(), outputProperties);
    sink.setDefaultStream(outputStreamName);
    sink.setDefaultPartition("0");
    return sink;
}
Also used : FlinkKinesisProducer(org.apache.flink.streaming.connectors.kinesis.FlinkKinesisProducer) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Properties(java.util.Properties)

Example 32 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KafkaConsumerTestBase method runFailOnNoBrokerTest.

// ------------------------------------------------------------------------
// Suite of Tests
// 
// The tests here are all not activated (by an @Test tag), but need
// to be invoked from the extending classes. That way, the classes can
// select which tests to run.
// ------------------------------------------------------------------------
/**
 * Test that ensures the KafkaConsumer is properly failing if the topic doesn't exist and a
 * wrong broker was specified.
 *
 * @throws Exception
 */
public void runFailOnNoBrokerTest() throws Exception {
    try {
        Properties properties = new Properties();
        StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
        see.setRestartStrategy(RestartStrategies.noRestart());
        see.setParallelism(1);
        // use wrong ports for the consumers
        properties.setProperty("bootstrap.servers", "localhost:80");
        properties.setProperty("group.id", "test");
        // let the test fail fast
        properties.setProperty("request.timeout.ms", "3000");
        properties.setProperty("socket.timeout.ms", "3000");
        properties.setProperty("session.timeout.ms", "2000");
        properties.setProperty("fetch.max.wait.ms", "2000");
        properties.setProperty("heartbeat.interval.ms", "1000");
        properties.putAll(secureProps);
        DataStream<String> stream = getStream(see, "doesntexist", new SimpleStringSchema(), properties);
        stream.print();
        see.execute("No broker test");
    } catch (JobExecutionException jee) {
        final Optional<TimeoutException> optionalTimeoutException = ExceptionUtils.findThrowable(jee, TimeoutException.class);
        assertTrue(optionalTimeoutException.isPresent());
        final TimeoutException timeoutException = optionalTimeoutException.get();
        if (useNewSource) {
            assertThat(timeoutException.getCause().getMessage(), containsString("Timed out waiting for a node assignment."));
        } else {
            assertEquals("Timeout expired while fetching topic metadata", timeoutException.getMessage());
        }
    }
}
Also used : JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Optional(java.util.Optional) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) TimeoutException(org.apache.kafka.common.errors.TimeoutException)

Example 33 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KafkaConsumerTestBase method runCommitOffsetsToKafka.

/**
 * Ensures that the committed offsets to Kafka are the offsets of "the next record to process".
 */
public void runCommitOffsetsToKafka() throws Exception {
    // 3 partitions with 50 records each (0-49, so the expected commit offset of each partition
    // should be 50)
    final int parallelism = 3;
    final int recordsInEachPartition = 50;
    final String topicName = writeSequence("testCommitOffsetsToKafkaTopic", recordsInEachPartition, parallelism, 1);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setRestartStrategy(RestartStrategies.noRestart());
    env.setParallelism(parallelism);
    env.enableCheckpointing(200);
    DataStream<String> stream = getStream(env, topicName, new SimpleStringSchema(), standardProps);
    stream.addSink(new DiscardingSink<String>());
    final AtomicReference<Throwable> errorRef = new AtomicReference<>();
    final Thread runner = new Thread("runner") {

        @Override
        public void run() {
            try {
                env.execute();
            } catch (Throwable t) {
                if (!(t instanceof JobCancellationException)) {
                    errorRef.set(t);
                }
            }
        }
    };
    runner.start();
    // the final committed offset in Kafka should be 50
    final Long l50 = 50L;
    final long deadline = 30_000_000_000L + System.nanoTime();
    KafkaTestEnvironment.KafkaOffsetHandler kafkaOffsetHandler = kafkaServer.createOffsetHandler();
    do {
        Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
        Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
        Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
        if (l50.equals(o1) && l50.equals(o2) && l50.equals(o3)) {
            break;
        }
        Thread.sleep(100);
    } while (System.nanoTime() < deadline);
    // cancel the job & wait for the job to finish
    client.cancel(Iterables.getOnlyElement(getRunningJobs(client))).get();
    runner.join();
    final Throwable t = errorRef.get();
    if (t != null) {
        throw new RuntimeException("Job failed with an exception", t);
    }
    // final check to see if offsets are correctly in Kafka
    Long o1 = kafkaOffsetHandler.getCommittedOffset(topicName, 0);
    Long o2 = kafkaOffsetHandler.getCommittedOffset(topicName, 1);
    Long o3 = kafkaOffsetHandler.getCommittedOffset(topicName, 2);
    Assert.assertEquals(Long.valueOf(50L), o1);
    Assert.assertEquals(Long.valueOf(50L), o2);
    Assert.assertEquals(Long.valueOf(50L), o3);
    kafkaOffsetHandler.close();
    deleteTestTopic(topicName);
}
Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 34 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KafkaConsumerTestBase method runCancelingOnFullInputTest.

/**
 * Tests that the source can be properly canceled when reading full partitions.
 */
public void runCancelingOnFullInputTest() throws Exception {
    final String topic = "cancelingOnFullTopic";
    final int parallelism = 3;
    createTestTopic(topic, parallelism, 1);
    // launch a producer thread
    DataGenerators.InfiniteStringsGenerator generator = new DataGenerators.InfiniteStringsGenerator(kafkaServer, topic);
    generator.start();
    // launch a consumer asynchronously
    final AtomicReference<Throwable> jobError = new AtomicReference<>();
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.enableCheckpointing(100);
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    getStream(env, topic, new SimpleStringSchema(), props).addSink(new DiscardingSink<String>());
    JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
    final JobID jobId = jobGraph.getJobID();
    final Runnable jobRunner = () -> {
        try {
            submitJobAndWaitForResult(client, jobGraph, getClass().getClassLoader());
        } catch (Throwable t) {
            jobError.set(t);
        }
    };
    Thread runnerThread = new Thread(jobRunner, "program runner thread");
    runnerThread.start();
    // wait a bit before canceling
    Thread.sleep(2000);
    Throwable failueCause = jobError.get();
    if (failueCause != null) {
        failueCause.printStackTrace();
        Assert.fail("Test failed prematurely with: " + failueCause.getMessage());
    }
    // cancel
    client.cancel(jobId).get();
    // wait for the program to be done and validate that we failed with the right exception
    runnerThread.join();
    assertEquals(JobStatus.CANCELED, client.getJobStatus(jobId).get());
    if (generator.isAlive()) {
        generator.shutdown();
        generator.join();
    } else {
        Throwable t = generator.getError();
        if (t != null) {
            t.printStackTrace();
            fail("Generator failed: " + t.getMessage());
        } else {
            fail("Generator failed with no exception");
        }
    }
    deleteTestTopic(topic);
}
Also used : DataGenerators(org.apache.flink.streaming.connectors.kafka.testutils.DataGenerators) AtomicReference(java.util.concurrent.atomic.AtomicReference) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID)

Example 35 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KafkaChangelogTableITCase method writeRecordsToKafka.

private void writeRecordsToKafka(String topic, List<String> lines) throws Exception {
    DataStreamSource<String> stream = env.fromCollection(lines);
    SerializationSchema<String> serSchema = new SimpleStringSchema();
    FlinkKafkaPartitioner<String> partitioner = new FlinkFixedPartitioner<>();
    // the producer must not produce duplicates
    Properties producerProperties = getStandardProps();
    producerProperties.setProperty("retries", "0");
    stream.sinkTo(KafkaSink.<String>builder().setBootstrapServers(producerProperties.getProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG)).setRecordSerializer(KafkaRecordSerializationSchema.builder().setTopic(topic).setValueSerializationSchema(serSchema).setPartitioner(partitioner).build()).setDeliverGuarantee(DeliveryGuarantee.EXACTLY_ONCE).build());
    env.execute("Write sequence");
}
Also used : FlinkFixedPartitioner(org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Properties(java.util.Properties)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)63 Test (org.junit.Test)35 Properties (java.util.Properties)30 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)20 CheckedThread (org.apache.flink.core.testutils.CheckedThread)13 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)13 Shard (com.amazonaws.services.kinesis.model.Shard)11 ArrayList (java.util.ArrayList)11 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)11 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)11 LinkedList (java.util.LinkedList)9 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)9 HashMap (java.util.HashMap)8 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)7 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)7 Map (java.util.Map)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)6 Matchers.anyString (org.mockito.Matchers.anyString)6 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)5