Search in sources :

Example 1 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class PubSubConsumingTest method testProducingMultipleResults.

@Test
public void testProducingMultipleResults() throws Exception {
    TestPubSubSubscriber testPubSubSubscriber = new TestPubSubSubscriber(receivedMessage("1", pubSubMessage("A")), receivedMessage("2", pubSubMessage("B,C,D")), receivedMessage("3", pubSubMessage("E")));
    PubSubSource<String> pubSubSource = PubSubSource.newBuilder().withDeserializationSchema(new SimpleStringSchema() {

        @Override
        public void deserialize(byte[] message, Collector<String> out) {
            String[] records = super.deserialize(message).split(",");
            for (String record : records) {
                out.collect(record);
            }
        }

        @Override
        public boolean isEndOfStream(String nextElement) {
            return nextElement.equals("C");
        }
    }).withProjectName("fakeProject").withSubscriptionName("fakeSubscription").withPubSubSubscriberFactory(credentials -> testPubSubSubscriber).withCredentials(mock(Credentials.class)).build();
    Object lock = new Object();
    ConcurrentLinkedQueue<String> results = new ConcurrentLinkedQueue<>();
    Thread thread = createSourceThread(pubSubSource, lock, results);
    try {
        thread.start();
        awaitRecordCount(results, 2);
        // we emit only the records prior to the end of the stream
        assertThat(new ArrayList<>(results), equalTo(Arrays.asList("A", "B")));
        pubSubSource.snapshotState(0, 0);
        pubSubSource.notifyCheckpointComplete(0);
        // we acknowledge also the end of the stream record
        assertThat(testPubSubSubscriber.getAcknowledgedIds(), equalTo(Arrays.asList("1", "2")));
    } finally {
        pubSubSource.cancel();
        thread.join();
    }
}
Also used : Collector(org.apache.flink.util.Collector) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) ByteString(com.google.protobuf.ByteString) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 2 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKafkaProducerBaseTest method testKeyValueDeserializersSetIfMissing.

/**
 * Tests that constructor defaults to key value serializers in config to byte array
 * deserializers if not set.
 */
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
    Properties props = new Properties();
    props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
    // should set missing key value deserializers
    new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
    assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
    assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
    assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
    assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}
Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Properties(java.util.Properties) Test(org.junit.Test)

Example 3 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKafkaProducerBaseTest method testAtLeastOnceProducer.

/**
 * Test ensuring that the producer is not dropping buffered records; we set a timeout because
 * the test will not finish if the logic is broken.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testAtLeastOnceProducer() throws Throwable {
    final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
    producer.setFlushOnCheckpoint(true);
    final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();
    final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));
    testHarness.open();
    testHarness.processElement(new StreamRecord<>("msg-1"));
    testHarness.processElement(new StreamRecord<>("msg-2"));
    testHarness.processElement(new StreamRecord<>("msg-3"));
    verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class));
    Assert.assertEquals(3, producer.getPendingSize());
    // start a thread to perform checkpointing
    CheckedThread snapshotThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            // this should block until all records are flushed;
            // if the snapshot implementation returns before pending records are
            // flushed,
            testHarness.snapshot(123L, 123L);
        }
    };
    snapshotThread.start();
    // before proceeding, make sure that flushing has started and that the snapshot is still
    // blocked;
    // this would block forever if the snapshot didn't perform a flush
    producer.waitUntilFlushStarted();
    Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
    // now, complete the callbacks
    producer.getPendingCallbacks().get(0).onCompletion(null, null);
    Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
    Assert.assertEquals(2, producer.getPendingSize());
    producer.getPendingCallbacks().get(1).onCompletion(null, null);
    Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
    Assert.assertEquals(1, producer.getPendingSize());
    producer.getPendingCallbacks().get(2).onCompletion(null, null);
    Assert.assertEquals(0, producer.getPendingSize());
    // this would fail with an exception if flushing wasn't completed before the snapshot method
    // returned
    snapshotThread.sync();
    testHarness.close();
}
Also used : Mockito.anyString(org.mockito.Mockito.anyString) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) CheckedThread(org.apache.flink.core.testutils.CheckedThread) Callback(org.apache.kafka.clients.producer.Callback) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Test(org.junit.Test)

Example 4 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKafkaProducerBaseTest method testAsyncErrorRethrownOnCheckpointAfterFlush.

/**
 * Test ensuring that if an async exception is caught for one of the flushed requests on
 * checkpoint, it should be rethrown; we set a timeout because the test will not finish if the
 * logic is broken.
 *
 * <p>Note that this test does not test the snapshot method is blocked correctly when there are
 * pending records. The test for that is covered in testAtLeastOnceProducer.
 */
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testAsyncErrorRethrownOnCheckpointAfterFlush() throws Throwable {
    final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
    producer.setFlushOnCheckpoint(true);
    final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();
    final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));
    testHarness.open();
    testHarness.processElement(new StreamRecord<>("msg-1"));
    testHarness.processElement(new StreamRecord<>("msg-2"));
    testHarness.processElement(new StreamRecord<>("msg-3"));
    verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class));
    // only let the first callback succeed for now
    producer.getPendingCallbacks().get(0).onCompletion(null, null);
    CheckedThread snapshotThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            // this should block at first, since there are still two pending records
            // that needs to be flushed
            testHarness.snapshot(123L, 123L);
        }
    };
    snapshotThread.start();
    // let the 2nd message fail with an async exception
    producer.getPendingCallbacks().get(1).onCompletion(null, new Exception("artificial async failure for 2nd message"));
    producer.getPendingCallbacks().get(2).onCompletion(null, null);
    try {
        snapshotThread.sync();
    } catch (Exception e) {
        // the snapshot should have failed with the async exception
        Assert.assertTrue(e.getCause().getMessage().contains("artificial async failure for 2nd message"));
        // test succeeded
        return;
    }
    Assert.fail();
}
Also used : Mockito.anyString(org.mockito.Mockito.anyString) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) CheckedThread(org.apache.flink.core.testutils.CheckedThread) Callback(org.apache.kafka.clients.producer.Callback) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Test(org.junit.Test)

Example 5 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class KafkaConsumerTestBase method runCancelingOnEmptyInputTest.

/**
 * Tests that the source can be properly canceled when reading empty partitions.
 */
public void runCancelingOnEmptyInputTest() throws Exception {
    final String topic = "cancelingOnEmptyInputTopic";
    final int parallelism = 3;
    createTestTopic(topic, parallelism, 1);
    final AtomicReference<Throwable> error = new AtomicReference<>();
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.enableCheckpointing(100);
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    getStream(env, topic, new SimpleStringSchema(), props).addSink(new DiscardingSink<String>());
    JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
    final JobID jobId = jobGraph.getJobID();
    final Runnable jobRunner = () -> {
        try {
            submitJobAndWaitForResult(client, jobGraph, getClass().getClassLoader());
        } catch (Throwable t) {
            LOG.error("Job Runner failed with exception", t);
            error.set(t);
        }
    };
    Thread runnerThread = new Thread(jobRunner, "program runner thread");
    runnerThread.start();
    // wait a bit before canceling
    Thread.sleep(2000);
    Throwable failueCause = error.get();
    if (failueCause != null) {
        failueCause.printStackTrace();
        Assert.fail("Test failed prematurely with: " + failueCause.getMessage());
    }
    // cancel
    client.cancel(jobId).get();
    // wait for the program to be done and validate that we failed with the right exception
    runnerThread.join();
    assertEquals(JobStatus.CANCELED, client.getJobStatus(jobId).get());
    deleteTestTopic(topic);
}
Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)63 Test (org.junit.Test)35 Properties (java.util.Properties)30 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)20 CheckedThread (org.apache.flink.core.testutils.CheckedThread)13 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)13 Shard (com.amazonaws.services.kinesis.model.Shard)11 ArrayList (java.util.ArrayList)11 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)11 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)11 LinkedList (java.util.LinkedList)9 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)9 HashMap (java.util.HashMap)8 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)7 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)7 Map (java.util.Map)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)6 Matchers.anyString (org.mockito.Matchers.anyString)6 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)5