use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class PubSubConsumingTest method testProducingMultipleResults.
@Test
public void testProducingMultipleResults() throws Exception {
TestPubSubSubscriber testPubSubSubscriber = new TestPubSubSubscriber(receivedMessage("1", pubSubMessage("A")), receivedMessage("2", pubSubMessage("B,C,D")), receivedMessage("3", pubSubMessage("E")));
PubSubSource<String> pubSubSource = PubSubSource.newBuilder().withDeserializationSchema(new SimpleStringSchema() {
@Override
public void deserialize(byte[] message, Collector<String> out) {
String[] records = super.deserialize(message).split(",");
for (String record : records) {
out.collect(record);
}
}
@Override
public boolean isEndOfStream(String nextElement) {
return nextElement.equals("C");
}
}).withProjectName("fakeProject").withSubscriptionName("fakeSubscription").withPubSubSubscriberFactory(credentials -> testPubSubSubscriber).withCredentials(mock(Credentials.class)).build();
Object lock = new Object();
ConcurrentLinkedQueue<String> results = new ConcurrentLinkedQueue<>();
Thread thread = createSourceThread(pubSubSource, lock, results);
try {
thread.start();
awaitRecordCount(results, 2);
// we emit only the records prior to the end of the stream
assertThat(new ArrayList<>(results), equalTo(Arrays.asList("A", "B")));
pubSubSource.snapshotState(0, 0);
pubSubSource.notifyCheckpointComplete(0);
// we acknowledge also the end of the stream record
assertThat(testPubSubSubscriber.getAcknowledgedIds(), equalTo(Arrays.asList("1", "2")));
} finally {
pubSubSource.cancel();
thread.join();
}
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKafkaProducerBaseTest method testKeyValueDeserializersSetIfMissing.
/**
* Tests that constructor defaults to key value serializers in config to byte array
* deserializers if not set.
*/
@Test
public void testKeyValueDeserializersSetIfMissing() throws Exception {
Properties props = new Properties();
props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:12345");
// should set missing key value deserializers
new DummyFlinkKafkaProducer<>(props, new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
assertTrue(props.containsKey(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG));
assertTrue(props.containsKey(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG));
assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
assertTrue(props.getProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).equals(ByteArraySerializer.class.getName()));
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKafkaProducerBaseTest method testAtLeastOnceProducer.
/**
* Test ensuring that the producer is not dropping buffered records; we set a timeout because
* the test will not finish if the logic is broken.
*/
@SuppressWarnings("unchecked")
@Test(timeout = 10000)
public void testAtLeastOnceProducer() throws Throwable {
final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
producer.setFlushOnCheckpoint(true);
final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();
final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));
testHarness.open();
testHarness.processElement(new StreamRecord<>("msg-1"));
testHarness.processElement(new StreamRecord<>("msg-2"));
testHarness.processElement(new StreamRecord<>("msg-3"));
verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class));
Assert.assertEquals(3, producer.getPendingSize());
// start a thread to perform checkpointing
CheckedThread snapshotThread = new CheckedThread() {
@Override
public void go() throws Exception {
// this should block until all records are flushed;
// if the snapshot implementation returns before pending records are
// flushed,
testHarness.snapshot(123L, 123L);
}
};
snapshotThread.start();
// before proceeding, make sure that flushing has started and that the snapshot is still
// blocked;
// this would block forever if the snapshot didn't perform a flush
producer.waitUntilFlushStarted();
Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
// now, complete the callbacks
producer.getPendingCallbacks().get(0).onCompletion(null, null);
Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
Assert.assertEquals(2, producer.getPendingSize());
producer.getPendingCallbacks().get(1).onCompletion(null, null);
Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
Assert.assertEquals(1, producer.getPendingSize());
producer.getPendingCallbacks().get(2).onCompletion(null, null);
Assert.assertEquals(0, producer.getPendingSize());
// this would fail with an exception if flushing wasn't completed before the snapshot method
// returned
snapshotThread.sync();
testHarness.close();
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKafkaProducerBaseTest method testAsyncErrorRethrownOnCheckpointAfterFlush.
/**
* Test ensuring that if an async exception is caught for one of the flushed requests on
* checkpoint, it should be rethrown; we set a timeout because the test will not finish if the
* logic is broken.
*
* <p>Note that this test does not test the snapshot method is blocked correctly when there are
* pending records. The test for that is covered in testAtLeastOnceProducer.
*/
@SuppressWarnings("unchecked")
@Test(timeout = 5000)
public void testAsyncErrorRethrownOnCheckpointAfterFlush() throws Throwable {
final DummyFlinkKafkaProducer<String> producer = new DummyFlinkKafkaProducer<>(FakeStandardProducerConfig.get(), new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), null);
producer.setFlushOnCheckpoint(true);
final KafkaProducer<?, ?> mockProducer = producer.getMockKafkaProducer();
final OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));
testHarness.open();
testHarness.processElement(new StreamRecord<>("msg-1"));
testHarness.processElement(new StreamRecord<>("msg-2"));
testHarness.processElement(new StreamRecord<>("msg-3"));
verify(mockProducer, times(3)).send(any(ProducerRecord.class), any(Callback.class));
// only let the first callback succeed for now
producer.getPendingCallbacks().get(0).onCompletion(null, null);
CheckedThread snapshotThread = new CheckedThread() {
@Override
public void go() throws Exception {
// this should block at first, since there are still two pending records
// that needs to be flushed
testHarness.snapshot(123L, 123L);
}
};
snapshotThread.start();
// let the 2nd message fail with an async exception
producer.getPendingCallbacks().get(1).onCompletion(null, new Exception("artificial async failure for 2nd message"));
producer.getPendingCallbacks().get(2).onCompletion(null, null);
try {
snapshotThread.sync();
} catch (Exception e) {
// the snapshot should have failed with the async exception
Assert.assertTrue(e.getCause().getMessage().contains("artificial async failure for 2nd message"));
// test succeeded
return;
}
Assert.fail();
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class KafkaConsumerTestBase method runCancelingOnEmptyInputTest.
/**
* Tests that the source can be properly canceled when reading empty partitions.
*/
public void runCancelingOnEmptyInputTest() throws Exception {
final String topic = "cancelingOnEmptyInputTopic";
final int parallelism = 3;
createTestTopic(topic, parallelism, 1);
final AtomicReference<Throwable> error = new AtomicReference<>();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(parallelism);
env.enableCheckpointing(100);
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
getStream(env, topic, new SimpleStringSchema(), props).addSink(new DiscardingSink<String>());
JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph());
final JobID jobId = jobGraph.getJobID();
final Runnable jobRunner = () -> {
try {
submitJobAndWaitForResult(client, jobGraph, getClass().getClassLoader());
} catch (Throwable t) {
LOG.error("Job Runner failed with exception", t);
error.set(t);
}
};
Thread runnerThread = new Thread(jobRunner, "program runner thread");
runnerThread.start();
// wait a bit before canceling
Thread.sleep(2000);
Throwable failueCause = error.get();
if (failueCause != null) {
failueCause.printStackTrace();
Assert.fail("Test failed prematurely with: " + failueCause.getMessage());
}
// cancel
client.cancel(jobId).get();
// wait for the program to be done and validate that we failed with the right exception
runnerThread.join();
assertEquals(JobStatus.CANCELED, client.getJobStatus(jobId).get());
deleteTestTopic(topic);
}
Aggregations