use of org.apache.flink.streaming.connectors.gcp.pubsub.emulator.PubSubSubscriberFactoryForEmulator in project flink by apache.
the class EmulatedFullTopologyTest method testFullTopology.
// ======================================================================================================
// IMPORTANT: This test makes use of things that happen in the emulated PubSub that
// are GUARANTEED to be different in the real Google hosted PubSub.
// So running these tests against the real thing will have a very high probability of
// failing.
// The assumptions:
// 1) The ordering of the messages is maintained.
// We are inserting a STOP_MARKER _after_ the set of test measurements and we assume this
// STOP event will
// arrive after the actual test data so we can stop the processing. In the real PubSub this
// is NOT true.
// 2) Exactly once: We assume that every message we put in comes out exactly once.
// In the real PubSub there are a lot of situations (mostly failure/retry) where this is not
// true.
@Test
public void testFullTopology() throws Exception {
// ===============================================================================
// Step 0: The test data
List<String> input = new ArrayList<>(Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten"));
List<String> messagesToSend = new ArrayList<>(input);
// Now add some stream termination messages.
// NOTE: Messages are pulled from PubSub in batches by the source.
// So we need enough STOP_MARKERs to ensure ALL parallel tasks get at least one
// STOP_MARKER
// If not then at least one task will not terminate and the test will not end.
// We pull 3 at a time, have 4 parallel: We need at least 12 STOP_MARKERS
IntStream.rangeClosed(1, 20).forEach(i -> messagesToSend.add(STOP_MARKER));
// IMPORTANT NOTE: This way of testing uses an effect of the PubSub emulator that is
// absolutely
// guaranteed NOT to work in the real PubSub: The ordering of the messages is maintained in
// the topic.
// So here we can assume that if we add a stop message LAST we can terminate the test stream
// when we see it.
// ===============================================================================
// Step 1: We put test data into the topic
// Publish the test messages into the input topic
Publisher publisher = pubsubHelper.createPublisher(PROJECT_NAME, INPUT_TOPIC_NAME);
for (String s : messagesToSend) {
publisher.publish(PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(s)).build()).get();
}
publisher.shutdown();
// ===============================================================================
// Step 2: Now we run our topology
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(100);
env.setParallelism(4);
env.setRestartStrategy(RestartStrategies.noRestart());
// Silly topology
env.addSource(// a self termination feature.
PubSubSource.newBuilder().withDeserializationSchema(new SimpleStringSchemaWithStopMarkerDetection()).withProjectName(PROJECT_NAME).withSubscriptionName(INPUT_SUBSCRIPTION_NAME).withCredentials(EmulatorCredentials.getInstance()).withPubSubSubscriberFactory(new PubSubSubscriberFactoryForEmulator(getPubSubHostPort(), PROJECT_NAME, INPUT_SUBSCRIPTION_NAME, 1, Duration.ofSeconds(1), 3)).build()).map((MapFunction<String, String>) StringUtils::reverse).addSink(PubSubSink.newBuilder().withSerializationSchema(new SimpleStringSchema()).withProjectName(PROJECT_NAME).withTopicName(OUTPUT_TOPIC_NAME).withCredentials(EmulatorCredentials.getInstance()).withHostAndPortForEmulator(getPubSubHostPort()).build());
env.execute("Running unit test");
// ===============================================================================
// Now we should have all the resulting data in the output topic.
// Step 3: Get the result from the output topic and verify if everything is there
List<ReceivedMessage> receivedMessages = pubsubHelper.pullMessages(PROJECT_NAME, OUTPUT_SUBSCRIPTION_NAME, 100);
assertEquals("Wrong number of elements", input.size(), receivedMessages.size());
// Check output strings
List<String> output = new ArrayList<>();
// Extract the actual Strings from the ReceivedMessages
receivedMessages.forEach(msg -> output.add(msg.getMessage().getData().toStringUtf8()));
for (String test : input) {
String reversedTest = org.apache.commons.lang3.StringUtils.reverse(test);
LOG.info("Checking if \"{}\" --> \"{}\" exists", test, reversedTest);
assertTrue("Missing " + test, output.contains(reversedTest));
}
// ===============================================================================
}
use of org.apache.flink.streaming.connectors.gcp.pubsub.emulator.PubSubSubscriberFactoryForEmulator in project flink by apache.
the class EmulatedPubSubSourceTest method testFlinkSource.
// IMPORTANT: This test makes use of things that happen in the emulated PubSub that
// are GUARANTEED to be different in the real Google hosted PubSub.
// So running these tests against the real thing will have a very high probability of
// failing.
// The assumptions:
// 1) The ordering of the messages is maintained.
// We are inserting a STOP_MARKER _after_ the set of test measurements and we assume this
// STOP event will
// arrive after the actual test data so we can stop the processing. In the real PubSub this
// is NOT true.
// 2) Exactly once: We assume that every message we put in comes out exactly once.
// In the real PubSub there are a lot of situations (mostly failure/retry) where this is not
// true.
@Test
public void testFlinkSource() throws Exception {
// Create some messages and put them into pubsub
List<String> input = Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten");
List<String> messagesToSend = new ArrayList<>(input);
// Now add some stream termination messages.
// NOTE: Messages are pulled from PubSub in batches by the source.
// So we need enough STOP_MARKERs to ensure ALL parallel tasks get at least one
// STOP_MARKER
// If not then at least one task will not terminate and the test will not end.
// We pull 3 at a time, have 4 parallel: We need at least 12 STOP_MARKERS
IntStream.rangeClosed(1, 20).forEach(i -> messagesToSend.add(STOP_MARKER));
// IMPORTANT NOTE: This way of testing uses an effect of the PubSub emulator that is
// absolutely
// guaranteed NOT to work in the real PubSub: The ordering of the messages is maintained in
// the topic.
// So here we can assume that if we add a stop message LAST we can terminate the test stream
// when we see it.
// Publish the messages into PubSub
Publisher publisher = pubsubHelper.createPublisher(PROJECT_NAME, TOPIC_NAME);
messagesToSend.forEach(s -> {
try {
publisher.publish(PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(s)).build()).get();
} catch (InterruptedException | ExecutionException e) {
e.printStackTrace();
}
});
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(100);
env.setParallelism(4);
env.setRestartStrategy(RestartStrategies.noRestart());
DataStream<String> fromPubSub = env.addSource(PubSubSource.newBuilder().withDeserializationSchema(new SimpleStringSchemaWithStopMarkerDetection()).withProjectName(PROJECT_NAME).withSubscriptionName(SUBSCRIPTION_NAME).withCredentials(EmulatorCredentials.getInstance()).withPubSubSubscriberFactory(new PubSubSubscriberFactoryForEmulator(getPubSubHostPort(), PROJECT_NAME, SUBSCRIPTION_NAME, 10, Duration.ofSeconds(1), 3)).build()).name("PubSub source");
List<String> output = new ArrayList<>();
DataStreamUtils.collect(fromPubSub).forEachRemaining(output::add);
assertEquals("Wrong number of elements", input.size(), output.size());
for (String test : input) {
assertTrue("Missing " + test, output.contains(test));
}
}
Aggregations