use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisConsumerMigrationTest method testRestoreWithEmptyState.
@Test
public void testRestoreWithEmptyState() throws Exception {
final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
Shard shard = new Shard();
shard.setShardId(shardMetadata.getShardId());
SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
sequenceNumberRange.withStartingSequenceNumber("1");
shard.setSequenceNumberRange(sequenceNumberRange);
initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
}
final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
testHarness.setup();
testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-empty-snapshot"));
testHarness.open();
consumerFunction.run(new TestSourceContext<>());
// assert that no state was restored
assertTrue(consumerFunction.getRestoredState().isEmpty());
// although the restore state is empty, the fetcher should still have been registered the
// initial discovered shard;
// furthermore, the discovered shard should be considered a newly created shard while the
// job wasn't running,
// and therefore should be consumed from the earliest sequence number
KinesisStreamShardState restoredShardState = fetcher.getSubscribedShardsState().get(0);
assertEquals(TEST_STREAM_NAME, restoredShardState.getStreamShardHandle().getStreamName());
assertEquals(TEST_SHARD_ID, restoredShardState.getStreamShardHandle().getShard().getShardId());
assertFalse(restoredShardState.getStreamShardHandle().isClosed());
assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredShardState.getLastProcessedSequenceNum());
consumerOperator.close();
consumerOperator.cancel();
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisConsumerMigrationTest method testRestore.
@Test
public void testRestore() throws Exception {
final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
Shard shard = new Shard();
shard.setShardId(shardMetadata.getShardId());
SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
sequenceNumberRange.withStartingSequenceNumber("1");
shard.setSequenceNumberRange(sequenceNumberRange);
initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
}
final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
testHarness.setup();
testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-snapshot"));
testHarness.open();
consumerFunction.run(new TestSourceContext<>());
// assert that state is correctly restored
assertNotEquals(null, consumerFunction.getRestoredState());
assertEquals(1, consumerFunction.getRestoredState().size());
assertEquals(TEST_STATE, removeEquivalenceWrappers(consumerFunction.getRestoredState()));
assertEquals(1, fetcher.getSubscribedShardsState().size());
assertEquals(TEST_SEQUENCE_NUMBER, fetcher.getSubscribedShardsState().get(0).getLastProcessedSequenceNum());
KinesisStreamShardState restoredShardState = fetcher.getSubscribedShardsState().get(0);
assertEquals(TEST_STREAM_NAME, restoredShardState.getStreamShardHandle().getStreamName());
assertEquals(TEST_SHARD_ID, restoredShardState.getStreamShardHandle().getShard().getShardId());
assertFalse(restoredShardState.getStreamShardHandle().isClosed());
assertEquals(TEST_SEQUENCE_NUMBER, restoredShardState.getLastProcessedSequenceNum());
consumerOperator.close();
consumerOperator.cancel();
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class EmulatedFullTopologyTest method testFullTopology.
// ======================================================================================================
// IMPORTANT: This test makes use of things that happen in the emulated PubSub that
// are GUARANTEED to be different in the real Google hosted PubSub.
// So running these tests against the real thing will have a very high probability of
// failing.
// The assumptions:
// 1) The ordering of the messages is maintained.
// We are inserting a STOP_MARKER _after_ the set of test measurements and we assume this
// STOP event will
// arrive after the actual test data so we can stop the processing. In the real PubSub this
// is NOT true.
// 2) Exactly once: We assume that every message we put in comes out exactly once.
// In the real PubSub there are a lot of situations (mostly failure/retry) where this is not
// true.
@Test
public void testFullTopology() throws Exception {
// ===============================================================================
// Step 0: The test data
List<String> input = new ArrayList<>(Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten"));
List<String> messagesToSend = new ArrayList<>(input);
// Now add some stream termination messages.
// NOTE: Messages are pulled from PubSub in batches by the source.
// So we need enough STOP_MARKERs to ensure ALL parallel tasks get at least one
// STOP_MARKER
// If not then at least one task will not terminate and the test will not end.
// We pull 3 at a time, have 4 parallel: We need at least 12 STOP_MARKERS
IntStream.rangeClosed(1, 20).forEach(i -> messagesToSend.add(STOP_MARKER));
// IMPORTANT NOTE: This way of testing uses an effect of the PubSub emulator that is
// absolutely
// guaranteed NOT to work in the real PubSub: The ordering of the messages is maintained in
// the topic.
// So here we can assume that if we add a stop message LAST we can terminate the test stream
// when we see it.
// ===============================================================================
// Step 1: We put test data into the topic
// Publish the test messages into the input topic
Publisher publisher = pubsubHelper.createPublisher(PROJECT_NAME, INPUT_TOPIC_NAME);
for (String s : messagesToSend) {
publisher.publish(PubsubMessage.newBuilder().setData(ByteString.copyFromUtf8(s)).build()).get();
}
publisher.shutdown();
// ===============================================================================
// Step 2: Now we run our topology
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(100);
env.setParallelism(4);
env.setRestartStrategy(RestartStrategies.noRestart());
// Silly topology
env.addSource(// a self termination feature.
PubSubSource.newBuilder().withDeserializationSchema(new SimpleStringSchemaWithStopMarkerDetection()).withProjectName(PROJECT_NAME).withSubscriptionName(INPUT_SUBSCRIPTION_NAME).withCredentials(EmulatorCredentials.getInstance()).withPubSubSubscriberFactory(new PubSubSubscriberFactoryForEmulator(getPubSubHostPort(), PROJECT_NAME, INPUT_SUBSCRIPTION_NAME, 1, Duration.ofSeconds(1), 3)).build()).map((MapFunction<String, String>) StringUtils::reverse).addSink(PubSubSink.newBuilder().withSerializationSchema(new SimpleStringSchema()).withProjectName(PROJECT_NAME).withTopicName(OUTPUT_TOPIC_NAME).withCredentials(EmulatorCredentials.getInstance()).withHostAndPortForEmulator(getPubSubHostPort()).build());
env.execute("Running unit test");
// ===============================================================================
// Now we should have all the resulting data in the output topic.
// Step 3: Get the result from the output topic and verify if everything is there
List<ReceivedMessage> receivedMessages = pubsubHelper.pullMessages(PROJECT_NAME, OUTPUT_SUBSCRIPTION_NAME, 100);
assertEquals("Wrong number of elements", input.size(), receivedMessages.size());
// Check output strings
List<String> output = new ArrayList<>();
// Extract the actual Strings from the ReceivedMessages
receivedMessages.forEach(msg -> output.add(msg.getMessage().getData().toStringUtf8()));
for (String test : input) {
String reversedTest = org.apache.commons.lang3.StringUtils.reverse(test);
LOG.info("Checking if \"{}\" --> \"{}\" exists", test, reversedTest);
assertTrue("Missing " + test, output.contains(reversedTest));
}
// ===============================================================================
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class EmulatedPubSubSinkTest method testFlinkSink.
@Test
public void testFlinkSink() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
List<String> input = Arrays.asList("One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten");
// Create test stream
DataStream<String> theData = env.fromCollection(input).name("Test input").map((MapFunction<String, String>) StringUtils::reverse);
// Sink into pubsub
theData.addSink(PubSubSink.newBuilder().withSerializationSchema(new SimpleStringSchema()).withProjectName(PROJECT_NAME).withTopicName(TOPIC_NAME).withHostAndPortForEmulator(getPubSubHostPort()).withCredentials(EmulatorCredentials.getInstance()).build()).name("PubSub sink");
// Run
env.execute();
// Now get the result from PubSub and verify if everything is there
List<ReceivedMessage> receivedMessages = pubsubHelper.pullMessages(PROJECT_NAME, SUBSCRIPTION_NAME, 100);
assertEquals("Wrong number of elements", input.size(), receivedMessages.size());
// Check output strings
List<String> output = new ArrayList<>();
receivedMessages.forEach(msg -> output.add(msg.getMessage().getData().toStringUtf8()));
for (String test : input) {
assertTrue("Missing " + test, output.contains(StringUtils.reverse(test)));
}
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisConsumerTest method testPeriodicWatermark.
@Test
public void testPeriodicWatermark() throws Exception {
String streamName = "fakeStreamName";
Time maxOutOfOrderness = Time.milliseconds(5);
long autoWatermarkInterval = 1_000;
HashMap<String, String> subscribedStreamsToLastDiscoveredShardIds = new HashMap<>();
subscribedStreamsToLastDiscoveredShardIds.put(streamName, null);
KinesisDeserializationSchema<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
Properties props = new Properties();
props.setProperty(ConsumerConfigConstants.AWS_REGION, "us-east-1");
props.setProperty(ConsumerConfigConstants.SHARD_GETRECORDS_INTERVAL_MILLIS, Long.toString(10L));
BlockingQueue<String> shard1 = new LinkedBlockingQueue<>();
BlockingQueue<String> shard2 = new LinkedBlockingQueue<>();
Map<String, List<BlockingQueue<String>>> streamToQueueMap = new HashMap<>();
streamToQueueMap.put(streamName, Arrays.asList(shard1, shard2));
// override createFetcher to mock Kinesis
FlinkKinesisConsumer<String> sourceFunc = new FlinkKinesisConsumer<String>(streamName, deserializationSchema, props) {
@Override
protected KinesisDataFetcher<String> createFetcher(List<String> streams, SourceContext<String> sourceContext, RuntimeContext runtimeContext, Properties configProps, KinesisDeserializationSchema<String> deserializationSchema) {
KinesisDataFetcher<String> fetcher = new KinesisDataFetcher<String>(streams, sourceContext, sourceContext.getCheckpointLock(), runtimeContext, configProps, deserializationSchema, getShardAssigner(), getPeriodicWatermarkAssigner(), null, new AtomicReference<>(), new ArrayList<>(), subscribedStreamsToLastDiscoveredShardIds, (props) -> FakeKinesisBehavioursFactory.blockingQueueGetRecords(streamToQueueMap), null) {
};
return fetcher;
}
};
sourceFunc.setShardAssigner((streamShardHandle, i) -> {
// shardId-000000000000
return Integer.parseInt(streamShardHandle.getShard().getShardId().substring("shardId-".length()));
});
sourceFunc.setPeriodicWatermarkAssigner(new TestTimestampExtractor(maxOutOfOrderness));
// there is currently no test harness specifically for sources,
// so we overlay the source thread here
AbstractStreamOperatorTestHarness<Object> testHarness = new AbstractStreamOperatorTestHarness<Object>(new StreamSource(sourceFunc), 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
testHarness.getExecutionConfig().setAutoWatermarkInterval(autoWatermarkInterval);
testHarness.initializeEmptyState();
testHarness.open();
ConcurrentLinkedQueue<Watermark> watermarks = new ConcurrentLinkedQueue<>();
@SuppressWarnings("unchecked") SourceFunction.SourceContext<String> sourceContext = new CollectingSourceContext(testHarness.getCheckpointLock(), testHarness.getOutput()) {
@Override
public void emitWatermark(Watermark mark) {
watermarks.add(mark);
}
@Override
public void markAsTemporarilyIdle() {
}
};
new Thread(() -> {
try {
sourceFunc.run(sourceContext);
} catch (InterruptedException e) {
// expected on cancel
} catch (Exception e) {
throw new RuntimeException(e);
}
}).start();
shard1.put("1");
shard1.put("2");
shard2.put("10");
int recordCount = 3;
int watermarkCount = 0;
awaitRecordCount(testHarness.getOutput(), recordCount);
// Trigger watermark emit, first watermark is -3
// - Shard-1 @2
// - Shard-2 @10
// - Watermark = min(2, 10) - maxOutOfOrderness = 2 - 5 = -3
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
watermarkCount++;
// advance watermark
shard1.put("10");
recordCount++;
awaitRecordCount(testHarness.getOutput(), recordCount);
// Trigger watermark emit, second watermark is -3
// - Shard-1 @10
// - Shard-2 @10
// - Watermark = min(10, 10) - maxOutOfOrderness = 10 - 5 = 5
testHarness.setProcessingTime(testHarness.getProcessingTime() + autoWatermarkInterval);
watermarkCount++;
sourceFunc.cancel();
testHarness.close();
assertEquals("record count", recordCount, testHarness.getOutput().size());
assertThat(watermarks, org.hamcrest.Matchers.contains(new Watermark(-3), new Watermark(5)));
assertEquals("watermark count", watermarkCount, watermarks.size());
}
Aggregations