use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class ExactlyOnceValidatingConsumerThread method create.
public static Thread create(final int totalEventCount, final int failAtRecordCount, final int parallelism, final int checkpointInterval, final long restartDelay, final String awsAccessKey, final String awsSecretKey, final String awsRegion, final String kinesisStreamName, final AtomicReference<Throwable> errorHandler, final int flinkPort, final Configuration flinkConfig) {
Runnable exactlyOnceValidationConsumer = new Runnable() {
@Override
public void run() {
try {
StreamExecutionEnvironment see = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort, flinkConfig);
see.setParallelism(parallelism);
see.enableCheckpointing(checkpointInterval);
// we restart two times
see.setRestartStrategy(RestartStrategies.fixedDelayRestart(2, restartDelay));
// consuming topology
Properties consumerProps = new Properties();
consumerProps.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, awsAccessKey);
consumerProps.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, awsSecretKey);
consumerProps.setProperty(ConsumerConfigConstants.AWS_REGION, awsRegion);
// start reading from beginning
consumerProps.setProperty(ConsumerConfigConstants.STREAM_INITIAL_POSITION, ConsumerConfigConstants.InitialPosition.TRIM_HORIZON.name());
DataStream<String> consuming = see.addSource(new FlinkKinesisConsumer<>(kinesisStreamName, new SimpleStringSchema(), consumerProps));
consuming.flatMap(new ArtificialFailOnceFlatMapper(failAtRecordCount)).flatMap(new ExactlyOnceValidatingMapper(totalEventCount)).setParallelism(1);
LOG.info("Starting consuming topology");
tryExecute(see, "Consuming topo");
LOG.info("Consuming topo finished");
} catch (Exception e) {
LOG.warn("Error while running consuming topology", e);
errorHandler.set(e);
}
}
};
return new Thread(exactlyOnceValidationConsumer);
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class DynamoDBStreamsDataFetcherTest method testCreateRecordPublisherRespectsShardIteratorTypeLatest.
@Test
public void testCreateRecordPublisherRespectsShardIteratorTypeLatest() throws Exception {
RuntimeContext runtimeContext = TestUtils.getMockedRuntimeContext(1, 0);
KinesisProxyInterface kinesis = mock(KinesisProxyInterface.class);
DynamoDBStreamsDataFetcher<String> fetcher = new DynamoDBStreamsDataFetcher<>(singletonList("fakeStream"), new TestSourceContext<>(), runtimeContext, TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), DEFAULT_SHARD_ASSIGNER, config -> kinesis);
StreamShardHandle dummyStreamShardHandle = TestUtils.createDummyStreamShardHandle("dummy-stream", "0");
fetcher.createRecordPublisher(SENTINEL_LATEST_SEQUENCE_NUM.get(), new Properties(), createFakeShardConsumerMetricGroup(runtimeContext.getMetricGroup()), dummyStreamShardHandle);
verify(kinesis).getShardIterator(dummyStreamShardHandle, LATEST.toString(), null);
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class ShardConsumerTestUtils method assertNumberOfMessagesReceivedFromKinesis.
public static ShardConsumerMetricsReporter assertNumberOfMessagesReceivedFromKinesis(final int expectedNumberOfMessages, final RecordPublisherFactory recordPublisherFactory, final SequenceNumber startingSequenceNumber, final Properties consumerProperties, final SequenceNumber expectedLastProcessedSequenceNum, final AbstractMetricGroup metricGroup) throws InterruptedException {
ShardConsumerMetricsReporter shardMetricsReporter = new ShardConsumerMetricsReporter(metricGroup);
StreamShardHandle fakeToBeConsumedShard = getMockStreamShard("fakeStream", 0);
LinkedList<KinesisStreamShardState> subscribedShardsStateUnderTest = new LinkedList<>();
subscribedShardsStateUnderTest.add(new KinesisStreamShardState(KinesisDataFetcher.convertToStreamShardMetadata(fakeToBeConsumedShard), fakeToBeConsumedShard, startingSequenceNumber));
TestSourceContext<String> sourceContext = new TestSourceContext<>();
KinesisDeserializationSchemaWrapper<String> deserializationSchema = new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema());
TestableKinesisDataFetcher<String> fetcher = new TestableKinesisDataFetcher<>(Collections.singletonList("fakeStream"), sourceContext, consumerProperties, deserializationSchema, 10, 2, new AtomicReference<>(), subscribedShardsStateUnderTest, KinesisDataFetcher.createInitialSubscribedStreamsToLastDiscoveredShardsState(Collections.singletonList("fakeStream")), Mockito.mock(KinesisProxyInterface.class), Mockito.mock(KinesisProxyV2Interface.class));
final StreamShardHandle shardHandle = subscribedShardsStateUnderTest.get(0).getStreamShardHandle();
final SequenceNumber lastProcessedSequenceNum = subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum();
final StartingPosition startingPosition = AWSUtil.getStartingPosition(lastProcessedSequenceNum, consumerProperties);
final RecordPublisher recordPublisher = recordPublisherFactory.create(startingPosition, fetcher.getConsumerConfiguration(), metricGroup, shardHandle);
int shardIndex = fetcher.registerNewSubscribedShardState(subscribedShardsStateUnderTest.get(0));
new ShardConsumer<>(fetcher, recordPublisher, shardIndex, shardHandle, lastProcessedSequenceNum, shardMetricsReporter, deserializationSchema).run();
assertEquals(expectedNumberOfMessages, sourceContext.getCollectedOutputs().size());
assertEquals(expectedLastProcessedSequenceNum, subscribedShardsStateUnderTest.get(0).getLastProcessedSequenceNum());
return shardMetricsReporter;
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class ManualConsumerProducerTest method main.
public static void main(String[] args) throws Exception {
ParameterTool pt = ParameterTool.fromArgs(args);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(4);
DataStream<String> simpleStringStream = see.addSource(new ProduceIntoKinesis.EventsGenerator());
Properties kinesisProducerConfig = new Properties();
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_REGION, pt.getRequired("region"));
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new KinesisSerializationSchema<String>() {
@Override
public ByteBuffer serialize(String element) {
return ByteBuffer.wrap(element.getBytes(ConfigConstants.DEFAULT_CHARSET));
}
// every 10th element goes into a different stream
@Override
public String getTargetStream(String element) {
if (element.split("-")[0].endsWith("0")) {
return "flink-test-2";
}
// send to default stream
return null;
}
}, kinesisProducerConfig);
kinesis.setFailOnError(true);
kinesis.setDefaultStream("test-flink");
kinesis.setDefaultPartition("0");
kinesis.setCustomPartitioner(new KinesisPartitioner<String>() {
@Override
public String getPartitionId(String element) {
int l = element.length();
return element.substring(l - 1, l);
}
});
simpleStringStream.addSink(kinesis);
// consuming topology
Properties consumerProps = new Properties();
consumerProps.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
consumerProps.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
consumerProps.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
DataStream<String> consuming = see.addSource(new FlinkKinesisConsumer<>("test-flink", new SimpleStringSchema(), consumerProps));
// validate consumed records for correctness
consuming.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String value, Collector<String> out) throws Exception {
String[] parts = value.split("-");
try {
long l = Long.parseLong(parts[0]);
if (l < 0) {
throw new RuntimeException("Negative");
}
} catch (NumberFormatException nfe) {
throw new RuntimeException("First part of '" + value + "' is not a valid numeric type");
}
if (parts[1].length() != 12) {
throw new RuntimeException("Second part of '" + value + "' doesn't have 12 characters");
}
}
});
consuming.print();
see.execute();
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisConsumerMigrationTest method writeSnapshot.
// ------------------------------------------------------------------------
@SuppressWarnings("unchecked")
private void writeSnapshot(String path, HashMap<StreamShardMetadata, SequenceNumber> state) throws Exception {
final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(state.size());
for (StreamShardMetadata shardMetadata : state.keySet()) {
Shard shard = new Shard();
shard.setShardId(shardMetadata.getShardId());
SequenceNumberRange sequenceNumberRange = new SequenceNumberRange();
sequenceNumberRange.withStartingSequenceNumber("1");
shard.setSequenceNumberRange(sequenceNumberRange);
initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), shard));
}
final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), state, initialDiscoveryShards);
final DummyFlinkKinesisConsumer<String> consumer = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumer);
final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
testHarness.setup();
testHarness.open();
final AtomicReference<Throwable> error = new AtomicReference<>();
// run the source asynchronously
Thread runner = new Thread() {
@Override
public void run() {
try {
consumer.run(new TestSourceContext<>());
} catch (Throwable t) {
t.printStackTrace();
error.set(t);
}
}
};
runner.start();
fetcher.waitUntilRun();
final OperatorSubtaskState snapshot;
synchronized (testHarness.getCheckpointLock()) {
snapshot = testHarness.snapshot(0L, 0L);
}
OperatorSnapshotUtil.writeStateHandle(snapshot, path);
consumerOperator.close();
runner.join();
}
Aggregations