use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisConsumerTest method testListStateChangedAfterSnapshotState.
@Test
public void testListStateChangedAfterSnapshotState() throws Exception {
// ----------------------------------------------------------------------
// setup config, initial state and expected state snapshot
// ----------------------------------------------------------------------
Properties config = TestUtils.getStandardProperties();
ArrayList<Tuple2<StreamShardMetadata, SequenceNumber>> initialState = new ArrayList<>(1);
initialState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("1")));
ArrayList<Tuple2<StreamShardMetadata, SequenceNumber>> expectedStateSnapshot = new ArrayList<>(3);
expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("12")));
expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1)))), new SequenceNumber("11")));
expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2)))), new SequenceNumber("31")));
// ----------------------------------------------------------------------
// mock operator state backend and initial state for initializeState()
// ----------------------------------------------------------------------
TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
for (Tuple2<StreamShardMetadata, SequenceNumber> state : initialState) {
listState.add(state);
}
OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
StateInitializationContext initializationContext = mock(StateInitializationContext.class);
when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
when(initializationContext.isRestored()).thenReturn(true);
// ----------------------------------------------------------------------
// mock a running fetcher and its state for snapshot
// ----------------------------------------------------------------------
HashMap<StreamShardMetadata, SequenceNumber> stateSnapshot = new HashMap<>();
for (Tuple2<StreamShardMetadata, SequenceNumber> tuple : expectedStateSnapshot) {
stateSnapshot.put(tuple.f0, tuple.f1);
}
KinesisDataFetcher mockedFetcher = mock(KinesisDataFetcher.class);
when(mockedFetcher.snapshotState()).thenReturn(stateSnapshot);
// ----------------------------------------------------------------------
// create a consumer and test the snapshotState()
// ----------------------------------------------------------------------
FlinkKinesisConsumer<String> consumer = new FlinkKinesisConsumer<>("fakeStream", new SimpleStringSchema(), config);
FlinkKinesisConsumer<?> mockedConsumer = spy(consumer);
RuntimeContext context = mock(RuntimeContext.class);
when(context.getIndexOfThisSubtask()).thenReturn(1);
mockedConsumer.setRuntimeContext(context);
mockedConsumer.initializeState(initializationContext);
mockedConsumer.open(new Configuration());
Whitebox.setInternalState(mockedConsumer, "fetcher", // mock consumer as running.
mockedFetcher);
mockedConsumer.snapshotState(mock(FunctionSnapshotContext.class));
assertEquals(true, listState.clearCalled);
assertEquals(3, listState.getList().size());
for (Tuple2<StreamShardMetadata, SequenceNumber> state : initialState) {
for (Tuple2<StreamShardMetadata, SequenceNumber> currentState : listState.getList()) {
assertNotEquals(state, currentState);
}
}
for (Tuple2<StreamShardMetadata, SequenceNumber> state : expectedStateSnapshot) {
boolean hasOneIsSame = false;
for (Tuple2<StreamShardMetadata, SequenceNumber> currentState : listState.getList()) {
hasOneIsSame = hasOneIsSame || state.equals(currentState);
}
assertEquals(true, hasOneIsSame);
}
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisProducerTest method testAtLeastOnceProducer.
/**
* Test ensuring that the producer is not dropping buffered records; we set a timeout because
* the test will not finish if the logic is broken.
*/
@SuppressWarnings({ "unchecked", "ResultOfMethodCallIgnored" })
@Test(timeout = 10000)
public void testAtLeastOnceProducer() throws Throwable {
final DummyFlinkKinesisProducer<String> producer = new DummyFlinkKinesisProducer<>(new SimpleStringSchema());
OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));
testHarness.open();
testHarness.processElement(new StreamRecord<>("msg-1"));
testHarness.processElement(new StreamRecord<>("msg-2"));
testHarness.processElement(new StreamRecord<>("msg-3"));
// start a thread to perform checkpointing
CheckedThread snapshotThread = new CheckedThread() {
@Override
public void go() throws Exception {
// this should block until all records are flushed;
// if the snapshot implementation returns before pending records are
// flushed,
testHarness.snapshot(123L, 123L);
}
};
snapshotThread.start();
// before proceeding, make sure that flushing has started and that the snapshot is still
// blocked;
// this would block forever if the snapshot didn't perform a flush
producer.waitUntilFlushStarted();
Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
// now, complete the callbacks
UserRecordResult result = mock(UserRecordResult.class);
when(result.isSuccessful()).thenReturn(true);
producer.getPendingRecordFutures().get(0).set(result);
Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
producer.getPendingRecordFutures().get(1).set(result);
Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
producer.getPendingRecordFutures().get(2).set(result);
// this would fail with an exception if flushing wasn't completed before the snapshot method
// returned
snapshotThread.sync();
testHarness.close();
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisProducerTest method testAsyncErrorRethrownAfterFlush.
/**
* Test ensuring that if an async exception is caught for one of the flushed requests on
* checkpoint, it should be rethrown; we set a timeout because the test will not finish if the
* logic is broken.
*
* <p>Note that this test does not test the snapshot method is blocked correctly when there are
* pending records. The test for that is covered in testAtLeastOnceProducer.
*/
@SuppressWarnings("ResultOfMethodCallIgnored")
@Test(timeout = 10000)
public void testAsyncErrorRethrownAfterFlush() throws Throwable {
final DummyFlinkKinesisProducer<String> producer = new DummyFlinkKinesisProducer<>(new SimpleStringSchema());
OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));
testHarness.open();
testHarness.processElement(new StreamRecord<>("msg-1"));
testHarness.processElement(new StreamRecord<>("msg-2"));
testHarness.processElement(new StreamRecord<>("msg-3"));
// only let the first record succeed for now
UserRecordResult result = mock(UserRecordResult.class);
when(result.isSuccessful()).thenReturn(true);
producer.getPendingRecordFutures().get(0).set(result);
CheckedThread snapshotThread = new CheckedThread() {
@Override
public void go() throws Exception {
// this should block at first, since there are still two pending records
// that needs to be flushed
testHarness.snapshot(123L, 123L);
}
};
snapshotThread.start();
// let the 2nd message fail with an async exception
producer.getPendingRecordFutures().get(1).setException(new Exception("artificial async failure for 2nd message"));
producer.getPendingRecordFutures().get(2).set(mock(UserRecordResult.class));
try {
snapshotThread.sync();
} catch (Exception e) {
// after the flush, the async exception should have been rethrown
Assert.assertTrue(ExceptionUtils.findThrowableWithMessage(e, "artificial async failure for 2nd message").isPresent());
// test succeeded
return;
}
Assert.fail();
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class FlinkKinesisProducerTest method testProducerIsSerializable.
@Test
public void testProducerIsSerializable() {
FlinkKinesisProducer<String> producer = new FlinkKinesisProducer<>(new SimpleStringSchema(), TestUtils.getStandardProperties());
assertTrue(InstantiationUtil.isSerializable(producer));
}
use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.
the class ProduceIntoKinesis method main.
public static void main(String[] args) throws Exception {
ParameterTool pt = ParameterTool.fromArgs(args);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
see.setParallelism(1);
DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());
Properties kinesisProducerConfig = new Properties();
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_REGION, pt.getRequired("region"));
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), kinesisProducerConfig);
kinesis.setFailOnError(true);
kinesis.setDefaultStream("flink-test");
kinesis.setDefaultPartition("0");
simpleStringStream.addSink(kinesis);
see.execute();
}
Aggregations