Search in sources :

Example 16 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisConsumerTest method testListStateChangedAfterSnapshotState.

@Test
public void testListStateChangedAfterSnapshotState() throws Exception {
    // ----------------------------------------------------------------------
    // setup config, initial state and expected state snapshot
    // ----------------------------------------------------------------------
    Properties config = TestUtils.getStandardProperties();
    ArrayList<Tuple2<StreamShardMetadata, SequenceNumber>> initialState = new ArrayList<>(1);
    initialState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("1")));
    ArrayList<Tuple2<StreamShardMetadata, SequenceNumber>> expectedStateSnapshot = new ArrayList<>(3);
    expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("12")));
    expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1)))), new SequenceNumber("11")));
    expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2)))), new SequenceNumber("31")));
    // ----------------------------------------------------------------------
    // mock operator state backend and initial state for initializeState()
    // ----------------------------------------------------------------------
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Tuple2<StreamShardMetadata, SequenceNumber> state : initialState) {
        listState.add(state);
    }
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    // ----------------------------------------------------------------------
    // mock a running fetcher and its state for snapshot
    // ----------------------------------------------------------------------
    HashMap<StreamShardMetadata, SequenceNumber> stateSnapshot = new HashMap<>();
    for (Tuple2<StreamShardMetadata, SequenceNumber> tuple : expectedStateSnapshot) {
        stateSnapshot.put(tuple.f0, tuple.f1);
    }
    KinesisDataFetcher mockedFetcher = mock(KinesisDataFetcher.class);
    when(mockedFetcher.snapshotState()).thenReturn(stateSnapshot);
    // ----------------------------------------------------------------------
    // create a consumer and test the snapshotState()
    // ----------------------------------------------------------------------
    FlinkKinesisConsumer<String> consumer = new FlinkKinesisConsumer<>("fakeStream", new SimpleStringSchema(), config);
    FlinkKinesisConsumer<?> mockedConsumer = spy(consumer);
    RuntimeContext context = mock(RuntimeContext.class);
    when(context.getIndexOfThisSubtask()).thenReturn(1);
    mockedConsumer.setRuntimeContext(context);
    mockedConsumer.initializeState(initializationContext);
    mockedConsumer.open(new Configuration());
    Whitebox.setInternalState(mockedConsumer, "fetcher", // mock consumer as running.
    mockedFetcher);
    mockedConsumer.snapshotState(mock(FunctionSnapshotContext.class));
    assertEquals(true, listState.clearCalled);
    assertEquals(3, listState.getList().size());
    for (Tuple2<StreamShardMetadata, SequenceNumber> state : initialState) {
        for (Tuple2<StreamShardMetadata, SequenceNumber> currentState : listState.getList()) {
            assertNotEquals(state, currentState);
        }
    }
    for (Tuple2<StreamShardMetadata, SequenceNumber> state : expectedStateSnapshot) {
        boolean hasOneIsSame = false;
        for (Tuple2<StreamShardMetadata, SequenceNumber> currentState : listState.getList()) {
            hasOneIsSame = hasOneIsSame || state.equals(currentState);
        }
        assertEquals(true, hasOneIsSame);
    }
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Properties(java.util.Properties) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Shard(com.amazonaws.services.kinesis.model.Shard) KinesisStreamShard(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 17 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisProducerTest method testAtLeastOnceProducer.

/**
 * Test ensuring that the producer is not dropping buffered records; we set a timeout because
 * the test will not finish if the logic is broken.
 */
@SuppressWarnings({ "unchecked", "ResultOfMethodCallIgnored" })
@Test(timeout = 10000)
public void testAtLeastOnceProducer() throws Throwable {
    final DummyFlinkKinesisProducer<String> producer = new DummyFlinkKinesisProducer<>(new SimpleStringSchema());
    OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));
    testHarness.open();
    testHarness.processElement(new StreamRecord<>("msg-1"));
    testHarness.processElement(new StreamRecord<>("msg-2"));
    testHarness.processElement(new StreamRecord<>("msg-3"));
    // start a thread to perform checkpointing
    CheckedThread snapshotThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            // this should block until all records are flushed;
            // if the snapshot implementation returns before pending records are
            // flushed,
            testHarness.snapshot(123L, 123L);
        }
    };
    snapshotThread.start();
    // before proceeding, make sure that flushing has started and that the snapshot is still
    // blocked;
    // this would block forever if the snapshot didn't perform a flush
    producer.waitUntilFlushStarted();
    Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
    // now, complete the callbacks
    UserRecordResult result = mock(UserRecordResult.class);
    when(result.isSuccessful()).thenReturn(true);
    producer.getPendingRecordFutures().get(0).set(result);
    Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
    producer.getPendingRecordFutures().get(1).set(result);
    Assert.assertTrue("Snapshot returned before all records were flushed", snapshotThread.isAlive());
    producer.getPendingRecordFutures().get(2).set(result);
    // this would fail with an exception if flushing wasn't completed before the snapshot method
    // returned
    snapshotThread.sync();
    testHarness.close();
}
Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Matchers.anyString(org.mockito.Matchers.anyString) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) CheckedThread(org.apache.flink.core.testutils.CheckedThread) UserRecordResult(com.amazonaws.services.kinesis.producer.UserRecordResult) Test(org.junit.Test)

Example 18 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisProducerTest method testAsyncErrorRethrownAfterFlush.

/**
 * Test ensuring that if an async exception is caught for one of the flushed requests on
 * checkpoint, it should be rethrown; we set a timeout because the test will not finish if the
 * logic is broken.
 *
 * <p>Note that this test does not test the snapshot method is blocked correctly when there are
 * pending records. The test for that is covered in testAtLeastOnceProducer.
 */
@SuppressWarnings("ResultOfMethodCallIgnored")
@Test(timeout = 10000)
public void testAsyncErrorRethrownAfterFlush() throws Throwable {
    final DummyFlinkKinesisProducer<String> producer = new DummyFlinkKinesisProducer<>(new SimpleStringSchema());
    OneInputStreamOperatorTestHarness<String, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(producer));
    testHarness.open();
    testHarness.processElement(new StreamRecord<>("msg-1"));
    testHarness.processElement(new StreamRecord<>("msg-2"));
    testHarness.processElement(new StreamRecord<>("msg-3"));
    // only let the first record succeed for now
    UserRecordResult result = mock(UserRecordResult.class);
    when(result.isSuccessful()).thenReturn(true);
    producer.getPendingRecordFutures().get(0).set(result);
    CheckedThread snapshotThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            // this should block at first, since there are still two pending records
            // that needs to be flushed
            testHarness.snapshot(123L, 123L);
        }
    };
    snapshotThread.start();
    // let the 2nd message fail with an async exception
    producer.getPendingRecordFutures().get(1).setException(new Exception("artificial async failure for 2nd message"));
    producer.getPendingRecordFutures().get(2).set(mock(UserRecordResult.class));
    try {
        snapshotThread.sync();
    } catch (Exception e) {
        // after the flush, the async exception should have been rethrown
        Assert.assertTrue(ExceptionUtils.findThrowableWithMessage(e, "artificial async failure for 2nd message").isPresent());
        // test succeeded
        return;
    }
    Assert.fail();
}
Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Matchers.anyString(org.mockito.Matchers.anyString) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) UserRecordResult(com.amazonaws.services.kinesis.producer.UserRecordResult) CheckedThread(org.apache.flink.core.testutils.CheckedThread) ExpectedException(org.junit.rules.ExpectedException) Test(org.junit.Test)

Example 19 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class FlinkKinesisProducerTest method testProducerIsSerializable.

@Test
public void testProducerIsSerializable() {
    FlinkKinesisProducer<String> producer = new FlinkKinesisProducer<>(new SimpleStringSchema(), TestUtils.getStandardProperties());
    assertTrue(InstantiationUtil.isSerializable(producer));
}
Also used : SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Example 20 with SimpleStringSchema

use of org.apache.flink.api.common.serialization.SimpleStringSchema in project flink by apache.

the class ProduceIntoKinesis method main.

public static void main(String[] args) throws Exception {
    ParameterTool pt = ParameterTool.fromArgs(args);
    StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
    see.setParallelism(1);
    DataStream<String> simpleStringStream = see.addSource(new EventsGenerator());
    Properties kinesisProducerConfig = new Properties();
    kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_REGION, pt.getRequired("region"));
    kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accessKey"));
    kinesisProducerConfig.setProperty(AWSConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretKey"));
    FlinkKinesisProducer<String> kinesis = new FlinkKinesisProducer<>(new SimpleStringSchema(), kinesisProducerConfig);
    kinesis.setFailOnError(true);
    kinesis.setDefaultStream("flink-test");
    kinesis.setDefaultPartition("0");
    simpleStringStream.addSink(kinesis);
    see.execute();
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) FlinkKinesisProducer(org.apache.flink.streaming.connectors.kinesis.FlinkKinesisProducer) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Properties(java.util.Properties)

Aggregations

SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)63 Test (org.junit.Test)35 Properties (java.util.Properties)30 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)20 CheckedThread (org.apache.flink.core.testutils.CheckedThread)13 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)13 Shard (com.amazonaws.services.kinesis.model.Shard)11 ArrayList (java.util.ArrayList)11 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)11 TestableKinesisDataFetcher (org.apache.flink.streaming.connectors.kinesis.testutils.TestableKinesisDataFetcher)11 LinkedList (java.util.LinkedList)9 SequenceNumber (org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber)9 HashMap (java.util.HashMap)8 StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)7 OneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness)7 Map (java.util.Map)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)6 Matchers.anyString (org.mockito.Matchers.anyString)6 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)5