Search in sources :

Example 1 with StreamShardMetadata

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata in project flink by apache.

the class FlinkKinesisConsumerTest method testUseRestoredStateForSnapshotIfFetcherNotInitialized.

// ----------------------------------------------------------------------
// Tests related to state initialization
// ----------------------------------------------------------------------
@Test
public void testUseRestoredStateForSnapshotIfFetcherNotInitialized() throws Exception {
    Properties config = TestUtils.getStandardProperties();
    List<Tuple2<StreamShardMetadata, SequenceNumber>> globalUnionState = new ArrayList<>(4);
    globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("1")));
    globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1)))), new SequenceNumber("1")));
    globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2)))), new SequenceNumber("1")));
    globalUnionState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(3)))), new SequenceNumber("1")));
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Tuple2<StreamShardMetadata, SequenceNumber> state : globalUnionState) {
        listState.add(state);
    }
    FlinkKinesisConsumer<String> consumer = new FlinkKinesisConsumer<>("fakeStream", new SimpleStringSchema(), config);
    RuntimeContext context = mock(RuntimeContext.class);
    when(context.getIndexOfThisSubtask()).thenReturn(0);
    when(context.getNumberOfParallelSubtasks()).thenReturn(2);
    consumer.setRuntimeContext(context);
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    consumer.initializeState(initializationContext);
    // only opened, not run
    consumer.open(new Configuration());
    // arbitrary checkpoint id and timestamp
    consumer.snapshotState(new StateSnapshotContextSynchronousImpl(123, 123));
    assertTrue(listState.isClearCalled());
    // the checkpointed list state should contain only the shards that it should subscribe to
    assertEquals(globalUnionState.size() / 2, listState.getList().size());
    assertTrue(listState.getList().contains(globalUnionState.get(0)));
    assertTrue(listState.getList().contains(globalUnionState.get(2)));
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) StateSnapshotContextSynchronousImpl(org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl) ArrayList(java.util.ArrayList) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Properties(java.util.Properties) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Shard(com.amazonaws.services.kinesis.model.Shard) KinesisStreamShard(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 2 with StreamShardMetadata

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata in project flink by apache.

the class FlinkKinesisConsumerTest method testListStateChangedAfterSnapshotState.

@Test
public void testListStateChangedAfterSnapshotState() throws Exception {
    // ----------------------------------------------------------------------
    // setup config, initial state and expected state snapshot
    // ----------------------------------------------------------------------
    Properties config = TestUtils.getStandardProperties();
    ArrayList<Tuple2<StreamShardMetadata, SequenceNumber>> initialState = new ArrayList<>(1);
    initialState.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("1")));
    ArrayList<Tuple2<StreamShardMetadata, SequenceNumber>> expectedStateSnapshot = new ArrayList<>(3);
    expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(0)))), new SequenceNumber("12")));
    expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(1)))), new SequenceNumber("11")));
    expectedStateSnapshot.add(Tuple2.of(KinesisDataFetcher.convertToStreamShardMetadata(new StreamShardHandle("fakeStream1", new Shard().withShardId(KinesisShardIdGenerator.generateFromShardOrder(2)))), new SequenceNumber("31")));
    // ----------------------------------------------------------------------
    // mock operator state backend and initial state for initializeState()
    // ----------------------------------------------------------------------
    TestingListState<Tuple2<StreamShardMetadata, SequenceNumber>> listState = new TestingListState<>();
    for (Tuple2<StreamShardMetadata, SequenceNumber> state : initialState) {
        listState.add(state);
    }
    OperatorStateStore operatorStateStore = mock(OperatorStateStore.class);
    when(operatorStateStore.getUnionListState(Matchers.any(ListStateDescriptor.class))).thenReturn(listState);
    StateInitializationContext initializationContext = mock(StateInitializationContext.class);
    when(initializationContext.getOperatorStateStore()).thenReturn(operatorStateStore);
    when(initializationContext.isRestored()).thenReturn(true);
    // ----------------------------------------------------------------------
    // mock a running fetcher and its state for snapshot
    // ----------------------------------------------------------------------
    HashMap<StreamShardMetadata, SequenceNumber> stateSnapshot = new HashMap<>();
    for (Tuple2<StreamShardMetadata, SequenceNumber> tuple : expectedStateSnapshot) {
        stateSnapshot.put(tuple.f0, tuple.f1);
    }
    KinesisDataFetcher mockedFetcher = mock(KinesisDataFetcher.class);
    when(mockedFetcher.snapshotState()).thenReturn(stateSnapshot);
    // ----------------------------------------------------------------------
    // create a consumer and test the snapshotState()
    // ----------------------------------------------------------------------
    FlinkKinesisConsumer<String> consumer = new FlinkKinesisConsumer<>("fakeStream", new SimpleStringSchema(), config);
    FlinkKinesisConsumer<?> mockedConsumer = spy(consumer);
    RuntimeContext context = mock(RuntimeContext.class);
    when(context.getIndexOfThisSubtask()).thenReturn(1);
    mockedConsumer.setRuntimeContext(context);
    mockedConsumer.initializeState(initializationContext);
    mockedConsumer.open(new Configuration());
    Whitebox.setInternalState(mockedConsumer, "fetcher", // mock consumer as running.
    mockedFetcher);
    mockedConsumer.snapshotState(mock(FunctionSnapshotContext.class));
    assertEquals(true, listState.clearCalled);
    assertEquals(3, listState.getList().size());
    for (Tuple2<StreamShardMetadata, SequenceNumber> state : initialState) {
        for (Tuple2<StreamShardMetadata, SequenceNumber> currentState : listState.getList()) {
            assertNotEquals(state, currentState);
        }
    }
    for (Tuple2<StreamShardMetadata, SequenceNumber> state : expectedStateSnapshot) {
        boolean hasOneIsSame = false;
        for (Tuple2<StreamShardMetadata, SequenceNumber> currentState : listState.getList()) {
            hasOneIsSame = hasOneIsSame || state.equals(currentState);
        }
        assertEquals(true, hasOneIsSame);
    }
}
Also used : OperatorStateStore(org.apache.flink.api.common.state.OperatorStateStore) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Properties(java.util.Properties) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) SequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SequenceNumber) SentinelSequenceNumber(org.apache.flink.streaming.connectors.kinesis.model.SentinelSequenceNumber) KinesisDataFetcher(org.apache.flink.streaming.connectors.kinesis.internals.KinesisDataFetcher) TestableFlinkKinesisConsumer(org.apache.flink.streaming.connectors.kinesis.testutils.TestableFlinkKinesisConsumer) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) Shard(com.amazonaws.services.kinesis.model.Shard) KinesisStreamShard(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 3 with StreamShardMetadata

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata in project flink by apache.

the class FlinkKinesisConsumerMigrationTest method testRestoreWithReshardedStream.

@Test
public void testRestoreWithReshardedStream() throws Exception {
    final List<StreamShardHandle> initialDiscoveryShards = new ArrayList<>(TEST_STATE.size());
    for (StreamShardMetadata shardMetadata : TEST_STATE.keySet()) {
        // setup the closed shard
        Shard closedShard = new Shard();
        closedShard.setShardId(shardMetadata.getShardId());
        SequenceNumberRange closedSequenceNumberRange = new SequenceNumberRange();
        closedSequenceNumberRange.withStartingSequenceNumber("1");
        closedSequenceNumberRange.withEndingSequenceNumber(// this represents a closed shard
        "1087654321");
        closedShard.setSequenceNumberRange(closedSequenceNumberRange);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), closedShard));
        // setup the new shards
        Shard newSplitShard1 = new Shard();
        newSplitShard1.setShardId(KinesisShardIdGenerator.generateFromShardOrder(1));
        SequenceNumberRange newSequenceNumberRange1 = new SequenceNumberRange();
        newSequenceNumberRange1.withStartingSequenceNumber("1087654322");
        newSplitShard1.setSequenceNumberRange(newSequenceNumberRange1);
        newSplitShard1.setParentShardId(TEST_SHARD_ID);
        Shard newSplitShard2 = new Shard();
        newSplitShard2.setShardId(KinesisShardIdGenerator.generateFromShardOrder(2));
        SequenceNumberRange newSequenceNumberRange2 = new SequenceNumberRange();
        newSequenceNumberRange2.withStartingSequenceNumber("2087654322");
        newSplitShard2.setSequenceNumberRange(newSequenceNumberRange2);
        newSplitShard2.setParentShardId(TEST_SHARD_ID);
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), newSplitShard1));
        initialDiscoveryShards.add(new StreamShardHandle(shardMetadata.getStreamName(), newSplitShard2));
    }
    final TestFetcher<String> fetcher = new TestFetcher<>(Collections.singletonList(TEST_STREAM_NAME), new TestSourceContext<>(), new TestRuntimeContext(true, 1, 0), TestUtils.getStandardProperties(), new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()), null, initialDiscoveryShards);
    final DummyFlinkKinesisConsumer<String> consumerFunction = new DummyFlinkKinesisConsumer<>(fetcher, new KinesisDeserializationSchemaWrapper<>(new SimpleStringSchema()));
    StreamSource<String, DummyFlinkKinesisConsumer<String>> consumerOperator = new StreamSource<>(consumerFunction);
    final AbstractStreamOperatorTestHarness<String> testHarness = new AbstractStreamOperatorTestHarness<>(consumerOperator, 1, 1, 0);
    testHarness.setup();
    testHarness.initializeState(OperatorSnapshotUtil.getResourceFilename("kinesis-consumer-migration-test-flink" + testMigrateVersion + "-snapshot"));
    testHarness.open();
    consumerFunction.run(new TestSourceContext<>());
    // assert that state is correctly restored
    assertNotEquals(null, consumerFunction.getRestoredState());
    assertEquals(1, consumerFunction.getRestoredState().size());
    assertEquals(TEST_STATE, removeEquivalenceWrappers(consumerFunction.getRestoredState()));
    // assert that the fetcher is registered with all shards, including new shards
    assertEquals(3, fetcher.getSubscribedShardsState().size());
    KinesisStreamShardState restoredClosedShardState = fetcher.getSubscribedShardsState().get(0);
    assertEquals(TEST_STREAM_NAME, restoredClosedShardState.getStreamShardHandle().getStreamName());
    assertEquals(TEST_SHARD_ID, restoredClosedShardState.getStreamShardHandle().getShard().getShardId());
    assertTrue(restoredClosedShardState.getStreamShardHandle().isClosed());
    assertEquals(TEST_SEQUENCE_NUMBER, restoredClosedShardState.getLastProcessedSequenceNum());
    KinesisStreamShardState restoredNewSplitShard1 = fetcher.getSubscribedShardsState().get(1);
    assertEquals(TEST_STREAM_NAME, restoredNewSplitShard1.getStreamShardHandle().getStreamName());
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(1), restoredNewSplitShard1.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredNewSplitShard1.getStreamShardHandle().isClosed());
    // new shards should be consumed from the beginning
    assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredNewSplitShard1.getLastProcessedSequenceNum());
    KinesisStreamShardState restoredNewSplitShard2 = fetcher.getSubscribedShardsState().get(2);
    assertEquals(TEST_STREAM_NAME, restoredNewSplitShard2.getStreamShardHandle().getStreamName());
    assertEquals(KinesisShardIdGenerator.generateFromShardOrder(2), restoredNewSplitShard2.getStreamShardHandle().getShard().getShardId());
    assertFalse(restoredNewSplitShard2.getStreamShardHandle().isClosed());
    // new shards should be consumed from the beginning
    assertEquals(SentinelSequenceNumber.SENTINEL_EARLIEST_SEQUENCE_NUM.get(), restoredNewSplitShard2.getLastProcessedSequenceNum());
    consumerOperator.close();
    consumerOperator.cancel();
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) ArrayList(java.util.ArrayList) TestRuntimeContext(org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) AbstractStreamOperatorTestHarness(org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness) StreamShardHandle(org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle) SimpleStringSchema(org.apache.flink.api.common.serialization.SimpleStringSchema) KinesisStreamShardState(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState) Shard(com.amazonaws.services.kinesis.model.Shard) Test(org.junit.Test)

Example 4 with StreamShardMetadata

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata in project flink by apache.

the class FlinkKinesisConsumerTest method testLegacyKinesisStreamShardToStreamShardMetadataConversion.

@Test
public void testLegacyKinesisStreamShardToStreamShardMetadataConversion() {
    String streamName = "fakeStream1";
    String shardId = "shard-000001";
    String parentShardId = "shard-000002";
    String adjacentParentShardId = "shard-000003";
    String startingHashKey = "key-000001";
    String endingHashKey = "key-000010";
    String startingSequenceNumber = "seq-0000021";
    String endingSequenceNumber = "seq-00000031";
    StreamShardMetadata streamShardMetadata = new StreamShardMetadata();
    streamShardMetadata.setStreamName(streamName);
    streamShardMetadata.setShardId(shardId);
    streamShardMetadata.setParentShardId(parentShardId);
    streamShardMetadata.setAdjacentParentShardId(adjacentParentShardId);
    streamShardMetadata.setStartingHashKey(startingHashKey);
    streamShardMetadata.setEndingHashKey(endingHashKey);
    streamShardMetadata.setStartingSequenceNumber(startingSequenceNumber);
    streamShardMetadata.setEndingSequenceNumber(endingSequenceNumber);
    Shard shard = new Shard().withShardId(shardId).withParentShardId(parentShardId).withAdjacentParentShardId(adjacentParentShardId).withHashKeyRange(new HashKeyRange().withStartingHashKey(startingHashKey).withEndingHashKey(endingHashKey)).withSequenceNumberRange(new SequenceNumberRange().withStartingSequenceNumber(startingSequenceNumber).withEndingSequenceNumber(endingSequenceNumber));
    KinesisStreamShard kinesisStreamShard = new KinesisStreamShard(streamName, shard);
    assertEquals(streamShardMetadata, KinesisStreamShard.convertToStreamShardMetadata(kinesisStreamShard));
}
Also used : SequenceNumberRange(com.amazonaws.services.kinesis.model.SequenceNumberRange) KinesisStreamShard(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard) StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata) Shard(com.amazonaws.services.kinesis.model.Shard) KinesisStreamShard(org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard) HashKeyRange(com.amazonaws.services.kinesis.model.HashKeyRange) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 5 with StreamShardMetadata

use of org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata in project flink by apache.

the class KinesisDataFetcher method convertToStreamShardMetadata.

/**
 * Utility function to convert {@link StreamShardHandle} into {@link StreamShardMetadata}.
 *
 * @param streamShardHandle the {@link StreamShardHandle} to be converted
 * @return a {@link StreamShardMetadata} object
 */
public static StreamShardMetadata convertToStreamShardMetadata(StreamShardHandle streamShardHandle) {
    StreamShardMetadata streamShardMetadata = new StreamShardMetadata();
    streamShardMetadata.setStreamName(streamShardHandle.getStreamName());
    streamShardMetadata.setShardId(streamShardHandle.getShard().getShardId());
    streamShardMetadata.setParentShardId(streamShardHandle.getShard().getParentShardId());
    streamShardMetadata.setAdjacentParentShardId(streamShardHandle.getShard().getAdjacentParentShardId());
    if (streamShardHandle.getShard().getHashKeyRange() != null) {
        streamShardMetadata.setStartingHashKey(streamShardHandle.getShard().getHashKeyRange().getStartingHashKey());
        streamShardMetadata.setEndingHashKey(streamShardHandle.getShard().getHashKeyRange().getEndingHashKey());
    }
    if (streamShardHandle.getShard().getSequenceNumberRange() != null) {
        streamShardMetadata.setStartingSequenceNumber(streamShardHandle.getShard().getSequenceNumberRange().getStartingSequenceNumber());
        streamShardMetadata.setEndingSequenceNumber(streamShardHandle.getShard().getSequenceNumberRange().getEndingSequenceNumber());
    }
    return streamShardMetadata;
}
Also used : StreamShardMetadata(org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)

Aggregations

StreamShardMetadata (org.apache.flink.streaming.connectors.kinesis.model.StreamShardMetadata)10 Shard (com.amazonaws.services.kinesis.model.Shard)8 Test (org.junit.Test)8 StreamShardHandle (org.apache.flink.streaming.connectors.kinesis.model.StreamShardHandle)7 SequenceNumberRange (com.amazonaws.services.kinesis.model.SequenceNumberRange)6 ArrayList (java.util.ArrayList)6 SimpleStringSchema (org.apache.flink.api.common.serialization.SimpleStringSchema)6 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)4 TestRuntimeContext (org.apache.flink.streaming.connectors.kinesis.testutils.TestRuntimeContext)4 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)4 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)4 KinesisStreamShard (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard)3 KinesisStreamShardState (org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShardState)3 HashKeyRange (com.amazonaws.services.kinesis.model.HashKeyRange)2 Properties (java.util.Properties)2 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)2 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)2 OperatorStateStore (org.apache.flink.api.common.state.OperatorStateStore)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 Configuration (org.apache.flink.configuration.Configuration)2