Search in sources :

Example 16 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class StatefulJobWBroadcastStateMigrationITCase method testSavepoint.

@Test
public void testSavepoint() throws Exception {
    final int parallelism = 4;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setRestartStrategy(RestartStrategies.noRestart());
    switch(snapshotSpec.getStateBackendType()) {
        case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
            env.setStateBackend(new EmbeddedRocksDBStateBackend());
            break;
        case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
            env.setStateBackend(new MemoryStateBackend());
            break;
        case StateBackendLoader.HASHMAP_STATE_BACKEND_NAME:
            env.setStateBackend(new HashMapStateBackend());
            break;
        default:
            throw new UnsupportedOperationException();
    }
    env.enableChangelogStateBackend(false);
    env.enableCheckpointing(500);
    env.setParallelism(parallelism);
    env.setMaxParallelism(parallelism);
    SourceFunction<Tuple2<Long, Long>> nonParallelSource;
    SourceFunction<Tuple2<Long, Long>> nonParallelSourceB;
    SourceFunction<Tuple2<Long, Long>> parallelSource;
    SourceFunction<Tuple2<Long, Long>> parallelSourceB;
    KeyedBroadcastProcessFunction<Long, Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> firstBroadcastFunction;
    KeyedBroadcastProcessFunction<Long, Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> secondBroadcastFunction;
    final Map<Long, Long> expectedFirstState = new HashMap<>();
    expectedFirstState.put(0L, 0L);
    expectedFirstState.put(1L, 1L);
    expectedFirstState.put(2L, 2L);
    expectedFirstState.put(3L, 3L);
    final Map<String, Long> expectedSecondState = new HashMap<>();
    expectedSecondState.put("0", 0L);
    expectedSecondState.put("1", 1L);
    expectedSecondState.put("2", 2L);
    expectedSecondState.put("3", 3L);
    final Map<Long, String> expectedThirdState = new HashMap<>();
    expectedThirdState.put(0L, "0");
    expectedThirdState.put(1L, "1");
    expectedThirdState.put(2L, "2");
    expectedThirdState.put(3L, "3");
    if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
        nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
        nonParallelSourceB = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
        parallelSource = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
        parallelSourceB = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
        firstBroadcastFunction = new CheckpointingKeyedBroadcastFunction();
        secondBroadcastFunction = new CheckpointingKeyedSingleBroadcastFunction();
    } else if (executionMode == ExecutionMode.VERIFY_SNAPSHOT) {
        nonParallelSource = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
        nonParallelSourceB = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
        parallelSource = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
        parallelSourceB = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
        firstBroadcastFunction = new CheckingKeyedBroadcastFunction(expectedFirstState, expectedSecondState);
        secondBroadcastFunction = new CheckingKeyedSingleBroadcastFunction(expectedThirdState);
    } else {
        throw new IllegalStateException("Unknown ExecutionMode " + executionMode);
    }
    KeyedStream<Tuple2<Long, Long>, Long> npStream = env.addSource(nonParallelSource).uid("CheckpointingSource1").keyBy(new KeySelector<Tuple2<Long, Long>, Long>() {

        private static final long serialVersionUID = -4514793867774977152L;

        @Override
        public Long getKey(Tuple2<Long, Long> value) throws Exception {
            return value.f0;
        }
    });
    KeyedStream<Tuple2<Long, Long>, Long> pStream = env.addSource(parallelSource).uid("CheckpointingSource2").keyBy(new KeySelector<Tuple2<Long, Long>, Long>() {

        private static final long serialVersionUID = 4940496713319948104L;

        @Override
        public Long getKey(Tuple2<Long, Long> value) throws Exception {
            return value.f0;
        }
    });
    final MapStateDescriptor<Long, Long> firstBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-1", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
    final MapStateDescriptor<String, Long> secondBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-2", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
    final MapStateDescriptor<Long, String> thirdBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-3", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    BroadcastStream<Tuple2<Long, Long>> npBroadcastStream = env.addSource(nonParallelSourceB).uid("BrCheckpointingSource1").broadcast(firstBroadcastStateDesc, secondBroadcastStateDesc);
    BroadcastStream<Tuple2<Long, Long>> pBroadcastStream = env.addSource(parallelSourceB).uid("BrCheckpointingSource2").broadcast(thirdBroadcastStateDesc);
    npStream.connect(npBroadcastStream).process(firstBroadcastFunction).uid("BrProcess1").addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
    pStream.connect(pBroadcastStream).process(secondBroadcastFunction).uid("BrProcess2").addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
    if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
        executeAndSnapshot(env, "src/test/resources/" + getSnapshotPath(snapshotSpec), snapshotSpec.getSnapshotType(), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, 2 * NUM_SOURCE_ELEMENTS));
    } else {
        restoreAndExecute(env, getResourceFilename(getSnapshotPath(snapshotSpec)), new Tuple2<>(MigrationTestUtils.CheckingNonParallelSourceWithListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, // we have 2 sources
        2), new Tuple2<>(MigrationTestUtils.CheckingParallelSourceWithUnionListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, // we have 2 sources
        2 * parallelism), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2));
    }
}
Also used : MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) HashMap(java.util.HashMap) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) MigrationTestUtils(org.apache.flink.test.checkpointing.utils.MigrationTestUtils) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) Test(org.junit.Test)

Example 17 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class StateBootstrapTransformationTest method testBroadcastStateTransformationParallelism.

@Test
public void testBroadcastStateTransformationParallelism() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(10);
    DataStream<Integer> input = env.fromElements(0);
    StateBootstrapTransformation<Integer> transformation = OperatorTransformation.bootstrapWith(input).transform(new ExampleBroadcastStateBootstrapFunction());
    int maxParallelism = transformation.getMaxParallelism(4);
    DataStream<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates(OperatorIDGenerator.fromUid("uid"), new HashMapStateBackend(), new Path(), maxParallelism);
    Assert.assertEquals("Broadcast transformations should always be run at parallelism 1", 1, result.getParallelism());
}
Also used : Path(org.apache.flink.core.fs.Path) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) TaggedOperatorSubtaskState(org.apache.flink.state.api.output.TaggedOperatorSubtaskState) Test(org.junit.Test)

Example 18 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class SavepointReaderITTestBase method verifyBroadcastState.

private void verifyBroadcastState(String path, StreamExecutionEnvironment env) throws Exception {
    SavepointReader savepoint = SavepointReader.read(env, path, new HashMapStateBackend());
    List<Tuple2<Integer, String>> broadcastResult = JobResultRetriever.collect(readBroadcastState(savepoint));
    List<Integer> broadcastStateKeys = broadcastResult.stream().map(entry -> entry.f0).sorted(Comparator.naturalOrder()).collect(Collectors.toList());
    List<String> broadcastStateValues = broadcastResult.stream().map(entry -> entry.f1).sorted(Comparator.naturalOrder()).collect(Collectors.toList());
    Assert.assertEquals("Unexpected element in broadcast state keys", SavepointSource.getElements(), broadcastStateKeys);
    Assert.assertEquals("Unexpected element in broadcast state values", SavepointSource.getElements().stream().map(Object::toString).sorted().collect(Collectors.toList()), broadcastStateValues);
}
Also used : Tuple2(org.apache.flink.api.java.tuple.Tuple2) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend)

Example 19 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class SavepointReaderITTestBase method verifyUnionState.

private void verifyUnionState(String path, StreamExecutionEnvironment env) throws Exception {
    SavepointReader savepoint = SavepointReader.read(env, path, new HashMapStateBackend());
    List<Integer> unionResult = JobResultRetriever.collect(readUnionState(savepoint));
    unionResult.sort(Comparator.naturalOrder());
    Assert.assertEquals("Unexpected elements read from union state", SavepointSource.getElements(), unionResult);
}
Also used : HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend)

Example 20 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class FileSinkCompactionSwitchITCase method createJobGraph.

private JobGraph createJobGraph(String path, String cpPath, boolean compactionEnabled, boolean isFinite, SharedReference<ConcurrentHashMap<Integer, Integer>> sendCountMap) {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.STREAMING);
    // disable changelog state in case it's randomly enabled, since it will fail the savepoint
    config.set(StateChangelogOptions.ENABLE_STATE_CHANGE_LOG, false);
    env.configure(config, getClass().getClassLoader());
    env.enableCheckpointing(100, CheckpointingMode.EXACTLY_ONCE);
    env.setRestartStrategy(RestartStrategies.noRestart());
    env.getCheckpointConfig().setCheckpointStorage(new FileSystemCheckpointStorage(cpPath));
    env.setStateBackend(new HashMapStateBackend());
    env.addSource(new CountingTestSource(latchId, NUM_RECORDS, isFinite, sendCountMap)).setParallelism(NUM_SOURCES).sinkTo(createFileSink(path, compactionEnabled)).uid("sink").setParallelism(NUM_SINKS);
    StreamGraph streamGraph = env.getStreamGraph();
    return streamGraph.getJobGraph();
}
Also used : MiniClusterConfiguration(org.apache.flink.runtime.minicluster.MiniClusterConfiguration) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) FileSystemCheckpointStorage(org.apache.flink.runtime.state.storage.FileSystemCheckpointStorage) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend)

Aggregations

HashMapStateBackend (org.apache.flink.runtime.state.hashmap.HashMapStateBackend)22 Test (org.junit.Test)12 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)11 MemoryStateBackend (org.apache.flink.runtime.state.memory.MemoryStateBackend)10 EmbeddedRocksDBStateBackend (org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend)7 Configuration (org.apache.flink.configuration.Configuration)6 Path (org.apache.flink.core.fs.Path)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 TaggedOperatorSubtaskState (org.apache.flink.state.api.output.TaggedOperatorSubtaskState)4 DelegatingStateBackend (org.apache.flink.runtime.state.delegate.DelegatingStateBackend)3 FsStateBackend (org.apache.flink.runtime.state.filesystem.FsStateBackend)3 MigrationTestUtils (org.apache.flink.test.checkpointing.utils.MigrationTestUtils)3 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)2 AbstractKeyedStateBackend (org.apache.flink.runtime.state.AbstractKeyedStateBackend)2 AbstractStateBackend (org.apache.flink.runtime.state.AbstractStateBackend)2 ConfigurableStateBackend (org.apache.flink.runtime.state.ConfigurableStateBackend)2 OperatorStateBackend (org.apache.flink.runtime.state.OperatorStateBackend)2 StateBackend (org.apache.flink.runtime.state.StateBackend)2 HashMapStateBackendFactory (org.apache.flink.runtime.state.hashmap.HashMapStateBackendFactory)2