use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.
the class StatefulJobWBroadcastStateMigrationITCase method testSavepoint.
@Test
public void testSavepoint() throws Exception {
final int parallelism = 4;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRestartStrategy(RestartStrategies.noRestart());
switch(snapshotSpec.getStateBackendType()) {
case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
env.setStateBackend(new EmbeddedRocksDBStateBackend());
break;
case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
env.setStateBackend(new MemoryStateBackend());
break;
case StateBackendLoader.HASHMAP_STATE_BACKEND_NAME:
env.setStateBackend(new HashMapStateBackend());
break;
default:
throw new UnsupportedOperationException();
}
env.enableChangelogStateBackend(false);
env.enableCheckpointing(500);
env.setParallelism(parallelism);
env.setMaxParallelism(parallelism);
SourceFunction<Tuple2<Long, Long>> nonParallelSource;
SourceFunction<Tuple2<Long, Long>> nonParallelSourceB;
SourceFunction<Tuple2<Long, Long>> parallelSource;
SourceFunction<Tuple2<Long, Long>> parallelSourceB;
KeyedBroadcastProcessFunction<Long, Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> firstBroadcastFunction;
KeyedBroadcastProcessFunction<Long, Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> secondBroadcastFunction;
final Map<Long, Long> expectedFirstState = new HashMap<>();
expectedFirstState.put(0L, 0L);
expectedFirstState.put(1L, 1L);
expectedFirstState.put(2L, 2L);
expectedFirstState.put(3L, 3L);
final Map<String, Long> expectedSecondState = new HashMap<>();
expectedSecondState.put("0", 0L);
expectedSecondState.put("1", 1L);
expectedSecondState.put("2", 2L);
expectedSecondState.put("3", 3L);
final Map<Long, String> expectedThirdState = new HashMap<>();
expectedThirdState.put(0L, "0");
expectedThirdState.put(1L, "1");
expectedThirdState.put(2L, "2");
expectedThirdState.put(3L, "3");
if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
nonParallelSourceB = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
parallelSource = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
parallelSourceB = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
firstBroadcastFunction = new CheckpointingKeyedBroadcastFunction();
secondBroadcastFunction = new CheckpointingKeyedSingleBroadcastFunction();
} else if (executionMode == ExecutionMode.VERIFY_SNAPSHOT) {
nonParallelSource = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
nonParallelSourceB = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
parallelSource = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
parallelSourceB = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
firstBroadcastFunction = new CheckingKeyedBroadcastFunction(expectedFirstState, expectedSecondState);
secondBroadcastFunction = new CheckingKeyedSingleBroadcastFunction(expectedThirdState);
} else {
throw new IllegalStateException("Unknown ExecutionMode " + executionMode);
}
KeyedStream<Tuple2<Long, Long>, Long> npStream = env.addSource(nonParallelSource).uid("CheckpointingSource1").keyBy(new KeySelector<Tuple2<Long, Long>, Long>() {
private static final long serialVersionUID = -4514793867774977152L;
@Override
public Long getKey(Tuple2<Long, Long> value) throws Exception {
return value.f0;
}
});
KeyedStream<Tuple2<Long, Long>, Long> pStream = env.addSource(parallelSource).uid("CheckpointingSource2").keyBy(new KeySelector<Tuple2<Long, Long>, Long>() {
private static final long serialVersionUID = 4940496713319948104L;
@Override
public Long getKey(Tuple2<Long, Long> value) throws Exception {
return value.f0;
}
});
final MapStateDescriptor<Long, Long> firstBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-1", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
final MapStateDescriptor<String, Long> secondBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-2", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
final MapStateDescriptor<Long, String> thirdBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-3", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
BroadcastStream<Tuple2<Long, Long>> npBroadcastStream = env.addSource(nonParallelSourceB).uid("BrCheckpointingSource1").broadcast(firstBroadcastStateDesc, secondBroadcastStateDesc);
BroadcastStream<Tuple2<Long, Long>> pBroadcastStream = env.addSource(parallelSourceB).uid("BrCheckpointingSource2").broadcast(thirdBroadcastStateDesc);
npStream.connect(npBroadcastStream).process(firstBroadcastFunction).uid("BrProcess1").addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
pStream.connect(pBroadcastStream).process(secondBroadcastFunction).uid("BrProcess2").addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
executeAndSnapshot(env, "src/test/resources/" + getSnapshotPath(snapshotSpec), snapshotSpec.getSnapshotType(), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, 2 * NUM_SOURCE_ELEMENTS));
} else {
restoreAndExecute(env, getResourceFilename(getSnapshotPath(snapshotSpec)), new Tuple2<>(MigrationTestUtils.CheckingNonParallelSourceWithListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, // we have 2 sources
2), new Tuple2<>(MigrationTestUtils.CheckingParallelSourceWithUnionListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, // we have 2 sources
2 * parallelism), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2));
}
}
use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.
the class StateBootstrapTransformationTest method testBroadcastStateTransformationParallelism.
@Test
public void testBroadcastStateTransformationParallelism() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(10);
DataStream<Integer> input = env.fromElements(0);
StateBootstrapTransformation<Integer> transformation = OperatorTransformation.bootstrapWith(input).transform(new ExampleBroadcastStateBootstrapFunction());
int maxParallelism = transformation.getMaxParallelism(4);
DataStream<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates(OperatorIDGenerator.fromUid("uid"), new HashMapStateBackend(), new Path(), maxParallelism);
Assert.assertEquals("Broadcast transformations should always be run at parallelism 1", 1, result.getParallelism());
}
use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.
the class SavepointReaderITTestBase method verifyBroadcastState.
private void verifyBroadcastState(String path, StreamExecutionEnvironment env) throws Exception {
SavepointReader savepoint = SavepointReader.read(env, path, new HashMapStateBackend());
List<Tuple2<Integer, String>> broadcastResult = JobResultRetriever.collect(readBroadcastState(savepoint));
List<Integer> broadcastStateKeys = broadcastResult.stream().map(entry -> entry.f0).sorted(Comparator.naturalOrder()).collect(Collectors.toList());
List<String> broadcastStateValues = broadcastResult.stream().map(entry -> entry.f1).sorted(Comparator.naturalOrder()).collect(Collectors.toList());
Assert.assertEquals("Unexpected element in broadcast state keys", SavepointSource.getElements(), broadcastStateKeys);
Assert.assertEquals("Unexpected element in broadcast state values", SavepointSource.getElements().stream().map(Object::toString).sorted().collect(Collectors.toList()), broadcastStateValues);
}
use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.
the class SavepointReaderITTestBase method verifyUnionState.
private void verifyUnionState(String path, StreamExecutionEnvironment env) throws Exception {
SavepointReader savepoint = SavepointReader.read(env, path, new HashMapStateBackend());
List<Integer> unionResult = JobResultRetriever.collect(readUnionState(savepoint));
unionResult.sort(Comparator.naturalOrder());
Assert.assertEquals("Unexpected elements read from union state", SavepointSource.getElements(), unionResult);
}
use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.
the class FileSinkCompactionSwitchITCase method createJobGraph.
private JobGraph createJobGraph(String path, String cpPath, boolean compactionEnabled, boolean isFinite, SharedReference<ConcurrentHashMap<Integer, Integer>> sendCountMap) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.STREAMING);
// disable changelog state in case it's randomly enabled, since it will fail the savepoint
config.set(StateChangelogOptions.ENABLE_STATE_CHANGE_LOG, false);
env.configure(config, getClass().getClassLoader());
env.enableCheckpointing(100, CheckpointingMode.EXACTLY_ONCE);
env.setRestartStrategy(RestartStrategies.noRestart());
env.getCheckpointConfig().setCheckpointStorage(new FileSystemCheckpointStorage(cpPath));
env.setStateBackend(new HashMapStateBackend());
env.addSource(new CountingTestSource(latchId, NUM_RECORDS, isFinite, sendCountMap)).setParallelism(NUM_SOURCES).sinkTo(createFileSink(path, compactionEnabled)).uid("sink").setParallelism(NUM_SINKS);
StreamGraph streamGraph = env.getStreamGraph();
return streamGraph.getJobGraph();
}
Aggregations