use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.
the class StateMachineExample method main.
/**
* Main entry point for the program.
*
* @param args The command line arguments.
*/
public static void main(String[] args) throws Exception {
// ---- print some usage help ----
System.out.println("Usage with built-in data generator: StateMachineExample [--error-rate <probability-of-invalid-transition>] [--sleep <sleep-per-record-in-ms>]");
System.out.println("Usage with Kafka: StateMachineExample --kafka-topic <topic> [--brokers <brokers>]");
System.out.println("Options for both the above setups: ");
System.out.println("\t[--backend <hashmap|rocks>]");
System.out.println("\t[--checkpoint-dir <filepath>]");
System.out.println("\t[--incremental-checkpoints <true|false>]");
System.out.println("\t[--output <filepath> OR null for stdout]");
System.out.println();
// ---- determine whether to use the built-in source, or read from Kafka ----
final DataStream<Event> events;
final ParameterTool params = ParameterTool.fromArgs(args);
// create the environment to create streams and configure execution
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(2000L);
final String stateBackend = params.get("backend", "memory");
if ("hashmap".equals(stateBackend)) {
final String checkpointDir = params.get("checkpoint-dir");
env.setStateBackend(new HashMapStateBackend());
env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
} else if ("rocks".equals(stateBackend)) {
final String checkpointDir = params.get("checkpoint-dir");
boolean incrementalCheckpoints = params.getBoolean("incremental-checkpoints", false);
env.setStateBackend(new EmbeddedRocksDBStateBackend(incrementalCheckpoints));
env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
}
if (params.has("kafka-topic")) {
// set up the Kafka reader
String kafkaTopic = params.get("kafka-topic");
String brokers = params.get("brokers", "localhost:9092");
System.out.printf("Reading from kafka topic %s @ %s\n", kafkaTopic, brokers);
System.out.println();
KafkaSource<Event> source = KafkaSource.<Event>builder().setBootstrapServers(brokers).setGroupId("stateMachineExample").setTopics(kafkaTopic).setDeserializer(KafkaRecordDeserializationSchema.valueOnly(new EventDeSerializationSchema())).setStartingOffsets(OffsetsInitializer.latest()).build();
events = env.fromSource(source, WatermarkStrategy.noWatermarks(), "StateMachineExampleSource");
} else {
double errorRate = params.getDouble("error-rate", 0.0);
int sleep = params.getInt("sleep", 1);
System.out.printf("Using standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
System.out.println();
events = env.addSource(new EventsGeneratorSource(errorRate, sleep));
}
// ---- main program ----
final String outputFile = params.get("output");
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(params);
DataStream<Alert> alerts = events.keyBy(Event::sourceAddress).flatMap(new StateMachineMapper());
// output the alerts to std-out
if (outputFile == null) {
alerts.print();
} else {
alerts.sinkTo(FileSink.<Alert>forRowFormat(new Path(outputFile), new SimpleStringEncoder<>()).withRollingPolicy(DefaultRollingPolicy.builder().withMaxPartSize(MemorySize.ofMebiBytes(1)).withRolloverInterval(Duration.ofSeconds(10)).build()).build()).setParallelism(1).name("output");
}
// trigger program execution
env.execute("State machine job");
}
use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.
the class StickyAllocationAndLocalRecoveryTestJob method main.
public static void main(String[] args) throws Exception {
final ParameterTool pt = ParameterTool.fromArgs(args);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(pt.getInt("parallelism", 1));
env.setMaxParallelism(pt.getInt("maxParallelism", pt.getInt("parallelism", 1)));
env.enableCheckpointing(pt.getInt("checkpointInterval", 1000));
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, pt.getInt("restartDelay", 0)));
if (pt.getBoolean("externalizedCheckpoints", false)) {
env.getCheckpointConfig().setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
}
String checkpointDir = pt.getRequired("checkpointDir");
env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
boolean killJvmOnFail = pt.getBoolean("killJvmOnFail", false);
String stateBackend = pt.get("stateBackend", "hashmap");
if ("hashmap".equals(stateBackend)) {
env.setStateBackend(new HashMapStateBackend());
} else if ("rocks".equals(stateBackend)) {
boolean incrementalCheckpoints = pt.getBoolean("incrementalCheckpoints", false);
env.setStateBackend(new EmbeddedRocksDBStateBackend(incrementalCheckpoints));
} else {
throw new IllegalArgumentException("Unknown backend: " + stateBackend);
}
// make parameters available in the web interface
env.getConfig().setGlobalJobParameters(pt);
// delay to throttle down the production of the source
long delay = pt.getLong("delay", 0L);
// the maximum number of attempts, before the job finishes with success
int maxAttempts = pt.getInt("maxAttempts", 3);
// size of one artificial value
int valueSize = pt.getInt("valueSize", 10);
env.addSource(new RandomLongSource(maxAttempts, delay)).keyBy((KeySelector<Long, Long>) aLong -> aLong).flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail)).addSink(new PrintSinkFunction<>());
env.execute("Sticky Allocation And Local Recovery Test");
}
use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.
the class TypeSerializerSnapshotMigrationITCase method testSnapshot.
@Test
public void testSnapshot() throws Exception {
final int parallelism = 1;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRestartStrategy(RestartStrategies.noRestart());
switch(snapshotSpec.getStateBackendType()) {
case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
env.setStateBackend(new EmbeddedRocksDBStateBackend());
break;
case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
env.setStateBackend(new MemoryStateBackend());
break;
case StateBackendLoader.HASHMAP_STATE_BACKEND_NAME:
env.setStateBackend(new HashMapStateBackend());
break;
default:
throw new UnsupportedOperationException();
}
env.enableChangelogStateBackend(false);
env.enableCheckpointing(500);
env.setParallelism(parallelism);
env.setMaxParallelism(parallelism);
SourceFunction<Tuple2<Long, Long>> nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
env.addSource(nonParallelSource).keyBy(0).map(new TestMapFunction()).addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
final String snapshotPath = getSnapshotPath(snapshotSpec);
if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
executeAndSnapshot(env, "src/test/resources/" + snapshotPath, snapshotSpec.getSnapshotType(), Tuple2.of(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
} else if (executionMode == ExecutionMode.VERIFY_SNAPSHOT) {
restoreAndExecute(env, getResourceFilename(snapshotPath), Tuple2.of(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
} else {
throw new IllegalStateException("Unknown ExecutionMode " + executionMode);
}
}
use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.
the class SavepointITCase method testTriggerSavepointAndResumeWithNoClaim.
@Test
@Ignore("Disabling this test because it regularly fails on AZP. See FLINK-25427.")
public void testTriggerSavepointAndResumeWithNoClaim() throws Exception {
final int numTaskManagers = 2;
final int numSlotsPerTaskManager = 2;
final int parallelism = numTaskManagers * numSlotsPerTaskManager;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.getCheckpointConfig().setCheckpointStorage(folder.newFolder().toURI());
env.setParallelism(parallelism);
final SharedReference<CountDownLatch> counter = sharedObjects.add(new CountDownLatch(10_000));
env.fromSequence(1, Long.MAX_VALUE).keyBy(i -> i % parallelism).process(new KeyedProcessFunction<Long, Long, Long>() {
private ListState<Long> last;
@Override
public void open(Configuration parameters) {
// we use list state here to create sst files of a significant size
// if sst files do not reach certain thresholds they are not stored
// in files, but as a byte stream in checkpoints metadata
last = getRuntimeContext().getListState(new ListStateDescriptor<>("last", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement(Long value, KeyedProcessFunction<Long, Long, Long>.Context ctx, Collector<Long> out) throws Exception {
last.add(value);
out.collect(value);
}
}).addSink(new SinkFunction<Long>() {
@Override
public void invoke(Long value) {
counter.consumeSync(CountDownLatch::countDown);
}
}).setParallelism(1);
final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
cluster.before();
try {
final JobID jobID1 = new JobID();
jobGraph.setJobID(jobID1);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID1, false);
// wait for some records to be processed before taking the checkpoint
counter.get().await();
final String firstCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID1).get();
cluster.getClusterClient().cancel(jobID1).get();
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(firstCheckpoint, false, RestoreMode.NO_CLAIM));
final JobID jobID2 = new JobID();
jobGraph.setJobID(jobID2);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID2, false);
String secondCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID2).get();
cluster.getClusterClient().cancel(jobID2).get();
// delete the checkpoint we restored from
FileUtils.deleteDirectory(Paths.get(new URI(firstCheckpoint)).getParent().toFile());
// we should be able to restore from the second checkpoint even though it has been built
// on top of the first checkpoint
jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(secondCheckpoint, false, RestoreMode.NO_CLAIM));
final JobID jobID3 = new JobID();
jobGraph.setJobID(jobID3);
cluster.getClusterClient().submitJob(jobGraph).get();
CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID3, false);
} finally {
cluster.after();
}
}
use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.
the class StatefulJobWBroadcastStateMigrationITCase method testSavepoint.
@Test
public void testSavepoint() throws Exception {
final int parallelism = 4;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRestartStrategy(RestartStrategies.noRestart());
switch(snapshotSpec.getStateBackendType()) {
case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
env.setStateBackend(new EmbeddedRocksDBStateBackend());
break;
case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
env.setStateBackend(new MemoryStateBackend());
break;
case StateBackendLoader.HASHMAP_STATE_BACKEND_NAME:
env.setStateBackend(new HashMapStateBackend());
break;
default:
throw new UnsupportedOperationException();
}
env.enableChangelogStateBackend(false);
env.enableCheckpointing(500);
env.setParallelism(parallelism);
env.setMaxParallelism(parallelism);
SourceFunction<Tuple2<Long, Long>> nonParallelSource;
SourceFunction<Tuple2<Long, Long>> nonParallelSourceB;
SourceFunction<Tuple2<Long, Long>> parallelSource;
SourceFunction<Tuple2<Long, Long>> parallelSourceB;
KeyedBroadcastProcessFunction<Long, Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> firstBroadcastFunction;
KeyedBroadcastProcessFunction<Long, Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> secondBroadcastFunction;
final Map<Long, Long> expectedFirstState = new HashMap<>();
expectedFirstState.put(0L, 0L);
expectedFirstState.put(1L, 1L);
expectedFirstState.put(2L, 2L);
expectedFirstState.put(3L, 3L);
final Map<String, Long> expectedSecondState = new HashMap<>();
expectedSecondState.put("0", 0L);
expectedSecondState.put("1", 1L);
expectedSecondState.put("2", 2L);
expectedSecondState.put("3", 3L);
final Map<Long, String> expectedThirdState = new HashMap<>();
expectedThirdState.put(0L, "0");
expectedThirdState.put(1L, "1");
expectedThirdState.put(2L, "2");
expectedThirdState.put(3L, "3");
if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
nonParallelSourceB = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
parallelSource = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
parallelSourceB = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
firstBroadcastFunction = new CheckpointingKeyedBroadcastFunction();
secondBroadcastFunction = new CheckpointingKeyedSingleBroadcastFunction();
} else if (executionMode == ExecutionMode.VERIFY_SNAPSHOT) {
nonParallelSource = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
nonParallelSourceB = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
parallelSource = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
parallelSourceB = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
firstBroadcastFunction = new CheckingKeyedBroadcastFunction(expectedFirstState, expectedSecondState);
secondBroadcastFunction = new CheckingKeyedSingleBroadcastFunction(expectedThirdState);
} else {
throw new IllegalStateException("Unknown ExecutionMode " + executionMode);
}
KeyedStream<Tuple2<Long, Long>, Long> npStream = env.addSource(nonParallelSource).uid("CheckpointingSource1").keyBy(new KeySelector<Tuple2<Long, Long>, Long>() {
private static final long serialVersionUID = -4514793867774977152L;
@Override
public Long getKey(Tuple2<Long, Long> value) throws Exception {
return value.f0;
}
});
KeyedStream<Tuple2<Long, Long>, Long> pStream = env.addSource(parallelSource).uid("CheckpointingSource2").keyBy(new KeySelector<Tuple2<Long, Long>, Long>() {
private static final long serialVersionUID = 4940496713319948104L;
@Override
public Long getKey(Tuple2<Long, Long> value) throws Exception {
return value.f0;
}
});
final MapStateDescriptor<Long, Long> firstBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-1", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
final MapStateDescriptor<String, Long> secondBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-2", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
final MapStateDescriptor<Long, String> thirdBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-3", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
BroadcastStream<Tuple2<Long, Long>> npBroadcastStream = env.addSource(nonParallelSourceB).uid("BrCheckpointingSource1").broadcast(firstBroadcastStateDesc, secondBroadcastStateDesc);
BroadcastStream<Tuple2<Long, Long>> pBroadcastStream = env.addSource(parallelSourceB).uid("BrCheckpointingSource2").broadcast(thirdBroadcastStateDesc);
npStream.connect(npBroadcastStream).process(firstBroadcastFunction).uid("BrProcess1").addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
pStream.connect(pBroadcastStream).process(secondBroadcastFunction).uid("BrProcess2").addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
executeAndSnapshot(env, "src/test/resources/" + getSnapshotPath(snapshotSpec), snapshotSpec.getSnapshotType(), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, 2 * NUM_SOURCE_ELEMENTS));
} else {
restoreAndExecute(env, getResourceFilename(getSnapshotPath(snapshotSpec)), new Tuple2<>(MigrationTestUtils.CheckingNonParallelSourceWithListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, // we have 2 sources
2), new Tuple2<>(MigrationTestUtils.CheckingParallelSourceWithUnionListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, // we have 2 sources
2 * parallelism), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2));
}
}
Aggregations