Examples with EmbeddedRocksDBStateBackend - org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend

Example 1 with EmbeddedRocksDBStateBackend

use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.

the class StateMachineExample method main.

/**
 * Main entry point for the program.
 *
 * @param args The command line arguments.
 */
public static void main(String[] args) throws Exception {
    // ---- print some usage help ----
    System.out.println("Usage with built-in data generator: StateMachineExample [--error-rate <probability-of-invalid-transition>] [--sleep <sleep-per-record-in-ms>]");
    System.out.println("Usage with Kafka: StateMachineExample --kafka-topic <topic> [--brokers <brokers>]");
    System.out.println("Options for both the above setups: ");
    System.out.println("\t[--backend <hashmap|rocks>]");
    System.out.println("\t[--checkpoint-dir <filepath>]");
    System.out.println("\t[--incremental-checkpoints <true|false>]");
    System.out.println("\t[--output <filepath> OR null for stdout]");
    System.out.println();
    // ---- determine whether to use the built-in source, or read from Kafka ----
    final DataStream<Event> events;
    final ParameterTool params = ParameterTool.fromArgs(args);
    // create the environment to create streams and configure execution
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(2000L);
    final String stateBackend = params.get("backend", "memory");
    if ("hashmap".equals(stateBackend)) {
        final String checkpointDir = params.get("checkpoint-dir");
        env.setStateBackend(new HashMapStateBackend());
        env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
    } else if ("rocks".equals(stateBackend)) {
        final String checkpointDir = params.get("checkpoint-dir");
        boolean incrementalCheckpoints = params.getBoolean("incremental-checkpoints", false);
        env.setStateBackend(new EmbeddedRocksDBStateBackend(incrementalCheckpoints));
        env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
    }
    if (params.has("kafka-topic")) {
        // set up the Kafka reader
        String kafkaTopic = params.get("kafka-topic");
        String brokers = params.get("brokers", "localhost:9092");
        System.out.printf("Reading from kafka topic %s @ %s\n", kafkaTopic, brokers);
        System.out.println();
        KafkaSource<Event> source = KafkaSource.<Event>builder().setBootstrapServers(brokers).setGroupId("stateMachineExample").setTopics(kafkaTopic).setDeserializer(KafkaRecordDeserializationSchema.valueOnly(new EventDeSerializationSchema())).setStartingOffsets(OffsetsInitializer.latest()).build();
        events = env.fromSource(source, WatermarkStrategy.noWatermarks(), "StateMachineExampleSource");
    } else {
        double errorRate = params.getDouble("error-rate", 0.0);
        int sleep = params.getInt("sleep", 1);
        System.out.printf("Using standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
        System.out.println();
        events = env.addSource(new EventsGeneratorSource(errorRate, sleep));
    }
    // ---- main program ----
    final String outputFile = params.get("output");
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    DataStream<Alert> alerts = events.keyBy(Event::sourceAddress).flatMap(new StateMachineMapper());
    // output the alerts to std-out
    if (outputFile == null) {
        alerts.print();
    } else {
        alerts.sinkTo(FileSink.<Alert>forRowFormat(new Path(outputFile), new SimpleStringEncoder<>()).withRollingPolicy(DefaultRollingPolicy.builder().withMaxPartSize(MemorySize.ofMebiBytes(1)).withRolloverInterval(Duration.ofSeconds(10)).build()).build()).setParallelism(1).name("output");
    }
    // trigger program execution
    env.execute("State machine job");
}

Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) Path(org.apache.flink.core.fs.Path) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) EventsGeneratorSource(org.apache.flink.streaming.examples.statemachine.generator.EventsGeneratorSource) Event(org.apache.flink.streaming.examples.statemachine.event.Event) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) Alert(org.apache.flink.streaming.examples.statemachine.event.Alert) EventDeSerializationSchema(org.apache.flink.streaming.examples.statemachine.kafka.EventDeSerializationSchema) SimpleStringEncoder(org.apache.flink.api.common.serialization.SimpleStringEncoder)

Example 2 with EmbeddedRocksDBStateBackend

use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.

the class StickyAllocationAndLocalRecoveryTestJob method main.

public static void main(String[] args) throws Exception {
    final ParameterTool pt = ParameterTool.fromArgs(args);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(pt.getInt("parallelism", 1));
    env.setMaxParallelism(pt.getInt("maxParallelism", pt.getInt("parallelism", 1)));
    env.enableCheckpointing(pt.getInt("checkpointInterval", 1000));
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, pt.getInt("restartDelay", 0)));
    if (pt.getBoolean("externalizedCheckpoints", false)) {
        env.getCheckpointConfig().setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
    }
    String checkpointDir = pt.getRequired("checkpointDir");
    env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
    boolean killJvmOnFail = pt.getBoolean("killJvmOnFail", false);
    String stateBackend = pt.get("stateBackend", "hashmap");
    if ("hashmap".equals(stateBackend)) {
        env.setStateBackend(new HashMapStateBackend());
    } else if ("rocks".equals(stateBackend)) {
        boolean incrementalCheckpoints = pt.getBoolean("incrementalCheckpoints", false);
        env.setStateBackend(new EmbeddedRocksDBStateBackend(incrementalCheckpoints));
    } else {
        throw new IllegalArgumentException("Unknown backend: " + stateBackend);
    }
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(pt);
    // delay to throttle down the production of the source
    long delay = pt.getLong("delay", 0L);
    // the maximum number of attempts, before the job finishes with success
    int maxAttempts = pt.getInt("maxAttempts", 3);
    // size of one artificial value
    int valueSize = pt.getInt("valueSize", 10);
    env.addSource(new RandomLongSource(maxAttempts, delay)).keyBy((KeySelector<Long, Long>) aLong -> aLong).flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail)).addSink(new PrintSinkFunction<>());
    env.execute("Sticky Allocation And Local Recovery Test");
}

Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) PrintSinkFunction(org.apache.flink.streaming.api.functions.sink.PrintSinkFunction) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ListState(org.apache.flink.api.common.state.ListState) ParameterTool(org.apache.flink.api.java.utils.ParameterTool) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) Collector(org.apache.flink.util.Collector) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) KeySelector(org.apache.flink.api.java.functions.KeySelector) Iterator(java.util.Iterator) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Set(java.util.Set) IOException(java.io.IOException) Preconditions(org.apache.flink.util.Preconditions) Serializable(java.io.Serializable) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend)

Example 3 with EmbeddedRocksDBStateBackend

use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.

the class TypeSerializerSnapshotMigrationITCase method testSnapshot.

@Test
public void testSnapshot() throws Exception {
    final int parallelism = 1;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setRestartStrategy(RestartStrategies.noRestart());
    switch(snapshotSpec.getStateBackendType()) {
        case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
            env.setStateBackend(new EmbeddedRocksDBStateBackend());
            break;
        case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
            env.setStateBackend(new MemoryStateBackend());
            break;
        case StateBackendLoader.HASHMAP_STATE_BACKEND_NAME:
            env.setStateBackend(new HashMapStateBackend());
            break;
        default:
            throw new UnsupportedOperationException();
    }
    env.enableChangelogStateBackend(false);
    env.enableCheckpointing(500);
    env.setParallelism(parallelism);
    env.setMaxParallelism(parallelism);
    SourceFunction<Tuple2<Long, Long>> nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
    env.addSource(nonParallelSource).keyBy(0).map(new TestMapFunction()).addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
    final String snapshotPath = getSnapshotPath(snapshotSpec);
    if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
        executeAndSnapshot(env, "src/test/resources/" + snapshotPath, snapshotSpec.getSnapshotType(), Tuple2.of(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
    } else if (executionMode == ExecutionMode.VERIFY_SNAPSHOT) {
        restoreAndExecute(env, getResourceFilename(snapshotPath), Tuple2.of(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS));
    } else {
        throw new IllegalStateException("Unknown ExecutionMode " + executionMode);
    }
}

Also used : MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) MigrationTestUtils(org.apache.flink.test.checkpointing.utils.MigrationTestUtils) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) Test(org.junit.Test)

Example 4 with EmbeddedRocksDBStateBackend

use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.

the class SavepointITCase method testTriggerSavepointAndResumeWithNoClaim.

@Test
@Ignore("Disabling this test because it regularly fails on AZP. See FLINK-25427.")
public void testTriggerSavepointAndResumeWithNoClaim() throws Exception {
    final int numTaskManagers = 2;
    final int numSlotsPerTaskManager = 2;
    final int parallelism = numTaskManagers * numSlotsPerTaskManager;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
    env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
    env.getCheckpointConfig().setCheckpointStorage(folder.newFolder().toURI());
    env.setParallelism(parallelism);
    final SharedReference<CountDownLatch> counter = sharedObjects.add(new CountDownLatch(10_000));
    env.fromSequence(1, Long.MAX_VALUE).keyBy(i -> i % parallelism).process(new KeyedProcessFunction<Long, Long, Long>() {

        private ListState<Long> last;

        @Override
        public void open(Configuration parameters) {
            // we use list state here to create sst files of a significant size
            // if sst files do not reach certain thresholds they are not stored
            // in files, but as a byte stream in checkpoints metadata
            last = getRuntimeContext().getListState(new ListStateDescriptor<>("last", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement(Long value, KeyedProcessFunction<Long, Long, Long>.Context ctx, Collector<Long> out) throws Exception {
            last.add(value);
            out.collect(value);
        }
    }).addSink(new SinkFunction<Long>() {

        @Override
        public void invoke(Long value) {
            counter.consumeSync(CountDownLatch::countDown);
        }
    }).setParallelism(1);
    final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    MiniClusterWithClientResource cluster = new MiniClusterWithClientResource(new MiniClusterResourceConfiguration.Builder().setNumberTaskManagers(numTaskManagers).setNumberSlotsPerTaskManager(numSlotsPerTaskManager).build());
    cluster.before();
    try {
        final JobID jobID1 = new JobID();
        jobGraph.setJobID(jobID1);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID1, false);
        // wait for some records to be processed before taking the checkpoint
        counter.get().await();
        final String firstCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID1).get();
        cluster.getClusterClient().cancel(jobID1).get();
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(firstCheckpoint, false, RestoreMode.NO_CLAIM));
        final JobID jobID2 = new JobID();
        jobGraph.setJobID(jobID2);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID2, false);
        String secondCheckpoint = cluster.getMiniCluster().triggerCheckpoint(jobID2).get();
        cluster.getClusterClient().cancel(jobID2).get();
        // delete the checkpoint we restored from
        FileUtils.deleteDirectory(Paths.get(new URI(firstCheckpoint)).getParent().toFile());
        // we should be able to restore from the second checkpoint even though it has been built
        // on top of the first checkpoint
        jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(secondCheckpoint, false, RestoreMode.NO_CLAIM));
        final JobID jobID3 = new JobID();
        jobGraph.setJobID(jobID3);
        cluster.getClusterClient().submitJob(jobGraph).get();
        CommonTestUtils.waitForAllTaskRunning(cluster.getMiniCluster(), jobID3, false);
    } finally {
        cluster.after();
    }
}

Also used : Arrays(java.util.Arrays) SharedObjects(org.apache.flink.testutils.junit.SharedObjects) MemorySize(org.apache.flink.configuration.MemorySize) EmptyRequestBody(org.apache.flink.runtime.rest.messages.EmptyRequestBody) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) ExceptionUtils.findThrowable(org.apache.flink.util.ExceptionUtils.findThrowable) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) TestUtils.submitJobAndWaitForResult(org.apache.flink.test.util.TestUtils.submitJobAndWaitForResult) FSDataOutputStream(org.apache.flink.core.fs.FSDataOutputStream) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) Duration(java.time.Duration) Map(java.util.Map) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) ExceptionUtils.assertThrowable(org.apache.flink.util.ExceptionUtils.assertThrowable) RichSourceFunction(org.apache.flink.streaming.api.functions.source.RichSourceFunction) Path(java.nio.file.Path) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) FileSystemFactory(org.apache.flink.core.fs.FileSystemFactory) CountDownLatch(java.util.concurrent.CountDownLatch) JobMessageParameters(org.apache.flink.runtime.rest.messages.JobMessageParameters) Stream(java.util.stream.Stream) ValueState(org.apache.flink.api.common.state.ValueState) ClusterClient(org.apache.flink.client.program.ClusterClient) Assert.assertFalse(org.junit.Assert.assertFalse) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Time(org.apache.flink.api.common.time.Time) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) FlinkException(org.apache.flink.util.FlinkException) LocalFileSystem(org.apache.flink.core.fs.local.LocalFileSystem) JobStatus(org.apache.flink.api.common.JobStatus) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) TypeSafeDiagnosingMatcher(org.hamcrest.TypeSafeDiagnosingMatcher) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Before(org.junit.Before) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) Files(java.nio.file.Files) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) FSDataInputStream(org.apache.flink.core.fs.FSDataInputStream) File(java.io.File) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) Paths(java.nio.file.Paths) Matcher(org.hamcrest.Matcher) Assert(org.junit.Assert) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Assert.assertEquals(org.junit.Assert.assertEquals) StateBackendOptions(org.apache.flink.configuration.StateBackendOptions) EntropyInjectingTestFileSystem(org.apache.flink.testutils.EntropyInjectingTestFileSystem) Deadline(org.apache.flink.api.common.time.Deadline) ExceptionUtils.findThrowableWithMessage(org.apache.flink.util.ExceptionUtils.findThrowableWithMessage) ClusterOptions(org.apache.flink.configuration.ClusterOptions) FileUtils(org.apache.flink.util.FileUtils) URISyntaxException(java.net.URISyntaxException) BiFunction(java.util.function.BiFunction) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) Random(java.util.Random) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) CommonTestUtils.waitForAllTaskRunning(org.apache.flink.runtime.testutils.CommonTestUtils.waitForAllTaskRunning) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) TestLogger(org.apache.flink.util.TestLogger) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Assert.fail(org.junit.Assert.fail) URI(java.net.URI) KeySelector(org.apache.flink.api.java.functions.KeySelector) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) CheckpointingOptions(org.apache.flink.configuration.CheckpointingOptions) Objects(java.util.Objects) TestingUtils(org.apache.flink.testutils.TestingUtils) List(java.util.List) FileSystem(org.apache.flink.core.fs.FileSystem) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) Optional(java.util.Optional) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) CompletableFuture(java.util.concurrent.CompletableFuture) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) RestClusterClient(org.apache.flink.client.program.rest.RestClusterClient) RestoreMode(org.apache.flink.runtime.jobgraph.RestoreMode) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) CompletableFuture.allOf(java.util.concurrent.CompletableFuture.allOf) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) JobDetailsHeaders(org.apache.flink.runtime.rest.messages.job.JobDetailsHeaders) SharedReference(org.apache.flink.testutils.junit.SharedReference) Description(org.hamcrest.Description) Logger(org.slf4j.Logger) LocalRecoverableWriter(org.apache.flink.core.fs.local.LocalRecoverableWriter) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) ExceptionUtils.assertThrowableWithMessage(org.apache.flink.util.ExceptionUtils.assertThrowableWithMessage) DataStream(org.apache.flink.streaming.api.datastream.DataStream) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) Ignore(org.junit.Ignore) ListCheckpointed(org.apache.flink.streaming.api.checkpoint.ListCheckpointed) FileVisitOption(java.nio.file.FileVisitOption) CommonTestUtils(org.apache.flink.runtime.testutils.CommonTestUtils) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) Configuration(org.apache.flink.configuration.Configuration) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) CountDownLatch(java.util.concurrent.CountDownLatch) URI(java.net.URI) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FlinkException(org.apache.flink.util.FlinkException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) URISyntaxException(java.net.URISyntaxException) FileNotFoundException(java.io.FileNotFoundException) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 5 with EmbeddedRocksDBStateBackend

use of org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend in project flink by apache.

the class StatefulJobWBroadcastStateMigrationITCase method testSavepoint.

@Test
public void testSavepoint() throws Exception {
    final int parallelism = 4;
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setRestartStrategy(RestartStrategies.noRestart());
    switch(snapshotSpec.getStateBackendType()) {
        case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME:
            env.setStateBackend(new EmbeddedRocksDBStateBackend());
            break;
        case StateBackendLoader.MEMORY_STATE_BACKEND_NAME:
            env.setStateBackend(new MemoryStateBackend());
            break;
        case StateBackendLoader.HASHMAP_STATE_BACKEND_NAME:
            env.setStateBackend(new HashMapStateBackend());
            break;
        default:
            throw new UnsupportedOperationException();
    }
    env.enableChangelogStateBackend(false);
    env.enableCheckpointing(500);
    env.setParallelism(parallelism);
    env.setMaxParallelism(parallelism);
    SourceFunction<Tuple2<Long, Long>> nonParallelSource;
    SourceFunction<Tuple2<Long, Long>> nonParallelSourceB;
    SourceFunction<Tuple2<Long, Long>> parallelSource;
    SourceFunction<Tuple2<Long, Long>> parallelSourceB;
    KeyedBroadcastProcessFunction<Long, Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> firstBroadcastFunction;
    KeyedBroadcastProcessFunction<Long, Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>> secondBroadcastFunction;
    final Map<Long, Long> expectedFirstState = new HashMap<>();
    expectedFirstState.put(0L, 0L);
    expectedFirstState.put(1L, 1L);
    expectedFirstState.put(2L, 2L);
    expectedFirstState.put(3L, 3L);
    final Map<String, Long> expectedSecondState = new HashMap<>();
    expectedSecondState.put("0", 0L);
    expectedSecondState.put("1", 1L);
    expectedSecondState.put("2", 2L);
    expectedSecondState.put("3", 3L);
    final Map<Long, String> expectedThirdState = new HashMap<>();
    expectedThirdState.put(0L, "0");
    expectedThirdState.put(1L, "1");
    expectedThirdState.put(2L, "2");
    expectedThirdState.put(3L, "3");
    if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
        nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
        nonParallelSourceB = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
        parallelSource = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
        parallelSourceB = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
        firstBroadcastFunction = new CheckpointingKeyedBroadcastFunction();
        secondBroadcastFunction = new CheckpointingKeyedSingleBroadcastFunction();
    } else if (executionMode == ExecutionMode.VERIFY_SNAPSHOT) {
        nonParallelSource = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
        nonParallelSourceB = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS);
        parallelSource = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
        parallelSourceB = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS);
        firstBroadcastFunction = new CheckingKeyedBroadcastFunction(expectedFirstState, expectedSecondState);
        secondBroadcastFunction = new CheckingKeyedSingleBroadcastFunction(expectedThirdState);
    } else {
        throw new IllegalStateException("Unknown ExecutionMode " + executionMode);
    }
    KeyedStream<Tuple2<Long, Long>, Long> npStream = env.addSource(nonParallelSource).uid("CheckpointingSource1").keyBy(new KeySelector<Tuple2<Long, Long>, Long>() {

        private static final long serialVersionUID = -4514793867774977152L;

        @Override
        public Long getKey(Tuple2<Long, Long> value) throws Exception {
            return value.f0;
        }
    });
    KeyedStream<Tuple2<Long, Long>, Long> pStream = env.addSource(parallelSource).uid("CheckpointingSource2").keyBy(new KeySelector<Tuple2<Long, Long>, Long>() {

        private static final long serialVersionUID = 4940496713319948104L;

        @Override
        public Long getKey(Tuple2<Long, Long> value) throws Exception {
            return value.f0;
        }
    });
    final MapStateDescriptor<Long, Long> firstBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-1", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
    final MapStateDescriptor<String, Long> secondBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-2", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
    final MapStateDescriptor<Long, String> thirdBroadcastStateDesc = new MapStateDescriptor<>("broadcast-state-3", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    BroadcastStream<Tuple2<Long, Long>> npBroadcastStream = env.addSource(nonParallelSourceB).uid("BrCheckpointingSource1").broadcast(firstBroadcastStateDesc, secondBroadcastStateDesc);
    BroadcastStream<Tuple2<Long, Long>> pBroadcastStream = env.addSource(parallelSourceB).uid("BrCheckpointingSource2").broadcast(thirdBroadcastStateDesc);
    npStream.connect(npBroadcastStream).process(firstBroadcastFunction).uid("BrProcess1").addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
    pStream.connect(pBroadcastStream).process(secondBroadcastFunction).uid("BrProcess2").addSink(new MigrationTestUtils.AccumulatorCountingSink<>());
    if (executionMode == ExecutionMode.CREATE_SNAPSHOT) {
        executeAndSnapshot(env, "src/test/resources/" + getSnapshotPath(snapshotSpec), snapshotSpec.getSnapshotType(), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, 2 * NUM_SOURCE_ELEMENTS));
    } else {
        restoreAndExecute(env, getResourceFilename(getSnapshotPath(snapshotSpec)), new Tuple2<>(MigrationTestUtils.CheckingNonParallelSourceWithListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, // we have 2 sources
        2), new Tuple2<>(MigrationTestUtils.CheckingParallelSourceWithUnionListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, // we have 2 sources
        2 * parallelism), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2));
    }
}

Also used : MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) HashMap(java.util.HashMap) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) MigrationTestUtils(org.apache.flink.test.checkpointing.utils.MigrationTestUtils) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) Test(org.junit.Test)

Aggregations

EmbeddedRocksDBStateBackend (org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend)7 HashMapStateBackend (org.apache.flink.runtime.state.hashmap.HashMapStateBackend)6 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)6 Test (org.junit.Test)5 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)3 MemoryStateBackend (org.apache.flink.runtime.state.memory.MemoryStateBackend)3 MigrationTestUtils (org.apache.flink.test.checkpointing.utils.MigrationTestUtils)3 IOException (java.io.IOException)2 List (java.util.List)2 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)2 RestartStrategies (org.apache.flink.api.common.restartstrategy.RestartStrategies)2 CheckpointListener (org.apache.flink.api.common.state.CheckpointListener)2 ListState (org.apache.flink.api.common.state.ListState)2 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)2 ValueState (org.apache.flink.api.common.state.ValueState)2 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)2 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)2 FunctionInitializationContext (org.apache.flink.runtime.state.FunctionInitializationContext)2 FunctionSnapshotContext (org.apache.flink.runtime.state.FunctionSnapshotContext)2