Search in sources :

Example 1 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class StateMachineExample method main.

/**
 * Main entry point for the program.
 *
 * @param args The command line arguments.
 */
public static void main(String[] args) throws Exception {
    // ---- print some usage help ----
    System.out.println("Usage with built-in data generator: StateMachineExample [--error-rate <probability-of-invalid-transition>] [--sleep <sleep-per-record-in-ms>]");
    System.out.println("Usage with Kafka: StateMachineExample --kafka-topic <topic> [--brokers <brokers>]");
    System.out.println("Options for both the above setups: ");
    System.out.println("\t[--backend <hashmap|rocks>]");
    System.out.println("\t[--checkpoint-dir <filepath>]");
    System.out.println("\t[--incremental-checkpoints <true|false>]");
    System.out.println("\t[--output <filepath> OR null for stdout]");
    System.out.println();
    // ---- determine whether to use the built-in source, or read from Kafka ----
    final DataStream<Event> events;
    final ParameterTool params = ParameterTool.fromArgs(args);
    // create the environment to create streams and configure execution
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(2000L);
    final String stateBackend = params.get("backend", "memory");
    if ("hashmap".equals(stateBackend)) {
        final String checkpointDir = params.get("checkpoint-dir");
        env.setStateBackend(new HashMapStateBackend());
        env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
    } else if ("rocks".equals(stateBackend)) {
        final String checkpointDir = params.get("checkpoint-dir");
        boolean incrementalCheckpoints = params.getBoolean("incremental-checkpoints", false);
        env.setStateBackend(new EmbeddedRocksDBStateBackend(incrementalCheckpoints));
        env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
    }
    if (params.has("kafka-topic")) {
        // set up the Kafka reader
        String kafkaTopic = params.get("kafka-topic");
        String brokers = params.get("brokers", "localhost:9092");
        System.out.printf("Reading from kafka topic %s @ %s\n", kafkaTopic, brokers);
        System.out.println();
        KafkaSource<Event> source = KafkaSource.<Event>builder().setBootstrapServers(brokers).setGroupId("stateMachineExample").setTopics(kafkaTopic).setDeserializer(KafkaRecordDeserializationSchema.valueOnly(new EventDeSerializationSchema())).setStartingOffsets(OffsetsInitializer.latest()).build();
        events = env.fromSource(source, WatermarkStrategy.noWatermarks(), "StateMachineExampleSource");
    } else {
        double errorRate = params.getDouble("error-rate", 0.0);
        int sleep = params.getInt("sleep", 1);
        System.out.printf("Using standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
        System.out.println();
        events = env.addSource(new EventsGeneratorSource(errorRate, sleep));
    }
    // ---- main program ----
    final String outputFile = params.get("output");
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(params);
    DataStream<Alert> alerts = events.keyBy(Event::sourceAddress).flatMap(new StateMachineMapper());
    // output the alerts to std-out
    if (outputFile == null) {
        alerts.print();
    } else {
        alerts.sinkTo(FileSink.<Alert>forRowFormat(new Path(outputFile), new SimpleStringEncoder<>()).withRollingPolicy(DefaultRollingPolicy.builder().withMaxPartSize(MemorySize.ofMebiBytes(1)).withRolloverInterval(Duration.ofSeconds(10)).build()).build()).setParallelism(1).name("output");
    }
    // trigger program execution
    env.execute("State machine job");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) Path(org.apache.flink.core.fs.Path) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) EventsGeneratorSource(org.apache.flink.streaming.examples.statemachine.generator.EventsGeneratorSource) Event(org.apache.flink.streaming.examples.statemachine.event.Event) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) Alert(org.apache.flink.streaming.examples.statemachine.event.Alert) EventDeSerializationSchema(org.apache.flink.streaming.examples.statemachine.kafka.EventDeSerializationSchema) SimpleStringEncoder(org.apache.flink.api.common.serialization.SimpleStringEncoder)

Example 2 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class StateBackendLoader method loadStateBackendFromConfig.

// ------------------------------------------------------------------------
// Loading the state backend from a configuration
// ------------------------------------------------------------------------
/**
 * Loads the unwrapped state backend from the configuration, from the parameter 'state.backend',
 * as defined in {@link StateBackendOptions#STATE_BACKEND}.
 *
 * <p>The state backends can be specified either via their shortcut name, or via the class name
 * of a {@link StateBackendFactory}. If a StateBackendFactory class name is specified, the
 * factory is instantiated (via its zero-argument constructor) and its {@link
 * StateBackendFactory#createFromConfig(ReadableConfig, ClassLoader)} method is called.
 *
 * <p>Recognized shortcut names are '{@value StateBackendLoader#HASHMAP_STATE_BACKEND_NAME}',
 * '{@value StateBackendLoader#ROCKSDB_STATE_BACKEND_NAME}' '{@value
 * StateBackendLoader#MEMORY_STATE_BACKEND_NAME}' (Deprecated), and '{@value
 * StateBackendLoader#FS_STATE_BACKEND_NAME}' (Deprecated).
 *
 * @param config The configuration to load the state backend from
 * @param classLoader The class loader that should be used to load the state backend
 * @param logger Optionally, a logger to log actions to (may be null)
 * @return The instantiated state backend.
 * @throws DynamicCodeLoadingException Thrown if a state backend factory is configured and the
 *     factory class was not found or the factory could not be instantiated
 * @throws IllegalConfigurationException May be thrown by the StateBackendFactory when creating
 *     / configuring the state backend in the factory
 * @throws IOException May be thrown by the StateBackendFactory when instantiating the state
 *     backend
 */
public static StateBackend loadStateBackendFromConfig(ReadableConfig config, ClassLoader classLoader, @Nullable Logger logger) throws IllegalConfigurationException, DynamicCodeLoadingException, IOException {
    checkNotNull(config, "config");
    checkNotNull(classLoader, "classLoader");
    final String backendName = config.get(StateBackendOptions.STATE_BACKEND);
    if (backendName == null) {
        return null;
    }
    // by default the factory class is the backend name
    String factoryClassName = backendName;
    switch(backendName.toLowerCase()) {
        case MEMORY_STATE_BACKEND_NAME:
            MemoryStateBackend backend = new MemoryStateBackendFactory().createFromConfig(config, classLoader);
            if (logger != null) {
                logger.warn("MemoryStateBackend has been deprecated. Please use 'hashmap' state " + "backend instead with JobManagerCheckpointStorage for equivalent " + "functionality");
                logger.info("State backend is set to job manager {}", backend);
            }
            return backend;
        case FS_STATE_BACKEND_NAME:
            if (logger != null) {
                logger.warn("{} state backend has been deprecated. Please use 'hashmap' state " + "backend instead.", backendName.toLowerCase());
            }
        // utilizes the same HeapKeyedStateBackend runtime implementation.
        case HASHMAP_STATE_BACKEND_NAME:
            HashMapStateBackend hashMapStateBackend = new HashMapStateBackendFactory().createFromConfig(config, classLoader);
            if (logger != null) {
                logger.info("State backend is set to heap memory {}", hashMapStateBackend);
            }
            return hashMapStateBackend;
        case ROCKSDB_STATE_BACKEND_NAME:
            factoryClassName = ROCKSDB_STATE_BACKEND_FACTORY;
        default:
            if (logger != null) {
                logger.info("Loading state backend via factory {}", factoryClassName);
            }
            StateBackendFactory<?> factory;
            try {
                @SuppressWarnings("rawtypes") Class<? extends StateBackendFactory> clazz = Class.forName(factoryClassName, false, classLoader).asSubclass(StateBackendFactory.class);
                factory = clazz.newInstance();
            } catch (ClassNotFoundException e) {
                throw new DynamicCodeLoadingException("Cannot find configured state backend factory class: " + backendName, e);
            } catch (ClassCastException | InstantiationException | IllegalAccessException e) {
                throw new DynamicCodeLoadingException("The class configured under '" + StateBackendOptions.STATE_BACKEND.key() + "' is not a valid state backend factory (" + backendName + ')', e);
            }
            return factory.createFromConfig(config, classLoader);
    }
}
Also used : HashMapStateBackendFactory(org.apache.flink.runtime.state.hashmap.HashMapStateBackendFactory) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) DynamicCodeLoadingException(org.apache.flink.util.DynamicCodeLoadingException) MemoryStateBackendFactory(org.apache.flink.runtime.state.memory.MemoryStateBackendFactory) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend)

Example 3 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class StateBackendLoader method loadFromApplicationOrConfigOrDefaultInternal.

/**
 * Checks if an application-defined state backend is given, and if not, loads the state backend
 * from the configuration, from the parameter 'state.backend', as defined in {@link
 * CheckpointingOptions#STATE_BACKEND}. If no state backend is configured, this instantiates the
 * default state backend (the {@link HashMapStateBackend}).
 *
 * <p>If an application-defined state backend is found, and the state backend is a {@link
 * ConfigurableStateBackend}, this methods calls {@link
 * ConfigurableStateBackend#configure(ReadableConfig, ClassLoader)} on the state backend.
 *
 * <p>Refer to {@link #loadStateBackendFromConfig(ReadableConfig, ClassLoader, Logger)} for
 * details on how the state backend is loaded from the configuration.
 *
 * @param config The configuration to load the state backend from
 * @param classLoader The class loader that should be used to load the state backend
 * @param logger Optionally, a logger to log actions to (may be null)
 * @return The instantiated state backend.
 * @throws DynamicCodeLoadingException Thrown if a state backend factory is configured and the
 *     factory class was not found or the factory could not be instantiated
 * @throws IllegalConfigurationException May be thrown by the StateBackendFactory when creating
 *     / configuring the state backend in the factory
 * @throws IOException May be thrown by the StateBackendFactory when instantiating the state
 *     backend
 */
private static StateBackend loadFromApplicationOrConfigOrDefaultInternal(@Nullable StateBackend fromApplication, Configuration config, ClassLoader classLoader, @Nullable Logger logger) throws IllegalConfigurationException, DynamicCodeLoadingException, IOException {
    checkNotNull(config, "config");
    checkNotNull(classLoader, "classLoader");
    final StateBackend backend;
    // (1) the application defined state backend has precedence
    if (fromApplication != null) {
        // see if this is supposed to pick up additional configuration parameters
        if (fromApplication instanceof ConfigurableStateBackend) {
            // needs to pick up configuration
            if (logger != null) {
                logger.info("Using job/cluster config to configure application-defined state backend: {}", fromApplication);
            }
            backend = ((ConfigurableStateBackend) fromApplication).configure(config, classLoader);
        } else {
            // keep as is!
            backend = fromApplication;
        }
        if (logger != null) {
            logger.info("Using application-defined state backend: {}", backend);
        }
    } else {
        // (2) check if the config defines a state backend
        final StateBackend fromConfig = loadStateBackendFromConfig(config, classLoader, logger);
        if (fromConfig != null) {
            backend = fromConfig;
        } else {
            // (3) use the default
            backend = new HashMapStateBackendFactory().createFromConfig(config, classLoader);
            if (logger != null) {
                logger.info("No state backend has been configured, using default (HashMap) {}", backend);
            }
        }
    }
    return backend;
}
Also used : HashMapStateBackendFactory(org.apache.flink.runtime.state.hashmap.HashMapStateBackendFactory) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) DelegatingStateBackend(org.apache.flink.runtime.state.delegate.DelegatingStateBackend)

Example 4 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class StickyAllocationAndLocalRecoveryTestJob method main.

public static void main(String[] args) throws Exception {
    final ParameterTool pt = ParameterTool.fromArgs(args);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(pt.getInt("parallelism", 1));
    env.setMaxParallelism(pt.getInt("maxParallelism", pt.getInt("parallelism", 1)));
    env.enableCheckpointing(pt.getInt("checkpointInterval", 1000));
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, pt.getInt("restartDelay", 0)));
    if (pt.getBoolean("externalizedCheckpoints", false)) {
        env.getCheckpointConfig().setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
    }
    String checkpointDir = pt.getRequired("checkpointDir");
    env.getCheckpointConfig().setCheckpointStorage(checkpointDir);
    boolean killJvmOnFail = pt.getBoolean("killJvmOnFail", false);
    String stateBackend = pt.get("stateBackend", "hashmap");
    if ("hashmap".equals(stateBackend)) {
        env.setStateBackend(new HashMapStateBackend());
    } else if ("rocks".equals(stateBackend)) {
        boolean incrementalCheckpoints = pt.getBoolean("incrementalCheckpoints", false);
        env.setStateBackend(new EmbeddedRocksDBStateBackend(incrementalCheckpoints));
    } else {
        throw new IllegalArgumentException("Unknown backend: " + stateBackend);
    }
    // make parameters available in the web interface
    env.getConfig().setGlobalJobParameters(pt);
    // delay to throttle down the production of the source
    long delay = pt.getLong("delay", 0L);
    // the maximum number of attempts, before the job finishes with success
    int maxAttempts = pt.getInt("maxAttempts", 3);
    // size of one artificial value
    int valueSize = pt.getInt("valueSize", 10);
    env.addSource(new RandomLongSource(maxAttempts, delay)).keyBy((KeySelector<Long, Long>) aLong -> aLong).flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail)).addSink(new PrintSinkFunction<>());
    env.execute("Sticky Allocation And Local Recovery Test");
}
Also used : ParameterTool(org.apache.flink.api.java.utils.ParameterTool) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) PrintSinkFunction(org.apache.flink.streaming.api.functions.sink.PrintSinkFunction) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ListState(org.apache.flink.api.common.state.ListState) ParameterTool(org.apache.flink.api.java.utils.ParameterTool) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) Collector(org.apache.flink.util.Collector) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) KeySelector(org.apache.flink.api.java.functions.KeySelector) Iterator(java.util.Iterator) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Set(java.util.Set) IOException(java.io.IOException) Preconditions(org.apache.flink.util.Preconditions) Serializable(java.io.Serializable) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend)

Example 5 with HashMapStateBackend

use of org.apache.flink.runtime.state.hashmap.HashMapStateBackend in project flink by apache.

the class StateBootstrapTransformationTest method testMaxParallelismRespected.

@Test
public void testMaxParallelismRespected() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(10);
    DataStream<Integer> input = env.fromElements(0);
    StateBootstrapTransformation<Integer> transformation = OperatorTransformation.bootstrapWith(input).transform(new ExampleStateBootstrapFunction());
    int maxParallelism = transformation.getMaxParallelism(4);
    DataStream<TaggedOperatorSubtaskState> result = transformation.writeOperatorSubtaskStates(OperatorIDGenerator.fromUid("uid"), new HashMapStateBackend(), new Path(), maxParallelism);
    Assert.assertEquals("The parallelism of a data set should be constrained my the savepoint max parallelism", 4, result.getParallelism());
}
Also used : Path(org.apache.flink.core.fs.Path) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) TaggedOperatorSubtaskState(org.apache.flink.state.api.output.TaggedOperatorSubtaskState) Test(org.junit.Test)

Aggregations

HashMapStateBackend (org.apache.flink.runtime.state.hashmap.HashMapStateBackend)22 Test (org.junit.Test)12 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)11 MemoryStateBackend (org.apache.flink.runtime.state.memory.MemoryStateBackend)10 EmbeddedRocksDBStateBackend (org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend)7 Configuration (org.apache.flink.configuration.Configuration)6 Path (org.apache.flink.core.fs.Path)6 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 TaggedOperatorSubtaskState (org.apache.flink.state.api.output.TaggedOperatorSubtaskState)4 DelegatingStateBackend (org.apache.flink.runtime.state.delegate.DelegatingStateBackend)3 FsStateBackend (org.apache.flink.runtime.state.filesystem.FsStateBackend)3 MigrationTestUtils (org.apache.flink.test.checkpointing.utils.MigrationTestUtils)3 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 ParameterTool (org.apache.flink.api.java.utils.ParameterTool)2 AbstractKeyedStateBackend (org.apache.flink.runtime.state.AbstractKeyedStateBackend)2 AbstractStateBackend (org.apache.flink.runtime.state.AbstractStateBackend)2 ConfigurableStateBackend (org.apache.flink.runtime.state.ConfigurableStateBackend)2 OperatorStateBackend (org.apache.flink.runtime.state.OperatorStateBackend)2 StateBackend (org.apache.flink.runtime.state.StateBackend)2 HashMapStateBackendFactory (org.apache.flink.runtime.state.hashmap.HashMapStateBackendFactory)2