Search in sources :

Example 6 with SavepointRestoreSettings

use of org.apache.flink.runtime.jobgraph.SavepointRestoreSettings in project flink by apache.

the class DefaultExecutionGraphFactoryTest method createJobGraphWithSavepoint.

@Nonnull
private JobGraph createJobGraphWithSavepoint(boolean allowNonRestoredState, long savepointId) throws IOException {
    // create savepoint data
    final OperatorID operatorID = new OperatorID();
    final File savepointFile = TestUtils.createSavepointWithOperatorState(TEMPORARY_FOLDER.newFile(), savepointId, operatorID);
    // set savepoint settings which don't allow non restored state
    final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(savepointFile.getAbsolutePath(), allowNonRestoredState);
    // create a new operator
    final JobVertex jobVertex = new JobVertex("New operator");
    jobVertex.setInvokableClass(NoOpInvokable.class);
    jobVertex.setParallelism(1);
    // a given OperatorID that does not match any operator of the newly created JobGraph
    return TestUtils.createJobGraphFromJobVerticesWithCheckpointing(savepointRestoreSettings, jobVertex);
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) File(java.io.File) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Nonnull(javax.annotation.Nonnull)

Example 7 with SavepointRestoreSettings

use of org.apache.flink.runtime.jobgraph.SavepointRestoreSettings in project beam by apache.

the class FlinkExecutionEnvironments method createStreamExecutionEnvironment.

@VisibleForTesting
static StreamExecutionEnvironment createStreamExecutionEnvironment(FlinkPipelineOptions options, List<String> filesToStage, @Nullable String confDir) {
    LOG.info("Creating a Streaming Environment.");
    // Although Flink uses Rest, it expects the address not to contain a http scheme
    String masterUrl = stripHttpSchema(options.getFlinkMaster());
    Configuration flinkConfiguration = getFlinkConfiguration(confDir);
    StreamExecutionEnvironment flinkStreamEnv;
    // depending on the master, create the right environment.
    if ("[local]".equals(masterUrl)) {
        setManagedMemoryByFraction(flinkConfiguration);
        disableClassLoaderLeakCheck(flinkConfiguration);
        flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment(getDefaultLocalParallelism(), flinkConfiguration);
    } else if ("[auto]".equals(masterUrl)) {
        flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
        if (flinkStreamEnv instanceof LocalStreamEnvironment) {
            disableClassLoaderLeakCheck(flinkConfiguration);
            flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment(getDefaultLocalParallelism(), flinkConfiguration);
        }
    } else {
        int defaultPort = flinkConfiguration.getInteger(RestOptions.PORT);
        HostAndPort hostAndPort = HostAndPort.fromString(masterUrl).withDefaultPort(defaultPort);
        flinkConfiguration.setInteger(RestOptions.PORT, hostAndPort.getPort());
        final SavepointRestoreSettings savepointRestoreSettings;
        if (options.getSavepointPath() != null) {
            savepointRestoreSettings = SavepointRestoreSettings.forPath(options.getSavepointPath(), options.getAllowNonRestoredState());
        } else {
            savepointRestoreSettings = SavepointRestoreSettings.none();
        }
        flinkStreamEnv = new RemoteStreamEnvironment(hostAndPort.getHost(), hostAndPort.getPort(), flinkConfiguration, filesToStage.toArray(new String[filesToStage.size()]), null, savepointRestoreSettings);
        LOG.info("Using Flink Master URL {}:{}.", hostAndPort.getHost(), hostAndPort.getPort());
    }
    // Set the parallelism, required by UnboundedSourceWrapper to generate consistent splits.
    final int parallelism = determineParallelism(options.getParallelism(), flinkStreamEnv.getParallelism(), flinkConfiguration);
    flinkStreamEnv.setParallelism(parallelism);
    if (options.getMaxParallelism() > 0) {
        flinkStreamEnv.setMaxParallelism(options.getMaxParallelism());
    }
    // set parallelism in the options (required by some execution code)
    options.setParallelism(parallelism);
    if (options.getObjectReuse()) {
        flinkStreamEnv.getConfig().enableObjectReuse();
    } else {
        flinkStreamEnv.getConfig().disableObjectReuse();
    }
    // default to event time
    flinkStreamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
    // for the following 2 parameters, a value of -1 means that Flink will use
    // the default values as specified in the configuration.
    int numRetries = options.getNumberOfExecutionRetries();
    if (numRetries != -1) {
        flinkStreamEnv.setNumberOfExecutionRetries(numRetries);
    }
    long retryDelay = options.getExecutionRetryDelay();
    if (retryDelay != -1) {
        flinkStreamEnv.getConfig().setExecutionRetryDelay(retryDelay);
    }
    configureCheckpointing(options, flinkStreamEnv);
    applyLatencyTrackingInterval(flinkStreamEnv.getConfig(), options);
    if (options.getAutoWatermarkInterval() != null) {
        flinkStreamEnv.getConfig().setAutoWatermarkInterval(options.getAutoWatermarkInterval());
    }
    configureStateBackend(options, flinkStreamEnv);
    return flinkStreamEnv;
}
Also used : HostAndPort(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.net.HostAndPort) LocalStreamEnvironment(org.apache.flink.streaming.api.environment.LocalStreamEnvironment) Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) RemoteStreamEnvironment(org.apache.flink.streaming.api.environment.RemoteStreamEnvironment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 8 with SavepointRestoreSettings

use of org.apache.flink.runtime.jobgraph.SavepointRestoreSettings in project flink by apache.

the class JobManagerTest method testSavepointRestoreSettings.

/**
	 * Tests that configured {@link SavepointRestoreSettings} are respected.
	 */
@Test
public void testSavepointRestoreSettings() throws Exception {
    FiniteDuration timeout = new FiniteDuration(30, TimeUnit.SECONDS);
    ActorSystem actorSystem = null;
    ActorGateway jobManager = null;
    ActorGateway archiver = null;
    ActorGateway taskManager = null;
    try {
        actorSystem = AkkaUtils.createLocalActorSystem(new Configuration());
        Tuple2<ActorRef, ActorRef> master = JobManager.startJobManagerActors(new Configuration(), actorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), Option.apply("jm"), Option.apply("arch"), TestingJobManager.class, TestingMemoryArchivist.class);
        jobManager = new AkkaActorGateway(master._1(), null);
        archiver = new AkkaActorGateway(master._2(), null);
        Configuration tmConfig = new Configuration();
        tmConfig.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 4);
        ActorRef taskManagerRef = TaskManager.startTaskManagerComponentsAndActor(tmConfig, ResourceID.generate(), actorSystem, "localhost", Option.apply("tm"), Option.<LeaderRetrievalService>apply(new StandaloneLeaderRetrievalService(jobManager.path())), true, TestingTaskManager.class);
        taskManager = new AkkaActorGateway(taskManagerRef, null);
        // Wait until connected
        Object msg = new TestingTaskManagerMessages.NotifyWhenRegisteredAtJobManager(jobManager.actor());
        Await.ready(taskManager.ask(msg, timeout), timeout);
        // Create job graph
        JobVertex sourceVertex = new JobVertex("Source");
        sourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
        sourceVertex.setParallelism(1);
        JobGraph jobGraph = new JobGraph("TestingJob", sourceVertex);
        JobSnapshottingSettings snapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), Collections.singletonList(sourceVertex.getID()), // deactivated checkpointing
        Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
        jobGraph.setSnapshotSettings(snapshottingSettings);
        // Submit job graph
        msg = new JobManagerMessages.SubmitJob(jobGraph, ListeningBehaviour.DETACHED);
        Await.result(jobManager.ask(msg, timeout), timeout);
        // Wait for all tasks to be running
        msg = new TestingJobManagerMessages.WaitForAllVerticesToBeRunning(jobGraph.getJobID());
        Await.result(jobManager.ask(msg, timeout), timeout);
        // Trigger savepoint
        File targetDirectory = tmpFolder.newFolder();
        msg = new TriggerSavepoint(jobGraph.getJobID(), Option.apply(targetDirectory.getAbsolutePath()));
        Future<Object> future = jobManager.ask(msg, timeout);
        Object result = Await.result(future, timeout);
        String savepointPath = ((TriggerSavepointSuccess) result).savepointPath();
        // Cancel because of restarts
        msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(jobGraph.getJobID());
        Future<?> removedFuture = jobManager.ask(msg, timeout);
        Future<?> cancelFuture = jobManager.ask(new CancelJob(jobGraph.getJobID()), timeout);
        Object response = Await.result(cancelFuture, timeout);
        assertTrue("Unexpected response: " + response, response instanceof CancellationSuccess);
        Await.ready(removedFuture, timeout);
        // Adjust the job (we need a new operator ID)
        JobVertex newSourceVertex = new JobVertex("NewSource");
        newSourceVertex.setInvokableClass(BlockingStatefulInvokable.class);
        newSourceVertex.setParallelism(1);
        JobGraph newJobGraph = new JobGraph("NewTestingJob", newSourceVertex);
        JobSnapshottingSettings newSnapshottingSettings = new JobSnapshottingSettings(Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), Collections.singletonList(newSourceVertex.getID()), // deactivated checkpointing
        Long.MAX_VALUE, 360000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true);
        newJobGraph.setSnapshotSettings(newSnapshottingSettings);
        SavepointRestoreSettings restoreSettings = SavepointRestoreSettings.forPath(savepointPath, false);
        newJobGraph.setSavepointRestoreSettings(restoreSettings);
        msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
        response = Await.result(jobManager.ask(msg, timeout), timeout);
        assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobResultFailure);
        JobManagerMessages.JobResultFailure failure = (JobManagerMessages.JobResultFailure) response;
        Throwable cause = failure.cause().deserializeError(ClassLoader.getSystemClassLoader());
        assertTrue(cause instanceof IllegalStateException);
        assertTrue(cause.getMessage().contains("allowNonRestoredState"));
        // Wait until removed
        msg = new TestingJobManagerMessages.NotifyWhenJobRemoved(newJobGraph.getJobID());
        Await.ready(jobManager.ask(msg, timeout), timeout);
        // Resubmit, but allow non restored state now
        restoreSettings = SavepointRestoreSettings.forPath(savepointPath, true);
        newJobGraph.setSavepointRestoreSettings(restoreSettings);
        msg = new JobManagerMessages.SubmitJob(newJobGraph, ListeningBehaviour.DETACHED);
        response = Await.result(jobManager.ask(msg, timeout), timeout);
        assertTrue("Unexpected response: " + response, response instanceof JobManagerMessages.JobSubmitSuccess);
    } finally {
        if (actorSystem != null) {
            actorSystem.shutdown();
        }
        if (archiver != null) {
            archiver.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (jobManager != null) {
            jobManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
        if (taskManager != null) {
            taskManager.actor().tell(PoisonPill.getInstance(), ActorRef.noSender());
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) JobSubmitSuccess(org.apache.flink.runtime.messages.JobManagerMessages.JobSubmitSuccess) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) CancelJob(org.apache.flink.runtime.messages.JobManagerMessages.CancelJob) WaitForAllVerticesToBeRunning(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.WaitForAllVerticesToBeRunning) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) TriggerSavepointSuccess(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepointSuccess) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) File(java.io.File) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Test(org.junit.Test)

Example 9 with SavepointRestoreSettings

use of org.apache.flink.runtime.jobgraph.SavepointRestoreSettings in project flink by apache.

the class StandaloneApplicationClusterConfigurationParserFactory method createResult.

@Override
public StandaloneApplicationClusterConfiguration createResult(@Nonnull CommandLine commandLine) throws FlinkParseException {
    final String configDir = commandLine.getOptionValue(CONFIG_DIR_OPTION.getOpt());
    final Properties dynamicProperties = commandLine.getOptionProperties(DYNAMIC_PROPERTY_OPTION.getOpt());
    final int restPort = getRestPort(commandLine);
    final String hostname = commandLine.getOptionValue(HOST_OPTION.getOpt());
    final SavepointRestoreSettings savepointRestoreSettings = CliFrontendParser.createSavepointRestoreSettings(commandLine);
    final JobID jobId = getJobId(commandLine);
    final String jobClassName = commandLine.getOptionValue(JOB_CLASS_NAME_OPTION.getOpt());
    return new StandaloneApplicationClusterConfiguration(configDir, dynamicProperties, commandLine.getArgs(), hostname, restPort, savepointRestoreSettings, jobId, jobClassName);
}
Also used : Properties(java.util.Properties) JobID(org.apache.flink.api.common.JobID) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings)

Example 10 with SavepointRestoreSettings

use of org.apache.flink.runtime.jobgraph.SavepointRestoreSettings in project flink by apache.

the class StandaloneApplicationClusterConfigurationParserFactoryTest method testEntrypointClusterConfigurationToConfigurationParsing.

@Test
public void testEntrypointClusterConfigurationToConfigurationParsing() throws FlinkParseException {
    final JobID jobID = JobID.generate();
    final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("/test/savepoint/path", true);
    final String key = DeploymentOptions.TARGET.key();
    final String value = "testDynamicExecutorConfig";
    final int restPort = 1234;
    final String arg1 = "arg1";
    final String arg2 = "arg2";
    final String[] args = { "--configDir", confDirPath, "--job-id", jobID.toHexString(), "--fromSavepoint", savepointRestoreSettings.getRestorePath(), "--allowNonRestoredState", "--webui-port", String.valueOf(restPort), "--job-classname", JOB_CLASS_NAME, String.format("-D%s=%s", key, value), arg1, arg2 };
    final StandaloneApplicationClusterConfiguration clusterConfiguration = commandLineParser.parse(args);
    assertThat(clusterConfiguration.getJobClassName(), is(equalTo(JOB_CLASS_NAME)));
    assertThat(clusterConfiguration.getArgs(), arrayContaining(arg1, arg2));
    final Configuration configuration = StandaloneApplicationClusterEntryPoint.loadConfigurationFromClusterConfig(clusterConfiguration);
    final String strJobId = configuration.get(PipelineOptionsInternal.PIPELINE_FIXED_JOB_ID);
    assertThat(JobID.fromHexString(strJobId), is(equalTo(jobID)));
    assertThat(SavepointRestoreSettings.fromConfiguration(configuration), is(equalTo(savepointRestoreSettings)));
    assertThat(configuration.get(RestOptions.PORT), is(equalTo(restPort)));
    assertThat(configuration.get(DeploymentOptions.TARGET), is(equalTo(value)));
}
Also used : Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) Matchers.containsString(org.hamcrest.Matchers.containsString) JobID(org.apache.flink.api.common.JobID) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) Test(org.junit.Test)

Aggregations

SavepointRestoreSettings (org.apache.flink.runtime.jobgraph.SavepointRestoreSettings)22 Test (org.junit.Test)12 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)9 Configuration (org.apache.flink.configuration.Configuration)7 JobID (org.apache.flink.api.common.JobID)5 File (java.io.File)4 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 CheckpointConfig (org.apache.flink.streaming.api.environment.CheckpointConfig)3 Matchers.containsString (org.hamcrest.Matchers.containsString)3 GlobalConfiguration (org.apache.flink.configuration.GlobalConfiguration)2 CheckpointRecoveryFactory (org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory)2 CompletedCheckpoint (org.apache.flink.runtime.checkpoint.CompletedCheckpoint)2 PerJobCheckpointRecoveryFactory (org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory)2 StandaloneCheckpointRecoveryFactory (org.apache.flink.runtime.checkpoint.StandaloneCheckpointRecoveryFactory)2 StandaloneCompletedCheckpointStore (org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore)2 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)2 JobMasterBuilder (org.apache.flink.runtime.jobmaster.utils.JobMasterBuilder)2 RemoteStreamEnvironment (org.apache.flink.streaming.api.environment.RemoteStreamEnvironment)2 ActorRef (akka.actor.ActorRef)1 ActorSystem (akka.actor.ActorSystem)1