Search in sources :

Example 1 with CheckpointStorage

use of org.apache.flink.runtime.state.CheckpointStorage in project flink by apache.

the class DispatcherTest method createTestingSavepoint.

@Nonnull
private URI createTestingSavepoint() throws IOException, URISyntaxException {
    final CheckpointStorage storage = Checkpoints.loadCheckpointStorage(configuration, Thread.currentThread().getContextClassLoader(), log);
    final CheckpointStorageCoordinatorView checkpointStorage = storage.createCheckpointStorage(jobGraph.getJobID());
    final File savepointFile = temporaryFolder.newFolder();
    final long checkpointId = 1L;
    final CheckpointStorageLocation checkpointStorageLocation = checkpointStorage.initializeLocationForSavepoint(checkpointId, savepointFile.getAbsolutePath());
    final CheckpointMetadataOutputStream metadataOutputStream = checkpointStorageLocation.createMetadataOutputStream();
    Checkpoints.storeCheckpointMetadata(new CheckpointMetadata(checkpointId, Collections.emptyList(), Collections.emptyList()), metadataOutputStream);
    final CompletedCheckpointStorageLocation completedCheckpointStorageLocation = metadataOutputStream.closeAndFinalizeCheckpoint();
    return new URI(completedCheckpointStorageLocation.getExternalPointer());
}
Also used : CheckpointMetadataOutputStream(org.apache.flink.runtime.state.CheckpointMetadataOutputStream) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) CheckpointStorageCoordinatorView(org.apache.flink.runtime.state.CheckpointStorageCoordinatorView) CheckpointStorageLocation(org.apache.flink.runtime.state.CheckpointStorageLocation) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) CompletedCheckpointStorageLocation(org.apache.flink.runtime.state.CompletedCheckpointStorageLocation) File(java.io.File) URI(java.net.URI) CheckpointMetadata(org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata) Nonnull(javax.annotation.Nonnull)

Example 2 with CheckpointStorage

use of org.apache.flink.runtime.state.CheckpointStorage in project flink by apache.

the class DefaultExecutionGraphBuilder method buildGraph.

public static DefaultExecutionGraph buildGraph(JobGraph jobGraph, Configuration jobManagerConfig, ScheduledExecutorService futureExecutor, Executor ioExecutor, ClassLoader classLoader, CompletedCheckpointStore completedCheckpointStore, CheckpointsCleaner checkpointsCleaner, CheckpointIDCounter checkpointIdCounter, Time rpcTimeout, BlobWriter blobWriter, Logger log, ShuffleMaster<?> shuffleMaster, JobMasterPartitionTracker partitionTracker, TaskDeploymentDescriptorFactory.PartitionLocationConstraint partitionLocationConstraint, ExecutionDeploymentListener executionDeploymentListener, ExecutionStateUpdateListener executionStateUpdateListener, long initializationTimestamp, VertexAttemptNumberStore vertexAttemptNumberStore, VertexParallelismStore vertexParallelismStore, Supplier<CheckpointStatsTracker> checkpointStatsTrackerFactory, boolean isDynamicGraph) throws JobExecutionException, JobException {
    checkNotNull(jobGraph, "job graph cannot be null");
    final String jobName = jobGraph.getName();
    final JobID jobId = jobGraph.getJobID();
    final JobInformation jobInformation = new JobInformation(jobId, jobName, jobGraph.getSerializedExecutionConfig(), jobGraph.getJobConfiguration(), jobGraph.getUserJarBlobKeys(), jobGraph.getClasspaths());
    final int maxPriorAttemptsHistoryLength = jobManagerConfig.getInteger(JobManagerOptions.MAX_ATTEMPTS_HISTORY_SIZE);
    final PartitionGroupReleaseStrategy.Factory partitionGroupReleaseStrategyFactory = PartitionGroupReleaseStrategyFactoryLoader.loadPartitionGroupReleaseStrategyFactory(jobManagerConfig);
    // create a new execution graph, if none exists so far
    final DefaultExecutionGraph executionGraph;
    try {
        executionGraph = new DefaultExecutionGraph(jobInformation, futureExecutor, ioExecutor, rpcTimeout, maxPriorAttemptsHistoryLength, classLoader, blobWriter, partitionGroupReleaseStrategyFactory, shuffleMaster, partitionTracker, partitionLocationConstraint, executionDeploymentListener, executionStateUpdateListener, initializationTimestamp, vertexAttemptNumberStore, vertexParallelismStore, isDynamicGraph);
    } catch (IOException e) {
        throw new JobException("Could not create the ExecutionGraph.", e);
    }
    try {
        executionGraph.setJsonPlan(JsonPlanGenerator.generatePlan(jobGraph));
    } catch (Throwable t) {
        log.warn("Cannot create JSON plan for job", t);
        // give the graph an empty plan
        executionGraph.setJsonPlan("{}");
    }
    // initialize the vertices that have a master initialization hook
    // file output formats create directories here, input formats create splits
    final long initMasterStart = System.nanoTime();
    log.info("Running initialization on master for job {} ({}).", jobName, jobId);
    for (JobVertex vertex : jobGraph.getVertices()) {
        String executableClass = vertex.getInvokableClassName();
        if (executableClass == null || executableClass.isEmpty()) {
            throw new JobSubmissionException(jobId, "The vertex " + vertex.getID() + " (" + vertex.getName() + ") has no invokable class.");
        }
        try {
            vertex.initializeOnMaster(classLoader);
        } catch (Throwable t) {
            throw new JobExecutionException(jobId, "Cannot initialize task '" + vertex.getName() + "': " + t.getMessage(), t);
        }
    }
    log.info("Successfully ran initialization on master in {} ms.", (System.nanoTime() - initMasterStart) / 1_000_000);
    // topologically sort the job vertices and attach the graph to the existing one
    List<JobVertex> sortedTopology = jobGraph.getVerticesSortedTopologicallyFromSources();
    if (log.isDebugEnabled()) {
        log.debug("Adding {} vertices from job graph {} ({}).", sortedTopology.size(), jobName, jobId);
    }
    executionGraph.attachJobGraph(sortedTopology);
    if (log.isDebugEnabled()) {
        log.debug("Successfully created execution graph from job graph {} ({}).", jobName, jobId);
    }
    // configure the state checkpointing
    if (isDynamicGraph) {
        // dynamic graph does not support checkpointing so we skip it
        log.warn("Skip setting up checkpointing for a job with dynamic graph.");
    } else if (isCheckpointingEnabled(jobGraph)) {
        JobCheckpointingSettings snapshotSettings = jobGraph.getCheckpointingSettings();
        // load the state backend from the application settings
        final StateBackend applicationConfiguredBackend;
        final SerializedValue<StateBackend> serializedAppConfigured = snapshotSettings.getDefaultStateBackend();
        if (serializedAppConfigured == null) {
            applicationConfiguredBackend = null;
        } else {
            try {
                applicationConfiguredBackend = serializedAppConfigured.deserializeValue(classLoader);
            } catch (IOException | ClassNotFoundException e) {
                throw new JobExecutionException(jobId, "Could not deserialize application-defined state backend.", e);
            }
        }
        final StateBackend rootBackend;
        try {
            rootBackend = StateBackendLoader.fromApplicationOrConfigOrDefault(applicationConfiguredBackend, snapshotSettings.isChangelogStateBackendEnabled(), jobManagerConfig, classLoader, log);
        } catch (IllegalConfigurationException | IOException | DynamicCodeLoadingException e) {
            throw new JobExecutionException(jobId, "Could not instantiate configured state backend", e);
        }
        // load the checkpoint storage from the application settings
        final CheckpointStorage applicationConfiguredStorage;
        final SerializedValue<CheckpointStorage> serializedAppConfiguredStorage = snapshotSettings.getDefaultCheckpointStorage();
        if (serializedAppConfiguredStorage == null) {
            applicationConfiguredStorage = null;
        } else {
            try {
                applicationConfiguredStorage = serializedAppConfiguredStorage.deserializeValue(classLoader);
            } catch (IOException | ClassNotFoundException e) {
                throw new JobExecutionException(jobId, "Could not deserialize application-defined checkpoint storage.", e);
            }
        }
        final CheckpointStorage rootStorage;
        try {
            rootStorage = CheckpointStorageLoader.load(applicationConfiguredStorage, null, rootBackend, jobManagerConfig, classLoader, log);
        } catch (IllegalConfigurationException | DynamicCodeLoadingException e) {
            throw new JobExecutionException(jobId, "Could not instantiate configured checkpoint storage", e);
        }
        // instantiate the user-defined checkpoint hooks
        final SerializedValue<MasterTriggerRestoreHook.Factory[]> serializedHooks = snapshotSettings.getMasterHooks();
        final List<MasterTriggerRestoreHook<?>> hooks;
        if (serializedHooks == null) {
            hooks = Collections.emptyList();
        } else {
            final MasterTriggerRestoreHook.Factory[] hookFactories;
            try {
                hookFactories = serializedHooks.deserializeValue(classLoader);
            } catch (IOException | ClassNotFoundException e) {
                throw new JobExecutionException(jobId, "Could not instantiate user-defined checkpoint hooks", e);
            }
            final Thread thread = Thread.currentThread();
            final ClassLoader originalClassLoader = thread.getContextClassLoader();
            thread.setContextClassLoader(classLoader);
            try {
                hooks = new ArrayList<>(hookFactories.length);
                for (MasterTriggerRestoreHook.Factory factory : hookFactories) {
                    hooks.add(MasterHooks.wrapHook(factory.create(), classLoader));
                }
            } finally {
                thread.setContextClassLoader(originalClassLoader);
            }
        }
        final CheckpointCoordinatorConfiguration chkConfig = snapshotSettings.getCheckpointCoordinatorConfiguration();
        executionGraph.enableCheckpointing(chkConfig, hooks, checkpointIdCounter, completedCheckpointStore, rootBackend, rootStorage, checkpointStatsTrackerFactory.get(), checkpointsCleaner);
    }
    return executionGraph;
}
Also used : ArrayList(java.util.ArrayList) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) TaskDeploymentDescriptorFactory(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorFactory) JobSubmissionException(org.apache.flink.runtime.client.JobSubmissionException) StateBackend(org.apache.flink.runtime.state.StateBackend) MasterTriggerRestoreHook(org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook) JobException(org.apache.flink.runtime.JobException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) ArrayList(java.util.ArrayList) List(java.util.List) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) IOException(java.io.IOException) SerializedValue(org.apache.flink.util.SerializedValue) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobID(org.apache.flink.api.common.JobID) PartitionGroupReleaseStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.partitionrelease.PartitionGroupReleaseStrategy)

Example 3 with CheckpointStorage

use of org.apache.flink.runtime.state.CheckpointStorage in project flink by apache.

the class CheckpointSettingsSerializableTest method testDeserializationOfUserCodeWithUserClassLoader.

@Test
public void testDeserializationOfUserCodeWithUserClassLoader() throws Exception {
    final ClassLoaderUtils.ObjectAndClassLoader<Serializable> outsideClassLoading = ClassLoaderUtils.createSerializableObjectFromNewClassLoader();
    final ClassLoader classLoader = outsideClassLoading.getClassLoader();
    final Serializable outOfClassPath = outsideClassLoading.getObject();
    final MasterTriggerRestoreHook.Factory[] hooks = { new TestFactory(outOfClassPath) };
    final SerializedValue<MasterTriggerRestoreHook.Factory[]> serHooks = new SerializedValue<>(hooks);
    final JobCheckpointingSettings checkpointingSettings = new JobCheckpointingSettings(new CheckpointCoordinatorConfiguration(1000L, 10000L, 0L, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0), new SerializedValue<StateBackend>(new CustomStateBackend(outOfClassPath)), TernaryBoolean.UNDEFINED, new SerializedValue<CheckpointStorage>(new CustomCheckpointStorage(outOfClassPath)), serHooks);
    final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().setJobCheckpointingSettings(checkpointingSettings).build();
    // to serialize/deserialize the job graph to see if the behavior is correct under
    // distributed execution
    final JobGraph copy = CommonTestUtils.createCopySerializable(jobGraph);
    final ExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setJobGraph(copy).setUserClassLoader(classLoader).build();
    assertEquals(1, eg.getCheckpointCoordinator().getNumberOfRegisteredMasterHooks());
    assertTrue(jobGraph.getCheckpointingSettings().getDefaultStateBackend().deserializeValue(classLoader) instanceof CustomStateBackend);
}
Also used : Serializable(java.io.Serializable) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) SerializedValue(org.apache.flink.util.SerializedValue) StateBackend(org.apache.flink.runtime.state.StateBackend) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) AbstractKeyedStateBackend(org.apache.flink.runtime.state.AbstractKeyedStateBackend) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ClassLoaderUtils(org.apache.flink.testutils.ClassLoaderUtils) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) Test(org.junit.Test)

Example 4 with CheckpointStorage

use of org.apache.flink.runtime.state.CheckpointStorage in project flink by apache.

the class Checkpoints method loadCheckpointStorage.

@Nonnull
public static CheckpointStorage loadCheckpointStorage(Configuration configuration, ClassLoader classLoader, @Nullable Logger logger) {
    StateBackend backend = loadStateBackend(configuration, classLoader, logger);
    if (logger != null) {
        logger.info("Attempting to load configured checkpoint storage for savepoint disposal");
    }
    CheckpointStorage checkpointStorage = null;
    try {
        checkpointStorage = CheckpointStorageLoader.load(null, null, backend, configuration, classLoader, null);
    } catch (Throwable t) {
        // catches exceptions and errors (like linking errors)
        if (logger != null) {
            logger.info("Could not load configured state backend.");
            logger.debug("Detailed exception:", t);
        }
    }
    if (checkpointStorage == null) {
        // FileSystem-based for metadata
        return new JobManagerCheckpointStorage();
    }
    return checkpointStorage;
}
Also used : CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) JobManagerCheckpointStorage(org.apache.flink.runtime.state.storage.JobManagerCheckpointStorage) StateBackend(org.apache.flink.runtime.state.StateBackend) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) JobManagerCheckpointStorage(org.apache.flink.runtime.state.storage.JobManagerCheckpointStorage) Nonnull(javax.annotation.Nonnull)

Example 5 with CheckpointStorage

use of org.apache.flink.runtime.state.CheckpointStorage in project flink by apache.

the class ChangelogStateBackendLoadingTest method testLoadingDefault.

@Test
public void testLoadingDefault() throws Exception {
    final StateBackend backend = StateBackendLoader.fromApplicationOrConfigOrDefault(null, TernaryBoolean.UNDEFINED, config(), cl, null);
    final CheckpointStorage storage = CheckpointStorageLoader.load(null, null, backend, config(), cl, null);
    assertTrue(backend instanceof HashMapStateBackend);
}
Also used : CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) JobManagerCheckpointStorage(org.apache.flink.runtime.state.storage.JobManagerCheckpointStorage) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) EmbeddedRocksDBStateBackend(org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend) StateBackend(org.apache.flink.runtime.state.StateBackend) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) DelegatingStateBackend(org.apache.flink.runtime.state.delegate.DelegatingStateBackend) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) AbstractStateBackend(org.apache.flink.runtime.state.AbstractStateBackend) AbstractKeyedStateBackend(org.apache.flink.runtime.state.AbstractKeyedStateBackend) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) ConfigurableStateBackend(org.apache.flink.runtime.state.ConfigurableStateBackend) Test(org.junit.Test)

Aggregations

CheckpointStorage (org.apache.flink.runtime.state.CheckpointStorage)13 StateBackend (org.apache.flink.runtime.state.StateBackend)10 JobManagerCheckpointStorage (org.apache.flink.runtime.state.storage.JobManagerCheckpointStorage)7 AbstractKeyedStateBackend (org.apache.flink.runtime.state.AbstractKeyedStateBackend)6 OperatorStateBackend (org.apache.flink.runtime.state.OperatorStateBackend)6 HashMapStateBackend (org.apache.flink.runtime.state.hashmap.HashMapStateBackend)6 EmbeddedRocksDBStateBackend (org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend)5 AbstractStateBackend (org.apache.flink.runtime.state.AbstractStateBackend)5 ConfigurableStateBackend (org.apache.flink.runtime.state.ConfigurableStateBackend)5 DelegatingStateBackend (org.apache.flink.runtime.state.delegate.DelegatingStateBackend)5 MemoryStateBackend (org.apache.flink.runtime.state.memory.MemoryStateBackend)5 Test (org.junit.Test)5 JobCheckpointingSettings (org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings)4 IOException (java.io.IOException)3 CheckpointCoordinatorConfiguration (org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration)3 ArrayList (java.util.ArrayList)2 Nonnull (javax.annotation.Nonnull)2 MasterTriggerRestoreHook (org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook)2 SerializedValue (org.apache.flink.util.SerializedValue)2 File (java.io.File)1