use of org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata in project flink by apache.
the class SavepointWriter method fromExistingSavepoint.
/**
* Loads an existing savepoint. Useful if you want to modify or extend the state of an existing
* application. The savepoint will be written using the state backend defined via the clusters
* configuration.
*
* @param path The path to an existing savepoint on disk.
* @return A {@link SavepointWriter}.
* @see #fromExistingSavepoint(String, StateBackend)
* @see #withConfiguration(ConfigOption, Object)
*/
public static SavepointWriter fromExistingSavepoint(String path) throws IOException {
CheckpointMetadata metadata = SavepointLoader.loadSavepointMetadata(path);
int maxParallelism = metadata.getOperatorStates().stream().map(OperatorState::getMaxParallelism).max(Comparator.naturalOrder()).orElseThrow(() -> new RuntimeException("Savepoint must contain at least one operator state."));
SavepointMetadataV2 savepointMetadata = new SavepointMetadataV2(maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
return new SavepointWriter(savepointMetadata, null);
}
use of org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata in project flink by apache.
the class Checkpoints method loadAndValidateCheckpoint.
public static CompletedCheckpoint loadAndValidateCheckpoint(JobID jobId, Map<JobVertexID, ExecutionJobVertex> tasks, CompletedCheckpointStorageLocation location, ClassLoader classLoader, boolean allowNonRestoredState, CheckpointProperties checkpointProperties, RestoreMode restoreMode) throws IOException {
checkNotNull(jobId, "jobId");
checkNotNull(tasks, "tasks");
checkNotNull(location, "location");
checkNotNull(classLoader, "classLoader");
final StreamStateHandle metadataHandle = location.getMetadataHandle();
final String checkpointPointer = location.getExternalPointer();
// (1) load the savepoint
final CheckpointMetadata checkpointMetadata;
try (InputStream in = metadataHandle.openInputStream()) {
DataInputStream dis = new DataInputStream(in);
checkpointMetadata = loadCheckpointMetadata(dis, classLoader, checkpointPointer);
}
// generate mapping from operator to task
Map<OperatorID, ExecutionJobVertex> operatorToJobVertexMapping = new HashMap<>();
for (ExecutionJobVertex task : tasks.values()) {
for (OperatorIDPair operatorIDPair : task.getOperatorIDs()) {
operatorToJobVertexMapping.put(operatorIDPair.getGeneratedOperatorID(), task);
operatorIDPair.getUserDefinedOperatorID().ifPresent(id -> operatorToJobVertexMapping.put(id, task));
}
}
// (2) validate it (parallelism, etc)
HashMap<OperatorID, OperatorState> operatorStates = new HashMap<>(checkpointMetadata.getOperatorStates().size());
for (OperatorState operatorState : checkpointMetadata.getOperatorStates()) {
ExecutionJobVertex executionJobVertex = operatorToJobVertexMapping.get(operatorState.getOperatorID());
if (executionJobVertex != null) {
if (executionJobVertex.getMaxParallelism() == operatorState.getMaxParallelism() || executionJobVertex.canRescaleMaxParallelism(operatorState.getMaxParallelism())) {
operatorStates.put(operatorState.getOperatorID(), operatorState);
} else {
String msg = String.format("Failed to rollback to checkpoint/savepoint %s. " + "Max parallelism mismatch between checkpoint/savepoint state and new program. " + "Cannot map operator %s with max parallelism %d to new program with " + "max parallelism %d. This indicates that the program has been changed " + "in a non-compatible way after the checkpoint/savepoint.", checkpointMetadata, operatorState.getOperatorID(), operatorState.getMaxParallelism(), executionJobVertex.getMaxParallelism());
throw new IllegalStateException(msg);
}
} else if (allowNonRestoredState) {
LOG.info("Skipping savepoint state for operator {}.", operatorState.getOperatorID());
} else {
if (operatorState.getCoordinatorState() != null) {
throwNonRestoredStateException(checkpointPointer, operatorState.getOperatorID());
}
for (OperatorSubtaskState operatorSubtaskState : operatorState.getStates()) {
if (operatorSubtaskState.hasState()) {
throwNonRestoredStateException(checkpointPointer, operatorState.getOperatorID());
}
}
LOG.info("Skipping empty savepoint state for operator {}.", operatorState.getOperatorID());
}
}
return new CompletedCheckpoint(jobId, checkpointMetadata.getCheckpointId(), 0L, 0L, operatorStates, checkpointMetadata.getMasterStates(), checkpointProperties, restoreMode == RestoreMode.CLAIM ? new ClaimModeCompletedStorageLocation(location) : location);
}
use of org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata in project flink by apache.
the class Checkpoints method disposeSavepoint.
// ------------------------------------------------------------------------
// Savepoint Disposal Hooks
// ------------------------------------------------------------------------
public static void disposeSavepoint(String pointer, CheckpointStorage checkpointStorage, ClassLoader classLoader) throws IOException, FlinkException {
checkNotNull(pointer, "location");
checkNotNull(checkpointStorage, "stateBackend");
checkNotNull(classLoader, "classLoader");
final CompletedCheckpointStorageLocation checkpointLocation = checkpointStorage.resolveCheckpoint(pointer);
final StreamStateHandle metadataHandle = checkpointLocation.getMetadataHandle();
// load the savepoint object (the metadata) to have all the state handles that we need
// to dispose of all state
final CheckpointMetadata metadata;
try (InputStream in = metadataHandle.openInputStream();
DataInputStream dis = new DataInputStream(in)) {
metadata = loadCheckpointMetadata(dis, classLoader, pointer);
}
Exception exception = null;
// addressable any more even if the following disposal fails
try {
metadataHandle.discardState();
} catch (Exception e) {
exception = e;
}
// now dispose the savepoint data
try {
metadata.dispose();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
// now dispose the location (directory, table, whatever)
try {
checkpointLocation.disposeStorageLocation();
} catch (Exception e) {
exception = ExceptionUtils.firstOrSuppressed(e, exception);
}
// forward exceptions caught in the process
if (exception != null) {
ExceptionUtils.rethrowIOException(exception);
}
}
use of org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata in project flink by apache.
the class PendingCheckpoint method finalizeCheckpoint.
public CompletedCheckpoint finalizeCheckpoint(CheckpointsCleaner checkpointsCleaner, Runnable postCleanup, Executor executor, @Nullable PendingCheckpointStats statsCallback) throws IOException {
synchronized (lock) {
checkState(!isDisposed(), "checkpoint is discarded");
checkState(isFullyAcknowledged(), "Pending checkpoint has not been fully acknowledged yet");
// make sure we fulfill the promise with an exception if something fails
try {
checkpointPlan.fulfillFinishedTaskStatus(operatorStates);
// write out the metadata
final CheckpointMetadata savepoint = new CheckpointMetadata(checkpointId, operatorStates.values(), masterStates);
final CompletedCheckpointStorageLocation finalizedLocation;
try (CheckpointMetadataOutputStream out = targetLocation.createMetadataOutputStream()) {
Checkpoints.storeCheckpointMetadata(savepoint, out);
finalizedLocation = out.closeAndFinalizeCheckpoint();
}
CompletedCheckpoint completed = new CompletedCheckpoint(jobId, checkpointId, checkpointTimestamp, System.currentTimeMillis(), operatorStates, masterStates, props, finalizedLocation);
onCompletionPromise.complete(completed);
if (statsCallback != null) {
LOG.trace("Checkpoint {} size: {}Kb, duration: {}ms", checkpointId, statsCallback.getStateSize() == 0 ? 0 : statsCallback.getStateSize() / 1024, statsCallback.getEndToEndDuration());
// Finalize the statsCallback and give the completed checkpoint a
// callback for discards.
CompletedCheckpointStats.DiscardCallback discardCallback = statsCallback.reportCompletedCheckpoint(finalizedLocation.getExternalPointer());
completed.setDiscardCallback(discardCallback);
}
// mark this pending checkpoint as disposed, but do NOT drop the state
dispose(false, checkpointsCleaner, postCleanup, executor);
return completed;
} catch (Throwable t) {
onCompletionPromise.completeExceptionally(t);
ExceptionUtils.rethrowIOException(t);
// silence the compiler
return null;
}
}
}
use of org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata in project flink by apache.
the class TestUtils method createSavepointWithOperatorState.
public static File createSavepointWithOperatorState(File savepointFile, long savepointId, OperatorID... operatorIds) throws IOException {
final Collection<OperatorState> operatorStates = createOperatorState(operatorIds);
final CheckpointMetadata savepoint = new CheckpointMetadata(savepointId, operatorStates, Collections.emptyList());
try (FileOutputStream fileOutputStream = new FileOutputStream(savepointFile)) {
Checkpoints.storeCheckpointMetadata(savepoint, fileOutputStream);
}
return savepointFile;
}
Aggregations