use of org.apache.flink.runtime.checkpoint.CompletedCheckpoint in project flink by apache.
the class SavepointLoader method loadAndValidateSavepoint.
/**
* Loads a savepoint back as a {@link CompletedCheckpoint}.
*
* <p>This method verifies that tasks and parallelism still match the savepoint parameters.
*
* @param jobId The JobID of the job to load the savepoint for.
* @param tasks Tasks that will possibly be reset
* @param savepointPath The path of the savepoint to rollback to
* @param classLoader The class loader to resolve serialized classes in legacy savepoint versions.
* @param allowNonRestoredState Allow to skip checkpoint state that cannot be mapped
* to any job vertex in tasks.
*
* @throws IllegalStateException If mismatch between program and savepoint state
* @throws IOException If savepoint store failure
*/
public static CompletedCheckpoint loadAndValidateSavepoint(JobID jobId, Map<JobVertexID, ExecutionJobVertex> tasks, String savepointPath, ClassLoader classLoader, boolean allowNonRestoredState) throws IOException {
// (1) load the savepoint
final Tuple2<Savepoint, StreamStateHandle> savepointAndHandle = SavepointStore.loadSavepointWithHandle(savepointPath, classLoader);
final Savepoint savepoint = savepointAndHandle.f0;
final StreamStateHandle metadataHandle = savepointAndHandle.f1;
final Map<JobVertexID, TaskState> taskStates = new HashMap<>(savepoint.getTaskStates().size());
boolean expandedToLegacyIds = false;
// (2) validate it (parallelism, etc)
for (TaskState taskState : savepoint.getTaskStates()) {
ExecutionJobVertex executionJobVertex = tasks.get(taskState.getJobVertexID());
// for example as generated from older flink versions, to provide backwards compatibility.
if (executionJobVertex == null && !expandedToLegacyIds) {
tasks = ExecutionJobVertex.includeLegacyJobVertexIDs(tasks);
executionJobVertex = tasks.get(taskState.getJobVertexID());
expandedToLegacyIds = true;
LOG.info("Could not find ExecutionJobVertex. Including legacy JobVertexIDs in search.");
}
if (executionJobVertex != null) {
if (executionJobVertex.getMaxParallelism() == taskState.getMaxParallelism() || !executionJobVertex.isMaxParallelismConfigured()) {
taskStates.put(taskState.getJobVertexID(), taskState);
} else {
String msg = String.format("Failed to rollback to savepoint %s. " + "Max parallelism mismatch between savepoint state and new program. " + "Cannot map operator %s with max parallelism %d to new program with " + "max parallelism %d. This indicates that the program has been changed " + "in a non-compatible way after the savepoint.", savepoint, taskState.getJobVertexID(), taskState.getMaxParallelism(), executionJobVertex.getMaxParallelism());
throw new IllegalStateException(msg);
}
} else if (allowNonRestoredState) {
LOG.info("Skipping savepoint state for operator {}.", taskState.getJobVertexID());
} else {
String msg = String.format("Failed to rollback to savepoint %s. " + "Cannot map savepoint state for operator %s to the new program, " + "because the operator is not available in the new program. If " + "you want to allow to skip this, you can set the --allowNonRestoredState " + "option on the CLI.", savepointPath, taskState.getJobVertexID());
throw new IllegalStateException(msg);
}
}
// (3) convert to checkpoint so the system can fall back to it
CheckpointProperties props = CheckpointProperties.forStandardSavepoint();
return new CompletedCheckpoint(jobId, savepoint.getCheckpointId(), 0L, 0L, taskStates, props, metadataHandle, savepointPath);
}
use of org.apache.flink.runtime.checkpoint.CompletedCheckpoint in project flink by apache.
the class SavepointLoaderTest method testLoadAndValidateSavepoint.
/**
* Tests loading and validation of savepoints with correct setup,
* parallelism mismatch, and a missing task.
*/
@Test
public void testLoadAndValidateSavepoint() throws Exception {
File tmp = tmpFolder.newFolder();
int parallelism = 128128;
long checkpointId = Integer.MAX_VALUE + 123123L;
JobVertexID vertexId = new JobVertexID();
TaskState state = mock(TaskState.class);
when(state.getParallelism()).thenReturn(parallelism);
when(state.getJobVertexID()).thenReturn(vertexId);
when(state.getMaxParallelism()).thenReturn(parallelism);
when(state.getChainLength()).thenReturn(1);
Map<JobVertexID, TaskState> taskStates = new HashMap<>();
taskStates.put(vertexId, state);
JobID jobId = new JobID();
// Store savepoint
SavepointV1 savepoint = new SavepointV1(checkpointId, taskStates.values());
String path = SavepointStore.storeSavepoint(tmp.getAbsolutePath(), savepoint);
ExecutionJobVertex vertex = mock(ExecutionJobVertex.class);
when(vertex.getParallelism()).thenReturn(parallelism);
when(vertex.getMaxParallelism()).thenReturn(parallelism);
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
tasks.put(vertexId, vertex);
ClassLoader ucl = Thread.currentThread().getContextClassLoader();
// 1) Load and validate: everything correct
CompletedCheckpoint loaded = SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
assertEquals(jobId, loaded.getJobId());
assertEquals(checkpointId, loaded.getCheckpointID());
// 2) Load and validate: max parallelism mismatch
when(vertex.getMaxParallelism()).thenReturn(222);
when(vertex.isMaxParallelismConfigured()).thenReturn(true);
try {
SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
fail("Did not throw expected Exception");
} catch (IllegalStateException expected) {
assertTrue(expected.getMessage().contains("Max parallelism mismatch"));
}
// 3) Load and validate: missing vertex
assertNotNull(tasks.remove(vertexId));
try {
SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
fail("Did not throw expected Exception");
} catch (IllegalStateException expected) {
assertTrue(expected.getMessage().contains("allowNonRestoredState"));
}
// 4) Load and validate: ignore missing vertex
SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, true);
}
Aggregations