use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedStateWithChangingParallelism.
/**
* Tests the checkpoint restoration with changing parallelism of job vertex with partitioned
* state.
*
* @throws Exception
*/
private void testRestoreLatestCheckpointedStateWithChangingParallelism(boolean scaleOut) throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = scaleOut ? 2 : 13;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
int newParallelism2 = scaleOut ? 13 : 2;
final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// trigger the checkpoint
coord.triggerCheckpoint(timestamp, false);
assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
//vertex 1
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), true);
SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, opStateBackend, null, keyedStateBackend, keyedStateRaw);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
//vertex 2
final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesBackend = new ArrayList<>(jobVertex2.getParallelism());
final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesRaw = new ArrayList<>(jobVertex2.getParallelism());
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), true);
ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
ChainedStateHandle<OperatorStateHandle> opStateRaw = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, true);
expectedOpStatesBackend.add(opStateBackend);
expectedOpStatesRaw.add(opStateRaw);
SubtaskState checkpointStateHandles = new SubtaskState(new ChainedStateHandle<>(Collections.<StreamStateHandle>singletonList(null)), opStateBackend, opStateRaw, keyedStateBackend, keyedStateRaw);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
// rescale vertex 2
final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
tasks.put(jobVertexID1, newJobVertex1);
tasks.put(jobVertexID2, newJobVertex2);
coord.restoreLatestCheckpointedState(tasks, true, false);
// verify the restored state
verifyStateRestore(jobVertexID1, newJobVertex1, keyGroupPartitions1);
List<List<Collection<OperatorStateHandle>>> actualOpStatesBackend = new ArrayList<>(newJobVertex2.getParallelism());
List<List<Collection<OperatorStateHandle>>> actualOpStatesRaw = new ArrayList<>(newJobVertex2.getParallelism());
for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), false);
KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), true);
TaskStateHandles taskStateHandles = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskStateHandles();
ChainedStateHandle<StreamStateHandle> operatorState = taskStateHandles.getLegacyOperatorState();
List<Collection<OperatorStateHandle>> opStateBackend = taskStateHandles.getManagedOperatorState();
List<Collection<OperatorStateHandle>> opStateRaw = taskStateHandles.getRawOperatorState();
Collection<KeyGroupsStateHandle> keyGroupStateBackend = taskStateHandles.getManagedKeyedState();
Collection<KeyGroupsStateHandle> keyGroupStateRaw = taskStateHandles.getRawKeyedState();
actualOpStatesBackend.add(opStateBackend);
actualOpStatesRaw.add(opStateRaw);
assertNull(operatorState);
compareKeyedState(Collections.singletonList(originalKeyedStateBackend), keyGroupStateBackend);
compareKeyedState(Collections.singletonList(originalKeyedStateRaw), keyGroupStateRaw);
}
comparePartitionableState(expectedOpStatesBackend, actualOpStatesBackend);
comparePartitionableState(expectedOpStatesRaw, actualOpStatesRaw);
}
use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class CheckpointCoordinatorTest method mockExecutionJobVertex.
static ExecutionJobVertex mockExecutionJobVertex(JobVertexID jobVertexID, int parallelism, int maxParallelism) {
final ExecutionJobVertex executionJobVertex = mock(ExecutionJobVertex.class);
ExecutionVertex[] executionVertices = new ExecutionVertex[parallelism];
for (int i = 0; i < parallelism; i++) {
executionVertices[i] = mockExecutionVertex(new ExecutionAttemptID(), jobVertexID, parallelism, maxParallelism, ExecutionState.RUNNING);
when(executionVertices[i].getParallelSubtaskIndex()).thenReturn(i);
}
when(executionJobVertex.getJobVertexId()).thenReturn(jobVertexID);
when(executionJobVertex.getTaskVertices()).thenReturn(executionVertices);
when(executionJobVertex.getParallelism()).thenReturn(parallelism);
when(executionJobVertex.getMaxParallelism()).thenReturn(maxParallelism);
when(executionJobVertex.isMaxParallelismConfigured()).thenReturn(true);
return executionJobVertex;
}
use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedState.
/**
* Tests that the checkpointed partitioned and non-partitioned state is assigned properly to
* the {@link Execution} upon recovery.
*
* @throws Exception
*/
@Test
public void testRestoreLatestCheckpointedState() throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = 2;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// trigger the checkpoint
coord.triggerCheckpoint(timestamp, false);
assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> nonPartitionedState = generateStateForVertex(jobVertexID1, index);
ChainedStateHandle<OperatorStateHandle> partitionableState = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
KeyGroupsStateHandle partitionedKeyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(nonPartitionedState, partitionableState, null, partitionedKeyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> nonPartitionedState = generateStateForVertex(jobVertexID2, index);
ChainedStateHandle<OperatorStateHandle> partitionableState = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
KeyGroupsStateHandle partitionedKeyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(nonPartitionedState, partitionableState, null, partitionedKeyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
tasks.put(jobVertexID1, jobVertex1);
tasks.put(jobVertexID2, jobVertex2);
coord.restoreLatestCheckpointedState(tasks, true, false);
// verify the restored state
verifyStateRestore(jobVertexID1, jobVertex1, keyGroupPartitions1);
verifyStateRestore(jobVertexID2, jobVertex2, keyGroupPartitions2);
}
use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class CheckpointCoordinatorTest method testCheckpointStatsTrackerRestoreCallback.
/**
* Tests that the restore callbacks are called if registered.
*/
@Test
public void testCheckpointStatsTrackerRestoreCallback() throws Exception {
ExecutionVertex vertex1 = mockExecutionVertex(new ExecutionAttemptID());
StandaloneCompletedCheckpointStore store = new StandaloneCompletedCheckpointStore(1);
store.addCheckpoint(new CompletedCheckpoint(new JobID(), 0, 0, 0, Collections.<JobVertexID, TaskState>emptyMap()));
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(new JobID(), 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new StandaloneCheckpointIDCounter(), store, null, Executors.directExecutor());
CheckpointStatsTracker tracker = mock(CheckpointStatsTracker.class);
coord.setCheckpointStatsTracker(tracker);
assertTrue(coord.restoreLatestCheckpointedState(Collections.<JobVertexID, ExecutionJobVertex>emptyMap(), false, true));
verify(tracker, times(1)).reportRestoredCheckpoint(any(RestoredCheckpointStats.class));
}
use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class CheckpointStateRestoreTest method testNonRestoredState.
/**
* Tests that the allow non restored state flag is correctly handled.
*
* The flag only applies for state that is part of the checkpoint.
*/
@Test
public void testNonRestoredState() throws Exception {
// --- (1) Create tasks to restore checkpoint with ---
JobVertexID jobVertexId1 = new JobVertexID();
JobVertexID jobVertexId2 = new JobVertexID();
// 1st JobVertex
ExecutionVertex vertex11 = mockExecutionVertex(mockExecution(), jobVertexId1, 0, 3);
ExecutionVertex vertex12 = mockExecutionVertex(mockExecution(), jobVertexId1, 1, 3);
ExecutionVertex vertex13 = mockExecutionVertex(mockExecution(), jobVertexId1, 2, 3);
// 2nd JobVertex
ExecutionVertex vertex21 = mockExecutionVertex(mockExecution(), jobVertexId2, 0, 2);
ExecutionVertex vertex22 = mockExecutionVertex(mockExecution(), jobVertexId2, 1, 2);
ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexId1, new ExecutionVertex[] { vertex11, vertex12, vertex13 });
ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexId2, new ExecutionVertex[] { vertex21, vertex22 });
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
tasks.put(jobVertexId1, jobVertex1);
tasks.put(jobVertexId2, jobVertex2);
CheckpointCoordinator coord = new CheckpointCoordinator(new JobID(), Integer.MAX_VALUE, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] {}, new ExecutionVertex[] {}, new ExecutionVertex[] {}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
ChainedStateHandle<StreamStateHandle> serializedState = CheckpointCoordinatorTest.generateChainedStateHandle(new SerializableObject());
// --- (2) Checkpoint misses state for a jobVertex (should work) ---
Map<JobVertexID, TaskState> checkpointTaskStates = new HashMap<>();
{
TaskState taskState = new TaskState(jobVertexId1, 3, 3, 1);
taskState.putState(0, new SubtaskState(serializedState, null, null, null, null));
taskState.putState(1, new SubtaskState(serializedState, null, null, null, null));
taskState.putState(2, new SubtaskState(serializedState, null, null, null, null));
checkpointTaskStates.put(jobVertexId1, taskState);
}
CompletedCheckpoint checkpoint = new CompletedCheckpoint(new JobID(), 0, 1, 2, new HashMap<>(checkpointTaskStates));
coord.getCheckpointStore().addCheckpoint(checkpoint);
coord.restoreLatestCheckpointedState(tasks, true, false);
coord.restoreLatestCheckpointedState(tasks, true, true);
// --- (3) JobVertex missing for task state that is part of the checkpoint ---
JobVertexID newJobVertexID = new JobVertexID();
// There is no task for this
{
TaskState taskState = new TaskState(jobVertexId1, 1, 1, 1);
taskState.putState(0, new SubtaskState(serializedState, null, null, null, null));
checkpointTaskStates.put(newJobVertexID, taskState);
}
checkpoint = new CompletedCheckpoint(new JobID(), 1, 2, 3, new HashMap<>(checkpointTaskStates));
coord.getCheckpointStore().addCheckpoint(checkpoint);
// (i) Allow non restored state (should succeed)
coord.restoreLatestCheckpointedState(tasks, true, true);
// (ii) Don't allow non restored state (should fail)
try {
coord.restoreLatestCheckpointedState(tasks, true, false);
fail("Did not throw the expected Exception.");
} catch (IllegalStateException ignored) {
}
}
Aggregations