use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class CheckpointCoordinatorTest method testTriggerAndConfirmSimpleCheckpoint.
@Test
public void testTriggerAndConfirmSimpleCheckpoint() {
try {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
// create some mock Execution vertices that receive the checkpoint trigger messages
final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
final ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);
ExecutionVertex vertex2 = mockExecutionVertex(attemptID2);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
assertEquals(0, coord.getNumberOfPendingCheckpoints());
assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, coord.getNumScheduledTasks());
// trigger the first checkpoint. this should succeed
assertTrue(coord.triggerCheckpoint(timestamp, false));
// validate that we have a pending checkpoint
assertEquals(1, coord.getNumberOfPendingCheckpoints());
assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(1, coord.getNumScheduledTasks());
long checkpointId = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
PendingCheckpoint checkpoint = coord.getPendingCheckpoints().get(checkpointId);
assertNotNull(checkpoint);
assertEquals(checkpointId, checkpoint.getCheckpointId());
assertEquals(timestamp, checkpoint.getCheckpointTimestamp());
assertEquals(jid, checkpoint.getJobId());
assertEquals(2, checkpoint.getNumberOfNonAcknowledgedTasks());
assertEquals(0, checkpoint.getNumberOfAcknowledgedTasks());
assertEquals(0, checkpoint.getTaskStates().size());
assertFalse(checkpoint.isDiscarded());
assertFalse(checkpoint.isFullyAcknowledged());
// check that the vertices received the trigger checkpoint message
{
verify(vertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId), eq(timestamp), any(CheckpointOptions.class));
verify(vertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId), eq(timestamp), any(CheckpointOptions.class));
}
// acknowledge from one of the tasks
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId));
assertEquals(1, checkpoint.getNumberOfAcknowledgedTasks());
assertEquals(1, checkpoint.getNumberOfNonAcknowledgedTasks());
assertFalse(checkpoint.isDiscarded());
assertFalse(checkpoint.isFullyAcknowledged());
// acknowledge the same task again (should not matter)
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId));
assertFalse(checkpoint.isDiscarded());
assertFalse(checkpoint.isFullyAcknowledged());
// acknowledge the other task.
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointId));
// the checkpoint is internally converted to a successful checkpoint and the
// pending checkpoint object is disposed
assertTrue(checkpoint.isDiscarded());
// the now we should have a completed checkpoint
assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, coord.getNumberOfPendingCheckpoints());
// the canceler should be removed now
assertEquals(0, coord.getNumScheduledTasks());
// validate that the relevant tasks got a confirmation message
{
verify(vertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId), eq(timestamp), any(CheckpointOptions.class));
verify(vertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId), eq(timestamp), any(CheckpointOptions.class));
}
CompletedCheckpoint success = coord.getSuccessfulCheckpoints().get(0);
assertEquals(jid, success.getJobId());
assertEquals(timestamp, success.getTimestamp());
assertEquals(checkpoint.getCheckpointId(), success.getCheckpointID());
assertTrue(success.getTaskStates().isEmpty());
// ---------------
// trigger another checkpoint and see that this one replaces the other checkpoint
// ---------------
final long timestampNew = timestamp + 7;
coord.triggerCheckpoint(timestampNew, false);
long checkpointIdNew = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointIdNew));
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointIdNew));
assertEquals(0, coord.getNumberOfPendingCheckpoints());
assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, coord.getNumScheduledTasks());
CompletedCheckpoint successNew = coord.getSuccessfulCheckpoints().get(0);
assertEquals(jid, successNew.getJobId());
assertEquals(timestampNew, successNew.getTimestamp());
assertEquals(checkpointIdNew, successNew.getCheckpointID());
assertTrue(successNew.getTaskStates().isEmpty());
// validate that the relevant tasks got a confirmation message
{
verify(vertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointIdNew), eq(timestampNew), any(CheckpointOptions.class));
verify(vertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointIdNew), eq(timestampNew), any(CheckpointOptions.class));
verify(vertex1.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointIdNew), eq(timestampNew));
verify(vertex2.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointIdNew), eq(timestampNew));
}
coord.shutdown(JobStatus.FINISHED);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class CheckpointCoordinatorTest method testPeriodicSchedulingWithInactiveTasks.
@Test
public void testPeriodicSchedulingWithInactiveTasks() {
try {
final JobID jid = new JobID();
// create some mock execution vertices and trigger some checkpoint
final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID();
final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID);
ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
final AtomicReference<ExecutionState> currentState = new AtomicReference<>(ExecutionState.CREATED);
when(triggerVertex.getCurrentExecutionAttempt().getState()).thenAnswer(new Answer<ExecutionState>() {
@Override
public ExecutionState answer(InvocationOnMock invocation) {
return currentState.get();
}
});
CheckpointCoordinator coord = new CheckpointCoordinator(jid, // periodic interval is 10 ms
10, // timeout is very long (200 s)
200000, // no extra delay
0L, // max two concurrent checkpoints
2, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
coord.startCheckpointScheduler();
// no checkpoint should have started so far
Thread.sleep(200);
assertEquals(0, coord.getNumberOfPendingCheckpoints());
// now move the state to RUNNING
currentState.set(ExecutionState.RUNNING);
// the coordinator should start checkpointing now
final long timeout = System.currentTimeMillis() + 10000;
do {
Thread.sleep(20);
} while (System.currentTimeMillis() < timeout && coord.getNumberOfPendingCheckpoints() == 0);
assertTrue(coord.getNumberOfPendingCheckpoints() > 0);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class CheckpointStateRestoreTest method testSetState.
/**
* Tests that on restore the task state is reset for each stateful task.
*/
@Test
public void testSetState() {
try {
final ChainedStateHandle<StreamStateHandle> serializedState = CheckpointCoordinatorTest.generateChainedStateHandle(new SerializableObject());
KeyGroupRange keyGroupRange = KeyGroupRange.of(0, 0);
List<SerializableObject> testStates = Collections.singletonList(new SerializableObject());
final KeyGroupsStateHandle serializedKeyGroupStates = CheckpointCoordinatorTest.generateKeyGroupState(keyGroupRange, testStates);
final JobID jid = new JobID();
final JobVertexID statefulId = new JobVertexID();
final JobVertexID statelessId = new JobVertexID();
Execution statefulExec1 = mockExecution();
Execution statefulExec2 = mockExecution();
Execution statefulExec3 = mockExecution();
Execution statelessExec1 = mockExecution();
Execution statelessExec2 = mockExecution();
ExecutionVertex stateful1 = mockExecutionVertex(statefulExec1, statefulId, 0, 3);
ExecutionVertex stateful2 = mockExecutionVertex(statefulExec2, statefulId, 1, 3);
ExecutionVertex stateful3 = mockExecutionVertex(statefulExec3, statefulId, 2, 3);
ExecutionVertex stateless1 = mockExecutionVertex(statelessExec1, statelessId, 0, 2);
ExecutionVertex stateless2 = mockExecutionVertex(statelessExec2, statelessId, 1, 2);
ExecutionJobVertex stateful = mockExecutionJobVertex(statefulId, new ExecutionVertex[] { stateful1, stateful2, stateful3 });
ExecutionJobVertex stateless = mockExecutionJobVertex(statelessId, new ExecutionVertex[] { stateless1, stateless2 });
Map<JobVertexID, ExecutionJobVertex> map = new HashMap<JobVertexID, ExecutionJobVertex>();
map.put(statefulId, stateful);
map.put(statelessId, stateless);
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 200000L, 200000L, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { stateful1, stateful2, stateful3, stateless1, stateless2 }, new ExecutionVertex[] { stateful1, stateful2, stateful3, stateless1, stateless2 }, new ExecutionVertex[0], new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// create ourselves a checkpoint with state
final long timestamp = 34623786L;
coord.triggerCheckpoint(timestamp, false);
PendingCheckpoint pending = coord.getPendingCheckpoints().values().iterator().next();
final long checkpointId = pending.getCheckpointId();
SubtaskState checkpointStateHandles = new SubtaskState(serializedState, null, null, serializedKeyGroupStates, null);
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, statefulExec1.getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles));
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, statefulExec2.getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles));
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, statefulExec3.getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles));
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, statelessExec1.getAttemptId(), checkpointId));
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, statelessExec2.getAttemptId(), checkpointId));
assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, coord.getNumberOfPendingCheckpoints());
// let the coordinator inject the state
coord.restoreLatestCheckpointedState(map, true, false);
// verify that each stateful vertex got the state
final TaskStateHandles taskStateHandles = new TaskStateHandles(serializedState, Collections.<Collection<OperatorStateHandle>>singletonList(null), Collections.<Collection<OperatorStateHandle>>singletonList(null), Collections.singletonList(serializedKeyGroupStates), null);
BaseMatcher<TaskStateHandles> matcher = new BaseMatcher<TaskStateHandles>() {
@Override
public boolean matches(Object o) {
if (o instanceof TaskStateHandles) {
return o.equals(taskStateHandles);
}
return false;
}
@Override
public void describeTo(Description description) {
description.appendValue(taskStateHandles);
}
};
verify(statefulExec1, times(1)).setInitialState(Mockito.argThat(matcher));
verify(statefulExec2, times(1)).setInitialState(Mockito.argThat(matcher));
verify(statefulExec3, times(1)).setInitialState(Mockito.argThat(matcher));
verify(statelessExec1, times(0)).setInitialState(Mockito.<TaskStateHandles>any());
verify(statelessExec2, times(0)).setInitialState(Mockito.<TaskStateHandles>any());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class CheckpointStateRestoreTest method testNoCheckpointAvailable.
@Test
public void testNoCheckpointAvailable() {
try {
CheckpointCoordinator coord = new CheckpointCoordinator(new JobID(), 200000L, 200000L, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { mock(ExecutionVertex.class) }, new ExecutionVertex[] { mock(ExecutionVertex.class) }, new ExecutionVertex[0], new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
try {
coord.restoreLatestCheckpointedState(new HashMap<JobVertexID, ExecutionJobVertex>(), true, false);
fail("this should throw an exception");
} catch (IllegalStateException e) {
// expected
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class CheckpointCoordinatorTest method testRestoreLatestCheckpointFailureWhenMaxParallelismChanges.
/**
* Tests that the checkpoint restoration fails if the max parallelism of the job vertices has
* changed.
*
* @throws Exception
*/
@Test(expected = IllegalStateException.class)
public void testRestoreLatestCheckpointFailureWhenMaxParallelismChanges() throws Exception {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
final JobVertexID jobVertexID1 = new JobVertexID();
final JobVertexID jobVertexID2 = new JobVertexID();
int parallelism1 = 3;
int parallelism2 = 2;
int maxParallelism1 = 42;
int maxParallelism2 = 13;
final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
// trigger the checkpoint
coord.triggerCheckpoint(timestamp, false);
assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
for (int index = 0; index < jobVertex1.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
for (int index = 0; index < jobVertex2.getParallelism(); index++) {
ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID2, index);
KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
}
List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
assertEquals(1, completedCheckpoints.size());
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
int newMaxParallelism1 = 20;
int newMaxParallelism2 = 42;
final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, newMaxParallelism1);
final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, newMaxParallelism2);
tasks.put(jobVertexID1, newJobVertex1);
tasks.put(jobVertexID2, newJobVertex2);
coord.restoreLatestCheckpointedState(tasks, true, false);
fail("The restoration should have failed because the max parallelism changed.");
}
Aggregations