use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class StackTraceSampleCoordinatorTest method testCollectForDiscardedPendingSample.
/** Tests that collecting for a cancelled sample throws no Exception. */
@Test
public void testCollectForDiscardedPendingSample() throws Exception {
ExecutionVertex[] vertices = new ExecutionVertex[] { mockExecutionVertex(new ExecutionAttemptID(), ExecutionState.RUNNING, true) };
Future<StackTraceSample> sampleFuture = coord.triggerStackTraceSample(vertices, 1, Time.milliseconds(100L), 0);
assertFalse(sampleFuture.isDone());
coord.cancelStackTraceSample(0, null);
assertTrue(sampleFuture.isDone());
// Verify no error on late collect
ExecutionAttemptID executionId = vertices[0].getCurrentExecutionAttempt().getAttemptId();
coord.collectStackTraces(0, executionId, new ArrayList<StackTraceElement[]>());
}
use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class StackTraceSampleCoordinatorTest method mockExecutionVertexWithTimeout.
private ExecutionVertex mockExecutionVertexWithTimeout(ExecutionAttemptID executionId, ExecutionState state, ScheduledExecutorService scheduledExecutorService, int timeout) {
final CompletableFuture<StackTraceSampleResponse> future = new FlinkCompletableFuture<>();
Execution exec = mock(Execution.class);
when(exec.getAttemptId()).thenReturn(executionId);
when(exec.getState()).thenReturn(state);
when(exec.requestStackTraceSample(anyInt(), anyInt(), any(Time.class), anyInt(), any(Time.class))).thenReturn(future);
scheduledExecutorService.schedule(new Runnable() {
@Override
public void run() {
future.completeExceptionally(new TimeoutException("Timeout"));
}
}, timeout, TimeUnit.MILLISECONDS);
ExecutionVertex vertex = mock(ExecutionVertex.class);
when(vertex.getJobvertexId()).thenReturn(new JobVertexID());
when(vertex.getCurrentExecutionAttempt()).thenReturn(exec);
return vertex;
}
use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class CheckpointCoordinator method completePendingCheckpoint.
/**
* Try to complete the given pending checkpoint.
*
* Important: This method should only be called in the checkpoint lock scope.
*
* @param pendingCheckpoint to complete
* @throws CheckpointException if the completion failed
*/
private void completePendingCheckpoint(PendingCheckpoint pendingCheckpoint) throws CheckpointException {
final long checkpointId = pendingCheckpoint.getCheckpointId();
CompletedCheckpoint completedCheckpoint = null;
try {
// externalize the checkpoint if required
if (pendingCheckpoint.getProps().externalizeCheckpoint()) {
completedCheckpoint = pendingCheckpoint.finalizeCheckpointExternalized();
} else {
completedCheckpoint = pendingCheckpoint.finalizeCheckpointNonExternalized();
}
completedCheckpointStore.addCheckpoint(completedCheckpoint);
rememberRecentCheckpointId(checkpointId);
dropSubsumedCheckpoints(checkpointId);
} catch (Exception exception) {
// abort the current pending checkpoint if it has not been discarded yet
if (!pendingCheckpoint.isDiscarded()) {
pendingCheckpoint.abortError(exception);
}
if (completedCheckpoint != null) {
// we failed to store the completed checkpoint. Let's clean up
final CompletedCheckpoint cc = completedCheckpoint;
executor.execute(new Runnable() {
@Override
public void run() {
try {
cc.discard();
} catch (Throwable t) {
LOG.warn("Could not properly discard completed checkpoint {}.", cc.getCheckpointID(), t);
}
}
});
}
throw new CheckpointException("Could not complete the pending checkpoint " + checkpointId + '.', exception);
} finally {
pendingCheckpoints.remove(checkpointId);
triggerQueuedRequests();
}
// record the time when this was completed, to calculate
// the 'min delay between checkpoints'
lastCheckpointCompletionNanos = System.nanoTime();
LOG.info("Completed checkpoint {} ({} bytes in {} ms).", checkpointId, completedCheckpoint.getStateSize(), completedCheckpoint.getDuration());
if (LOG.isDebugEnabled()) {
StringBuilder builder = new StringBuilder();
builder.append("Checkpoint state: ");
for (TaskState state : completedCheckpoint.getTaskStates().values()) {
builder.append(state);
builder.append(", ");
}
// Remove last two chars ", "
builder.setLength(builder.length() - 2);
LOG.debug(builder.toString());
}
// send the "notify complete" call to all vertices
final long timestamp = completedCheckpoint.getTimestamp();
for (ExecutionVertex ev : tasksToCommitTo) {
Execution ee = ev.getCurrentExecutionAttempt();
if (ee != null) {
ee.notifyCheckpointComplete(checkpointId, timestamp);
}
}
}
use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class CheckpointCoordinatorTest method testMinDelayBetweenSavepoints.
/**
* Tests that no minimum delay between savepoints is enforced.
*/
@Test
public void testMinDelayBetweenSavepoints() throws Exception {
JobID jobId = new JobID();
final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);
CheckpointCoordinator coord = new CheckpointCoordinator(jobId, 100000, 200000, // very long min delay => should not affect savepoints
100000000L, 1, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
String savepointDir = tmpFolder.newFolder().getAbsolutePath();
Future<CompletedCheckpoint> savepoint0 = coord.triggerSavepoint(0, savepointDir);
assertFalse("Did not trigger savepoint", savepoint0.isDone());
Future<CompletedCheckpoint> savepoint1 = coord.triggerSavepoint(1, savepointDir);
assertFalse("Did not trigger savepoint", savepoint1.isDone());
}
use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.
the class CheckpointCoordinatorTest method testTriggerAndDeclineCheckpointSimple.
/**
* This test triggers a checkpoint and then sends a decline checkpoint message from
* one of the tasks. The expected behaviour is that said checkpoint is discarded and a new
* checkpoint is triggered.
*/
@Test
public void testTriggerAndDeclineCheckpointSimple() {
try {
final JobID jid = new JobID();
final long timestamp = System.currentTimeMillis();
// create some mock Execution vertices that receive the checkpoint trigger messages
final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
final ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);
ExecutionVertex vertex2 = mockExecutionVertex(attemptID2);
// set up the coordinator and validate the initial state
CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
assertEquals(0, coord.getNumberOfPendingCheckpoints());
assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
// trigger the first checkpoint. this should succeed
assertTrue(coord.triggerCheckpoint(timestamp, false));
// validate that we have a pending checkpoint
assertEquals(1, coord.getNumberOfPendingCheckpoints());
assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
// we have one task scheduled that will cancel after timeout
assertEquals(1, coord.getNumScheduledTasks());
long checkpointId = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
PendingCheckpoint checkpoint = coord.getPendingCheckpoints().get(checkpointId);
assertNotNull(checkpoint);
assertEquals(checkpointId, checkpoint.getCheckpointId());
assertEquals(timestamp, checkpoint.getCheckpointTimestamp());
assertEquals(jid, checkpoint.getJobId());
assertEquals(2, checkpoint.getNumberOfNonAcknowledgedTasks());
assertEquals(0, checkpoint.getNumberOfAcknowledgedTasks());
assertEquals(0, checkpoint.getTaskStates().size());
assertFalse(checkpoint.isDiscarded());
assertFalse(checkpoint.isFullyAcknowledged());
// check that the vertices received the trigger checkpoint message
verify(vertex1.getCurrentExecutionAttempt()).triggerCheckpoint(checkpointId, timestamp, CheckpointOptions.forFullCheckpoint());
verify(vertex2.getCurrentExecutionAttempt()).triggerCheckpoint(checkpointId, timestamp, CheckpointOptions.forFullCheckpoint());
CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
// acknowledge from one of the tasks
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId));
assertEquals(1, checkpoint.getNumberOfAcknowledgedTasks());
assertEquals(1, checkpoint.getNumberOfNonAcknowledgedTasks());
assertFalse(checkpoint.isDiscarded());
assertFalse(checkpoint.isFullyAcknowledged());
// acknowledge the same task again (should not matter)
coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId));
assertFalse(checkpoint.isDiscarded());
assertFalse(checkpoint.isFullyAcknowledged());
// decline checkpoint from the other task, this should cancel the checkpoint
// and trigger a new one
coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID1, checkpointId));
assertTrue(checkpoint.isDiscarded());
// the canceler is also removed
assertEquals(0, coord.getNumScheduledTasks());
// validate that we have no new pending checkpoint
assertEquals(0, coord.getNumberOfPendingCheckpoints());
assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
// decline again, nothing should happen
// decline from the other task, nothing should happen
coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID1, checkpointId));
coord.receiveDeclineMessage(new DeclineCheckpoint(jid, attemptID2, checkpointId));
assertTrue(checkpoint.isDiscarded());
coord.shutdown(JobStatus.FINISHED);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations