use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorRestoringTest method testJobGraphModificationsAreCheckedForSavepoint.
@Test
public void testJobGraphModificationsAreCheckedForSavepoint() throws Exception {
final JobVertexID jobVertexID = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID, 1, 1).build();
CheckpointCoordinator coordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setTimer(manuallyTriggeredScheduledExecutor).build();
File savepointPath = tmpFolder.newFolder();
CompletableFuture<CompletedCheckpoint> savepointFuture = coordinator.triggerSavepoint("file://" + savepointPath.getAbsolutePath(), SavepointFormatType.CANONICAL);
manuallyTriggeredScheduledExecutor.triggerAll();
long pendingSavepointId = coordinator.getPendingCheckpoints().keySet().stream().findFirst().get();
coordinator.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), graph.getJobVertex(jobVertexID).getTaskVertices()[0].getCurrentExecutionAttempt().getAttemptId(), pendingSavepointId), "localhost");
assertTrue(savepointFuture.isDone());
BooleanValue checked = new BooleanValue(false);
CheckpointCoordinator restoreCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setVertexFinishedStateCheckerFactory((vertices, states) -> new VertexFinishedStateChecker(vertices, states) {
@Override
public void validateOperatorsFinishedState() {
checked.set(true);
}
}).build();
restoreCoordinator.restoreSavepoint(SavepointRestoreSettings.forPath(savepointFuture.get().getExternalPointer()), graph.getAllVertices(), getClass().getClassLoader());
assertTrue("The finished states should be checked when job is restored on startup", checked.get());
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorTest method testCompleteCheckpointFailureWithExternallyInducedSource.
@Test
public void testCompleteCheckpointFailureWithExternallyInducedSource() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
TaskStateSnapshot taskOperatorSubtaskStates1 = new TaskStateSnapshot();
TaskStateSnapshot taskOperatorSubtaskStates2 = new TaskStateSnapshot();
OperatorSubtaskState subtaskState1 = OperatorSubtaskState.builder().build();
OperatorSubtaskState subtaskState2 = OperatorSubtaskState.builder().build();
taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1);
taskOperatorSubtaskStates2.putSubtaskStateByOperatorID(opID2, subtaskState2);
// Create a mock OperatorCoordinatorCheckpointContext which completes the checkpoint
// immediately.
AtomicBoolean coordCheckpointDone = new AtomicBoolean(false);
OperatorCoordinatorCheckpointContext coordinatorCheckpointContext = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOnCallingCheckpointCoordinator((checkpointId, result) -> {
coordCheckpointDone.set(true);
result.complete(new byte[0]);
}).setOperatorID(opID1).build();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(coordinatorCheckpointContext)).setCheckpointStorage(new JobManagerCheckpointStorage() {
private static final long serialVersionUID = 8134582566514272546L;
// Throw exception when finalizing the checkpoint.
@Override
public CheckpointStorageAccess createCheckpointStorage(JobID jobId) throws IOException {
return new MemoryBackendCheckpointStorageAccess(jobId, null, null, 100) {
@Override
public CheckpointStorageLocation initializeLocationForCheckpoint(long checkpointId) throws IOException {
return new NonPersistentMetadataCheckpointStorageLocation(1000) {
@Override
public CheckpointMetadataOutputStream createMetadataOutputStream() throws IOException {
throw new IOException("Artificial Exception");
}
};
}
};
}
}).build();
AtomicReference<Long> checkpointIdRef = new AtomicReference<>();
// Add a master hook which triggers and acks the task checkpoint immediately.
// In this case the task checkpoints would complete before the job master checkpoint
// completes.
checkpointCoordinator.addMasterHook(new MasterTriggerRestoreHook<Integer>() {
@Override
public String getIdentifier() {
return "anything";
}
@Override
@Nullable
public CompletableFuture<Integer> triggerCheckpoint(long checkpointId, long timestamp, Executor executor) throws Exception {
assertTrue("The coordinator checkpoint should have finished.", coordCheckpointDone.get());
// Acknowledge the checkpoint in the master hooks so the task snapshots
// complete before
// the master state snapshot completes.
checkpointIdRef.set(checkpointId);
AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates1);
AcknowledgeCheckpoint acknowledgeCheckpoint2 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates2);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint2, TASK_MANAGER_LOCATION_INFO);
return null;
}
@Override
public void restoreCheckpoint(long checkpointId, Integer checkpointData) throws Exception {
}
@Override
public SimpleVersionedSerializer<Integer> createCheckpointDataSerializer() {
return new SimpleVersionedSerializer<Integer>() {
@Override
public int getVersion() {
return 0;
}
@Override
public byte[] serialize(Integer obj) throws IOException {
return new byte[0];
}
@Override
public Integer deserialize(int version, byte[] serialized) throws IOException {
return 1;
}
};
}
});
// trigger the first checkpoint. this should succeed
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertTrue(checkpointFuture.isCompletedExceptionally());
assertTrue(checkpointCoordinator.getSuccessfulCheckpoints().isEmpty());
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorMasterHooksTest method ensureRegisteredAtHookTime.
// ------------------------------------------------------------------------
// failure scenarios
// ------------------------------------------------------------------------
/**
* This test makes sure that the checkpoint is already registered by the time. that the hooks
* are called
*/
@Test
public void ensureRegisteredAtHookTime() throws Exception {
final String id = "id";
// create the checkpoint coordinator
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).build();
final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
CheckpointCoordinator cc = instantiateCheckpointCoordinator(graph, manuallyTriggeredScheduledExecutor);
final MasterTriggerRestoreHook<Void> hook = mockGeneric(MasterTriggerRestoreHook.class);
when(hook.getIdentifier()).thenReturn(id);
when(hook.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenAnswer(new Answer<CompletableFuture<Void>>() {
@Override
public CompletableFuture<Void> answer(InvocationOnMock invocation) throws Throwable {
assertEquals(1, cc.getNumberOfPendingCheckpoints());
long checkpointId = (Long) invocation.getArguments()[0];
assertNotNull(cc.getPendingCheckpoints().get(checkpointId));
return null;
}
});
cc.addMasterHook(hook);
// trigger a checkpoint
final CompletableFuture<CompletedCheckpoint> checkpointFuture = cc.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertFalse(checkpointFuture.isCompletedExceptionally());
}
use of org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor in project flink by apache.
the class CheckpointCoordinatorMasterHooksTest method testHooksAreCalledOnTrigger.
// ------------------------------------------------------------------------
// trigger / restore behavior
// ------------------------------------------------------------------------
@Test
public void testHooksAreCalledOnTrigger() throws Exception {
final String id1 = "id1";
final String id2 = "id2";
final String state1 = "the-test-string-state";
final byte[] state1serialized = new StringSerializer().serialize(state1);
final long state2 = 987654321L;
final byte[] state2serialized = new LongSerializer().serialize(state2);
final MasterTriggerRestoreHook<String> statefulHook1 = mockGeneric(MasterTriggerRestoreHook.class);
when(statefulHook1.getIdentifier()).thenReturn(id1);
when(statefulHook1.createCheckpointDataSerializer()).thenReturn(new StringSerializer());
when(statefulHook1.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenReturn(CompletableFuture.completedFuture(state1));
final MasterTriggerRestoreHook<Long> statefulHook2 = mockGeneric(MasterTriggerRestoreHook.class);
when(statefulHook2.getIdentifier()).thenReturn(id2);
when(statefulHook2.createCheckpointDataSerializer()).thenReturn(new LongSerializer());
when(statefulHook2.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenReturn(CompletableFuture.completedFuture(state2));
final MasterTriggerRestoreHook<Void> statelessHook = mockGeneric(MasterTriggerRestoreHook.class);
when(statelessHook.getIdentifier()).thenReturn("some-id");
// create the checkpoint coordinator
JobVertexID jobVertexId = new JobVertexID();
final ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexId).build();
final ManuallyTriggeredScheduledExecutor manuallyTriggeredScheduledExecutor = new ManuallyTriggeredScheduledExecutor();
final CheckpointCoordinator cc = instantiateCheckpointCoordinator(graph, manuallyTriggeredScheduledExecutor);
cc.addMasterHook(statefulHook1);
cc.addMasterHook(statelessHook);
cc.addMasterHook(statefulHook2);
// trigger a checkpoint
final CompletableFuture<CompletedCheckpoint> checkpointFuture = cc.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertFalse(checkpointFuture.isCompletedExceptionally());
assertEquals(1, cc.getNumberOfPendingCheckpoints());
verify(statefulHook1, times(1)).triggerCheckpoint(anyLong(), anyLong(), any(Executor.class));
verify(statefulHook2, times(1)).triggerCheckpoint(anyLong(), anyLong(), any(Executor.class));
verify(statelessHook, times(1)).triggerCheckpoint(anyLong(), anyLong(), any(Executor.class));
ExecutionAttemptID attemptID = graph.getJobVertex(jobVertexId).getTaskVertices()[0].getCurrentExecutionAttempt().getAttemptId();
final long checkpointId = cc.getPendingCheckpoints().values().iterator().next().getCheckpointId();
cc.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(graph.getJobID(), attemptID, checkpointId), "Unknown location");
assertEquals(0, cc.getNumberOfPendingCheckpoints());
assertEquals(1, cc.getNumberOfRetainedSuccessfulCheckpoints());
final CompletedCheckpoint chk = cc.getCheckpointStore().getLatestCheckpoint();
final Collection<MasterState> masterStates = chk.getMasterHookStates();
assertEquals(2, masterStates.size());
for (MasterState ms : masterStates) {
if (ms.name().equals(id1)) {
assertArrayEquals(state1serialized, ms.bytes());
assertEquals(StringSerializer.VERSION, ms.version());
} else if (ms.name().equals(id2)) {
assertArrayEquals(state2serialized, ms.bytes());
assertEquals(LongSerializer.VERSION, ms.version());
} else {
fail("unrecognized state name: " + ms.name());
}
}
}
Aggregations