use of org.apache.flink.core.io.SimpleVersionedSerializer in project flink by apache.
the class MasterHooksTest method wrapHook.
// ------------------------------------------------------------------------
// hook management
// ------------------------------------------------------------------------
@Test
public void wrapHook() throws Exception {
final String id = "id";
Thread thread = Thread.currentThread();
final ClassLoader originalClassLoader = thread.getContextClassLoader();
final ClassLoader userClassLoader = new URLClassLoader(new URL[0]);
final Runnable command = spy(new Runnable() {
@Override
public void run() {
assertEquals(userClassLoader, Thread.currentThread().getContextClassLoader());
}
});
MasterTriggerRestoreHook<String> hook = spy(new MasterTriggerRestoreHook<String>() {
@Override
public String getIdentifier() {
assertEquals(userClassLoader, Thread.currentThread().getContextClassLoader());
return id;
}
@Override
public void reset() throws Exception {
assertEquals(userClassLoader, Thread.currentThread().getContextClassLoader());
}
@Override
public void close() throws Exception {
assertEquals(userClassLoader, Thread.currentThread().getContextClassLoader());
}
@Nullable
@Override
public CompletableFuture<String> triggerCheckpoint(long checkpointId, long timestamp, Executor executor) throws Exception {
assertEquals(userClassLoader, Thread.currentThread().getContextClassLoader());
executor.execute(command);
return null;
}
@Override
public void restoreCheckpoint(long checkpointId, @Nullable String checkpointData) throws Exception {
assertEquals(userClassLoader, Thread.currentThread().getContextClassLoader());
}
@Nullable
@Override
public SimpleVersionedSerializer<String> createCheckpointDataSerializer() {
assertEquals(userClassLoader, Thread.currentThread().getContextClassLoader());
return null;
}
});
MasterTriggerRestoreHook<String> wrapped = MasterHooks.wrapHook(hook, userClassLoader);
// verify getIdentifier
wrapped.getIdentifier();
verify(hook, times(1)).getIdentifier();
assertEquals(originalClassLoader, thread.getContextClassLoader());
// verify triggerCheckpoint and its wrapped executor
TestExecutor testExecutor = new TestExecutor();
wrapped.triggerCheckpoint(0L, 0, testExecutor);
assertEquals(originalClassLoader, thread.getContextClassLoader());
assertNotNull(testExecutor.command);
testExecutor.command.run();
verify(command, times(1)).run();
assertEquals(originalClassLoader, thread.getContextClassLoader());
// verify restoreCheckpoint
wrapped.restoreCheckpoint(0L, "");
verify(hook, times(1)).restoreCheckpoint(eq(0L), eq(""));
assertEquals(originalClassLoader, thread.getContextClassLoader());
// verify createCheckpointDataSerializer
wrapped.createCheckpointDataSerializer();
verify(hook, times(1)).createCheckpointDataSerializer();
assertEquals(originalClassLoader, thread.getContextClassLoader());
// verify close
wrapped.close();
verify(hook, times(1)).close();
assertEquals(originalClassLoader, thread.getContextClassLoader());
}
use of org.apache.flink.core.io.SimpleVersionedSerializer in project flink by apache.
the class CheckpointCoordinatorTest method testExternallyInducedSourceWithOperatorCoordinator.
/**
* Test that the checkpoint still behave correctly when the task checkpoint is triggered by the
* master hooks and finished before the master checkpoint. Also make sure that the operator
* coordinators are checkpointed before starting the task checkpoint.
*/
@Test
public void testExternallyInducedSourceWithOperatorCoordinator() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway gateway = new CheckpointCoordinatorTestingUtils.CheckpointRecorderTaskManagerGateway();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).setTaskManagerGateway(gateway).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
TaskStateSnapshot taskOperatorSubtaskStates1 = new TaskStateSnapshot();
TaskStateSnapshot taskOperatorSubtaskStates2 = new TaskStateSnapshot();
OperatorSubtaskState subtaskState1 = OperatorSubtaskState.builder().build();
OperatorSubtaskState subtaskState2 = OperatorSubtaskState.builder().build();
taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1);
taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID2, subtaskState2);
// Create a mock OperatorCoordinatorCheckpointContext which completes the checkpoint
// immediately.
AtomicBoolean coordCheckpointDone = new AtomicBoolean(false);
OperatorCoordinatorCheckpointContext coordinatorCheckpointContext = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOnCallingCheckpointCoordinator((checkpointId, result) -> {
coordCheckpointDone.set(true);
result.complete(new byte[0]);
}).setOperatorID(opID1).build();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(coordinatorCheckpointContext)).build();
AtomicReference<Long> checkpointIdRef = new AtomicReference<>();
// Add a master hook which triggers and acks the task checkpoint immediately.
// In this case the task checkpoints would complete before the job master checkpoint
// completes.
checkpointCoordinator.addMasterHook(new MasterTriggerRestoreHook<Integer>() {
@Override
public String getIdentifier() {
return "anything";
}
@Override
@Nullable
public CompletableFuture<Integer> triggerCheckpoint(long checkpointId, long timestamp, Executor executor) throws Exception {
assertTrue("The coordinator checkpoint should have finished.", coordCheckpointDone.get());
// Acknowledge the checkpoint in the master hooks so the task snapshots
// complete before
// the master state snapshot completes.
checkpointIdRef.set(checkpointId);
AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates1);
AcknowledgeCheckpoint acknowledgeCheckpoint2 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates2);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint2, TASK_MANAGER_LOCATION_INFO);
return null;
}
@Override
public void restoreCheckpoint(long checkpointId, Integer checkpointData) throws Exception {
}
@Override
public SimpleVersionedSerializer<Integer> createCheckpointDataSerializer() {
return new SimpleVersionedSerializer<Integer>() {
@Override
public int getVersion() {
return 0;
}
@Override
public byte[] serialize(Integer obj) throws IOException {
return new byte[0];
}
@Override
public Integer deserialize(int version, byte[] serialized) throws IOException {
return 1;
}
};
}
});
// Verify initial state.
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
// trigger the first checkpoint. this should succeed
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
FutureUtils.throwIfCompletedExceptionally(checkpointFuture);
// now we should have a completed checkpoint
assertEquals(1, checkpointCoordinator.getNumberOfRetainedSuccessfulCheckpoints());
assertEquals(0, checkpointCoordinator.getNumberOfPendingCheckpoints());
// the canceler should be removed now
assertEquals(0, manuallyTriggeredScheduledExecutor.getActiveScheduledTasks().size());
// validate that the relevant tasks got a confirmation message
long checkpointId = checkpointIdRef.get();
for (ExecutionVertex vertex : Arrays.asList(vertex1, vertex2)) {
ExecutionAttemptID attemptId = vertex.getCurrentExecutionAttempt().getAttemptId();
assertEquals(checkpointId, gateway.getOnlyTriggeredCheckpoint(attemptId).checkpointId);
}
CompletedCheckpoint success = checkpointCoordinator.getSuccessfulCheckpoints().get(0);
assertEquals(graph.getJobID(), success.getJobId());
assertEquals(2, success.getOperatorStates().size());
checkpointCoordinator.shutdown();
}
use of org.apache.flink.core.io.SimpleVersionedSerializer in project flink by apache.
the class MasterHooks method triggerHook.
// ------------------------------------------------------------------------
// checkpoint triggering
// ------------------------------------------------------------------------
/**
* Trigger master hook and return a completable future with state.
*
* @param hook The master hook given
* @param checkpointId The checkpoint ID of the triggering checkpoint
* @param timestamp The (informational) timestamp for the triggering checkpoint
* @param executor An executor that can be used for asynchronous I/O calls
* @param <T> The type of data produced by the hook
* @return the completable future with state
*/
public static <T> CompletableFuture<MasterState> triggerHook(MasterTriggerRestoreHook<T> hook, long checkpointId, long timestamp, Executor executor) {
final String id = hook.getIdentifier();
final SimpleVersionedSerializer<T> serializer = hook.createCheckpointDataSerializer();
try {
// call the hook!
final CompletableFuture<T> resultFuture = hook.triggerCheckpoint(checkpointId, timestamp, executor);
if (resultFuture == null) {
return CompletableFuture.completedFuture(null);
}
return resultFuture.thenApply(result -> {
// if the result of the future is not null, return it as state
if (result == null) {
return null;
} else if (serializer != null) {
try {
final int version = serializer.getVersion();
final byte[] bytes = serializer.serialize(result);
return new MasterState(id, bytes, version);
} catch (Throwable t) {
ExceptionUtils.rethrowIfFatalErrorOrOOM(t);
throw new CompletionException(new FlinkException("Failed to serialize state of master hook '" + id + '\'', t));
}
} else {
throw new CompletionException(new FlinkException("Checkpoint hook '" + id + " is stateful but creates no serializer"));
}
}).exceptionally((throwable) -> {
throw new CompletionException(new FlinkException("Checkpoint master hook '" + id + "' produced an exception", throwable.getCause()));
});
} catch (Throwable t) {
return FutureUtils.completedExceptionally(new FlinkException("Error while triggering checkpoint master hook '" + id + '\'', t));
}
}
use of org.apache.flink.core.io.SimpleVersionedSerializer in project flink by apache.
the class CheckpointCoordinatorTest method testCompleteCheckpointFailureWithExternallyInducedSource.
@Test
public void testCompleteCheckpointFailureWithExternallyInducedSource() throws Exception {
JobVertexID jobVertexID1 = new JobVertexID();
JobVertexID jobVertexID2 = new JobVertexID();
ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID1).addJobVertex(jobVertexID2).build();
ExecutionVertex vertex1 = graph.getJobVertex(jobVertexID1).getTaskVertices()[0];
ExecutionVertex vertex2 = graph.getJobVertex(jobVertexID2).getTaskVertices()[0];
ExecutionAttemptID attemptID1 = vertex1.getCurrentExecutionAttempt().getAttemptId();
ExecutionAttemptID attemptID2 = vertex2.getCurrentExecutionAttempt().getAttemptId();
OperatorID opID1 = vertex1.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
OperatorID opID2 = vertex2.getJobVertex().getOperatorIDs().get(0).getGeneratedOperatorID();
TaskStateSnapshot taskOperatorSubtaskStates1 = new TaskStateSnapshot();
TaskStateSnapshot taskOperatorSubtaskStates2 = new TaskStateSnapshot();
OperatorSubtaskState subtaskState1 = OperatorSubtaskState.builder().build();
OperatorSubtaskState subtaskState2 = OperatorSubtaskState.builder().build();
taskOperatorSubtaskStates1.putSubtaskStateByOperatorID(opID1, subtaskState1);
taskOperatorSubtaskStates2.putSubtaskStateByOperatorID(opID2, subtaskState2);
// Create a mock OperatorCoordinatorCheckpointContext which completes the checkpoint
// immediately.
AtomicBoolean coordCheckpointDone = new AtomicBoolean(false);
OperatorCoordinatorCheckpointContext coordinatorCheckpointContext = new CheckpointCoordinatorTestingUtils.MockOperatorCheckpointCoordinatorContextBuilder().setOnCallingCheckpointCoordinator((checkpointId, result) -> {
coordCheckpointDone.set(true);
result.complete(new byte[0]);
}).setOperatorID(opID1).build();
// set up the coordinator and validate the initial state
CheckpointCoordinator checkpointCoordinator = new CheckpointCoordinatorBuilder().setExecutionGraph(graph).setCheckpointCoordinatorConfiguration(CheckpointCoordinatorConfiguration.builder().setMaxConcurrentCheckpoints(Integer.MAX_VALUE).build()).setTimer(manuallyTriggeredScheduledExecutor).setCoordinatorsToCheckpoint(Collections.singleton(coordinatorCheckpointContext)).setCheckpointStorage(new JobManagerCheckpointStorage() {
private static final long serialVersionUID = 8134582566514272546L;
// Throw exception when finalizing the checkpoint.
@Override
public CheckpointStorageAccess createCheckpointStorage(JobID jobId) throws IOException {
return new MemoryBackendCheckpointStorageAccess(jobId, null, null, 100) {
@Override
public CheckpointStorageLocation initializeLocationForCheckpoint(long checkpointId) throws IOException {
return new NonPersistentMetadataCheckpointStorageLocation(1000) {
@Override
public CheckpointMetadataOutputStream createMetadataOutputStream() throws IOException {
throw new IOException("Artificial Exception");
}
};
}
};
}
}).build();
AtomicReference<Long> checkpointIdRef = new AtomicReference<>();
// Add a master hook which triggers and acks the task checkpoint immediately.
// In this case the task checkpoints would complete before the job master checkpoint
// completes.
checkpointCoordinator.addMasterHook(new MasterTriggerRestoreHook<Integer>() {
@Override
public String getIdentifier() {
return "anything";
}
@Override
@Nullable
public CompletableFuture<Integer> triggerCheckpoint(long checkpointId, long timestamp, Executor executor) throws Exception {
assertTrue("The coordinator checkpoint should have finished.", coordCheckpointDone.get());
// Acknowledge the checkpoint in the master hooks so the task snapshots
// complete before
// the master state snapshot completes.
checkpointIdRef.set(checkpointId);
AcknowledgeCheckpoint acknowledgeCheckpoint1 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID1, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates1);
AcknowledgeCheckpoint acknowledgeCheckpoint2 = new AcknowledgeCheckpoint(graph.getJobID(), attemptID2, checkpointId, new CheckpointMetrics(), taskOperatorSubtaskStates2);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint1, TASK_MANAGER_LOCATION_INFO);
checkpointCoordinator.receiveAcknowledgeMessage(acknowledgeCheckpoint2, TASK_MANAGER_LOCATION_INFO);
return null;
}
@Override
public void restoreCheckpoint(long checkpointId, Integer checkpointData) throws Exception {
}
@Override
public SimpleVersionedSerializer<Integer> createCheckpointDataSerializer() {
return new SimpleVersionedSerializer<Integer>() {
@Override
public int getVersion() {
return 0;
}
@Override
public byte[] serialize(Integer obj) throws IOException {
return new byte[0];
}
@Override
public Integer deserialize(int version, byte[] serialized) throws IOException {
return 1;
}
};
}
});
// trigger the first checkpoint. this should succeed
final CompletableFuture<CompletedCheckpoint> checkpointFuture = checkpointCoordinator.triggerCheckpoint(false);
manuallyTriggeredScheduledExecutor.triggerAll();
assertTrue(checkpointFuture.isCompletedExceptionally());
assertTrue(checkpointCoordinator.getSuccessfulCheckpoints().isEmpty());
}
Aggregations