Search in sources :

Example 16 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointFailureWhenMaxParallelismChanges.

/**
	 * Tests that the checkpoint restoration fails if the max parallelism of the job vertices has
	 * changed.
	 *
	 * @throws Exception
	 */
@Test(expected = IllegalStateException.class)
public void testRestoreLatestCheckpointFailureWhenMaxParallelismChanges() throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = 2;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID2, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    int newMaxParallelism1 = 20;
    int newMaxParallelism2 = 42;
    final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, newMaxParallelism1);
    final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, newMaxParallelism2);
    tasks.put(jobVertexID1, newJobVertex1);
    tasks.put(jobVertexID2, newJobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    fail("The restoration should have failed because the max parallelism changed.");
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 17 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testStateCleanupForLateOrUnknownMessages.

/**
	 * Tests that late acknowledge checkpoint messages are properly cleaned up. Furthermore it tests
	 * that unknown checkpoint messages for the same job a are cleaned up as well. In contrast
	 * checkpointing messages from other jobs should not be touched. A late acknowledge
	 * message is an acknowledge message which arrives after the checkpoint has been declined.
	 *
	 * @throws Exception
	 */
@Test
public void testStateCleanupForLateOrUnknownMessages() throws Exception {
    final JobID jobId = new JobID();
    final ExecutionAttemptID triggerAttemptId = new ExecutionAttemptID();
    final ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptId);
    final ExecutionAttemptID ackAttemptId1 = new ExecutionAttemptID();
    final ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptId1);
    final ExecutionAttemptID ackAttemptId2 = new ExecutionAttemptID();
    final ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptId2);
    final long timestamp = 1L;
    CheckpointCoordinator coord = new CheckpointCoordinator(jobId, 20000L, 20000L, 0L, 1, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { triggerVertex, ackVertex1, ackVertex2 }, new ExecutionVertex[0], new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    assertTrue(coord.triggerCheckpoint(timestamp, false));
    assertEquals(1, coord.getNumberOfPendingCheckpoints());
    PendingCheckpoint pendingCheckpoint = coord.getPendingCheckpoints().values().iterator().next();
    long checkpointId = pendingCheckpoint.getCheckpointId();
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    SubtaskState triggerSubtaskState = mock(SubtaskState.class);
    // acknowledge the first trigger vertex
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jobId, triggerAttemptId, checkpointId, new CheckpointMetrics(), triggerSubtaskState));
    SubtaskState unknownSubtaskState = mock(SubtaskState.class);
    // receive an acknowledge message for an unknown vertex
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jobId, new ExecutionAttemptID(), checkpointId, new CheckpointMetrics(), unknownSubtaskState));
    // we should discard acknowledge messages from an unknown vertex belonging to our job
    verify(unknownSubtaskState, times(1)).discardState();
    SubtaskState differentJobSubtaskState = mock(SubtaskState.class);
    // receive an acknowledge message from an unknown job
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(new JobID(), new ExecutionAttemptID(), checkpointId, new CheckpointMetrics(), differentJobSubtaskState));
    // we should not interfere with different jobs
    verify(differentJobSubtaskState, never()).discardState();
    // duplicate acknowledge message for the trigger vertex
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jobId, triggerAttemptId, checkpointId, new CheckpointMetrics(), triggerSubtaskState));
    // duplicate acknowledge messages for a known vertex should not trigger discarding the state
    verify(triggerSubtaskState, never()).discardState();
    // let the checkpoint fail at the first ack vertex
    coord.receiveDeclineMessage(new DeclineCheckpoint(jobId, ackAttemptId1, checkpointId));
    assertTrue(pendingCheckpoint.isDiscarded());
    // check that we've cleaned up the already acknowledged state
    verify(triggerSubtaskState, times(1)).discardState();
    SubtaskState ackSubtaskState = mock(SubtaskState.class);
    // late acknowledge message from the second ack vertex
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jobId, ackAttemptId2, checkpointId, new CheckpointMetrics(), ackSubtaskState));
    // check that we also cleaned up this state
    verify(ackSubtaskState, times(1)).discardState();
    // receive an acknowledge message from an unknown job
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(new JobID(), new ExecutionAttemptID(), checkpointId, new CheckpointMetrics(), differentJobSubtaskState));
    // we should not interfere with different jobs
    verify(differentJobSubtaskState, never()).discardState();
    SubtaskState unknownSubtaskState2 = mock(SubtaskState.class);
    // receive an acknowledge message for an unknown vertex
    coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jobId, new ExecutionAttemptID(), checkpointId, new CheckpointMetrics(), unknownSubtaskState2));
    // we should discard acknowledge messages from an unknown vertex belonging to our job
    verify(unknownSubtaskState2, times(1)).discardState();
}
Also used : DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 18 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testSuccessfulCheckpointSubsumesUnsuccessful.

@Test
public void testSuccessfulCheckpointSubsumesUnsuccessful() {
    try {
        final JobID jid = new JobID();
        final long timestamp1 = System.currentTimeMillis();
        final long timestamp2 = timestamp1 + 1552;
        // create some mock execution vertices
        final ExecutionAttemptID triggerAttemptID1 = new ExecutionAttemptID();
        final ExecutionAttemptID triggerAttemptID2 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID3 = new ExecutionAttemptID();
        final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
        ExecutionVertex triggerVertex1 = mockExecutionVertex(triggerAttemptID1);
        ExecutionVertex triggerVertex2 = mockExecutionVertex(triggerAttemptID2);
        ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1);
        ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2);
        ExecutionVertex ackVertex3 = mockExecutionVertex(ackAttemptID3);
        ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
        // set up the coordinator and validate the initial state
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex1, triggerVertex2 }, new ExecutionVertex[] { ackVertex1, ackVertex2, ackVertex3 }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(10), null, Executors.directExecutor());
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        // trigger the first checkpoint. this should succeed
        assertTrue(coord.triggerCheckpoint(timestamp1, false));
        assertEquals(1, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        PendingCheckpoint pending1 = coord.getPendingCheckpoints().values().iterator().next();
        long checkpointId1 = pending1.getCheckpointId();
        // trigger messages should have been sent
        verify(triggerVertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId1), eq(timestamp1), any(CheckpointOptions.class));
        verify(triggerVertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId1), eq(timestamp1), any(CheckpointOptions.class));
        CheckpointMetaData checkpointMetaData1 = new CheckpointMetaData(checkpointId1, 0L);
        // acknowledge one of the three tasks
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID2, checkpointId1));
        // start the second checkpoint
        // trigger the first checkpoint. this should succeed
        assertTrue(coord.triggerCheckpoint(timestamp2, false));
        assertEquals(2, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        PendingCheckpoint pending2;
        {
            Iterator<PendingCheckpoint> all = coord.getPendingCheckpoints().values().iterator();
            PendingCheckpoint cc1 = all.next();
            PendingCheckpoint cc2 = all.next();
            pending2 = pending1 == cc1 ? cc2 : cc1;
        }
        long checkpointId2 = pending2.getCheckpointId();
        // trigger messages should have been sent
        verify(triggerVertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId2), eq(timestamp2), any(CheckpointOptions.class));
        verify(triggerVertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId2), eq(timestamp2), any(CheckpointOptions.class));
        // we acknowledge one more task from the first checkpoint and the second
        // checkpoint completely. The second checkpoint should then subsume the first checkpoint
        CheckpointMetaData checkpointMetaData2 = new CheckpointMetaData(checkpointId2, 0L);
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID3, checkpointId2));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID1, checkpointId2));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID1, checkpointId1));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID2, checkpointId2));
        // now, the second checkpoint should be confirmed, and the first discarded
        // actually both pending checkpoints are discarded, and the second has been transformed
        // into a successful checkpoint
        assertTrue(pending1.isDiscarded());
        assertTrue(pending2.isDiscarded());
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
        // validate the committed checkpoints
        List<CompletedCheckpoint> scs = coord.getSuccessfulCheckpoints();
        CompletedCheckpoint success = scs.get(0);
        assertEquals(checkpointId2, success.getCheckpointID());
        assertEquals(timestamp2, success.getTimestamp());
        assertEquals(jid, success.getJobId());
        assertTrue(success.getTaskStates().isEmpty());
        // the first confirm message should be out
        verify(commitVertex.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointId2), eq(timestamp2));
        // send the last remaining ack for the first checkpoint. This should not do anything
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID3, checkpointId1));
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IOException(java.io.IOException) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Iterator(java.util.Iterator) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 19 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointFailureWhenParallelismChanges.

/**
	 * Tests that the checkpoint restoration fails if the parallelism of a job vertices with
	 * non-partitioned state has changed.
	 *
	 * @throws Exception
	 */
@Test(expected = IllegalStateException.class)
public void testRestoreLatestCheckpointFailureWhenParallelismChanges() throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = 2;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> state = generateStateForVertex(jobVertexID2, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(state, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    int newParallelism1 = 4;
    int newParallelism2 = 3;
    final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, newParallelism1, maxParallelism1);
    final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
    tasks.put(jobVertexID1, newJobVertex1);
    tasks.put(jobVertexID2, newJobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    fail("The restoration should have failed because the parallelism of an vertex with " + "non-partitioned state changed.");
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 20 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testConcurrentSavepoints.

/**
	 * Tests that the savepoints can be triggered concurrently.
	 */
@Test
public void testConcurrentSavepoints() throws Exception {
    JobID jobId = new JobID();
    final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
    ExecutionVertex vertex1 = mockExecutionVertex(attemptID1);
    StandaloneCheckpointIDCounter checkpointIDCounter = new StandaloneCheckpointIDCounter();
    CheckpointCoordinator coord = new CheckpointCoordinator(jobId, 100000, 200000, 0L, // max one checkpoint at a time => should not affect savepoints
    1, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, checkpointIDCounter, new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
    List<Future<CompletedCheckpoint>> savepointFutures = new ArrayList<>();
    int numSavepoints = 5;
    String savepointDir = tmpFolder.newFolder().getAbsolutePath();
    // Trigger savepoints
    for (int i = 0; i < numSavepoints; i++) {
        savepointFutures.add(coord.triggerSavepoint(i, savepointDir));
    }
    // After triggering multiple savepoints, all should in progress
    for (Future<CompletedCheckpoint> savepointFuture : savepointFutures) {
        assertFalse(savepointFuture.isDone());
    }
    // ACK all savepoints
    long checkpointId = checkpointIDCounter.getLast();
    for (int i = 0; i < numSavepoints; i++, checkpointId--) {
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jobId, attemptID1, checkpointId));
    }
    // After ACKs, all should be completed
    for (Future<CompletedCheckpoint> savepointFuture : savepointFutures) {
        assertTrue(savepointFuture.isDone());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ArrayList(java.util.ArrayList) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Future(org.apache.flink.runtime.concurrent.Future) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)23 JobID (org.apache.flink.api.common.JobID)21 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)20 Test (org.junit.Test)19 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)16 IOException (java.io.IOException)9 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)8 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)7 HashMap (java.util.HashMap)6 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)6 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)6 ArrayList (java.util.ArrayList)5 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)5 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)5 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)4 Execution (org.apache.flink.runtime.executiongraph.Execution)3 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)3 Iterator (java.util.Iterator)2 TaskStateHandles (org.apache.flink.runtime.state.TaskStateHandles)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2