Search in sources :

Example 1 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorExternalizedCheckpointsTest method testTriggerAndConfirmSimpleExternalizedCheckpoint.

/**
	 * Triggers multiple externalized checkpoints and verifies that the metadata
	 * files have been created.
	 */
@Test
public void testTriggerAndConfirmSimpleExternalizedCheckpoint() throws Exception {
    final JobID jid = new JobID();
    final ExternalizedCheckpointSettings externalizedCheckpointSettings = ExternalizedCheckpointSettings.externalizeCheckpoints(false);
    final File checkpointDir = tmp.newFolder();
    // create some mock Execution vertices that receive the checkpoint trigger messages
    final ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
    final ExecutionAttemptID attemptID2 = new ExecutionAttemptID();
    ExecutionVertex vertex1 = CheckpointCoordinatorTest.mockExecutionVertex(attemptID1);
    ExecutionVertex vertex2 = CheckpointCoordinatorTest.mockExecutionVertex(attemptID2);
    Map<JobVertexID, ExecutionJobVertex> jobVertices = new HashMap<>();
    jobVertices.put(vertex1.getJobvertexId(), vertex1.getJobVertex());
    jobVertices.put(vertex2.getJobvertexId(), vertex2.getJobVertex());
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, externalizedCheckpointSettings, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new ExecutionVertex[] { vertex1, vertex2 }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), checkpointDir.getAbsolutePath(), Executors.directExecutor());
    assertEquals(0, coord.getNumberOfPendingCheckpoints());
    assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
    // ---------------
    // trigger checkpoint 1
    // ---------------
    {
        final long timestamp1 = System.currentTimeMillis();
        coord.triggerCheckpoint(timestamp1, false);
        long checkpointId1 = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointId1));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId1));
        CompletedCheckpoint latest = coord.getCheckpointStore().getLatestCheckpoint();
        verifyExternalizedCheckpoint(latest, jid, checkpointId1, timestamp1);
        verifyExternalizedCheckpointRestore(latest, jobVertices, vertex1, vertex2);
    }
    // ---------------
    // trigger checkpoint 2
    // ---------------
    {
        final long timestamp2 = System.currentTimeMillis() + 7;
        coord.triggerCheckpoint(timestamp2, false);
        long checkpointId2 = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointId2));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId2));
        CompletedCheckpoint latest = coord.getCheckpointStore().getLatestCheckpoint();
        verifyExternalizedCheckpoint(latest, jid, checkpointId2, timestamp2);
        verifyExternalizedCheckpointRestore(latest, jobVertices, vertex1, vertex2);
    }
    // ---------------
    // trigger checkpoint 3
    // ---------------
    {
        final long timestamp3 = System.currentTimeMillis() + 146;
        coord.triggerCheckpoint(timestamp3, false);
        long checkpointId3 = coord.getPendingCheckpoints().entrySet().iterator().next().getKey();
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID1, checkpointId3));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, attemptID2, checkpointId3));
        CompletedCheckpoint latest = coord.getCheckpointStore().getLatestCheckpoint();
        verifyExternalizedCheckpoint(latest, jid, checkpointId3, timestamp3);
        verifyExternalizedCheckpointRestore(latest, jobVertices, vertex1, vertex2);
    }
    coord.shutdown(JobStatus.FINISHED);
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) HashMap(java.util.HashMap) ExternalizedCheckpointSettings(org.apache.flink.runtime.jobgraph.tasks.ExternalizedCheckpointSettings) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedStateWithChangingParallelism.

/**
	 * Tests the checkpoint restoration with changing parallelism of job vertex with partitioned
	 * state.
	 *
	 * @throws Exception
	 */
private void testRestoreLatestCheckpointedStateWithChangingParallelism(boolean scaleOut) throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = scaleOut ? 2 : 13;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    int newParallelism2 = scaleOut ? 13 : 2;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    //vertex 1
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
        ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
        KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), true);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, opStateBackend, null, keyedStateBackend, keyedStateRaw);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    //vertex 2
    final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesBackend = new ArrayList<>(jobVertex2.getParallelism());
    final List<ChainedStateHandle<OperatorStateHandle>> expectedOpStatesRaw = new ArrayList<>(jobVertex2.getParallelism());
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        KeyGroupsStateHandle keyedStateBackend = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        KeyGroupsStateHandle keyedStateRaw = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), true);
        ChainedStateHandle<OperatorStateHandle> opStateBackend = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
        ChainedStateHandle<OperatorStateHandle> opStateRaw = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, true);
        expectedOpStatesBackend.add(opStateBackend);
        expectedOpStatesRaw.add(opStateRaw);
        SubtaskState checkpointStateHandles = new SubtaskState(new ChainedStateHandle<>(Collections.<StreamStateHandle>singletonList(null)), opStateBackend, opStateRaw, keyedStateBackend, keyedStateRaw);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    List<KeyGroupRange> newKeyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, newParallelism2);
    final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    // rescale vertex 2
    final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
    tasks.put(jobVertexID1, newJobVertex1);
    tasks.put(jobVertexID2, newJobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    // verify the restored state
    verifyStateRestore(jobVertexID1, newJobVertex1, keyGroupPartitions1);
    List<List<Collection<OperatorStateHandle>>> actualOpStatesBackend = new ArrayList<>(newJobVertex2.getParallelism());
    List<List<Collection<OperatorStateHandle>>> actualOpStatesRaw = new ArrayList<>(newJobVertex2.getParallelism());
    for (int i = 0; i < newJobVertex2.getParallelism(); i++) {
        KeyGroupsStateHandle originalKeyedStateBackend = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), false);
        KeyGroupsStateHandle originalKeyedStateRaw = generateKeyGroupState(jobVertexID2, newKeyGroupPartitions2.get(i), true);
        TaskStateHandles taskStateHandles = newJobVertex2.getTaskVertices()[i].getCurrentExecutionAttempt().getTaskStateHandles();
        ChainedStateHandle<StreamStateHandle> operatorState = taskStateHandles.getLegacyOperatorState();
        List<Collection<OperatorStateHandle>> opStateBackend = taskStateHandles.getManagedOperatorState();
        List<Collection<OperatorStateHandle>> opStateRaw = taskStateHandles.getRawOperatorState();
        Collection<KeyGroupsStateHandle> keyGroupStateBackend = taskStateHandles.getManagedKeyedState();
        Collection<KeyGroupsStateHandle> keyGroupStateRaw = taskStateHandles.getRawKeyedState();
        actualOpStatesBackend.add(opStateBackend);
        actualOpStatesRaw.add(opStateRaw);
        assertNull(operatorState);
        compareKeyedState(Collections.singletonList(originalKeyedStateBackend), keyGroupStateBackend);
        compareKeyedState(Collections.singletonList(originalKeyedStateRaw), keyGroupStateRaw);
    }
    comparePartitionableState(expectedOpStatesBackend, actualOpStatesBackend);
    comparePartitionableState(expectedOpStatesRaw, actualOpStatesRaw);
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) ChainedStateHandle(org.apache.flink.runtime.state.ChainedStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) List(java.util.List) ArrayList(java.util.ArrayList) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Collection(java.util.Collection) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID)

Example 3 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testMinTimeBetweenCheckpointsInterval.

/**
	 * This test verified that after a completed checkpoint a certain time has passed before
	 * another is triggered.
	 */
@Test
public void testMinTimeBetweenCheckpointsInterval() throws Exception {
    final JobID jid = new JobID();
    // create some mock execution vertices and trigger some checkpoint
    final ExecutionAttemptID attemptID = new ExecutionAttemptID();
    final ExecutionVertex vertex = mockExecutionVertex(attemptID);
    final Execution executionAttempt = vertex.getCurrentExecutionAttempt();
    final BlockingQueue<Long> triggerCalls = new LinkedBlockingQueue<>();
    doAnswer(new Answer<Void>() {

        @Override
        public Void answer(InvocationOnMock invocation) throws Throwable {
            triggerCalls.add((Long) invocation.getArguments()[0]);
            return null;
        }
    }).when(executionAttempt).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class));
    final long delay = 50;
    final CheckpointCoordinator coord = new CheckpointCoordinator(jid, // periodic interval is 2 ms
    2, // timeout is very long (200 s)
    200_000, // 50 ms delay between checkpoints
    delay, 1, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex }, new ExecutionVertex[] { vertex }, new ExecutionVertex[] { vertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), "dummy-path", Executors.directExecutor());
    try {
        coord.startCheckpointScheduler();
        // wait until the first checkpoint was triggered
        Long firstCallId = triggerCalls.take();
        assertEquals(1L, firstCallId.longValue());
        AcknowledgeCheckpoint ackMsg = new AcknowledgeCheckpoint(jid, attemptID, 1L);
        // tell the coordinator that the checkpoint is done
        final long ackTime = System.nanoTime();
        coord.receiveAcknowledgeMessage(ackMsg);
        // wait until the next checkpoint is triggered
        Long nextCallId = triggerCalls.take();
        final long nextCheckpointTime = System.nanoTime();
        assertEquals(2L, nextCallId.longValue());
        final long delayMillis = (nextCheckpointTime - ackTime) / 1_000_000;
        // we need to add one ms here to account for rounding errors
        if (delayMillis + 1 < delay) {
            fail("checkpoint came too early: delay was " + delayMillis + " but should have been at least " + delay);
        }
    } finally {
        coord.stopCheckpointScheduler();
        coord.shutdown(JobStatus.FINISHED);
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Execution(org.apache.flink.runtime.executiongraph.Execution) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Matchers.anyLong(org.mockito.Matchers.anyLong) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointedState.

/**
	 * Tests that the checkpointed partitioned and non-partitioned state is assigned properly to
	 * the {@link Execution} upon recovery.
	 *
	 * @throws Exception
	 */
@Test
public void testRestoreLatestCheckpointedState() throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = 2;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> nonPartitionedState = generateStateForVertex(jobVertexID1, index);
        ChainedStateHandle<OperatorStateHandle> partitionableState = generateChainedPartitionableStateHandle(jobVertexID1, index, 2, 8, false);
        KeyGroupsStateHandle partitionedKeyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(nonPartitionedState, partitionableState, null, partitionedKeyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> nonPartitionedState = generateStateForVertex(jobVertexID2, index);
        ChainedStateHandle<OperatorStateHandle> partitionableState = generateChainedPartitionableStateHandle(jobVertexID2, index, 2, 8, false);
        KeyGroupsStateHandle partitionedKeyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(nonPartitionedState, partitionableState, null, partitionedKeyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    tasks.put(jobVertexID1, jobVertex1);
    tasks.put(jobVertexID2, jobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    // verify the restored state
    verifyStateRestore(jobVertexID1, jobVertex1, keyGroupPartitions1);
    verifyStateRestore(jobVertexID2, jobVertex2, keyGroupPartitions2);
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with AcknowledgeCheckpoint

use of org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint in project flink by apache.

the class CheckpointCoordinatorTest method testMaxConcurrentAttempsWithSubsumption.

@Test
public void testMaxConcurrentAttempsWithSubsumption() {
    try {
        final int maxConcurrentAttempts = 2;
        final JobID jid = new JobID();
        // create some mock execution vertices and trigger some checkpoint
        final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
        ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
        ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID);
        ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, // periodic interval is 10 ms
        10, // timeout is very long (200 s)
        200000, // no extra delay
        0L, // max two concurrent checkpoints
        maxConcurrentAttempts, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
        coord.startCheckpointScheduler();
        // after a while, there should be exactly as many checkpoints
        // as concurrently permitted
        long now = System.currentTimeMillis();
        long timeout = now + 60000;
        long minDuration = now + 100;
        do {
            Thread.sleep(20);
        } while ((now = System.currentTimeMillis()) < minDuration || (coord.getNumberOfPendingCheckpoints() < maxConcurrentAttempts && now < timeout));
        // validate that the pending checkpoints are there
        assertEquals(maxConcurrentAttempts, coord.getNumberOfPendingCheckpoints());
        assertNotNull(coord.getPendingCheckpoints().get(1L));
        assertNotNull(coord.getPendingCheckpoints().get(2L));
        // now we acknowledge the second checkpoint, which should subsume the first checkpoint
        // and allow two more checkpoints to be triggered
        // now, once we acknowledge one checkpoint, it should trigger the next one
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID, 2L));
        // after a while, there should be the new checkpoints
        final long newTimeout = System.currentTimeMillis() + 60000;
        do {
            Thread.sleep(20);
        } while (coord.getPendingCheckpoints().get(4L) == null && System.currentTimeMillis() < newTimeout);
        // do the final check
        assertEquals(maxConcurrentAttempts, coord.getNumberOfPendingCheckpoints());
        assertNotNull(coord.getPendingCheckpoints().get(3L));
        assertNotNull(coord.getPendingCheckpoints().get(4L));
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) JobID(org.apache.flink.api.common.JobID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)45 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)35 Test (org.junit.Test)33 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)32 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)29 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)29 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)23 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)18 JobID (org.apache.flink.api.common.JobID)15 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)14 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)13 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)12 HashMap (java.util.HashMap)9 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)8 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)7 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)7 ManuallyTriggeredScheduledExecutor (org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor)7 HashSet (java.util.HashSet)6