Search in sources :

Example 21 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class CheckpointCoordinatorTest method testCheckpointTimeoutIsolated.

@Test
public void testCheckpointTimeoutIsolated() {
    try {
        final JobID jid = new JobID();
        final long timestamp = System.currentTimeMillis();
        // create some mock execution vertices
        final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID();
        final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
        ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
        ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1);
        ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2);
        ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
        // set up the coordinator
        // the timeout for the checkpoint is a 200 milliseconds
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 200, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
        // trigger a checkpoint, partially acknowledged
        assertTrue(coord.triggerCheckpoint(timestamp, false));
        assertEquals(1, coord.getNumberOfPendingCheckpoints());
        PendingCheckpoint checkpoint = coord.getPendingCheckpoints().values().iterator().next();
        assertFalse(checkpoint.isDiscarded());
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID1, checkpoint.getCheckpointId()));
        // wait until the checkpoint must have expired.
        // we check every 250 msecs conservatively for 5 seconds
        // to give even slow build servers a very good chance of completing this
        long deadline = System.currentTimeMillis() + 5000;
        do {
            Thread.sleep(250);
        } while (!checkpoint.isDiscarded() && coord.getNumberOfPendingCheckpoints() > 0 && System.currentTimeMillis() < deadline);
        assertTrue("Checkpoint was not canceled by the timeout", checkpoint.isDiscarded());
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        // no confirm message must have been sent
        verify(commitVertex.getCurrentExecutionAttempt(), times(0)).notifyCheckpointComplete(anyLong(), anyLong());
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IOException(java.io.IOException) Test(org.junit.Test)

Example 22 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class CheckpointCoordinatorTest method testMultipleConcurrentCheckpoints.

@Test
public void testMultipleConcurrentCheckpoints() {
    try {
        final JobID jid = new JobID();
        final long timestamp1 = System.currentTimeMillis();
        final long timestamp2 = timestamp1 + 8617;
        // create some mock execution vertices
        final ExecutionAttemptID triggerAttemptID1 = new ExecutionAttemptID();
        final ExecutionAttemptID triggerAttemptID2 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID3 = new ExecutionAttemptID();
        final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
        ExecutionVertex triggerVertex1 = mockExecutionVertex(triggerAttemptID1);
        ExecutionVertex triggerVertex2 = mockExecutionVertex(triggerAttemptID2);
        ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1);
        ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2);
        ExecutionVertex ackVertex3 = mockExecutionVertex(ackAttemptID3);
        ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
        // set up the coordinator and validate the initial state
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex1, triggerVertex2 }, new ExecutionVertex[] { ackVertex1, ackVertex2, ackVertex3 }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        // trigger the first checkpoint. this should succeed
        assertTrue(coord.triggerCheckpoint(timestamp1, false));
        assertEquals(1, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        PendingCheckpoint pending1 = coord.getPendingCheckpoints().values().iterator().next();
        long checkpointId1 = pending1.getCheckpointId();
        // trigger messages should have been sent
        verify(triggerVertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId1), eq(timestamp1), any(CheckpointOptions.class));
        verify(triggerVertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId1), eq(timestamp1), any(CheckpointOptions.class));
        CheckpointMetaData checkpointMetaData1 = new CheckpointMetaData(checkpointId1, 0L);
        // acknowledge one of the three tasks
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID2, checkpointId1));
        // start the second checkpoint
        // trigger the first checkpoint. this should succeed
        assertTrue(coord.triggerCheckpoint(timestamp2, false));
        assertEquals(2, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        PendingCheckpoint pending2;
        {
            Iterator<PendingCheckpoint> all = coord.getPendingCheckpoints().values().iterator();
            PendingCheckpoint cc1 = all.next();
            PendingCheckpoint cc2 = all.next();
            pending2 = pending1 == cc1 ? cc2 : cc1;
        }
        long checkpointId2 = pending2.getCheckpointId();
        CheckpointMetaData checkpointMetaData2 = new CheckpointMetaData(checkpointId2, 0L);
        // trigger messages should have been sent
        verify(triggerVertex1.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId2), eq(timestamp2), any(CheckpointOptions.class));
        verify(triggerVertex2.getCurrentExecutionAttempt(), times(1)).triggerCheckpoint(eq(checkpointId2), eq(timestamp2), any(CheckpointOptions.class));
        // we acknowledge the remaining two tasks from the first
        // checkpoint and two tasks from the second checkpoint
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID3, checkpointId1));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID1, checkpointId2));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID1, checkpointId1));
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID2, checkpointId2));
        // now, the first checkpoint should be confirmed
        assertEquals(1, coord.getNumberOfPendingCheckpoints());
        assertEquals(1, coord.getNumberOfRetainedSuccessfulCheckpoints());
        assertTrue(pending1.isDiscarded());
        // the first confirm message should be out
        verify(commitVertex.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointId1), eq(timestamp1));
        // send the last remaining ack for the second checkpoint
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID3, checkpointId2));
        // now, the second checkpoint should be confirmed
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        assertEquals(2, coord.getNumberOfRetainedSuccessfulCheckpoints());
        assertTrue(pending2.isDiscarded());
        // the second commit message should be out
        verify(commitVertex.getCurrentExecutionAttempt(), times(1)).notifyCheckpointComplete(eq(checkpointId2), eq(timestamp2));
        // validate the committed checkpoints
        List<CompletedCheckpoint> scs = coord.getSuccessfulCheckpoints();
        CompletedCheckpoint sc1 = scs.get(0);
        assertEquals(checkpointId1, sc1.getCheckpointID());
        assertEquals(timestamp1, sc1.getTimestamp());
        assertEquals(jid, sc1.getJobId());
        assertTrue(sc1.getTaskStates().isEmpty());
        CompletedCheckpoint sc2 = scs.get(1);
        assertEquals(checkpointId2, sc2.getCheckpointID());
        assertEquals(timestamp2, sc2.getTimestamp());
        assertEquals(jid, sc2.getJobId());
        assertTrue(sc2.getTaskStates().isEmpty());
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IOException(java.io.IOException) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) Iterator(java.util.Iterator) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 23 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class CheckpointCoordinatorTest method testHandleMessagesForNonExistingCheckpoints.

@Test
public void testHandleMessagesForNonExistingCheckpoints() {
    try {
        final JobID jid = new JobID();
        final long timestamp = System.currentTimeMillis();
        // create some mock execution vertices and trigger some checkpoint
        final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID();
        final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
        ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
        ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1);
        ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2);
        ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, 200000, 200000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
        assertTrue(coord.triggerCheckpoint(timestamp, false));
        long checkpointId = coord.getPendingCheckpoints().keySet().iterator().next();
        // send some messages that do not belong to either the job or the any
        // of the vertices that need to be acknowledged.
        // non of the messages should throw an exception
        CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
        // wrong job id
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(new JobID(), ackAttemptID1, checkpointId));
        // unknown checkpoint
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, ackAttemptID1, 1L));
        // unknown ack vertex
        coord.receiveAcknowledgeMessage(new AcknowledgeCheckpoint(jid, new ExecutionAttemptID(), checkpointId));
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IOException(java.io.IOException) Test(org.junit.Test)

Example 24 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class CheckpointCoordinatorTest method testCheckpointAbortsIfTriggerTasksAreFinished.

@Test
public void testCheckpointAbortsIfTriggerTasksAreFinished() {
    try {
        final JobID jid = new JobID();
        final long timestamp = System.currentTimeMillis();
        // create some mock Execution vertices that receive the checkpoint trigger messages
        final ExecutionAttemptID triggerAttemptID1 = new ExecutionAttemptID();
        final ExecutionAttemptID triggerAttemptID2 = new ExecutionAttemptID();
        ExecutionVertex triggerVertex1 = mockExecutionVertex(triggerAttemptID1);
        ExecutionVertex triggerVertex2 = mockExecutionVertex(triggerAttemptID2, new JobVertexID(), 1, 1, ExecutionState.FINISHED);
        // create some mock Execution vertices that need to ack the checkpoint
        final ExecutionAttemptID ackAttemptID1 = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID2 = new ExecutionAttemptID();
        ExecutionVertex ackVertex1 = mockExecutionVertex(ackAttemptID1);
        ExecutionVertex ackVertex2 = mockExecutionVertex(ackAttemptID2);
        // set up the coordinator and validate the initial state
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex1, triggerVertex2 }, new ExecutionVertex[] { ackVertex1, ackVertex2 }, new ExecutionVertex[] {}, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
        // nothing should be happening
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        // trigger the first checkpoint. this should not succeed
        assertFalse(coord.triggerCheckpoint(timestamp, false));
        // still, nothing should be happening
        assertEquals(0, coord.getNumberOfPendingCheckpoints());
        assertEquals(0, coord.getNumberOfRetainedSuccessfulCheckpoints());
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobID(org.apache.flink.api.common.JobID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IOException(java.io.IOException) Test(org.junit.Test)

Example 25 with ExecutionVertex

use of org.apache.flink.runtime.executiongraph.ExecutionVertex in project flink by apache.

the class CheckpointCoordinatorTest method testCheckpointStatsTrackerRestoreCallback.

/**
	 * Tests that the restore callbacks are called if registered.
	 */
@Test
public void testCheckpointStatsTrackerRestoreCallback() throws Exception {
    ExecutionVertex vertex1 = mockExecutionVertex(new ExecutionAttemptID());
    StandaloneCompletedCheckpointStore store = new StandaloneCompletedCheckpointStore(1);
    store.addCheckpoint(new CompletedCheckpoint(new JobID(), 0, 0, 0, Collections.<JobVertexID, TaskState>emptyMap()));
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(new JobID(), 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new ExecutionVertex[] { vertex1 }, new StandaloneCheckpointIDCounter(), store, null, Executors.directExecutor());
    CheckpointStatsTracker tracker = mock(CheckpointStatsTracker.class);
    coord.setCheckpointStatsTracker(tracker);
    assertTrue(coord.restoreLatestCheckpointedState(Collections.<JobVertexID, ExecutionJobVertex>emptyMap(), false, true));
    verify(tracker, times(1)).reportRestoredCheckpoint(any(RestoredCheckpointStats.class));
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)65 Test (org.junit.Test)47 JobID (org.apache.flink.api.common.JobID)42 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)41 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)23 IOException (java.io.IOException)15 Execution (org.apache.flink.runtime.executiongraph.Execution)15 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)15 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)12 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)12 HashMap (java.util.HashMap)10 ArrayList (java.util.ArrayList)8 TriggerStackTraceSample (org.apache.flink.runtime.messages.StackTraceSampleMessages.TriggerStackTraceSample)8 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)7 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)5 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)5 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)5 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)5 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)5 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)5