Search in sources :

Example 26 with ExecutionJobVertex

use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.

the class CheckpointStateRestoreTest method mockExecutionJobVertex.

private ExecutionJobVertex mockExecutionJobVertex(JobVertexID id, ExecutionVertex[] vertices) {
    ExecutionJobVertex vertex = mock(ExecutionJobVertex.class);
    when(vertex.getParallelism()).thenReturn(vertices.length);
    when(vertex.getMaxParallelism()).thenReturn(vertices.length);
    when(vertex.getJobVertexId()).thenReturn(id);
    when(vertex.getTaskVertices()).thenReturn(vertices);
    return vertex;
}
Also used : ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex)

Example 27 with ExecutionJobVertex

use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.

the class CheckpointStateRestoreTest method testNoCheckpointAvailable.

@Test
public void testNoCheckpointAvailable() {
    try {
        CheckpointCoordinator coord = new CheckpointCoordinator(new JobID(), 200000L, 200000L, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { mock(ExecutionVertex.class) }, new ExecutionVertex[] { mock(ExecutionVertex.class) }, new ExecutionVertex[0], new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
        try {
            coord.restoreLatestCheckpointedState(new HashMap<JobVertexID, ExecutionJobVertex>(), true, false);
            fail("this should throw an exception");
        } catch (IllegalStateException e) {
        // expected
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobID(org.apache.flink.api.common.JobID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Test(org.junit.Test)

Example 28 with ExecutionJobVertex

use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointFailureWhenMaxParallelismChanges.

/**
	 * Tests that the checkpoint restoration fails if the max parallelism of the job vertices has
	 * changed.
	 *
	 * @throws Exception
	 */
@Test(expected = IllegalStateException.class)
public void testRestoreLatestCheckpointFailureWhenMaxParallelismChanges() throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = 2;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID2, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    int newMaxParallelism1 = 20;
    int newMaxParallelism2 = 42;
    final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, newMaxParallelism1);
    final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, newMaxParallelism2);
    tasks.put(jobVertexID1, newJobVertex1);
    tasks.put(jobVertexID2, newJobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    fail("The restoration should have failed because the max parallelism changed.");
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 29 with ExecutionJobVertex

use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.

the class CheckpointCoordinatorTest method testRestoreLatestCheckpointFailureWhenParallelismChanges.

/**
	 * Tests that the checkpoint restoration fails if the parallelism of a job vertices with
	 * non-partitioned state has changed.
	 *
	 * @throws Exception
	 */
@Test(expected = IllegalStateException.class)
public void testRestoreLatestCheckpointFailureWhenParallelismChanges() throws Exception {
    final JobID jid = new JobID();
    final long timestamp = System.currentTimeMillis();
    final JobVertexID jobVertexID1 = new JobVertexID();
    final JobVertexID jobVertexID2 = new JobVertexID();
    int parallelism1 = 3;
    int parallelism2 = 2;
    int maxParallelism1 = 42;
    int maxParallelism2 = 13;
    final ExecutionJobVertex jobVertex1 = mockExecutionJobVertex(jobVertexID1, parallelism1, maxParallelism1);
    final ExecutionJobVertex jobVertex2 = mockExecutionJobVertex(jobVertexID2, parallelism2, maxParallelism2);
    List<ExecutionVertex> allExecutionVertices = new ArrayList<>(parallelism1 + parallelism2);
    allExecutionVertices.addAll(Arrays.asList(jobVertex1.getTaskVertices()));
    allExecutionVertices.addAll(Arrays.asList(jobVertex2.getTaskVertices()));
    ExecutionVertex[] arrayExecutionVertices = allExecutionVertices.toArray(new ExecutionVertex[allExecutionVertices.size()]);
    // set up the coordinator and validate the initial state
    CheckpointCoordinator coord = new CheckpointCoordinator(jid, 600000, 600000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), arrayExecutionVertices, arrayExecutionVertices, arrayExecutionVertices, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(1), null, Executors.directExecutor());
    // trigger the checkpoint
    coord.triggerCheckpoint(timestamp, false);
    assertTrue(coord.getPendingCheckpoints().keySet().size() == 1);
    long checkpointId = Iterables.getOnlyElement(coord.getPendingCheckpoints().keySet());
    CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointId, 0L);
    List<KeyGroupRange> keyGroupPartitions1 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism1, parallelism1);
    List<KeyGroupRange> keyGroupPartitions2 = StateAssignmentOperation.createKeyGroupPartitions(maxParallelism2, parallelism2);
    for (int index = 0; index < jobVertex1.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> valueSizeTuple = generateStateForVertex(jobVertexID1, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID1, keyGroupPartitions1.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(valueSizeTuple, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex1.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    for (int index = 0; index < jobVertex2.getParallelism(); index++) {
        ChainedStateHandle<StreamStateHandle> state = generateStateForVertex(jobVertexID2, index);
        KeyGroupsStateHandle keyGroupState = generateKeyGroupState(jobVertexID2, keyGroupPartitions2.get(index), false);
        SubtaskState checkpointStateHandles = new SubtaskState(state, null, null, keyGroupState, null);
        AcknowledgeCheckpoint acknowledgeCheckpoint = new AcknowledgeCheckpoint(jid, jobVertex2.getTaskVertices()[index].getCurrentExecutionAttempt().getAttemptId(), checkpointId, new CheckpointMetrics(), checkpointStateHandles);
        coord.receiveAcknowledgeMessage(acknowledgeCheckpoint);
    }
    List<CompletedCheckpoint> completedCheckpoints = coord.getSuccessfulCheckpoints();
    assertEquals(1, completedCheckpoints.size());
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    int newParallelism1 = 4;
    int newParallelism2 = 3;
    final ExecutionJobVertex newJobVertex1 = mockExecutionJobVertex(jobVertexID1, newParallelism1, maxParallelism1);
    final ExecutionJobVertex newJobVertex2 = mockExecutionJobVertex(jobVertexID2, newParallelism2, maxParallelism2);
    tasks.put(jobVertexID1, newJobVertex1);
    tasks.put(jobVertexID2, newJobVertex2);
    coord.restoreLatestCheckpointedState(tasks, true, false);
    fail("The restoration should have failed because the parallelism of an vertex with " + "non-partitioned state changed.");
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ArrayList(java.util.ArrayList) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 30 with ExecutionJobVertex

use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.

the class CheckpointStatsTrackerTest method testCreateSnapshot.

/**
	 * Tests that snapshots are only created if a new snapshot has been reported
	 * or updated.
	 */
@Test
public void testCreateSnapshot() throws Exception {
    ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
    when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
    when(jobVertex.getParallelism()).thenReturn(1);
    CheckpointStatsTracker tracker = new CheckpointStatsTracker(10, Collections.singletonList(jobVertex), mock(JobSnapshottingSettings.class), new UnregisteredMetricsGroup());
    CheckpointStatsSnapshot snapshot1 = tracker.createSnapshot();
    // Pending checkpoint => new snapshot
    PendingCheckpointStats pending = tracker.reportPendingCheckpoint(0, 1, CheckpointProperties.forStandardCheckpoint());
    pending.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(0));
    CheckpointStatsSnapshot snapshot2 = tracker.createSnapshot();
    assertNotEquals(snapshot1, snapshot2);
    assertEquals(snapshot2, tracker.createSnapshot());
    // Complete checkpoint => new snapshot
    pending.reportCompletedCheckpoint(null);
    CheckpointStatsSnapshot snapshot3 = tracker.createSnapshot();
    assertNotEquals(snapshot2, snapshot3);
    // Restore operation => new snapshot
    tracker.reportRestoredCheckpoint(new RestoredCheckpointStats(12, CheckpointProperties.forStandardCheckpoint(), 12, null));
    CheckpointStatsSnapshot snapshot4 = tracker.createSnapshot();
    assertNotEquals(snapshot3, snapshot4);
    assertEquals(snapshot4, tracker.createSnapshot());
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) Test(org.junit.Test)

Aggregations

ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)37 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)25 Test (org.junit.Test)25 JobID (org.apache.flink.api.common.JobID)17 HashMap (java.util.HashMap)12 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)12 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)9 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)8 UnregisteredMetricsGroup (org.apache.flink.metrics.groups.UnregisteredMetricsGroup)7 StreamStateHandle (org.apache.flink.runtime.state.StreamStateHandle)7 ArrayList (java.util.ArrayList)6 JobSnapshottingSettings (org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings)6 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)6 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)5 KeyGroupsStateHandle (org.apache.flink.runtime.state.KeyGroupsStateHandle)5 Configuration (org.apache.flink.configuration.Configuration)4 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)4 ByteStreamStateHandle (org.apache.flink.runtime.state.memory.ByteStreamStateHandle)4 JsonNode (com.fasterxml.jackson.databind.JsonNode)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3