Search in sources :

Example 46 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointSettingsSerializableTest method testDeserializationOfUserCodeWithUserClassLoader.

@Test
public void testDeserializationOfUserCodeWithUserClassLoader() throws Exception {
    final ClassLoaderUtils.ObjectAndClassLoader<Serializable> outsideClassLoading = ClassLoaderUtils.createSerializableObjectFromNewClassLoader();
    final ClassLoader classLoader = outsideClassLoading.getClassLoader();
    final Serializable outOfClassPath = outsideClassLoading.getObject();
    final MasterTriggerRestoreHook.Factory[] hooks = { new TestFactory(outOfClassPath) };
    final SerializedValue<MasterTriggerRestoreHook.Factory[]> serHooks = new SerializedValue<>(hooks);
    final JobCheckpointingSettings checkpointingSettings = new JobCheckpointingSettings(new CheckpointCoordinatorConfiguration(1000L, 10000L, 0L, 1, CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION, true, false, 0, 0), new SerializedValue<StateBackend>(new CustomStateBackend(outOfClassPath)), TernaryBoolean.UNDEFINED, new SerializedValue<CheckpointStorage>(new CustomCheckpointStorage(outOfClassPath)), serHooks);
    final JobGraph jobGraph = JobGraphBuilder.newStreamingJobGraphBuilder().setJobCheckpointingSettings(checkpointingSettings).build();
    // to serialize/deserialize the job graph to see if the behavior is correct under
    // distributed execution
    final JobGraph copy = CommonTestUtils.createCopySerializable(jobGraph);
    final ExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setJobGraph(copy).setUserClassLoader(classLoader).build();
    assertEquals(1, eg.getCheckpointCoordinator().getNumberOfRegisteredMasterHooks());
    assertTrue(jobGraph.getCheckpointingSettings().getDefaultStateBackend().deserializeValue(classLoader) instanceof CustomStateBackend);
}
Also used : Serializable(java.io.Serializable) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) SerializedValue(org.apache.flink.util.SerializedValue) StateBackend(org.apache.flink.runtime.state.StateBackend) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) AbstractKeyedStateBackend(org.apache.flink.runtime.state.AbstractKeyedStateBackend) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ClassLoaderUtils(org.apache.flink.testutils.ClassLoaderUtils) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) Test(org.junit.Test)

Example 47 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointStatsTrackerTest method testTrackerWithoutHistory.

/**
 * Tests that the number of remembered checkpoints configuration is respected.
 */
@Test
public void testTrackerWithoutHistory() throws Exception {
    JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID, 3, 256).build();
    ExecutionJobVertex jobVertex = graph.getJobVertex(jobVertexID);
    CheckpointStatsTracker tracker = new CheckpointStatsTracker(0, new UnregisteredMetricsGroup());
    PendingCheckpointStats pending = tracker.reportPendingCheckpoint(0, 1, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), singletonMap(jobVertexID, jobVertex.getParallelism()));
    pending.reportSubtaskStats(jobVertexID, createSubtaskStats(0));
    pending.reportSubtaskStats(jobVertexID, createSubtaskStats(1));
    pending.reportSubtaskStats(jobVertexID, createSubtaskStats(2));
    pending.reportCompletedCheckpoint(null);
    CheckpointStatsSnapshot snapshot = tracker.createSnapshot();
    // History should be empty
    assertFalse(snapshot.getHistory().getCheckpoints().iterator().hasNext());
    // Counts should be available
    CheckpointStatsCounts counts = snapshot.getCounts();
    assertEquals(1, counts.getNumberOfCompletedCheckpoints());
    assertEquals(1, counts.getTotalNumberOfCheckpoints());
    // Summary should be available
    CompletedCheckpointStatsSummarySnapshot summary = snapshot.getSummaryStats();
    assertEquals(1, summary.getStateSizeStats().getCount());
    assertEquals(1, summary.getEndToEndDurationStats().getCount());
    // Latest completed checkpoint
    assertNotNull(snapshot.getHistory().getLatestCompletedCheckpoint());
    assertEquals(0, snapshot.getHistory().getLatestCompletedCheckpoint().getCheckpointId());
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 48 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointStatsTrackerTest method testCheckpointTracking.

/**
 * Tests tracking of checkpoints.
 */
@Test
public void testCheckpointTracking() throws Exception {
    JobVertexID jobVertexID = new JobVertexID();
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(jobVertexID, 3, 256).build();
    ExecutionJobVertex jobVertex = graph.getJobVertex(jobVertexID);
    Map<JobVertexID, Integer> vertexToDop = singletonMap(jobVertexID, jobVertex.getParallelism());
    CheckpointStatsTracker tracker = new CheckpointStatsTracker(10, new UnregisteredMetricsGroup());
    // Completed checkpoint
    PendingCheckpointStats completed1 = tracker.reportPendingCheckpoint(0, 1, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), vertexToDop);
    completed1.reportSubtaskStats(jobVertexID, createSubtaskStats(0));
    completed1.reportSubtaskStats(jobVertexID, createSubtaskStats(1));
    completed1.reportSubtaskStats(jobVertexID, createSubtaskStats(2));
    completed1.reportCompletedCheckpoint(null);
    // Failed checkpoint
    PendingCheckpointStats failed = tracker.reportPendingCheckpoint(1, 1, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), vertexToDop);
    failed.reportFailedCheckpoint(12, null);
    // Completed savepoint
    PendingCheckpointStats savepoint = tracker.reportPendingCheckpoint(2, 1, CheckpointProperties.forSavepoint(true, SavepointFormatType.CANONICAL), vertexToDop);
    savepoint.reportSubtaskStats(jobVertexID, createSubtaskStats(0));
    savepoint.reportSubtaskStats(jobVertexID, createSubtaskStats(1));
    savepoint.reportSubtaskStats(jobVertexID, createSubtaskStats(2));
    savepoint.reportCompletedCheckpoint(null);
    // In Progress
    PendingCheckpointStats inProgress = tracker.reportPendingCheckpoint(3, 1, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), vertexToDop);
    RestoredCheckpointStats restored = new RestoredCheckpointStats(81, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), 123, null);
    tracker.reportRestoredCheckpoint(restored);
    CheckpointStatsSnapshot snapshot = tracker.createSnapshot();
    // Counts
    CheckpointStatsCounts counts = snapshot.getCounts();
    assertEquals(4, counts.getTotalNumberOfCheckpoints());
    assertEquals(1, counts.getNumberOfInProgressCheckpoints());
    assertEquals(2, counts.getNumberOfCompletedCheckpoints());
    assertEquals(1, counts.getNumberOfFailedCheckpoints());
    // Summary stats
    CompletedCheckpointStatsSummarySnapshot summary = snapshot.getSummaryStats();
    assertEquals(2, summary.getStateSizeStats().getCount());
    assertEquals(2, summary.getEndToEndDurationStats().getCount());
    // History
    CheckpointStatsHistory history = snapshot.getHistory();
    Iterator<AbstractCheckpointStats> it = history.getCheckpoints().iterator();
    assertTrue(it.hasNext());
    AbstractCheckpointStats stats = it.next();
    assertEquals(3, stats.getCheckpointId());
    assertTrue(stats.getStatus().isInProgress());
    assertTrue(it.hasNext());
    stats = it.next();
    assertEquals(2, stats.getCheckpointId());
    assertTrue(stats.getStatus().isCompleted());
    assertTrue(it.hasNext());
    stats = it.next();
    assertEquals(1, stats.getCheckpointId());
    assertTrue(stats.getStatus().isFailed());
    assertTrue(it.hasNext());
    stats = it.next();
    assertEquals(0, stats.getCheckpointId());
    assertTrue(stats.getStatus().isCompleted());
    assertFalse(it.hasNext());
    // Latest checkpoints
    assertEquals(completed1.getCheckpointId(), snapshot.getHistory().getLatestCompletedCheckpoint().getCheckpointId());
    assertEquals(savepoint.getCheckpointId(), snapshot.getHistory().getLatestSavepoint().getCheckpointId());
    assertEquals(failed.getCheckpointId(), snapshot.getHistory().getLatestFailedCheckpoint().getCheckpointId());
    assertEquals(restored, snapshot.getLatestRestoredCheckpoint());
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 49 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class CheckpointCoordinatorMasterHooksTest method testHooksAreCalledOnRestore.

@Test
public void testHooksAreCalledOnRestore() throws Exception {
    final String id1 = "id1";
    final String id2 = "id2";
    final String state1 = "the-test-string-state";
    final byte[] state1serialized = new StringSerializer().serialize(state1);
    final long state2 = 987654321L;
    final byte[] state2serialized = new LongSerializer().serialize(state2);
    final List<MasterState> masterHookStates = Arrays.asList(new MasterState(id1, state1serialized, StringSerializer.VERSION), new MasterState(id2, state2serialized, LongSerializer.VERSION));
    final MasterTriggerRestoreHook<String> statefulHook1 = mockGeneric(MasterTriggerRestoreHook.class);
    when(statefulHook1.getIdentifier()).thenReturn(id1);
    when(statefulHook1.createCheckpointDataSerializer()).thenReturn(new StringSerializer());
    when(statefulHook1.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenThrow(new Exception("not expected"));
    final MasterTriggerRestoreHook<Long> statefulHook2 = mockGeneric(MasterTriggerRestoreHook.class);
    when(statefulHook2.getIdentifier()).thenReturn(id2);
    when(statefulHook2.createCheckpointDataSerializer()).thenReturn(new LongSerializer());
    when(statefulHook2.triggerCheckpoint(anyLong(), anyLong(), any(Executor.class))).thenThrow(new Exception("not expected"));
    final MasterTriggerRestoreHook<Void> statelessHook = mockGeneric(MasterTriggerRestoreHook.class);
    when(statelessHook.getIdentifier()).thenReturn("some-id");
    final JobID jid = new JobID();
    final long checkpointId = 13L;
    final CompletedCheckpoint checkpoint = new CompletedCheckpoint(jid, checkpointId, 123L, 125L, Collections.<OperatorID, OperatorState>emptyMap(), masterHookStates, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new TestCompletedCheckpointStorageLocation());
    ExecutionGraph graph = new CheckpointCoordinatorTestingUtils.CheckpointExecutionGraphBuilder().addJobVertex(new JobVertexID()).build();
    CheckpointCoordinator cc = instantiateCheckpointCoordinator(graph);
    cc.addMasterHook(statefulHook1);
    cc.addMasterHook(statelessHook);
    cc.addMasterHook(statefulHook2);
    cc.getCheckpointStore().addCheckpointAndSubsumeOldestOne(checkpoint, new CheckpointsCleaner(), () -> {
    });
    cc.restoreLatestCheckpointedStateToAll(Collections.emptySet(), false);
    verify(statefulHook1, times(1)).restoreCheckpoint(eq(checkpointId), eq(state1));
    verify(statefulHook2, times(1)).restoreCheckpoint(eq(checkpointId), eq(state2));
    verify(statelessHook, times(1)).restoreCheckpoint(eq(checkpointId), isNull(Void.class));
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TestCompletedCheckpointStorageLocation(org.apache.flink.runtime.state.testutils.TestCompletedCheckpointStorageLocation) IOException(java.io.IOException) ManuallyTriggeredScheduledExecutor(org.apache.flink.util.concurrent.ManuallyTriggeredScheduledExecutor) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) Executor(java.util.concurrent.Executor) Mockito.anyLong(org.mockito.Mockito.anyLong) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) StringSerializer(org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.StringSerializer) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 50 with ExecutionGraph

use of org.apache.flink.runtime.executiongraph.ExecutionGraph in project flink by apache.

the class StateAssignmentOperationTest method toExecutionVertices.

private Map<OperatorID, ExecutionJobVertex> toExecutionVertices(JobVertex... jobVertices) throws JobException, JobExecutionException {
    JobGraph jobGraph = JobGraphTestUtils.streamingJobGraph(jobVertices);
    ExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setJobGraph(jobGraph).build();
    return Arrays.stream(jobVertices).collect(Collectors.toMap(jobVertex -> jobVertex.getOperatorIDs().get(0).getGeneratedOperatorID(), jobVertex -> {
        try {
            return eg.getJobVertex(jobVertex.getID());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }));
}
Also used : InflightDataRescalingDescriptorUtil.set(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.set) StateHandleDummyUtil.createNewOperatorStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewOperatorStateHandle) CoreMatchers.is(org.hamcrest.CoreMatchers.is) Arrays(java.util.Arrays) TestingDefaultExecutionGraphBuilder(org.apache.flink.runtime.executiongraph.TestingDefaultExecutionGraphBuilder) RANGE(org.apache.flink.runtime.io.network.api.writer.SubtaskStateMapper.RANGE) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SubtaskStateMapper(org.apache.flink.runtime.io.network.api.writer.SubtaskStateMapper) Random(java.util.Random) RESCALING(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptor.InflightDataGateOrPartitionRescalingDescriptor.MappingType.RESCALING) Collections.singletonList(java.util.Collections.singletonList) ARBITRARY(org.apache.flink.runtime.io.network.api.writer.SubtaskStateMapper.ARBITRARY) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) InflightDataGateOrPartitionRescalingDescriptor(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptor.InflightDataGateOrPartitionRescalingDescriptor) StateHandleDummyUtil.createNewResultSubpartitionStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewResultSubpartitionStateHandle) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) EnumMap(java.util.EnumMap) KeyedStateHandle(org.apache.flink.runtime.state.KeyedStateHandle) Set(java.util.Set) Collectors(java.util.stream.Collectors) List(java.util.List) StateHandleDummyUtil.createNewInputChannelStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewInputChannelStateHandle) Stream(java.util.stream.Stream) OperatorInstanceID(org.apache.flink.runtime.jobgraph.OperatorInstanceID) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) InflightDataRescalingDescriptorUtil.rescalingDescriptor(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.rescalingDescriptor) IntStream(java.util.stream.IntStream) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) StateHandleDummyUtil.createNewKeyedStateHandle(org.apache.flink.runtime.checkpoint.StateHandleDummyUtil.createNewKeyedStateHandle) OperatorStreamStateHandle(org.apache.flink.runtime.state.OperatorStreamStateHandle) Function(java.util.function.Function) InflightDataRescalingDescriptorUtil.array(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.array) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) JobException(org.apache.flink.runtime.JobException) InflightDataRescalingDescriptorUtil.to(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.to) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Matchers.empty(org.hamcrest.Matchers.empty) Collections.emptySet(java.util.Collections.emptySet) ExecutionGraphTestUtils(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils) ROUND_ROBIN(org.apache.flink.runtime.io.network.api.writer.SubtaskStateMapper.ROUND_ROBIN) Test(org.junit.Test) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair) InflightDataRescalingDescriptorUtil.mappings(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptorUtil.mappings) ByteStreamStateHandle(org.apache.flink.runtime.state.memory.ByteStreamStateHandle) Assert(org.junit.Assert) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) JobException(org.apache.flink.runtime.JobException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException)

Aggregations

ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)120 Test (org.junit.Test)96 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)77 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)53 CheckpointCoordinatorBuilder (org.apache.flink.runtime.checkpoint.CheckpointCoordinatorTestingUtils.CheckpointCoordinatorBuilder)40 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)36 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)35 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)31 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)24 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)24 HashMap (java.util.HashMap)20 CompletableFuture (java.util.concurrent.CompletableFuture)19 JobID (org.apache.flink.api.common.JobID)19 ArrayList (java.util.ArrayList)17 HashSet (java.util.HashSet)17 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)17 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)17 ExecutionException (java.util.concurrent.ExecutionException)13 Executor (java.util.concurrent.Executor)13 IOException (java.io.IOException)12