use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class SavepointLoaderTest method testLoadAndValidateSavepoint.
/**
* Tests loading and validation of savepoints with correct setup,
* parallelism mismatch, and a missing task.
*/
@Test
public void testLoadAndValidateSavepoint() throws Exception {
File tmp = tmpFolder.newFolder();
int parallelism = 128128;
long checkpointId = Integer.MAX_VALUE + 123123L;
JobVertexID vertexId = new JobVertexID();
TaskState state = mock(TaskState.class);
when(state.getParallelism()).thenReturn(parallelism);
when(state.getJobVertexID()).thenReturn(vertexId);
when(state.getMaxParallelism()).thenReturn(parallelism);
when(state.getChainLength()).thenReturn(1);
Map<JobVertexID, TaskState> taskStates = new HashMap<>();
taskStates.put(vertexId, state);
JobID jobId = new JobID();
// Store savepoint
SavepointV1 savepoint = new SavepointV1(checkpointId, taskStates.values());
String path = SavepointStore.storeSavepoint(tmp.getAbsolutePath(), savepoint);
ExecutionJobVertex vertex = mock(ExecutionJobVertex.class);
when(vertex.getParallelism()).thenReturn(parallelism);
when(vertex.getMaxParallelism()).thenReturn(parallelism);
Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
tasks.put(vertexId, vertex);
ClassLoader ucl = Thread.currentThread().getContextClassLoader();
// 1) Load and validate: everything correct
CompletedCheckpoint loaded = SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
assertEquals(jobId, loaded.getJobId());
assertEquals(checkpointId, loaded.getCheckpointID());
// 2) Load and validate: max parallelism mismatch
when(vertex.getMaxParallelism()).thenReturn(222);
when(vertex.isMaxParallelismConfigured()).thenReturn(true);
try {
SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
fail("Did not throw expected Exception");
} catch (IllegalStateException expected) {
assertTrue(expected.getMessage().contains("Max parallelism mismatch"));
}
// 3) Load and validate: missing vertex
assertNotNull(tasks.remove(vertexId));
try {
SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
fail("Did not throw expected Exception");
} catch (IllegalStateException expected) {
assertTrue(expected.getMessage().contains("allowNonRestoredState"));
}
// 4) Load and validate: ignore missing vertex
SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, true);
}
use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class ExecutionGraphCheckpointCoordinatorTest method createExecutionGraphAndEnableCheckpointing.
private ExecutionGraph createExecutionGraphAndEnableCheckpointing(CheckpointIDCounter counter, CompletedCheckpointStore store) throws Exception {
ExecutionGraph executionGraph = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "test", new Configuration(), new SerializedValue<>(new ExecutionConfig()), Time.days(1L), new NoRestartStrategy(), Collections.<BlobKey>emptyList(), Collections.<URL>emptyList(), new Scheduler(TestingUtils.defaultExecutionContext()), ClassLoader.getSystemClassLoader(), new UnregisteredMetricsGroup());
executionGraph.enableCheckpointing(100, 100, 100, 1, ExternalizedCheckpointSettings.none(), Collections.<ExecutionJobVertex>emptyList(), Collections.<ExecutionJobVertex>emptyList(), Collections.<ExecutionJobVertex>emptyList(), counter, store, null, null, CheckpointStatsTrackerTest.createTestTracker());
JobVertex jobVertex = new JobVertex("MockVertex");
jobVertex.setInvokableClass(AbstractInvokable.class);
executionGraph.attachJobGraph(Collections.singletonList(jobVertex));
return executionGraph;
}
use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class CheckpointStatsTrackerTest method testTrackerWithoutHistory.
/**
* Tests that the number of remembered checkpoints configuration
* is respected.
*/
@Test
public void testTrackerWithoutHistory() throws Exception {
int numberOfSubtasks = 3;
ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
when(jobVertex.getParallelism()).thenReturn(numberOfSubtasks);
CheckpointStatsTracker tracker = new CheckpointStatsTracker(0, Collections.singletonList(jobVertex), mock(JobSnapshottingSettings.class), new UnregisteredMetricsGroup());
PendingCheckpointStats pending = tracker.reportPendingCheckpoint(0, 1, CheckpointProperties.forStandardCheckpoint());
pending.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(0));
pending.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(1));
pending.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(2));
pending.reportCompletedCheckpoint(null);
CheckpointStatsSnapshot snapshot = tracker.createSnapshot();
// History should be empty
assertFalse(snapshot.getHistory().getCheckpoints().iterator().hasNext());
// Counts should be available
CheckpointStatsCounts counts = snapshot.getCounts();
assertEquals(1, counts.getNumberOfCompletedCheckpoints());
assertEquals(1, counts.getTotalNumberOfCheckpoints());
// Summary should be available
CompletedCheckpointStatsSummary summary = snapshot.getSummaryStats();
assertEquals(1, summary.getStateSizeStats().getCount());
assertEquals(1, summary.getEndToEndDurationStats().getCount());
assertEquals(1, summary.getAlignmentBufferedStats().getCount());
// Latest completed checkpoint
assertNotNull(snapshot.getHistory().getLatestCompletedCheckpoint());
assertEquals(0, snapshot.getHistory().getLatestCompletedCheckpoint().getCheckpointId());
}
use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class CheckpointStatsTrackerTest method testMetrics.
/**
* Tests the registered metrics.
*/
@Test
public void testMetrics() throws Exception {
MetricGroup metricGroup = mock(MetricGroup.class);
ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
when(jobVertex.getParallelism()).thenReturn(1);
new CheckpointStatsTracker(0, Collections.singletonList(jobVertex), mock(JobSnapshottingSettings.class), metricGroup);
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_CHECKPOINTS_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_IN_PROGRESS_CHECKPOINTS_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_COMPLETED_CHECKPOINTS_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.NUMBER_OF_FAILED_CHECKPOINTS_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_RESTORED_CHECKPOINT_TIMESTAMP_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_SIZE_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_DURATION_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_ALIGNMENT_BUFFERED_METRIC), any(Gauge.class));
verify(metricGroup, times(1)).gauge(eq(CheckpointStatsTracker.LATEST_COMPLETED_CHECKPOINT_EXTERNAL_PATH_METRIC), any(Gauge.class));
// Make sure this test is adjusted when further metrics are added
verify(metricGroup, times(9)).gauge(any(String.class), any(Gauge.class));
}
use of org.apache.flink.runtime.executiongraph.ExecutionJobVertex in project flink by apache.
the class CheckpointStatsTrackerTest method testCheckpointTracking.
/**
* Tests tracking of checkpoints.
*/
@Test
public void testCheckpointTracking() throws Exception {
int numberOfSubtasks = 3;
ExecutionJobVertex jobVertex = mock(ExecutionJobVertex.class);
when(jobVertex.getJobVertexId()).thenReturn(new JobVertexID());
when(jobVertex.getParallelism()).thenReturn(numberOfSubtasks);
CheckpointStatsTracker tracker = new CheckpointStatsTracker(10, Collections.singletonList(jobVertex), mock(JobSnapshottingSettings.class), new UnregisteredMetricsGroup());
// Completed checkpoint
PendingCheckpointStats completed1 = tracker.reportPendingCheckpoint(0, 1, CheckpointProperties.forStandardCheckpoint());
completed1.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(0));
completed1.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(1));
completed1.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(2));
completed1.reportCompletedCheckpoint(null);
// Failed checkpoint
PendingCheckpointStats failed = tracker.reportPendingCheckpoint(1, 1, CheckpointProperties.forStandardCheckpoint());
failed.reportFailedCheckpoint(12, null);
// Completed savepoint
PendingCheckpointStats savepoint = tracker.reportPendingCheckpoint(2, 1, CheckpointProperties.forStandardSavepoint());
savepoint.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(0));
savepoint.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(1));
savepoint.reportSubtaskStats(jobVertex.getJobVertexId(), createSubtaskStats(2));
savepoint.reportCompletedCheckpoint(null);
// In Progress
PendingCheckpointStats inProgress = tracker.reportPendingCheckpoint(3, 1, CheckpointProperties.forStandardCheckpoint());
RestoredCheckpointStats restored = new RestoredCheckpointStats(81, CheckpointProperties.forStandardCheckpoint(), 123, null);
tracker.reportRestoredCheckpoint(restored);
CheckpointStatsSnapshot snapshot = tracker.createSnapshot();
// Counts
CheckpointStatsCounts counts = snapshot.getCounts();
assertEquals(4, counts.getTotalNumberOfCheckpoints());
assertEquals(1, counts.getNumberOfInProgressCheckpoints());
assertEquals(2, counts.getNumberOfCompletedCheckpoints());
assertEquals(1, counts.getNumberOfFailedCheckpoints());
// Summary stats
CompletedCheckpointStatsSummary summary = snapshot.getSummaryStats();
assertEquals(2, summary.getStateSizeStats().getCount());
assertEquals(2, summary.getEndToEndDurationStats().getCount());
assertEquals(2, summary.getAlignmentBufferedStats().getCount());
// History
CheckpointStatsHistory history = snapshot.getHistory();
Iterator<AbstractCheckpointStats> it = history.getCheckpoints().iterator();
assertTrue(it.hasNext());
AbstractCheckpointStats stats = it.next();
assertEquals(3, stats.getCheckpointId());
assertTrue(stats.getStatus().isInProgress());
assertTrue(it.hasNext());
stats = it.next();
assertEquals(2, stats.getCheckpointId());
assertTrue(stats.getStatus().isCompleted());
assertTrue(it.hasNext());
stats = it.next();
assertEquals(1, stats.getCheckpointId());
assertTrue(stats.getStatus().isFailed());
assertTrue(it.hasNext());
stats = it.next();
assertEquals(0, stats.getCheckpointId());
assertTrue(stats.getStatus().isCompleted());
assertFalse(it.hasNext());
// Latest checkpoints
assertEquals(completed1.getCheckpointId(), snapshot.getHistory().getLatestCompletedCheckpoint().getCheckpointId());
assertEquals(savepoint.getCheckpointId(), snapshot.getHistory().getLatestSavepoint().getCheckpointId());
assertEquals(failed.getCheckpointId(), snapshot.getHistory().getLatestFailedCheckpoint().getCheckpointId());
assertEquals(restored, snapshot.getLatestRestoredCheckpoint());
}
Aggregations