Search in sources :

Example 56 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class CoordinatorShutdownTest method testCoordinatorShutsDownOnSuccess.

@Test
public void testCoordinatorShutsDownOnSuccess() {
    LocalFlinkMiniCluster cluster = null;
    try {
        Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, 1);
        cluster = new LocalFlinkMiniCluster(config, true);
        cluster.start();
        // build a test graph with snapshotting enabled
        JobVertex vertex = new JobVertex("Test Vertex");
        vertex.setInvokableClass(BlockingInvokable.class);
        List<JobVertexID> vertexIdList = Collections.singletonList(vertex.getID());
        JobGraph testGraph = new JobGraph("test job", vertex);
        testGraph.setSnapshotSettings(new JobSnapshottingSettings(vertexIdList, vertexIdList, vertexIdList, 5000, 60000, 0L, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), null, true));
        ActorGateway jmGateway = cluster.getLeaderGateway(TestingUtils.TESTING_DURATION());
        FiniteDuration timeout = new FiniteDuration(60, TimeUnit.SECONDS);
        JobManagerMessages.SubmitJob submitMessage = new JobManagerMessages.SubmitJob(testGraph, ListeningBehaviour.EXECUTION_RESULT);
        // submit is successful, but then the job blocks due to the invokable
        Future<Object> submitFuture = jmGateway.ask(submitMessage, timeout);
        Await.result(submitFuture, timeout);
        // get the execution graph and store the ExecutionGraph reference
        Future<Object> jobRequestFuture = jmGateway.ask(new JobManagerMessages.RequestJob(testGraph.getJobID()), timeout);
        ExecutionGraph graph = (ExecutionGraph) ((JobManagerMessages.JobFound) Await.result(jobRequestFuture, timeout)).executionGraph();
        assertNotNull(graph);
        BlockingInvokable.unblock();
        graph.waitUntilFinished();
        // verify that the coordinator was shut down
        CheckpointCoordinator coord = graph.getCheckpointCoordinator();
        assertTrue(coord == null || coord.isShutdown());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    } finally {
        if (cluster != null) {
            cluster.shutdown();
            cluster.awaitTermination();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobSnapshottingSettings(org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) LocalFlinkMiniCluster(org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) Test(org.junit.Test)

Example 57 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class FailedCheckpointStatsTest method testEndToEndDuration.

/**
	 * Tests that the end to end duration of a failed checkpoint is the duration
	 * until the failure.
	 */
@Test
public void testEndToEndDuration() throws Exception {
    long duration = 123912931293L;
    long triggerTimestamp = 10123;
    long failureTimestamp = triggerTimestamp + duration;
    Map<JobVertexID, TaskStateStats> taskStats = new HashMap<>();
    JobVertexID jobVertexId = new JobVertexID();
    taskStats.put(jobVertexId, new TaskStateStats(jobVertexId, 1));
    FailedCheckpointStats failed = new FailedCheckpointStats(0, triggerTimestamp, CheckpointProperties.forStandardCheckpoint(), 1, taskStats, 0, 0, 0, failureTimestamp, null, null);
    assertEquals(duration, failed.getEndToEndDuration());
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Test(org.junit.Test)

Example 58 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class PendingCheckpointStatsTest method testReportCompletedCheckpoint.

/**
	 * Test reporting of a completed checkpoint.
	 */
@Test
public void testReportCompletedCheckpoint() throws Exception {
    TaskStateStats task1 = new TaskStateStats(new JobVertexID(), 3);
    TaskStateStats task2 = new TaskStateStats(new JobVertexID(), 4);
    HashMap<JobVertexID, TaskStateStats> taskStats = new HashMap<>();
    taskStats.put(task1.getJobVertexId(), task1);
    taskStats.put(task2.getJobVertexId(), task2);
    CheckpointStatsTracker.PendingCheckpointStatsCallback callback = mock(CheckpointStatsTracker.PendingCheckpointStatsCallback.class);
    PendingCheckpointStats pending = new PendingCheckpointStats(0, 1, CheckpointProperties.forStandardCheckpoint(), task1.getNumberOfSubtasks() + task2.getNumberOfSubtasks(), taskStats, callback);
    // Report subtasks
    for (int i = 0; i < task1.getNumberOfSubtasks(); i++) {
        pending.reportSubtaskStats(task1.getJobVertexId(), createSubtaskStats(i));
    }
    for (int i = 0; i < task2.getNumberOfSubtasks(); i++) {
        pending.reportSubtaskStats(task2.getJobVertexId(), createSubtaskStats(i));
    }
    // Report completed
    String externalPath = "asdjkasdjkasd";
    CompletedCheckpointStats.DiscardCallback discardCallback = pending.reportCompletedCheckpoint(externalPath);
    ArgumentCaptor<CompletedCheckpointStats> args = ArgumentCaptor.forClass(CompletedCheckpointStats.class);
    verify(callback).reportCompletedCheckpoint(args.capture());
    CompletedCheckpointStats completed = args.getValue();
    assertNotNull(completed);
    assertEquals(CheckpointStatsStatus.COMPLETED, completed.getStatus());
    assertFalse(completed.isDiscarded());
    discardCallback.notifyDiscardedCheckpoint();
    assertTrue(completed.isDiscarded());
    assertEquals(externalPath, completed.getExternalPath());
    assertEquals(pending.getCheckpointId(), completed.getCheckpointId());
    assertEquals(pending.getNumberOfAcknowledgedSubtasks(), completed.getNumberOfAcknowledgedSubtasks());
    assertEquals(pending.getLatestAcknowledgedSubtaskStats(), completed.getLatestAcknowledgedSubtaskStats());
    assertEquals(pending.getLatestAckTimestamp(), completed.getLatestAckTimestamp());
    assertEquals(pending.getEndToEndDuration(), completed.getEndToEndDuration());
    assertEquals(pending.getStateSize(), completed.getStateSize());
    assertEquals(pending.getAlignmentBuffered(), completed.getAlignmentBuffered());
    assertEquals(task1, completed.getTaskStateStats(task1.getJobVertexId()));
    assertEquals(task2, completed.getTaskStateStats(task2.getJobVertexId()));
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Test(org.junit.Test)

Example 59 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class PendingCheckpointStatsTest method testReportSubtaskStats.

/**
	 * Tests reporting of subtask stats.
	 */
@Test
public void testReportSubtaskStats() throws Exception {
    long checkpointId = Integer.MAX_VALUE + 1222L;
    long triggerTimestamp = Integer.MAX_VALUE - 1239L;
    CheckpointProperties props = CheckpointProperties.forStandardCheckpoint();
    TaskStateStats task1 = new TaskStateStats(new JobVertexID(), 3);
    TaskStateStats task2 = new TaskStateStats(new JobVertexID(), 4);
    int totalSubtaskCount = task1.getNumberOfSubtasks() + task2.getNumberOfSubtasks();
    HashMap<JobVertexID, TaskStateStats> taskStats = new HashMap<>();
    taskStats.put(task1.getJobVertexId(), task1);
    taskStats.put(task2.getJobVertexId(), task2);
    CheckpointStatsTracker.PendingCheckpointStatsCallback callback = mock(CheckpointStatsTracker.PendingCheckpointStatsCallback.class);
    PendingCheckpointStats pending = new PendingCheckpointStats(checkpointId, triggerTimestamp, props, totalSubtaskCount, taskStats, callback);
    // Check initial state
    assertEquals(checkpointId, pending.getCheckpointId());
    assertEquals(triggerTimestamp, pending.getTriggerTimestamp());
    assertEquals(props, pending.getProperties());
    assertEquals(CheckpointStatsStatus.IN_PROGRESS, pending.getStatus());
    assertEquals(0, pending.getNumberOfAcknowledgedSubtasks());
    assertEquals(0, pending.getStateSize());
    assertEquals(totalSubtaskCount, pending.getNumberOfSubtasks());
    assertNull(pending.getLatestAcknowledgedSubtaskStats());
    assertEquals(-1, pending.getLatestAckTimestamp());
    assertEquals(-1, pending.getEndToEndDuration());
    assertEquals(task1, pending.getTaskStateStats(task1.getJobVertexId()));
    assertEquals(task2, pending.getTaskStateStats(task2.getJobVertexId()));
    assertNull(pending.getTaskStateStats(new JobVertexID()));
    // Report subtasks and check getters
    assertFalse(pending.reportSubtaskStats(new JobVertexID(), createSubtaskStats(0)));
    long stateSize = 0;
    long alignmentBuffered = 0;
    // Report 1st task
    for (int i = 0; i < task1.getNumberOfSubtasks(); i++) {
        SubtaskStateStats subtask = createSubtaskStats(i);
        stateSize += subtask.getStateSize();
        alignmentBuffered += subtask.getAlignmentBuffered();
        pending.reportSubtaskStats(task1.getJobVertexId(), subtask);
        assertEquals(subtask, pending.getLatestAcknowledgedSubtaskStats());
        assertEquals(subtask.getAckTimestamp(), pending.getLatestAckTimestamp());
        assertEquals(subtask.getAckTimestamp() - triggerTimestamp, pending.getEndToEndDuration());
        assertEquals(stateSize, pending.getStateSize());
        assertEquals(alignmentBuffered, pending.getAlignmentBuffered());
    }
    // Don't allow overwrite
    assertFalse(pending.reportSubtaskStats(task1.getJobVertexId(), task1.getSubtaskStats()[0]));
    // Report 2nd task
    for (int i = 0; i < task2.getNumberOfSubtasks(); i++) {
        SubtaskStateStats subtask = createSubtaskStats(i);
        stateSize += subtask.getStateSize();
        alignmentBuffered += subtask.getAlignmentBuffered();
        pending.reportSubtaskStats(task2.getJobVertexId(), subtask);
        assertEquals(subtask, pending.getLatestAcknowledgedSubtaskStats());
        assertEquals(subtask.getAckTimestamp(), pending.getLatestAckTimestamp());
        assertEquals(subtask.getAckTimestamp() - triggerTimestamp, pending.getEndToEndDuration());
        assertEquals(stateSize, pending.getStateSize());
        assertEquals(alignmentBuffered, pending.getAlignmentBuffered());
    }
    assertEquals(task1.getNumberOfSubtasks(), task1.getNumberOfAcknowledgedSubtasks());
    assertEquals(task2.getNumberOfSubtasks(), task2.getNumberOfAcknowledgedSubtasks());
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Test(org.junit.Test)

Example 60 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class PendingCheckpointStatsTest method testReportFailedCheckpoint.

/**
	 * Test reporting of a failed checkpoint.
	 */
@Test
public void testReportFailedCheckpoint() throws Exception {
    TaskStateStats task1 = new TaskStateStats(new JobVertexID(), 3);
    TaskStateStats task2 = new TaskStateStats(new JobVertexID(), 4);
    HashMap<JobVertexID, TaskStateStats> taskStats = new HashMap<>();
    taskStats.put(task1.getJobVertexId(), task1);
    taskStats.put(task2.getJobVertexId(), task2);
    CheckpointStatsTracker.PendingCheckpointStatsCallback callback = mock(CheckpointStatsTracker.PendingCheckpointStatsCallback.class);
    long triggerTimestamp = 123123;
    PendingCheckpointStats pending = new PendingCheckpointStats(0, triggerTimestamp, CheckpointProperties.forStandardCheckpoint(), task1.getNumberOfSubtasks() + task2.getNumberOfSubtasks(), taskStats, callback);
    // Report subtasks
    for (int i = 0; i < task1.getNumberOfSubtasks(); i++) {
        pending.reportSubtaskStats(task1.getJobVertexId(), createSubtaskStats(i));
    }
    for (int i = 0; i < task2.getNumberOfSubtasks(); i++) {
        pending.reportSubtaskStats(task2.getJobVertexId(), createSubtaskStats(i));
    }
    // Report failed
    Exception cause = new Exception("test exception");
    long failureTimestamp = 112211137;
    pending.reportFailedCheckpoint(failureTimestamp, cause);
    ArgumentCaptor<FailedCheckpointStats> args = ArgumentCaptor.forClass(FailedCheckpointStats.class);
    verify(callback).reportFailedCheckpoint(args.capture());
    FailedCheckpointStats failed = args.getValue();
    assertNotNull(failed);
    assertEquals(CheckpointStatsStatus.FAILED, failed.getStatus());
    assertEquals(failureTimestamp, failed.getFailureTimestamp());
    assertEquals(cause.getMessage(), failed.getFailureMessage());
    assertEquals(pending.getCheckpointId(), failed.getCheckpointId());
    assertEquals(pending.getNumberOfAcknowledgedSubtasks(), failed.getNumberOfAcknowledgedSubtasks());
    assertEquals(pending.getLatestAcknowledgedSubtaskStats(), failed.getLatestAcknowledgedSubtaskStats());
    assertEquals(pending.getLatestAckTimestamp(), failed.getLatestAckTimestamp());
    assertEquals(failureTimestamp - triggerTimestamp, failed.getEndToEndDuration());
    assertEquals(pending.getStateSize(), failed.getStateSize());
    assertEquals(pending.getAlignmentBuffered(), failed.getAlignmentBuffered());
    assertEquals(task1, failed.getTaskStateStats(task1.getJobVertexId()));
    assertEquals(task2, failed.getTaskStateStats(task2.getJobVertexId()));
}
Also used : HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Test(org.junit.Test)

Aggregations

JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)191 Test (org.junit.Test)145 JobID (org.apache.flink.api.common.JobID)88 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)46 HashMap (java.util.HashMap)38 Configuration (org.apache.flink.configuration.Configuration)33 Instance (org.apache.flink.runtime.instance.Instance)33 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)30 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)30 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)28 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)27 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)25 IOException (java.io.IOException)24 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)24 ExecutionException (java.util.concurrent.ExecutionException)23 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)22 ArrayList (java.util.ArrayList)20 ActorRef (akka.actor.ActorRef)18 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)18 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)15