Search in sources :

Example 51 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class ExecutionGraphDeploymentTest method testAccumulatorsAndMetricsForwarding.

/**
	 * Verifies that {@link ExecutionGraph#updateState(TaskExecutionState)} updates the accumulators and metrics for an
	 * execution that failed or was canceled.
	 */
@Test
public void testAccumulatorsAndMetricsForwarding() throws Exception {
    final JobVertexID jid1 = new JobVertexID();
    final JobVertexID jid2 = new JobVertexID();
    JobVertex v1 = new JobVertex("v1", jid1);
    JobVertex v2 = new JobVertex("v2", jid2);
    Tuple2<ExecutionGraph, Map<ExecutionAttemptID, Execution>> graphAndExecutions = setupExecution(v1, 1, v2, 1);
    ExecutionGraph graph = graphAndExecutions.f0;
    // verify behavior for canceled executions
    Execution execution1 = graphAndExecutions.f1.values().iterator().next();
    IOMetrics ioMetrics = new IOMetrics(0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0);
    Map<String, Accumulator<?, ?>> accumulators = new HashMap<>();
    accumulators.put("acc", new IntCounter(4));
    AccumulatorSnapshot accumulatorSnapshot = new AccumulatorSnapshot(graph.getJobID(), execution1.getAttemptId(), accumulators);
    TaskExecutionState state = new TaskExecutionState(graph.getJobID(), execution1.getAttemptId(), ExecutionState.CANCELED, null, accumulatorSnapshot, ioMetrics);
    graph.updateState(state);
    assertEquals(ioMetrics, execution1.getIOMetrics());
    assertNotNull(execution1.getUserAccumulators());
    assertEquals(4, execution1.getUserAccumulators().get("acc").getLocalValue());
    // verify behavior for failed executions
    Execution execution2 = graphAndExecutions.f1.values().iterator().next();
    IOMetrics ioMetrics2 = new IOMetrics(0, 0, 0, 0, 0, 0.0, 0.0, 0.0, 0.0, 0.0);
    Map<String, Accumulator<?, ?>> accumulators2 = new HashMap<>();
    accumulators2.put("acc", new IntCounter(8));
    AccumulatorSnapshot accumulatorSnapshot2 = new AccumulatorSnapshot(graph.getJobID(), execution2.getAttemptId(), accumulators2);
    TaskExecutionState state2 = new TaskExecutionState(graph.getJobID(), execution2.getAttemptId(), ExecutionState.FAILED, null, accumulatorSnapshot2, ioMetrics2);
    graph.updateState(state2);
    assertEquals(ioMetrics2, execution2.getIOMetrics());
    assertNotNull(execution2.getUserAccumulators());
    assertEquals(8, execution2.getUserAccumulators().get("acc").getLocalValue());
}
Also used : Accumulator(org.apache.flink.api.common.accumulators.Accumulator) HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) AccumulatorSnapshot(org.apache.flink.runtime.accumulators.AccumulatorSnapshot) IntCounter(org.apache.flink.api.common.accumulators.IntCounter) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 52 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class ExecutionGraphDeploymentTest method testBuildDeploymentDescriptor.

@Test
public void testBuildDeploymentDescriptor() {
    try {
        final JobID jobId = new JobID();
        final JobVertexID jid1 = new JobVertexID();
        final JobVertexID jid2 = new JobVertexID();
        final JobVertexID jid3 = new JobVertexID();
        final JobVertexID jid4 = new JobVertexID();
        JobVertex v1 = new JobVertex("v1", jid1);
        JobVertex v2 = new JobVertex("v2", jid2);
        JobVertex v3 = new JobVertex("v3", jid3);
        JobVertex v4 = new JobVertex("v4", jid4);
        v1.setParallelism(10);
        v2.setParallelism(10);
        v3.setParallelism(10);
        v4.setParallelism(10);
        v1.setInvokableClass(BatchTask.class);
        v2.setInvokableClass(BatchTask.class);
        v3.setInvokableClass(BatchTask.class);
        v4.setInvokableClass(BatchTask.class);
        v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
        ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), jobId, "some job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), new NoRestartStrategy(), new Scheduler(TestingUtils.defaultExecutionContext()));
        List<JobVertex> ordered = Arrays.asList(v1, v2, v3, v4);
        eg.attachJobGraph(ordered);
        ExecutionJobVertex ejv = eg.getAllVertices().get(jid2);
        ExecutionVertex vertex = ejv.getTaskVertices()[3];
        ExecutionGraphTestUtils.SimpleActorGateway instanceGateway = new ExecutionGraphTestUtils.SimpleActorGateway(TestingUtils.directExecutionContext());
        final Instance instance = getInstance(new ActorTaskManagerGateway(instanceGateway));
        final SimpleSlot slot = instance.allocateSimpleSlot(jobId);
        assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
        vertex.deployToSlot(slot);
        assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
        TaskDeploymentDescriptor descr = instanceGateway.lastTDD;
        assertNotNull(descr);
        JobInformation jobInformation = descr.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
        TaskInformation taskInformation = descr.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
        assertEquals(jobId, jobInformation.getJobId());
        assertEquals(jid2, taskInformation.getJobVertexId());
        assertEquals(3, descr.getSubtaskIndex());
        assertEquals(10, taskInformation.getNumberOfSubtasks());
        assertEquals(BatchTask.class.getName(), taskInformation.getInvokableClassName());
        assertEquals("v2", taskInformation.getTaskName());
        Collection<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions();
        Collection<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates();
        assertEquals(2, producedPartitions.size());
        assertEquals(1, consumedPartitions.size());
        Iterator<ResultPartitionDeploymentDescriptor> iteratorProducedPartitions = producedPartitions.iterator();
        Iterator<InputGateDeploymentDescriptor> iteratorConsumedPartitions = consumedPartitions.iterator();
        assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
        assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
        assertEquals(10, iteratorConsumedPartitions.next().getInputChannelDeploymentDescriptors().length);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) ExecutionGraphTestUtils.getInstance(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) BatchTask(org.apache.flink.runtime.operators.BatchTask) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 53 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class ExecutionGraphDeploymentTest method testRegistrationOfExecutionsCanceled.

@Test
public void testRegistrationOfExecutionsCanceled() {
    try {
        final JobVertexID jid1 = new JobVertexID();
        final JobVertexID jid2 = new JobVertexID();
        JobVertex v1 = new JobVertex("v1", jid1);
        JobVertex v2 = new JobVertex("v2", jid2);
        Map<ExecutionAttemptID, Execution> executions = setupExecution(v1, 19, v2, 37).f1;
        for (Execution e : executions.values()) {
            e.cancel();
            e.cancelingComplete();
        }
        assertEquals(0, executions.size());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Test(org.junit.Test)

Example 54 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class TaskStateStatsTest method testIsJavaSerializable.

@Test
public void testIsJavaSerializable() throws Exception {
    JobVertexID jobVertexId = new JobVertexID();
    SubtaskStateStats[] subtasks = new SubtaskStateStats[7];
    TaskStateStats taskStats = new TaskStateStats(jobVertexId, subtasks.length);
    long stateSize = 0;
    long alignmentBuffered = 0;
    for (int i = 0; i < subtasks.length; i++) {
        subtasks[i] = new SubtaskStateStats(i, rand.nextInt(128), rand.nextInt(128), rand.nextInt(128), rand.nextInt(128), rand.nextInt(128), rand.nextInt(128));
        stateSize += subtasks[i].getStateSize();
        alignmentBuffered += subtasks[i].getAlignmentBuffered();
        taskStats.reportSubtaskStats(subtasks[i]);
    }
    TaskStateStats copy = CommonTestUtils.createCopySerializable(taskStats);
    assertEquals(stateSize, copy.getStateSize());
    assertEquals(alignmentBuffered, copy.getAlignmentBuffered());
    TaskStateStats.TaskStateStatsSummary summary = copy.getSummaryStats();
    assertEquals(subtasks.length, summary.getStateSizeStats().getCount());
    assertEquals(subtasks.length, summary.getAckTimestampStats().getCount());
    assertEquals(subtasks.length, summary.getSyncCheckpointDurationStats().getCount());
    assertEquals(subtasks.length, summary.getAsyncCheckpointDurationStats().getCount());
    assertEquals(subtasks.length, summary.getAlignmentBufferedStats().getCount());
    assertEquals(subtasks.length, summary.getAlignmentDurationStats().getCount());
}
Also used : JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Test(org.junit.Test)

Example 55 with JobVertexID

use of org.apache.flink.runtime.jobgraph.JobVertexID in project flink by apache.

the class SavepointLoaderTest method testLoadAndValidateSavepoint.

/**
	 * Tests loading and validation of savepoints with correct setup,
	 * parallelism mismatch, and a missing task.
	 */
@Test
public void testLoadAndValidateSavepoint() throws Exception {
    File tmp = tmpFolder.newFolder();
    int parallelism = 128128;
    long checkpointId = Integer.MAX_VALUE + 123123L;
    JobVertexID vertexId = new JobVertexID();
    TaskState state = mock(TaskState.class);
    when(state.getParallelism()).thenReturn(parallelism);
    when(state.getJobVertexID()).thenReturn(vertexId);
    when(state.getMaxParallelism()).thenReturn(parallelism);
    when(state.getChainLength()).thenReturn(1);
    Map<JobVertexID, TaskState> taskStates = new HashMap<>();
    taskStates.put(vertexId, state);
    JobID jobId = new JobID();
    // Store savepoint
    SavepointV1 savepoint = new SavepointV1(checkpointId, taskStates.values());
    String path = SavepointStore.storeSavepoint(tmp.getAbsolutePath(), savepoint);
    ExecutionJobVertex vertex = mock(ExecutionJobVertex.class);
    when(vertex.getParallelism()).thenReturn(parallelism);
    when(vertex.getMaxParallelism()).thenReturn(parallelism);
    Map<JobVertexID, ExecutionJobVertex> tasks = new HashMap<>();
    tasks.put(vertexId, vertex);
    ClassLoader ucl = Thread.currentThread().getContextClassLoader();
    // 1) Load and validate: everything correct
    CompletedCheckpoint loaded = SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
    assertEquals(jobId, loaded.getJobId());
    assertEquals(checkpointId, loaded.getCheckpointID());
    // 2) Load and validate: max parallelism mismatch
    when(vertex.getMaxParallelism()).thenReturn(222);
    when(vertex.isMaxParallelismConfigured()).thenReturn(true);
    try {
        SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
        fail("Did not throw expected Exception");
    } catch (IllegalStateException expected) {
        assertTrue(expected.getMessage().contains("Max parallelism mismatch"));
    }
    // 3) Load and validate: missing vertex
    assertNotNull(tasks.remove(vertexId));
    try {
        SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, false);
        fail("Did not throw expected Exception");
    } catch (IllegalStateException expected) {
        assertTrue(expected.getMessage().contains("allowNonRestoredState"));
    }
    // 4) Load and validate: ignore missing vertex
    SavepointLoader.loadAndValidateSavepoint(jobId, tasks, path, ucl, true);
}
Also used : CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) HashMap(java.util.HashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) File(java.io.File) TaskState(org.apache.flink.runtime.checkpoint.TaskState) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)191 Test (org.junit.Test)145 JobID (org.apache.flink.api.common.JobID)88 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)46 HashMap (java.util.HashMap)38 Configuration (org.apache.flink.configuration.Configuration)33 Instance (org.apache.flink.runtime.instance.Instance)33 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)30 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)30 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)28 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)27 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)25 IOException (java.io.IOException)24 KeyGroupRange (org.apache.flink.runtime.state.KeyGroupRange)24 ExecutionException (java.util.concurrent.ExecutionException)23 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)22 ArrayList (java.util.ArrayList)20 ActorRef (akka.actor.ActorRef)18 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)18 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)15