Search in sources :

Example 61 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class TaskExecutorSubmissionTest method testCancellingDependentAndStateUpdateFails.

/**
 * This tests creates two tasks. The sender sends data but fails to send the state update back
 * to the job manager. the second one blocks to be canceled
 */
@Test
public void testCancellingDependentAndStateUpdateFails() throws Exception {
    ResourceID producerLocation = ResourceID.generate();
    NettyShuffleDescriptor sdd = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), producerLocation);
    TaskDeploymentDescriptor tdd1 = createSender(sdd);
    TaskDeploymentDescriptor tdd2 = createReceiver(sdd);
    ExecutionAttemptID eid1 = tdd1.getExecutionAttemptId();
    ExecutionAttemptID eid2 = tdd2.getExecutionAttemptId();
    final CompletableFuture<Void> task1RunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task2RunningFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task1FailedFuture = new CompletableFuture<>();
    final CompletableFuture<Void> task2CanceledFuture = new CompletableFuture<>();
    final JobMasterId jobMasterId = JobMasterId.generate();
    TestingJobMasterGateway testingJobMasterGateway = new TestingJobMasterGatewayBuilder().setFencingTokenSupplier(() -> jobMasterId).setUpdateTaskExecutionStateFunction(taskExecutionState -> {
        if (taskExecutionState != null && taskExecutionState.getID().equals(eid1) && taskExecutionState.getExecutionState() == ExecutionState.RUNNING) {
            return FutureUtils.completedExceptionally(new ExecutionGraphException("The execution attempt " + eid2 + " was not found."));
        } else {
            return CompletableFuture.completedFuture(Acknowledge.get());
        }
    }).build();
    try (TaskSubmissionTestEnvironment env = new TaskSubmissionTestEnvironment.Builder(jobId).setResourceID(producerLocation).setSlotSize(2).addTaskManagerActionListener(eid1, ExecutionState.RUNNING, task1RunningFuture).addTaskManagerActionListener(eid2, ExecutionState.RUNNING, task2RunningFuture).addTaskManagerActionListener(eid1, ExecutionState.FAILED, task1FailedFuture).addTaskManagerActionListener(eid2, ExecutionState.CANCELED, task2CanceledFuture).setJobMasterId(jobMasterId).setJobMasterGateway(testingJobMasterGateway).useRealNonMockShuffleEnvironment().build()) {
        TaskExecutorGateway tmGateway = env.getTaskExecutorGateway();
        TaskSlotTable<Task> taskSlotTable = env.getTaskSlotTable();
        taskSlotTable.allocateSlot(0, jobId, tdd1.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd1, jobMasterId, timeout).get();
        task1RunningFuture.get();
        taskSlotTable.allocateSlot(1, jobId, tdd2.getAllocationId(), Time.seconds(60));
        tmGateway.submitTask(tdd2, jobMasterId, timeout).get();
        task2RunningFuture.get();
        task1FailedFuture.get();
        assertSame(taskSlotTable.getTask(eid1).getExecutionState(), ExecutionState.FAILED);
        tmGateway.cancelTask(eid2, timeout);
        task2CanceledFuture.get();
        assertSame(taskSlotTable.getTask(eid2).getExecutionState(), ExecutionState.CANCELED);
    }
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) NettyShuffleDescriptorBuilder(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder) ShuffleEnvironment(org.apache.flink.runtime.shuffle.ShuffleEnvironment) URL(java.net.URL) BlockingNoOpInvokable(org.apache.flink.runtime.testtasks.BlockingNoOpInvokable) PartitionDescriptorBuilder(org.apache.flink.runtime.shuffle.PartitionDescriptorBuilder) ExceptionUtils(org.apache.flink.util.ExceptionUtils) MemorySize(org.apache.flink.configuration.MemorySize) NetUtils(org.apache.flink.util.NetUtils) Assert.assertThat(org.junit.Assert.assertThat) Mockito.doThrow(org.mockito.Mockito.doThrow) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) JobInformation(org.apache.flink.runtime.executiongraph.JobInformation) TestLogger(org.apache.flink.util.TestLogger) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) NettyShuffleDescriptorBuilder.createRemoteWithIdAndLocation(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder.createRemoteWithIdAndLocation) Collection(java.util.Collection) AbstractInvokable(org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) PartitionNotFoundException(org.apache.flink.runtime.io.network.partition.PartitionNotFoundException) Preconditions(org.apache.flink.util.Preconditions) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) ExecutionGraphException(org.apache.flink.runtime.executiongraph.ExecutionGraphException) Time(org.apache.flink.api.common.time.Time) AllocationID(org.apache.flink.runtime.clusterframework.types.AllocationID) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) Environment(org.apache.flink.runtime.execution.Environment) Mockito.mock(org.mockito.Mockito.mock) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) Assert.assertSame(org.junit.Assert.assertSame) NettyShuffleEnvironmentOptions(org.apache.flink.configuration.NettyShuffleEnvironmentOptions) TaskManagerOptions(org.apache.flink.configuration.TaskManagerOptions) TaskSlotTable(org.apache.flink.runtime.taskexecutor.slot.TaskSlotTable) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) TestName(org.junit.rules.TestName) PermanentBlobKey(org.apache.flink.runtime.blob.PermanentBlobKey) TestingAbstractInvokables(org.apache.flink.runtime.jobmaster.TestingAbstractInvokables) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) PartitionInfo(org.apache.flink.runtime.executiongraph.PartitionInfo) Configuration(org.apache.flink.configuration.Configuration) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) Mockito(org.mockito.Mockito) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) Task(org.apache.flink.runtime.taskmanager.Task) Rule(org.junit.Rule) PartitionDescriptor(org.apache.flink.runtime.shuffle.PartitionDescriptor) TaskInformation(org.apache.flink.runtime.executiongraph.TaskInformation) Collections(java.util.Collections) NettyShuffleDescriptor(org.apache.flink.runtime.shuffle.NettyShuffleDescriptor) Task(org.apache.flink.runtime.taskmanager.Task) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionGraphException(org.apache.flink.runtime.executiongraph.ExecutionGraphException) NettyShuffleDescriptorBuilder(org.apache.flink.runtime.util.NettyShuffleDescriptorBuilder) PartitionDescriptorBuilder(org.apache.flink.runtime.shuffle.PartitionDescriptorBuilder) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) CompletableFuture(java.util.concurrent.CompletableFuture) TestingJobMasterGateway(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGateway) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) TestingJobMasterGatewayBuilder(org.apache.flink.runtime.jobmaster.utils.TestingJobMasterGatewayBuilder) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 62 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class DefaultCheckpointPlanCalculator method hasRunningPrecedentTasks.

private boolean hasRunningPrecedentTasks(ExecutionVertex vertex, List<JobEdge> prevJobEdges, Map<JobVertexID, BitSet> taskRunningStatusByVertex) {
    InternalExecutionGraphAccessor executionGraphAccessor = vertex.getExecutionGraphAccessor();
    for (int i = 0; i < prevJobEdges.size(); ++i) {
        if (prevJobEdges.get(i).getDistributionPattern() == DistributionPattern.POINTWISE) {
            for (IntermediateResultPartitionID consumedPartitionId : vertex.getConsumedPartitionGroup(i)) {
                ExecutionVertex precedentTask = executionGraphAccessor.getResultPartitionOrThrow(consumedPartitionId).getProducer();
                BitSet precedentVertexRunningStatus = taskRunningStatusByVertex.get(precedentTask.getJobvertexId());
                if (precedentVertexRunningStatus.get(precedentTask.getParallelSubtaskIndex())) {
                    return true;
                }
            }
        }
    }
    return false;
}
Also used : BitSet(java.util.BitSet) InternalExecutionGraphAccessor(org.apache.flink.runtime.executiongraph.InternalExecutionGraphAccessor) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex)

Example 63 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class NettyShuffleUtilsTest method createResultPartition.

private ResultPartition createResultPartition(NettyShuffleEnvironment network, ResultPartitionType resultPartitionType, int numSubpartitions) {
    ShuffleDescriptor shuffleDescriptor = createRemoteWithIdAndLocation(new IntermediateResultPartitionID(), ResourceID.generate());
    PartitionDescriptor partitionDescriptor = new PartitionDescriptor(new IntermediateDataSetID(), 2, shuffleDescriptor.getResultPartitionID().getPartitionId(), resultPartitionType, numSubpartitions, 0);
    ResultPartitionDeploymentDescriptor resultPartitionDeploymentDescriptor = new ResultPartitionDeploymentDescriptor(partitionDescriptor, shuffleDescriptor, 1, true);
    ExecutionAttemptID consumerID = new ExecutionAttemptID();
    Collection<ResultPartition> resultPartitions = network.createResultPartitionWriters(network.createShuffleIOOwnerContext("", consumerID, new UnregisteredMetricsGroup()), Collections.singletonList(resultPartitionDeploymentDescriptor));
    return resultPartitions.iterator().next();
}
Also used : UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) ResultPartition(org.apache.flink.runtime.io.network.partition.ResultPartition)

Example 64 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class RescalePartitionerTest method testExecutionGraphGeneration.

@Test
public void testExecutionGraphGeneration() throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(4);
    // get input data
    DataStream<String> text = env.addSource(new ParallelSourceFunction<String>() {

        private static final long serialVersionUID = 7772338606389180774L;

        @Override
        public void run(SourceContext<String> ctx) throws Exception {
        }

        @Override
        public void cancel() {
        }
    }).setParallelism(2);
    DataStream<Tuple2<String, Integer>> counts = text.rescale().flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {

        private static final long serialVersionUID = -5255930322161596829L;

        @Override
        public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
        }
    });
    counts.rescale().print().setParallelism(2);
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    List<JobVertex> jobVertices = jobGraph.getVerticesSortedTopologicallyFromSources();
    JobVertex sourceVertex = jobVertices.get(0);
    JobVertex mapVertex = jobVertices.get(1);
    JobVertex sinkVertex = jobVertices.get(2);
    assertEquals(2, sourceVertex.getParallelism());
    assertEquals(4, mapVertex.getParallelism());
    assertEquals(2, sinkVertex.getParallelism());
    ExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setVertexParallelismStore(SchedulerBase.computeVertexParallelismStore(jobGraph)).build();
    try {
        eg.attachJobGraph(jobVertices);
    } catch (JobException e) {
        e.printStackTrace();
        fail("Building ExecutionGraph failed: " + e.getMessage());
    }
    ExecutionJobVertex execSourceVertex = eg.getJobVertex(sourceVertex.getID());
    ExecutionJobVertex execMapVertex = eg.getJobVertex(mapVertex.getID());
    ExecutionJobVertex execSinkVertex = eg.getJobVertex(sinkVertex.getID());
    assertEquals(0, execSourceVertex.getInputs().size());
    assertEquals(1, execMapVertex.getInputs().size());
    assertEquals(4, execMapVertex.getParallelism());
    ExecutionVertex[] mapTaskVertices = execMapVertex.getTaskVertices();
    // verify that we have each parallel input partition exactly twice, i.e. that one source
    // sends to two unique mappers
    Map<Integer, Integer> mapInputPartitionCounts = new HashMap<>();
    for (ExecutionVertex mapTaskVertex : mapTaskVertices) {
        assertEquals(1, mapTaskVertex.getNumberOfInputs());
        assertEquals(1, mapTaskVertex.getConsumedPartitionGroup(0).size());
        IntermediateResultPartitionID consumedPartitionId = mapTaskVertex.getConsumedPartitionGroup(0).getFirst();
        assertEquals(sourceVertex.getID(), mapTaskVertex.getExecutionGraphAccessor().getResultPartitionOrThrow(consumedPartitionId).getProducer().getJobvertexId());
        int inputPartition = consumedPartitionId.getPartitionNumber();
        if (!mapInputPartitionCounts.containsKey(inputPartition)) {
            mapInputPartitionCounts.put(inputPartition, 1);
        } else {
            mapInputPartitionCounts.put(inputPartition, mapInputPartitionCounts.get(inputPartition) + 1);
        }
    }
    assertEquals(2, mapInputPartitionCounts.size());
    for (int count : mapInputPartitionCounts.values()) {
        assertEquals(2, count);
    }
    assertEquals(1, execSinkVertex.getInputs().size());
    assertEquals(2, execSinkVertex.getParallelism());
    ExecutionVertex[] sinkTaskVertices = execSinkVertex.getTaskVertices();
    InternalExecutionGraphAccessor executionGraphAccessor = execSinkVertex.getGraph();
    // verify each sink instance has two inputs from the map and that each map subpartition
    // only occurs in one unique input edge
    Set<Integer> mapSubpartitions = new HashSet<>();
    for (ExecutionVertex sinkTaskVertex : sinkTaskVertices) {
        assertEquals(1, sinkTaskVertex.getNumberOfInputs());
        assertEquals(2, sinkTaskVertex.getConsumedPartitionGroup(0).size());
        for (IntermediateResultPartitionID consumedPartitionId : sinkTaskVertex.getConsumedPartitionGroup(0)) {
            IntermediateResultPartition consumedPartition = executionGraphAccessor.getResultPartitionOrThrow(consumedPartitionId);
            assertEquals(mapVertex.getID(), consumedPartition.getProducer().getJobvertexId());
            int partitionNumber = consumedPartition.getPartitionNumber();
            assertFalse(mapSubpartitions.contains(partitionNumber));
            mapSubpartitions.add(partitionNumber);
        }
    }
    assertEquals(4, mapSubpartitions.size());
}
Also used : HashMap(java.util.HashMap) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) JobException(org.apache.flink.runtime.JobException) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ParallelSourceFunction(org.apache.flink.streaming.api.functions.source.ParallelSourceFunction) InternalExecutionGraphAccessor(org.apache.flink.runtime.executiongraph.InternalExecutionGraphAccessor) HashSet(java.util.HashSet) JobException(org.apache.flink.runtime.JobException) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 65 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class Execution method registerProducedPartitions.

@VisibleForTesting
static CompletableFuture<Map<IntermediateResultPartitionID, ResultPartitionDeploymentDescriptor>> registerProducedPartitions(ExecutionVertex vertex, TaskManagerLocation location, ExecutionAttemptID attemptId, boolean notifyPartitionDataAvailable) {
    ProducerDescriptor producerDescriptor = ProducerDescriptor.create(location, attemptId);
    Collection<IntermediateResultPartition> partitions = vertex.getProducedPartitions().values();
    Collection<CompletableFuture<ResultPartitionDeploymentDescriptor>> partitionRegistrations = new ArrayList<>(partitions.size());
    for (IntermediateResultPartition partition : partitions) {
        PartitionDescriptor partitionDescriptor = PartitionDescriptor.from(partition);
        int maxParallelism = getPartitionMaxParallelism(partition);
        CompletableFuture<? extends ShuffleDescriptor> shuffleDescriptorFuture = vertex.getExecutionGraphAccessor().getShuffleMaster().registerPartitionWithProducer(vertex.getJobId(), partitionDescriptor, producerDescriptor);
        CompletableFuture<ResultPartitionDeploymentDescriptor> partitionRegistration = shuffleDescriptorFuture.thenApply(shuffleDescriptor -> new ResultPartitionDeploymentDescriptor(partitionDescriptor, shuffleDescriptor, maxParallelism, notifyPartitionDataAvailable));
        partitionRegistrations.add(partitionRegistration);
    }
    return FutureUtils.combineAll(partitionRegistrations).thenApply(rpdds -> {
        Map<IntermediateResultPartitionID, ResultPartitionDeploymentDescriptor> producedPartitions = new LinkedHashMap<>(partitions.size());
        rpdds.forEach(rpdd -> producedPartitions.put(rpdd.getPartitionId(), rpdd));
        return producedPartitions;
    });
}
Also used : ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) CompletableFuture(java.util.concurrent.CompletableFuture) PartitionDescriptor(org.apache.flink.runtime.shuffle.PartitionDescriptor) ProducerDescriptor(org.apache.flink.runtime.shuffle.ProducerDescriptor) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting)

Aggregations

IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)66 Test (org.junit.Test)41 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)18 ConsumedPartitionGroup (org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup)14 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)13 JobID (org.apache.flink.api.common.JobID)12 ExecutionVertexID (org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID)12 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 ArrayList (java.util.ArrayList)10 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)10 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)10 InputGateDeploymentDescriptor (org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor)9 ResultPartitionDeploymentDescriptor (org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor)9 CompletableFuture (java.util.concurrent.CompletableFuture)8 Configuration (org.apache.flink.configuration.Configuration)8 ShuffleDescriptor (org.apache.flink.runtime.shuffle.ShuffleDescriptor)8 IOException (java.io.IOException)7 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)7 Collection (java.util.Collection)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6