Search in sources :

Example 6 with ConsumedPartitionGroup

use of org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup in project flink by apache.

the class DefaultExecutionGraphDeploymentTest method testBuildDeploymentDescriptor.

@Test
public void testBuildDeploymentDescriptor() throws Exception {
    final JobVertexID jid1 = new JobVertexID();
    final JobVertexID jid2 = new JobVertexID();
    final JobVertexID jid3 = new JobVertexID();
    final JobVertexID jid4 = new JobVertexID();
    JobVertex v1 = new JobVertex("v1", jid1);
    JobVertex v2 = new JobVertex("v2", jid2);
    JobVertex v3 = new JobVertex("v3", jid3);
    JobVertex v4 = new JobVertex("v4", jid4);
    v1.setParallelism(10);
    v2.setParallelism(10);
    v3.setParallelism(10);
    v4.setParallelism(10);
    v1.setInvokableClass(BatchTask.class);
    v2.setInvokableClass(BatchTask.class);
    v3.setInvokableClass(BatchTask.class);
    v4.setInvokableClass(BatchTask.class);
    v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
    final JobGraph jobGraph = JobGraphTestUtils.batchJobGraph(v1, v2, v3, v4);
    final JobID jobId = jobGraph.getJobID();
    DirectScheduledExecutorService executor = new DirectScheduledExecutorService();
    DefaultExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setJobGraph(jobGraph).setFutureExecutor(executor).setIoExecutor(executor).setBlobWriter(blobWriter).build();
    eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
    checkJobOffloaded(eg);
    ExecutionJobVertex ejv = eg.getAllVertices().get(jid2);
    ExecutionVertex vertex = ejv.getTaskVertices()[3];
    final SimpleAckingTaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway();
    final CompletableFuture<TaskDeploymentDescriptor> tdd = new CompletableFuture<>();
    taskManagerGateway.setSubmitConsumer(FunctionUtils.uncheckedConsumer(taskDeploymentDescriptor -> {
        taskDeploymentDescriptor.loadBigData(blobCache);
        tdd.complete(taskDeploymentDescriptor);
    }));
    final LogicalSlot slot = new TestingLogicalSlotBuilder().setTaskManagerGateway(taskManagerGateway).createTestingLogicalSlot();
    assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
    vertex.getCurrentExecutionAttempt().transitionState(ExecutionState.SCHEDULED);
    vertex.getCurrentExecutionAttempt().registerProducedPartitions(slot.getTaskManagerLocation(), true).get();
    vertex.deployToSlot(slot);
    assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
    checkTaskOffloaded(eg, vertex.getJobvertexId());
    TaskDeploymentDescriptor descr = tdd.get();
    assertNotNull(descr);
    JobInformation jobInformation = descr.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
    TaskInformation taskInformation = descr.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
    assertEquals(jobId, descr.getJobId());
    assertEquals(jobId, jobInformation.getJobId());
    assertEquals(jid2, taskInformation.getJobVertexId());
    assertEquals(3, descr.getSubtaskIndex());
    assertEquals(10, taskInformation.getNumberOfSubtasks());
    assertEquals(BatchTask.class.getName(), taskInformation.getInvokableClassName());
    assertEquals("v2", taskInformation.getTaskName());
    Collection<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions();
    Collection<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates();
    assertEquals(2, producedPartitions.size());
    assertEquals(1, consumedPartitions.size());
    Iterator<ResultPartitionDeploymentDescriptor> iteratorProducedPartitions = producedPartitions.iterator();
    Iterator<InputGateDeploymentDescriptor> iteratorConsumedPartitions = consumedPartitions.iterator();
    assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
    assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
    ShuffleDescriptor[] shuffleDescriptors = iteratorConsumedPartitions.next().getShuffleDescriptors();
    assertEquals(10, shuffleDescriptors.length);
    Iterator<ConsumedPartitionGroup> iteratorConsumedPartitionGroup = vertex.getAllConsumedPartitionGroups().iterator();
    int idx = 0;
    for (IntermediateResultPartitionID partitionId : iteratorConsumedPartitionGroup.next()) {
        assertEquals(partitionId, shuffleDescriptors[idx++].getResultPartitionID().getPartitionId());
    }
}
Also used : ComponentMainThreadExecutorServiceAdapter(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutorServiceAdapter) TestingTaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGateway) Arrays(java.util.Arrays) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) Assert.assertThat(org.junit.Assert.assertThat) FunctionUtils(org.apache.flink.util.function.FunctionUtils) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) BlobWriter(org.apache.flink.runtime.blob.BlobWriter) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) TestingPhysicalSlotProvider(org.apache.flink.runtime.scheduler.TestingPhysicalSlotProvider) Collection(java.util.Collection) Accumulator(org.apache.flink.api.common.accumulators.Accumulator) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) Acknowledge(org.apache.flink.runtime.messages.Acknowledge) CheckpointingOptions(org.apache.flink.configuration.CheckpointingOptions) ArrayBlockingQueue(java.util.concurrent.ArrayBlockingQueue) List(java.util.List) TestingPhysicalSlot(org.apache.flink.runtime.scheduler.TestingPhysicalSlot) TestCase.assertTrue(junit.framework.TestCase.assertTrue) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) BatchTask(org.apache.flink.runtime.operators.BatchTask) SchedulerTestingUtils(org.apache.flink.runtime.scheduler.SchedulerTestingUtils) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) ArrayList(java.util.ArrayList) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) SchedulerNG(org.apache.flink.runtime.scheduler.SchedulerNG) RpcTaskManagerGateway(org.apache.flink.runtime.jobmaster.RpcTaskManagerGateway) JobGraphTestUtils(org.apache.flink.runtime.jobgraph.JobGraphTestUtils) IntCounter(org.apache.flink.api.common.accumulators.IntCounter) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Description(org.hamcrest.Description) Iterator(java.util.Iterator) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) JobMasterId(org.apache.flink.runtime.jobmaster.JobMasterId) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) Test(org.junit.Test) AccumulatorSnapshot(org.apache.flink.runtime.accumulators.AccumulatorSnapshot) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) PermanentBlobService(org.apache.flink.runtime.blob.PermanentBlobService) JobID(org.apache.flink.api.common.JobID) ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) TestingTaskExecutorGatewayBuilder(org.apache.flink.runtime.taskexecutor.TestingTaskExecutorGatewayBuilder) VoidBlobWriter(org.apache.flink.runtime.blob.VoidBlobWriter) TaskExecutionState(org.apache.flink.runtime.taskmanager.TaskExecutionState) Collections(java.util.Collections) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) Assert.assertEquals(org.junit.Assert.assertEquals) NoOpInvokable(org.apache.flink.runtime.testtasks.NoOpInvokable) ResultPartitionDeploymentDescriptor(org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor) DirectScheduledExecutorService(org.apache.flink.runtime.testutils.DirectScheduledExecutorService) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) SimpleAckingTaskManagerGateway(org.apache.flink.runtime.executiongraph.utils.SimpleAckingTaskManagerGateway) CompletableFuture(java.util.concurrent.CompletableFuture) TaskDeploymentDescriptor(org.apache.flink.runtime.deployment.TaskDeploymentDescriptor) ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) BatchTask(org.apache.flink.runtime.operators.BatchTask) InputGateDeploymentDescriptor(org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ShuffleDescriptor(org.apache.flink.runtime.shuffle.ShuffleDescriptor) TestingLogicalSlotBuilder(org.apache.flink.runtime.jobmaster.TestingLogicalSlotBuilder) JobID(org.apache.flink.api.common.JobID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Example 7 with ConsumedPartitionGroup

use of org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup in project flink by apache.

the class RestartPipelinedRegionFailoverStrategy method getRegionsToRestart.

/**
 * All 'involved' regions are proposed to be restarted. The 'involved' regions are calculated
 * with rules below: 1. The region containing the failed task is always involved 2. If an input
 * result partition of an involved region is not available, i.e. Missing or Corrupted, the
 * region containing the partition producer task is involved 3. If a region is involved, all of
 * its consumer regions are involved
 */
private Set<SchedulingPipelinedRegion> getRegionsToRestart(SchedulingPipelinedRegion failedRegion) {
    Set<SchedulingPipelinedRegion> regionsToRestart = Collections.newSetFromMap(new IdentityHashMap<>());
    Set<SchedulingPipelinedRegion> visitedRegions = Collections.newSetFromMap(new IdentityHashMap<>());
    Set<ConsumedPartitionGroup> visitedConsumedResultGroups = Collections.newSetFromMap(new IdentityHashMap<>());
    Set<ConsumerVertexGroup> visitedConsumerVertexGroups = Collections.newSetFromMap(new IdentityHashMap<>());
    // start from the failed region to visit all involved regions
    Queue<SchedulingPipelinedRegion> regionsToVisit = new ArrayDeque<>();
    visitedRegions.add(failedRegion);
    regionsToVisit.add(failedRegion);
    while (!regionsToVisit.isEmpty()) {
        SchedulingPipelinedRegion regionToRestart = regionsToVisit.poll();
        // an involved region should be restarted
        regionsToRestart.add(regionToRestart);
        // if a needed input result partition is not available, its producer region is involved
        for (IntermediateResultPartitionID consumedPartitionId : getConsumedPartitionsToVisit(regionToRestart, visitedConsumedResultGroups)) {
            if (!resultPartitionAvailabilityChecker.isAvailable(consumedPartitionId)) {
                SchedulingResultPartition consumedPartition = topology.getResultPartition(consumedPartitionId);
                SchedulingPipelinedRegion producerRegion = topology.getPipelinedRegionOfVertex(consumedPartition.getProducer().getId());
                if (!visitedRegions.contains(producerRegion)) {
                    visitedRegions.add(producerRegion);
                    regionsToVisit.add(producerRegion);
                }
            }
        }
        // all consumer regions of an involved region should be involved
        for (ExecutionVertexID consumerVertexId : getConsumerVerticesToVisit(regionToRestart, visitedConsumerVertexGroups)) {
            SchedulingPipelinedRegion consumerRegion = topology.getPipelinedRegionOfVertex(consumerVertexId);
            if (!visitedRegions.contains(consumerRegion)) {
                visitedRegions.add(consumerRegion);
                regionsToVisit.add(consumerRegion);
            }
        }
    }
    return regionsToRestart;
}
Also used : ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) SchedulingResultPartition(org.apache.flink.runtime.scheduler.strategy.SchedulingResultPartition) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) SchedulingPipelinedRegion(org.apache.flink.runtime.scheduler.strategy.SchedulingPipelinedRegion) ConsumerVertexGroup(org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup) ArrayDeque(java.util.ArrayDeque) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 8 with ConsumedPartitionGroup

use of org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup in project flink by apache.

the class RestartPipelinedRegionFailoverStrategy method getConsumedPartitionsToVisit.

private Iterable<IntermediateResultPartitionID> getConsumedPartitionsToVisit(SchedulingPipelinedRegion regionToRestart, Set<ConsumedPartitionGroup> visitedConsumedResultGroups) {
    final List<ConsumedPartitionGroup> consumedPartitionGroupsToVisit = new ArrayList<>();
    for (SchedulingExecutionVertex vertex : regionToRestart.getVertices()) {
        for (ConsumedPartitionGroup consumedPartitionGroup : vertex.getConsumedPartitionGroups()) {
            if (!visitedConsumedResultGroups.contains(consumedPartitionGroup)) {
                visitedConsumedResultGroups.add(consumedPartitionGroup);
                consumedPartitionGroupsToVisit.add(consumedPartitionGroup);
            }
        }
    }
    return IterableUtils.flatMap(consumedPartitionGroupsToVisit, Function.identity());
}
Also used : ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) ArrayList(java.util.ArrayList)

Example 9 with ConsumedPartitionGroup

use of org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup in project flink by apache.

the class ExecutionGraphToInputsLocationsRetrieverAdapter method getConsumedResultPartitionsProducers.

@Override
public Collection<Collection<ExecutionVertexID>> getConsumedResultPartitionsProducers(ExecutionVertexID executionVertexId) {
    ExecutionVertex ev = getExecutionVertex(executionVertexId);
    InternalExecutionGraphAccessor executionGraphAccessor = ev.getExecutionGraphAccessor();
    List<Collection<ExecutionVertexID>> resultPartitionProducers = new ArrayList<>(ev.getNumberOfInputs());
    for (ConsumedPartitionGroup consumedPartitions : ev.getAllConsumedPartitionGroups()) {
        List<ExecutionVertexID> producers = new ArrayList<>(consumedPartitions.size());
        for (IntermediateResultPartitionID consumedPartitionId : consumedPartitions) {
            ExecutionVertex producer = executionGraphAccessor.getResultPartitionOrThrow(consumedPartitionId).getProducer();
            producers.add(producer.getID());
        }
        resultPartitionProducers.add(producers);
    }
    return resultPartitionProducers;
}
Also used : ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) ArrayList(java.util.ArrayList) Collection(java.util.Collection) InternalExecutionGraphAccessor(org.apache.flink.runtime.executiongraph.InternalExecutionGraphAccessor) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 10 with ConsumedPartitionGroup

use of org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup in project flink by apache.

the class DefaultSchedulingPipelinedRegion method initializeAllBlockingConsumedPartitionGroups.

private void initializeAllBlockingConsumedPartitionGroups() {
    final Set<ConsumedPartitionGroup> consumedPartitionGroupSet = new HashSet<>();
    for (DefaultExecutionVertex executionVertex : executionVertices.values()) {
        for (ConsumedPartitionGroup consumedPartitionGroup : executionVertex.getConsumedPartitionGroups()) {
            SchedulingResultPartition consumedPartition = resultPartitionRetriever.apply(consumedPartitionGroup.getFirst());
            if (consumedPartition.getResultType().isBlocking()) {
                consumedPartitionGroupSet.add(consumedPartitionGroup);
            }
        }
    }
    this.blockingConsumedPartitionGroups = Collections.unmodifiableSet(consumedPartitionGroupSet);
}
Also used : ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) SchedulingResultPartition(org.apache.flink.runtime.scheduler.strategy.SchedulingResultPartition) HashSet(java.util.HashSet)

Aggregations

ConsumedPartitionGroup (org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup)30 IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)15 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)9 ExecutionVertexID (org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID)6 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)4 SchedulingResultPartition (org.apache.flink.runtime.scheduler.strategy.SchedulingResultPartition)4 HashSet (java.util.HashSet)3 List (java.util.List)3 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)3 ConsumerVertexGroup (org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup)3 Collection (java.util.Collection)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ExecutionState (org.apache.flink.runtime.execution.ExecutionState)2 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)2 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)2 ResultPartitionType (org.apache.flink.runtime.io.network.partition.ResultPartitionType)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2