Search in sources :

Example 16 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class RestartPipelinedRegionFailoverStrategy method getRegionsToRestart.

/**
 * All 'involved' regions are proposed to be restarted. The 'involved' regions are calculated
 * with rules below: 1. The region containing the failed task is always involved 2. If an input
 * result partition of an involved region is not available, i.e. Missing or Corrupted, the
 * region containing the partition producer task is involved 3. If a region is involved, all of
 * its consumer regions are involved
 */
private Set<SchedulingPipelinedRegion> getRegionsToRestart(SchedulingPipelinedRegion failedRegion) {
    Set<SchedulingPipelinedRegion> regionsToRestart = Collections.newSetFromMap(new IdentityHashMap<>());
    Set<SchedulingPipelinedRegion> visitedRegions = Collections.newSetFromMap(new IdentityHashMap<>());
    Set<ConsumedPartitionGroup> visitedConsumedResultGroups = Collections.newSetFromMap(new IdentityHashMap<>());
    Set<ConsumerVertexGroup> visitedConsumerVertexGroups = Collections.newSetFromMap(new IdentityHashMap<>());
    // start from the failed region to visit all involved regions
    Queue<SchedulingPipelinedRegion> regionsToVisit = new ArrayDeque<>();
    visitedRegions.add(failedRegion);
    regionsToVisit.add(failedRegion);
    while (!regionsToVisit.isEmpty()) {
        SchedulingPipelinedRegion regionToRestart = regionsToVisit.poll();
        // an involved region should be restarted
        regionsToRestart.add(regionToRestart);
        // if a needed input result partition is not available, its producer region is involved
        for (IntermediateResultPartitionID consumedPartitionId : getConsumedPartitionsToVisit(regionToRestart, visitedConsumedResultGroups)) {
            if (!resultPartitionAvailabilityChecker.isAvailable(consumedPartitionId)) {
                SchedulingResultPartition consumedPartition = topology.getResultPartition(consumedPartitionId);
                SchedulingPipelinedRegion producerRegion = topology.getPipelinedRegionOfVertex(consumedPartition.getProducer().getId());
                if (!visitedRegions.contains(producerRegion)) {
                    visitedRegions.add(producerRegion);
                    regionsToVisit.add(producerRegion);
                }
            }
        }
        // all consumer regions of an involved region should be involved
        for (ExecutionVertexID consumerVertexId : getConsumerVerticesToVisit(regionToRestart, visitedConsumerVertexGroups)) {
            SchedulingPipelinedRegion consumerRegion = topology.getPipelinedRegionOfVertex(consumerVertexId);
            if (!visitedRegions.contains(consumerRegion)) {
                visitedRegions.add(consumerRegion);
                regionsToVisit.add(consumerRegion);
            }
        }
    }
    return regionsToRestart;
}
Also used : ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) SchedulingResultPartition(org.apache.flink.runtime.scheduler.strategy.SchedulingResultPartition) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) SchedulingPipelinedRegion(org.apache.flink.runtime.scheduler.strategy.SchedulingPipelinedRegion) ConsumerVertexGroup(org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup) ArrayDeque(java.util.ArrayDeque) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 17 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class SingleInputGate method setInputChannels.

public void setInputChannels(InputChannel... channels) {
    if (channels.length != numberOfInputChannels) {
        throw new IllegalArgumentException("Expected " + numberOfInputChannels + " channels, " + "but got " + channels.length);
    }
    synchronized (requestLock) {
        System.arraycopy(channels, 0, this.channels, 0, numberOfInputChannels);
        for (InputChannel inputChannel : channels) {
            IntermediateResultPartitionID partitionId = inputChannel.getPartitionId().getPartitionId();
            int subpartitionIndex = inputChannel.getConsumedSubpartitionIndex();
            if (inputChannels.put(new SubpartitionInfo(partitionId, subpartitionIndex), inputChannel) == null && inputChannel instanceof UnknownInputChannel) {
                numberOfUninitializedChannels++;
            }
        }
    }
}
Also used : IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 18 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class ExecutionGraphToInputsLocationsRetrieverAdapter method getConsumedResultPartitionsProducers.

@Override
public Collection<Collection<ExecutionVertexID>> getConsumedResultPartitionsProducers(ExecutionVertexID executionVertexId) {
    ExecutionVertex ev = getExecutionVertex(executionVertexId);
    InternalExecutionGraphAccessor executionGraphAccessor = ev.getExecutionGraphAccessor();
    List<Collection<ExecutionVertexID>> resultPartitionProducers = new ArrayList<>(ev.getNumberOfInputs());
    for (ConsumedPartitionGroup consumedPartitions : ev.getAllConsumedPartitionGroups()) {
        List<ExecutionVertexID> producers = new ArrayList<>(consumedPartitions.size());
        for (IntermediateResultPartitionID consumedPartitionId : consumedPartitions) {
            ExecutionVertex producer = executionGraphAccessor.getResultPartitionOrThrow(consumedPartitionId).getProducer();
            producers.add(producer.getID());
        }
        resultPartitionProducers.add(producers);
    }
    return resultPartitionProducers;
}
Also used : ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) ArrayList(java.util.ArrayList) Collection(java.util.Collection) InternalExecutionGraphAccessor(org.apache.flink.runtime.executiongraph.InternalExecutionGraphAccessor) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 19 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class ExecutionPartitionLifecycleTest method setupExecutionGraphAndStartRunningJob.

private void setupExecutionGraphAndStartRunningJob(ResultPartitionType resultPartitionType, JobMasterPartitionTracker partitionTracker, TaskManagerGateway taskManagerGateway, ShuffleMaster<?> shuffleMaster) throws Exception {
    final JobVertex producerVertex = createNoOpJobVertex();
    final JobVertex consumerVertex = createNoOpJobVertex();
    consumerVertex.connectNewDataSetAsInput(producerVertex, DistributionPattern.ALL_TO_ALL, resultPartitionType);
    final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
    final TestingPhysicalSlotProvider physicalSlotProvider = TestingPhysicalSlotProvider.create((resourceProfile) -> CompletableFuture.completedFuture(TestingPhysicalSlot.builder().withTaskManagerGateway(taskManagerGateway).withTaskManagerLocation(taskManagerLocation).build()));
    final JobGraph jobGraph = JobGraphTestUtils.batchJobGraph(producerVertex, consumerVertex);
    final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(jobGraph, ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(physicalSlotProvider)).setShuffleMaster(shuffleMaster).setPartitionTracker(partitionTracker).build();
    final ExecutionGraph executionGraph = scheduler.getExecutionGraph();
    final ExecutionJobVertex executionJobVertex = executionGraph.getJobVertex(producerVertex.getID());
    final ExecutionVertex executionVertex = executionJobVertex.getTaskVertices()[0];
    execution = executionVertex.getCurrentExecutionAttempt();
    scheduler.startScheduling();
    execution.switchToRecovering();
    execution.switchToRunning();
    final IntermediateResultPartitionID expectedIntermediateResultPartitionId = executionJobVertex.getProducedDataSets()[0].getPartitions()[0].getPartitionId();
    descriptor = execution.getResultPartitionDeploymentDescriptor(expectedIntermediateResultPartitionId).get();
    taskExecutorResourceId = taskManagerLocation.getResourceID();
    jobId = executionGraph.getJobID();
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) LocalTaskManagerLocation(org.apache.flink.runtime.taskmanager.LocalTaskManagerLocation) TestingPhysicalSlotProvider(org.apache.flink.runtime.scheduler.TestingPhysicalSlotProvider) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)

Example 20 with IntermediateResultPartitionID

use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.

the class PointwisePatternTest method test3NToN.

@Test
public void test3NToN() throws Exception {
    final int N = 17;
    ExecutionJobVertex target = setUpExecutionGraphAndGetDownstreamVertex(3 * N, N);
    for (ExecutionVertex ev : target.getTaskVertices()) {
        assertEquals(1, ev.getNumberOfInputs());
        ConsumedPartitionGroup consumedPartitionGroup = ev.getConsumedPartitionGroup(0);
        assertEquals(3, consumedPartitionGroup.size());
        int idx = 0;
        for (IntermediateResultPartitionID partitionId : consumedPartitionGroup) {
            assertEquals(ev.getParallelSubtaskIndex() * 3L + idx++, partitionId.getPartitionNumber());
        }
    }
}
Also used : ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) Test(org.junit.Test)

Aggregations

IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)66 Test (org.junit.Test)41 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)18 ConsumedPartitionGroup (org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup)14 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)13 JobID (org.apache.flink.api.common.JobID)12 ExecutionVertexID (org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID)12 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 ArrayList (java.util.ArrayList)10 TaskDeploymentDescriptor (org.apache.flink.runtime.deployment.TaskDeploymentDescriptor)10 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)10 InputGateDeploymentDescriptor (org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor)9 ResultPartitionDeploymentDescriptor (org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor)9 CompletableFuture (java.util.concurrent.CompletableFuture)8 Configuration (org.apache.flink.configuration.Configuration)8 ShuffleDescriptor (org.apache.flink.runtime.shuffle.ShuffleDescriptor)8 IOException (java.io.IOException)7 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)7 Collection (java.util.Collection)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)6