use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.
the class RestartPipelinedRegionFailoverStrategy method getRegionsToRestart.
/**
* All 'involved' regions are proposed to be restarted. The 'involved' regions are calculated
* with rules below: 1. The region containing the failed task is always involved 2. If an input
* result partition of an involved region is not available, i.e. Missing or Corrupted, the
* region containing the partition producer task is involved 3. If a region is involved, all of
* its consumer regions are involved
*/
private Set<SchedulingPipelinedRegion> getRegionsToRestart(SchedulingPipelinedRegion failedRegion) {
Set<SchedulingPipelinedRegion> regionsToRestart = Collections.newSetFromMap(new IdentityHashMap<>());
Set<SchedulingPipelinedRegion> visitedRegions = Collections.newSetFromMap(new IdentityHashMap<>());
Set<ConsumedPartitionGroup> visitedConsumedResultGroups = Collections.newSetFromMap(new IdentityHashMap<>());
Set<ConsumerVertexGroup> visitedConsumerVertexGroups = Collections.newSetFromMap(new IdentityHashMap<>());
// start from the failed region to visit all involved regions
Queue<SchedulingPipelinedRegion> regionsToVisit = new ArrayDeque<>();
visitedRegions.add(failedRegion);
regionsToVisit.add(failedRegion);
while (!regionsToVisit.isEmpty()) {
SchedulingPipelinedRegion regionToRestart = regionsToVisit.poll();
// an involved region should be restarted
regionsToRestart.add(regionToRestart);
// if a needed input result partition is not available, its producer region is involved
for (IntermediateResultPartitionID consumedPartitionId : getConsumedPartitionsToVisit(regionToRestart, visitedConsumedResultGroups)) {
if (!resultPartitionAvailabilityChecker.isAvailable(consumedPartitionId)) {
SchedulingResultPartition consumedPartition = topology.getResultPartition(consumedPartitionId);
SchedulingPipelinedRegion producerRegion = topology.getPipelinedRegionOfVertex(consumedPartition.getProducer().getId());
if (!visitedRegions.contains(producerRegion)) {
visitedRegions.add(producerRegion);
regionsToVisit.add(producerRegion);
}
}
}
// all consumer regions of an involved region should be involved
for (ExecutionVertexID consumerVertexId : getConsumerVerticesToVisit(regionToRestart, visitedConsumerVertexGroups)) {
SchedulingPipelinedRegion consumerRegion = topology.getPipelinedRegionOfVertex(consumerVertexId);
if (!visitedRegions.contains(consumerRegion)) {
visitedRegions.add(consumerRegion);
regionsToVisit.add(consumerRegion);
}
}
}
return regionsToRestart;
}
use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.
the class SingleInputGate method setInputChannels.
public void setInputChannels(InputChannel... channels) {
if (channels.length != numberOfInputChannels) {
throw new IllegalArgumentException("Expected " + numberOfInputChannels + " channels, " + "but got " + channels.length);
}
synchronized (requestLock) {
System.arraycopy(channels, 0, this.channels, 0, numberOfInputChannels);
for (InputChannel inputChannel : channels) {
IntermediateResultPartitionID partitionId = inputChannel.getPartitionId().getPartitionId();
int subpartitionIndex = inputChannel.getConsumedSubpartitionIndex();
if (inputChannels.put(new SubpartitionInfo(partitionId, subpartitionIndex), inputChannel) == null && inputChannel instanceof UnknownInputChannel) {
numberOfUninitializedChannels++;
}
}
}
}
use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.
the class ExecutionGraphToInputsLocationsRetrieverAdapter method getConsumedResultPartitionsProducers.
@Override
public Collection<Collection<ExecutionVertexID>> getConsumedResultPartitionsProducers(ExecutionVertexID executionVertexId) {
ExecutionVertex ev = getExecutionVertex(executionVertexId);
InternalExecutionGraphAccessor executionGraphAccessor = ev.getExecutionGraphAccessor();
List<Collection<ExecutionVertexID>> resultPartitionProducers = new ArrayList<>(ev.getNumberOfInputs());
for (ConsumedPartitionGroup consumedPartitions : ev.getAllConsumedPartitionGroups()) {
List<ExecutionVertexID> producers = new ArrayList<>(consumedPartitions.size());
for (IntermediateResultPartitionID consumedPartitionId : consumedPartitions) {
ExecutionVertex producer = executionGraphAccessor.getResultPartitionOrThrow(consumedPartitionId).getProducer();
producers.add(producer.getID());
}
resultPartitionProducers.add(producers);
}
return resultPartitionProducers;
}
use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.
the class ExecutionPartitionLifecycleTest method setupExecutionGraphAndStartRunningJob.
private void setupExecutionGraphAndStartRunningJob(ResultPartitionType resultPartitionType, JobMasterPartitionTracker partitionTracker, TaskManagerGateway taskManagerGateway, ShuffleMaster<?> shuffleMaster) throws Exception {
final JobVertex producerVertex = createNoOpJobVertex();
final JobVertex consumerVertex = createNoOpJobVertex();
consumerVertex.connectNewDataSetAsInput(producerVertex, DistributionPattern.ALL_TO_ALL, resultPartitionType);
final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation();
final TestingPhysicalSlotProvider physicalSlotProvider = TestingPhysicalSlotProvider.create((resourceProfile) -> CompletableFuture.completedFuture(TestingPhysicalSlot.builder().withTaskManagerGateway(taskManagerGateway).withTaskManagerLocation(taskManagerLocation).build()));
final JobGraph jobGraph = JobGraphTestUtils.batchJobGraph(producerVertex, consumerVertex);
final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(jobGraph, ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(physicalSlotProvider)).setShuffleMaster(shuffleMaster).setPartitionTracker(partitionTracker).build();
final ExecutionGraph executionGraph = scheduler.getExecutionGraph();
final ExecutionJobVertex executionJobVertex = executionGraph.getJobVertex(producerVertex.getID());
final ExecutionVertex executionVertex = executionJobVertex.getTaskVertices()[0];
execution = executionVertex.getCurrentExecutionAttempt();
scheduler.startScheduling();
execution.switchToRecovering();
execution.switchToRunning();
final IntermediateResultPartitionID expectedIntermediateResultPartitionId = executionJobVertex.getProducedDataSets()[0].getPartitions()[0].getPartitionId();
descriptor = execution.getResultPartitionDeploymentDescriptor(expectedIntermediateResultPartitionId).get();
taskExecutorResourceId = taskManagerLocation.getResourceID();
jobId = executionGraph.getJobID();
}
use of org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID in project flink by apache.
the class PointwisePatternTest method test3NToN.
@Test
public void test3NToN() throws Exception {
final int N = 17;
ExecutionJobVertex target = setUpExecutionGraphAndGetDownstreamVertex(3 * N, N);
for (ExecutionVertex ev : target.getTaskVertices()) {
assertEquals(1, ev.getNumberOfInputs());
ConsumedPartitionGroup consumedPartitionGroup = ev.getConsumedPartitionGroup(0);
assertEquals(3, consumedPartitionGroup.size());
int idx = 0;
for (IntermediateResultPartitionID partitionId : consumedPartitionGroup) {
assertEquals(ev.getParallelSubtaskIndex() * 3L + idx++, partitionId.getPartitionNumber());
}
}
}
Aggregations