use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class SchedulingPipelinedRegionComputeUtilTest method testEmbarrassinglyParallelCase.
/**
* Tests that validates that embarrassingly parallel chains of vertices work correctly.
*
* <pre>
* (a1) --> (b1)
*
* (a2) --> (b2)
*
* (a3) --> (b3)
* </pre>
*/
@Test
public void testEmbarrassinglyParallelCase() {
TestingSchedulingTopology topology = new TestingSchedulingTopology();
TestingSchedulingExecutionVertex va1 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex va2 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex va3 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex vb1 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex vb2 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex vb3 = topology.newExecutionVertex();
topology.connect(va1, vb1, ResultPartitionType.PIPELINED).connect(va2, vb2, ResultPartitionType.PIPELINED).connect(va3, vb3, ResultPartitionType.PIPELINED);
Map<ExecutionVertexID, Set<SchedulingExecutionVertex>> pipelinedRegionByVertex = computePipelinedRegionByVertex(topology);
Set<SchedulingExecutionVertex> ra1 = pipelinedRegionByVertex.get(va1.getId());
Set<SchedulingExecutionVertex> ra2 = pipelinedRegionByVertex.get(va2.getId());
Set<SchedulingExecutionVertex> ra3 = pipelinedRegionByVertex.get(va3.getId());
Set<SchedulingExecutionVertex> rb1 = pipelinedRegionByVertex.get(vb1.getId());
Set<SchedulingExecutionVertex> rb2 = pipelinedRegionByVertex.get(vb2.getId());
Set<SchedulingExecutionVertex> rb3 = pipelinedRegionByVertex.get(vb3.getId());
assertSameRegion(ra1, rb1);
assertSameRegion(ra2, rb2);
assertSameRegion(ra3, rb3);
assertDistinctRegions(ra1, ra2, ra3);
}
use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class DefaultSchedulerTest method vertexIsResetBeforeRestarted.
@Test
public void vertexIsResetBeforeRestarted() throws Exception {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
final TestSchedulingStrategy.Factory schedulingStrategyFactory = new TestSchedulingStrategy.Factory();
final DefaultScheduler scheduler = createScheduler(jobGraph, ComponentMainThreadExecutorServiceAdapter.forMainThread(), schedulingStrategyFactory);
final TestSchedulingStrategy schedulingStrategy = schedulingStrategyFactory.getLastCreatedSchedulingStrategy();
final SchedulingTopology topology = schedulingStrategy.getSchedulingTopology();
scheduler.startScheduling();
final SchedulingExecutionVertex onlySchedulingVertex = Iterables.getOnlyElement(topology.getVertices());
schedulingStrategy.schedule(Collections.singletonList(onlySchedulingVertex.getId()));
final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
scheduler.updateTaskExecutionState(createFailedTaskExecutionState(attemptId));
taskRestartExecutor.triggerScheduledTasks();
assertThat(schedulingStrategy.getReceivedVerticesToRestart(), hasSize(1));
assertThat(onlySchedulingVertex.getState(), is(equalTo(ExecutionState.CREATED)));
}
use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class DefaultExecutionGraph method createResultPartitionId.
ResultPartitionID createResultPartitionId(final IntermediateResultPartitionID resultPartitionId) {
final SchedulingResultPartition schedulingResultPartition = getSchedulingTopology().getResultPartition(resultPartitionId);
final SchedulingExecutionVertex producer = schedulingResultPartition.getProducer();
final ExecutionVertexID producerId = producer.getId();
final JobVertexID jobVertexId = producerId.getJobVertexId();
final ExecutionJobVertex jobVertex = getJobVertex(jobVertexId);
checkNotNull(jobVertex, "Unknown job vertex %s", jobVertexId);
final ExecutionVertex[] taskVertices = jobVertex.getTaskVertices();
final int subtaskIndex = producerId.getSubtaskIndex();
checkState(subtaskIndex < taskVertices.length, "Invalid subtask index %d for job vertex %s", subtaskIndex, jobVertexId);
final ExecutionVertex taskVertex = taskVertices[subtaskIndex];
final Execution execution = taskVertex.getCurrentExecutionAttempt();
return new ResultPartitionID(resultPartitionId, execution.getAttemptId());
}
use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class DefaultExecutionTopology method generateNewPipelinedRegions.
private void generateNewPipelinedRegions(Iterable<ExecutionVertex> newExecutionVertices) {
final Iterable<DefaultExecutionVertex> newSchedulingExecutionVertices = IterableUtils.toStream(newExecutionVertices).map(ExecutionVertex::getID).map(executionVerticesById::get).collect(Collectors.toList());
Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>();
for (DefaultExecutionVertex schedulingVertex : newSchedulingExecutionVertices) {
sortedExecutionVerticesInPipelinedRegion.computeIfAbsent(logicalPipelinedRegionsByJobVertexId.get(schedulingVertex.getId().getJobVertexId()), ignore -> new ArrayList<>()).add(schedulingVertex);
}
long buildRegionsStartTime = System.nanoTime();
Set<Set<SchedulingExecutionVertex>> rawPipelinedRegions = Collections.newSetFromMap(new IdentityHashMap<>());
// SchedulingPipelinedRegions are both connected with inter-region blocking edges.
for (Map.Entry<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> entry : sortedExecutionVerticesInPipelinedRegion.entrySet()) {
DefaultLogicalPipelinedRegion logicalPipelinedRegion = entry.getKey();
List<DefaultExecutionVertex> schedulingExecutionVertices = entry.getValue();
if (containsIntraRegionAllToAllEdge(logicalPipelinedRegion)) {
// For edges inside one LogicalPipelinedRegion, if there is any all-to-all edge, it
// could be under two circumstances:
//
// 1. Pipelined all-to-all edge:
// Pipelined all-to-all edge will connect all vertices pipelined. Therefore,
// all execution vertices derived from this LogicalPipelinedRegion should be in one
// SchedulingPipelinedRegion.
//
// 2. Blocking all-to-all edge:
// For intra-region blocking all-to-all edge, we must make sure all the vertices
// are inside one SchedulingPipelinedRegion, so that there will be no deadlock
// happens during scheduling. For more details about this case, please refer to
// FLINK-17330 (https://issues.apache.org/jira/browse/FLINK-17330).
//
// Therefore, if a LogicalPipelinedRegion contains any intra-region all-to-all
// edge, we just convert the entire LogicalPipelinedRegion to a sole
// SchedulingPipelinedRegion directly.
rawPipelinedRegions.add(new HashSet<>(schedulingExecutionVertices));
} else {
// If there are only pointwise edges inside the LogicalPipelinedRegion, we can use
// SchedulingPipelinedRegionComputeUtil to compute the regions with O(N) computation
// complexity.
rawPipelinedRegions.addAll(SchedulingPipelinedRegionComputeUtil.computePipelinedRegions(schedulingExecutionVertices, executionVerticesById::get, resultPartitionsById::get));
}
}
for (Set<? extends SchedulingExecutionVertex> rawPipelinedRegion : rawPipelinedRegions) {
// noinspection unchecked
final DefaultSchedulingPipelinedRegion pipelinedRegion = new DefaultSchedulingPipelinedRegion((Set<DefaultExecutionVertex>) rawPipelinedRegion, resultPartitionsById::get);
pipelinedRegions.add(pipelinedRegion);
for (SchedulingExecutionVertex executionVertex : rawPipelinedRegion) {
pipelinedRegionsByVertex.put(executionVertex.getId(), pipelinedRegion);
}
}
long buildRegionsDuration = (System.nanoTime() - buildRegionsStartTime) / 1_000_000;
LOG.info("Built {} new pipelined regions in {} ms, total {} pipelined regions currently.", rawPipelinedRegions.size(), buildRegionsDuration, pipelinedRegions.size());
}
use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class SchedulingPipelinedRegionComputeUtilTest method testTwoComponentsViaBlockingExchange.
/**
* Tests the below topology.
*
* <pre>
* (a1) -+-> (b1) -+-> (c1)
* X
* (a2) -+-> (b2) -+-> (c2)
*
* ^ ^
* | |
* (pipelined) (blocking)
* </pre>
*/
@Test
public void testTwoComponentsViaBlockingExchange() {
TestingSchedulingTopology topology = new TestingSchedulingTopology();
TestingSchedulingExecutionVertex va1 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex va2 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex vb1 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex vb2 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex vc1 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex vc2 = topology.newExecutionVertex();
topology.connect(va1, vb1, ResultPartitionType.PIPELINED).connect(va1, vb2, ResultPartitionType.PIPELINED).connect(va2, vb1, ResultPartitionType.PIPELINED).connect(va2, vb2, ResultPartitionType.PIPELINED).connect(vb1, vc1, ResultPartitionType.BLOCKING).connect(vb2, vc2, ResultPartitionType.BLOCKING);
Map<ExecutionVertexID, Set<SchedulingExecutionVertex>> pipelinedRegionByVertex = computePipelinedRegionByVertex(topology);
Set<SchedulingExecutionVertex> ra1 = pipelinedRegionByVertex.get(va1.getId());
Set<SchedulingExecutionVertex> ra2 = pipelinedRegionByVertex.get(va2.getId());
Set<SchedulingExecutionVertex> rb1 = pipelinedRegionByVertex.get(vb1.getId());
Set<SchedulingExecutionVertex> rb2 = pipelinedRegionByVertex.get(vb2.getId());
Set<SchedulingExecutionVertex> rc1 = pipelinedRegionByVertex.get(vc1.getId());
Set<SchedulingExecutionVertex> rc2 = pipelinedRegionByVertex.get(vc2.getId());
assertSameRegion(ra1, ra2, rb1, rb2);
assertDistinctRegions(ra1, rc1, rc2);
}
Aggregations