Search in sources :

Example 11 with SchedulingExecutionVertex

use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.

the class SchedulingPipelinedRegionComputeUtilTest method testEmbarrassinglyParallelCase.

/**
 * Tests that validates that embarrassingly parallel chains of vertices work correctly.
 *
 * <pre>
 *     (a1) --> (b1)
 *
 *     (a2) --> (b2)
 *
 *     (a3) --> (b3)
 * </pre>
 */
@Test
public void testEmbarrassinglyParallelCase() {
    TestingSchedulingTopology topology = new TestingSchedulingTopology();
    TestingSchedulingExecutionVertex va1 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex va2 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex va3 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex vb1 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex vb2 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex vb3 = topology.newExecutionVertex();
    topology.connect(va1, vb1, ResultPartitionType.PIPELINED).connect(va2, vb2, ResultPartitionType.PIPELINED).connect(va3, vb3, ResultPartitionType.PIPELINED);
    Map<ExecutionVertexID, Set<SchedulingExecutionVertex>> pipelinedRegionByVertex = computePipelinedRegionByVertex(topology);
    Set<SchedulingExecutionVertex> ra1 = pipelinedRegionByVertex.get(va1.getId());
    Set<SchedulingExecutionVertex> ra2 = pipelinedRegionByVertex.get(va2.getId());
    Set<SchedulingExecutionVertex> ra3 = pipelinedRegionByVertex.get(va3.getId());
    Set<SchedulingExecutionVertex> rb1 = pipelinedRegionByVertex.get(vb1.getId());
    Set<SchedulingExecutionVertex> rb2 = pipelinedRegionByVertex.get(vb2.getId());
    Set<SchedulingExecutionVertex> rb3 = pipelinedRegionByVertex.get(vb3.getId());
    assertSameRegion(ra1, rb1);
    assertSameRegion(ra2, rb2);
    assertSameRegion(ra3, rb3);
    assertDistinctRegions(ra1, ra2, ra3);
}
Also used : TestingSchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.TestingSchedulingExecutionVertex) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) Set(java.util.Set) TestingSchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.TestingSchedulingExecutionVertex) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) TestingSchedulingTopology(org.apache.flink.runtime.scheduler.strategy.TestingSchedulingTopology) Test(org.junit.Test)

Example 12 with SchedulingExecutionVertex

use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.

the class DefaultSchedulerTest method vertexIsResetBeforeRestarted.

@Test
public void vertexIsResetBeforeRestarted() throws Exception {
    final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();
    final TestSchedulingStrategy.Factory schedulingStrategyFactory = new TestSchedulingStrategy.Factory();
    final DefaultScheduler scheduler = createScheduler(jobGraph, ComponentMainThreadExecutorServiceAdapter.forMainThread(), schedulingStrategyFactory);
    final TestSchedulingStrategy schedulingStrategy = schedulingStrategyFactory.getLastCreatedSchedulingStrategy();
    final SchedulingTopology topology = schedulingStrategy.getSchedulingTopology();
    scheduler.startScheduling();
    final SchedulingExecutionVertex onlySchedulingVertex = Iterables.getOnlyElement(topology.getVertices());
    schedulingStrategy.schedule(Collections.singletonList(onlySchedulingVertex.getId()));
    final ArchivedExecutionVertex onlyExecutionVertex = Iterables.getOnlyElement(scheduler.requestJob().getArchivedExecutionGraph().getAllExecutionVertices());
    final ExecutionAttemptID attemptId = onlyExecutionVertex.getCurrentExecutionAttempt().getAttemptId();
    scheduler.updateTaskExecutionState(createFailedTaskExecutionState(attemptId));
    taskRestartExecutor.triggerScheduledTasks();
    assertThat(schedulingStrategy.getReceivedVerticesToRestart(), hasSize(1));
    assertThat(onlySchedulingVertex.getState(), is(equalTo(ExecutionState.CREATED)));
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ArchivedExecutionVertex(org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex) TestingCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.TestingCheckpointRecoveryFactory) SchedulingStrategyFactory(org.apache.flink.runtime.scheduler.strategy.SchedulingStrategyFactory) TestFailoverStrategyFactory(org.apache.flink.runtime.executiongraph.utils.TestFailoverStrategyFactory) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) SchedulingTopology(org.apache.flink.runtime.scheduler.strategy.SchedulingTopology) TestSchedulingStrategy(org.apache.flink.runtime.scheduler.strategy.TestSchedulingStrategy) AdaptiveSchedulerTest(org.apache.flink.runtime.scheduler.adaptive.AdaptiveSchedulerTest) Test(org.junit.Test)

Example 13 with SchedulingExecutionVertex

use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.

the class DefaultExecutionGraph method createResultPartitionId.

ResultPartitionID createResultPartitionId(final IntermediateResultPartitionID resultPartitionId) {
    final SchedulingResultPartition schedulingResultPartition = getSchedulingTopology().getResultPartition(resultPartitionId);
    final SchedulingExecutionVertex producer = schedulingResultPartition.getProducer();
    final ExecutionVertexID producerId = producer.getId();
    final JobVertexID jobVertexId = producerId.getJobVertexId();
    final ExecutionJobVertex jobVertex = getJobVertex(jobVertexId);
    checkNotNull(jobVertex, "Unknown job vertex %s", jobVertexId);
    final ExecutionVertex[] taskVertices = jobVertex.getTaskVertices();
    final int subtaskIndex = producerId.getSubtaskIndex();
    checkState(subtaskIndex < taskVertices.length, "Invalid subtask index %d for job vertex %s", subtaskIndex, jobVertexId);
    final ExecutionVertex taskVertex = taskVertices[subtaskIndex];
    final Execution execution = taskVertex.getCurrentExecutionAttempt();
    return new ResultPartitionID(resultPartitionId, execution.getAttemptId());
}
Also used : SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) SchedulingResultPartition(org.apache.flink.runtime.scheduler.strategy.SchedulingResultPartition) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex)

Example 14 with SchedulingExecutionVertex

use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.

the class DefaultExecutionTopology method generateNewPipelinedRegions.

private void generateNewPipelinedRegions(Iterable<ExecutionVertex> newExecutionVertices) {
    final Iterable<DefaultExecutionVertex> newSchedulingExecutionVertices = IterableUtils.toStream(newExecutionVertices).map(ExecutionVertex::getID).map(executionVerticesById::get).collect(Collectors.toList());
    Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>();
    for (DefaultExecutionVertex schedulingVertex : newSchedulingExecutionVertices) {
        sortedExecutionVerticesInPipelinedRegion.computeIfAbsent(logicalPipelinedRegionsByJobVertexId.get(schedulingVertex.getId().getJobVertexId()), ignore -> new ArrayList<>()).add(schedulingVertex);
    }
    long buildRegionsStartTime = System.nanoTime();
    Set<Set<SchedulingExecutionVertex>> rawPipelinedRegions = Collections.newSetFromMap(new IdentityHashMap<>());
    // SchedulingPipelinedRegions are both connected with inter-region blocking edges.
    for (Map.Entry<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> entry : sortedExecutionVerticesInPipelinedRegion.entrySet()) {
        DefaultLogicalPipelinedRegion logicalPipelinedRegion = entry.getKey();
        List<DefaultExecutionVertex> schedulingExecutionVertices = entry.getValue();
        if (containsIntraRegionAllToAllEdge(logicalPipelinedRegion)) {
            // For edges inside one LogicalPipelinedRegion, if there is any all-to-all edge, it
            // could be under two circumstances:
            // 
            // 1. Pipelined all-to-all edge:
            // Pipelined all-to-all edge will connect all vertices pipelined. Therefore,
            // all execution vertices derived from this LogicalPipelinedRegion should be in one
            // SchedulingPipelinedRegion.
            // 
            // 2. Blocking all-to-all edge:
            // For intra-region blocking all-to-all edge, we must make sure all the vertices
            // are inside one SchedulingPipelinedRegion, so that there will be no deadlock
            // happens during scheduling. For more details about this case, please refer to
            // FLINK-17330 (https://issues.apache.org/jira/browse/FLINK-17330).
            // 
            // Therefore, if a LogicalPipelinedRegion contains any intra-region all-to-all
            // edge, we just convert the entire LogicalPipelinedRegion to a sole
            // SchedulingPipelinedRegion directly.
            rawPipelinedRegions.add(new HashSet<>(schedulingExecutionVertices));
        } else {
            // If there are only pointwise edges inside the LogicalPipelinedRegion, we can use
            // SchedulingPipelinedRegionComputeUtil to compute the regions with O(N) computation
            // complexity.
            rawPipelinedRegions.addAll(SchedulingPipelinedRegionComputeUtil.computePipelinedRegions(schedulingExecutionVertices, executionVerticesById::get, resultPartitionsById::get));
        }
    }
    for (Set<? extends SchedulingExecutionVertex> rawPipelinedRegion : rawPipelinedRegions) {
        // noinspection unchecked
        final DefaultSchedulingPipelinedRegion pipelinedRegion = new DefaultSchedulingPipelinedRegion((Set<DefaultExecutionVertex>) rawPipelinedRegion, resultPartitionsById::get);
        pipelinedRegions.add(pipelinedRegion);
        for (SchedulingExecutionVertex executionVertex : rawPipelinedRegion) {
            pipelinedRegionsByVertex.put(executionVertex.getId(), pipelinedRegion);
        }
    }
    long buildRegionsDuration = (System.nanoTime() - buildRegionsStartTime) / 1_000_000;
    LOG.info("Built {} new pipelined regions in {} ms, total {} pipelined regions currently.", rawPipelinedRegions.size(), buildRegionsDuration, pipelinedRegions.size());
}
Also used : IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) LogicalVertex(org.apache.flink.runtime.jobgraph.topology.LogicalVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ConsumerVertexGroup(org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ResultPartitionState(org.apache.flink.runtime.scheduler.strategy.ResultPartitionState) HashSet(java.util.HashSet) EdgeManager(org.apache.flink.runtime.executiongraph.EdgeManager) Map(java.util.Map) SchedulingTopology(org.apache.flink.runtime.scheduler.strategy.SchedulingTopology) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) CoLocationGroup(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Logger(org.slf4j.Logger) IdentityHashMap(java.util.IdentityHashMap) SchedulingPipelinedRegionComputeUtil(org.apache.flink.runtime.executiongraph.failover.flip1.SchedulingPipelinedRegionComputeUtil) LogicalEdge(org.apache.flink.runtime.jobgraph.topology.LogicalEdge) Set(java.util.Set) IterableUtils(org.apache.flink.util.IterableUtils) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) Stream(java.util.stream.Stream) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) SchedulingTopologyListener(org.apache.flink.runtime.scheduler.SchedulingTopologyListener) DefaultLogicalTopology(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalTopology) CoLocationConstraint(org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint) DefaultExecutionGraph(org.apache.flink.runtime.executiongraph.DefaultExecutionGraph) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Collections(java.util.Collections) HashSet(java.util.HashSet) Set(java.util.Set) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap)

Example 15 with SchedulingExecutionVertex

use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.

the class SchedulingPipelinedRegionComputeUtilTest method testTwoComponentsViaBlockingExchange.

/**
 * Tests the below topology.
 *
 * <pre>
 *     (a1) -+-> (b1) -+-> (c1)
 *           X
 *     (a2) -+-> (b2) -+-> (c2)
 *
 *           ^         ^
 *           |         |
 *     (pipelined) (blocking)
 * </pre>
 */
@Test
public void testTwoComponentsViaBlockingExchange() {
    TestingSchedulingTopology topology = new TestingSchedulingTopology();
    TestingSchedulingExecutionVertex va1 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex va2 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex vb1 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex vb2 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex vc1 = topology.newExecutionVertex();
    TestingSchedulingExecutionVertex vc2 = topology.newExecutionVertex();
    topology.connect(va1, vb1, ResultPartitionType.PIPELINED).connect(va1, vb2, ResultPartitionType.PIPELINED).connect(va2, vb1, ResultPartitionType.PIPELINED).connect(va2, vb2, ResultPartitionType.PIPELINED).connect(vb1, vc1, ResultPartitionType.BLOCKING).connect(vb2, vc2, ResultPartitionType.BLOCKING);
    Map<ExecutionVertexID, Set<SchedulingExecutionVertex>> pipelinedRegionByVertex = computePipelinedRegionByVertex(topology);
    Set<SchedulingExecutionVertex> ra1 = pipelinedRegionByVertex.get(va1.getId());
    Set<SchedulingExecutionVertex> ra2 = pipelinedRegionByVertex.get(va2.getId());
    Set<SchedulingExecutionVertex> rb1 = pipelinedRegionByVertex.get(vb1.getId());
    Set<SchedulingExecutionVertex> rb2 = pipelinedRegionByVertex.get(vb2.getId());
    Set<SchedulingExecutionVertex> rc1 = pipelinedRegionByVertex.get(vc1.getId());
    Set<SchedulingExecutionVertex> rc2 = pipelinedRegionByVertex.get(vc2.getId());
    assertSameRegion(ra1, ra2, rb1, rb2);
    assertDistinctRegions(ra1, rc1, rc2);
}
Also used : TestingSchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.TestingSchedulingExecutionVertex) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) Set(java.util.Set) TestingSchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.TestingSchedulingExecutionVertex) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) TestingSchedulingTopology(org.apache.flink.runtime.scheduler.strategy.TestingSchedulingTopology) Test(org.junit.Test)

Aggregations

SchedulingExecutionVertex (org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex)19 ExecutionVertexID (org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID)15 Set (java.util.Set)13 Test (org.junit.Test)13 TestingSchedulingExecutionVertex (org.apache.flink.runtime.scheduler.strategy.TestingSchedulingExecutionVertex)11 TestingSchedulingTopology (org.apache.flink.runtime.scheduler.strategy.TestingSchedulingTopology)11 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)3 IdentityHashMap (java.util.IdentityHashMap)2 List (java.util.List)2 ArchivedExecutionVertex (org.apache.flink.runtime.executiongraph.ArchivedExecutionVertex)2 IntermediateResultPartitionID (org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)2 ConsumerVertexGroup (org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup)2 SchedulingResultPartition (org.apache.flink.runtime.scheduler.strategy.SchedulingResultPartition)2 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Objects (java.util.Objects)1