Search in sources :

Example 1 with SchedulingPipelinedRegionComputeUtil

use of org.apache.flink.runtime.executiongraph.failover.flip1.SchedulingPipelinedRegionComputeUtil in project flink by apache.

the class DefaultExecutionTopology method generateNewPipelinedRegions.

private void generateNewPipelinedRegions(Iterable<ExecutionVertex> newExecutionVertices) {
    final Iterable<DefaultExecutionVertex> newSchedulingExecutionVertices = IterableUtils.toStream(newExecutionVertices).map(ExecutionVertex::getID).map(executionVerticesById::get).collect(Collectors.toList());
    Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>();
    for (DefaultExecutionVertex schedulingVertex : newSchedulingExecutionVertices) {
        sortedExecutionVerticesInPipelinedRegion.computeIfAbsent(logicalPipelinedRegionsByJobVertexId.get(schedulingVertex.getId().getJobVertexId()), ignore -> new ArrayList<>()).add(schedulingVertex);
    }
    long buildRegionsStartTime = System.nanoTime();
    Set<Set<SchedulingExecutionVertex>> rawPipelinedRegions = Collections.newSetFromMap(new IdentityHashMap<>());
    // SchedulingPipelinedRegions are both connected with inter-region blocking edges.
    for (Map.Entry<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> entry : sortedExecutionVerticesInPipelinedRegion.entrySet()) {
        DefaultLogicalPipelinedRegion logicalPipelinedRegion = entry.getKey();
        List<DefaultExecutionVertex> schedulingExecutionVertices = entry.getValue();
        if (containsIntraRegionAllToAllEdge(logicalPipelinedRegion)) {
            // For edges inside one LogicalPipelinedRegion, if there is any all-to-all edge, it
            // could be under two circumstances:
            // 
            // 1. Pipelined all-to-all edge:
            // Pipelined all-to-all edge will connect all vertices pipelined. Therefore,
            // all execution vertices derived from this LogicalPipelinedRegion should be in one
            // SchedulingPipelinedRegion.
            // 
            // 2. Blocking all-to-all edge:
            // For intra-region blocking all-to-all edge, we must make sure all the vertices
            // are inside one SchedulingPipelinedRegion, so that there will be no deadlock
            // happens during scheduling. For more details about this case, please refer to
            // FLINK-17330 (https://issues.apache.org/jira/browse/FLINK-17330).
            // 
            // Therefore, if a LogicalPipelinedRegion contains any intra-region all-to-all
            // edge, we just convert the entire LogicalPipelinedRegion to a sole
            // SchedulingPipelinedRegion directly.
            rawPipelinedRegions.add(new HashSet<>(schedulingExecutionVertices));
        } else {
            // If there are only pointwise edges inside the LogicalPipelinedRegion, we can use
            // SchedulingPipelinedRegionComputeUtil to compute the regions with O(N) computation
            // complexity.
            rawPipelinedRegions.addAll(SchedulingPipelinedRegionComputeUtil.computePipelinedRegions(schedulingExecutionVertices, executionVerticesById::get, resultPartitionsById::get));
        }
    }
    for (Set<? extends SchedulingExecutionVertex> rawPipelinedRegion : rawPipelinedRegions) {
        // noinspection unchecked
        final DefaultSchedulingPipelinedRegion pipelinedRegion = new DefaultSchedulingPipelinedRegion((Set<DefaultExecutionVertex>) rawPipelinedRegion, resultPartitionsById::get);
        pipelinedRegions.add(pipelinedRegion);
        for (SchedulingExecutionVertex executionVertex : rawPipelinedRegion) {
            pipelinedRegionsByVertex.put(executionVertex.getId(), pipelinedRegion);
        }
    }
    long buildRegionsDuration = (System.nanoTime() - buildRegionsStartTime) / 1_000_000;
    LOG.info("Built {} new pipelined regions in {} ms, total {} pipelined regions currently.", rawPipelinedRegions.size(), buildRegionsDuration, pipelinedRegions.size());
}
Also used : IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) LogicalVertex(org.apache.flink.runtime.jobgraph.topology.LogicalVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ConsumerVertexGroup(org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ResultPartitionState(org.apache.flink.runtime.scheduler.strategy.ResultPartitionState) HashSet(java.util.HashSet) EdgeManager(org.apache.flink.runtime.executiongraph.EdgeManager) Map(java.util.Map) SchedulingTopology(org.apache.flink.runtime.scheduler.strategy.SchedulingTopology) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) CoLocationGroup(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Logger(org.slf4j.Logger) IdentityHashMap(java.util.IdentityHashMap) SchedulingPipelinedRegionComputeUtil(org.apache.flink.runtime.executiongraph.failover.flip1.SchedulingPipelinedRegionComputeUtil) LogicalEdge(org.apache.flink.runtime.jobgraph.topology.LogicalEdge) Set(java.util.Set) IterableUtils(org.apache.flink.util.IterableUtils) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) Stream(java.util.stream.Stream) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) SchedulingTopologyListener(org.apache.flink.runtime.scheduler.SchedulingTopologyListener) DefaultLogicalTopology(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalTopology) CoLocationConstraint(org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint) DefaultExecutionGraph(org.apache.flink.runtime.executiongraph.DefaultExecutionGraph) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Collections(java.util.Collections) HashSet(java.util.HashSet) Set(java.util.Set) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap)

Aggregations

ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 IdentityHashMap (java.util.IdentityHashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Set (java.util.Set)1 Function (java.util.function.Function)1 Supplier (java.util.function.Supplier)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 DefaultExecutionGraph (org.apache.flink.runtime.executiongraph.DefaultExecutionGraph)1 EdgeManager (org.apache.flink.runtime.executiongraph.EdgeManager)1 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)1 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)1 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)1 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)1 SchedulingPipelinedRegionComputeUtil (org.apache.flink.runtime.executiongraph.failover.flip1.SchedulingPipelinedRegionComputeUtil)1