Search in sources :

Example 1 with DefaultLogicalPipelinedRegion

use of org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion in project flink by apache.

the class DefaultExecutionTopology method computeLogicalPipelinedRegionsByJobVertexId.

private static Map<JobVertexID, DefaultLogicalPipelinedRegion> computeLogicalPipelinedRegionsByJobVertexId(final ExecutionGraph executionGraph) {
    List<JobVertex> topologicallySortedJobVertices = IterableUtils.toStream(executionGraph.getVerticesTopologically()).map(ExecutionJobVertex::getJobVertex).collect(Collectors.toList());
    Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions = DefaultLogicalTopology.fromTopologicallySortedJobVertices(topologicallySortedJobVertices).getAllPipelinedRegions();
    Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionsByJobVertexId = new HashMap<>();
    for (DefaultLogicalPipelinedRegion logicalPipelinedRegion : logicalPipelinedRegions) {
        for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) {
            logicalPipelinedRegionsByJobVertexId.put(vertex.getId(), logicalPipelinedRegion);
        }
    }
    return logicalPipelinedRegionsByJobVertexId;
}
Also used : LogicalVertex(org.apache.flink.runtime.jobgraph.topology.LogicalVertex) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) HashMap(java.util.HashMap) IdentityHashMap(java.util.IdentityHashMap) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID)

Example 2 with DefaultLogicalPipelinedRegion

use of org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion in project flink by apache.

the class DefaultExecutionTopology method generateNewPipelinedRegions.

private void generateNewPipelinedRegions(Iterable<ExecutionVertex> newExecutionVertices) {
    final Iterable<DefaultExecutionVertex> newSchedulingExecutionVertices = IterableUtils.toStream(newExecutionVertices).map(ExecutionVertex::getID).map(executionVerticesById::get).collect(Collectors.toList());
    Map<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> sortedExecutionVerticesInPipelinedRegion = new IdentityHashMap<>();
    for (DefaultExecutionVertex schedulingVertex : newSchedulingExecutionVertices) {
        sortedExecutionVerticesInPipelinedRegion.computeIfAbsent(logicalPipelinedRegionsByJobVertexId.get(schedulingVertex.getId().getJobVertexId()), ignore -> new ArrayList<>()).add(schedulingVertex);
    }
    long buildRegionsStartTime = System.nanoTime();
    Set<Set<SchedulingExecutionVertex>> rawPipelinedRegions = Collections.newSetFromMap(new IdentityHashMap<>());
    // SchedulingPipelinedRegions are both connected with inter-region blocking edges.
    for (Map.Entry<DefaultLogicalPipelinedRegion, List<DefaultExecutionVertex>> entry : sortedExecutionVerticesInPipelinedRegion.entrySet()) {
        DefaultLogicalPipelinedRegion logicalPipelinedRegion = entry.getKey();
        List<DefaultExecutionVertex> schedulingExecutionVertices = entry.getValue();
        if (containsIntraRegionAllToAllEdge(logicalPipelinedRegion)) {
            // For edges inside one LogicalPipelinedRegion, if there is any all-to-all edge, it
            // could be under two circumstances:
            // 
            // 1. Pipelined all-to-all edge:
            // Pipelined all-to-all edge will connect all vertices pipelined. Therefore,
            // all execution vertices derived from this LogicalPipelinedRegion should be in one
            // SchedulingPipelinedRegion.
            // 
            // 2. Blocking all-to-all edge:
            // For intra-region blocking all-to-all edge, we must make sure all the vertices
            // are inside one SchedulingPipelinedRegion, so that there will be no deadlock
            // happens during scheduling. For more details about this case, please refer to
            // FLINK-17330 (https://issues.apache.org/jira/browse/FLINK-17330).
            // 
            // Therefore, if a LogicalPipelinedRegion contains any intra-region all-to-all
            // edge, we just convert the entire LogicalPipelinedRegion to a sole
            // SchedulingPipelinedRegion directly.
            rawPipelinedRegions.add(new HashSet<>(schedulingExecutionVertices));
        } else {
            // If there are only pointwise edges inside the LogicalPipelinedRegion, we can use
            // SchedulingPipelinedRegionComputeUtil to compute the regions with O(N) computation
            // complexity.
            rawPipelinedRegions.addAll(SchedulingPipelinedRegionComputeUtil.computePipelinedRegions(schedulingExecutionVertices, executionVerticesById::get, resultPartitionsById::get));
        }
    }
    for (Set<? extends SchedulingExecutionVertex> rawPipelinedRegion : rawPipelinedRegions) {
        // noinspection unchecked
        final DefaultSchedulingPipelinedRegion pipelinedRegion = new DefaultSchedulingPipelinedRegion((Set<DefaultExecutionVertex>) rawPipelinedRegion, resultPartitionsById::get);
        pipelinedRegions.add(pipelinedRegion);
        for (SchedulingExecutionVertex executionVertex : rawPipelinedRegion) {
            pipelinedRegionsByVertex.put(executionVertex.getId(), pipelinedRegion);
        }
    }
    long buildRegionsDuration = (System.nanoTime() - buildRegionsStartTime) / 1_000_000;
    LOG.info("Built {} new pipelined regions in {} ms, total {} pipelined regions currently.", rawPipelinedRegions.size(), buildRegionsDuration, pipelinedRegions.size());
}
Also used : IntermediateResultPartitionID(org.apache.flink.runtime.jobgraph.IntermediateResultPartitionID) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) LogicalVertex(org.apache.flink.runtime.jobgraph.topology.LogicalVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ConsumerVertexGroup(org.apache.flink.runtime.scheduler.strategy.ConsumerVertexGroup) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ResultPartitionState(org.apache.flink.runtime.scheduler.strategy.ResultPartitionState) HashSet(java.util.HashSet) EdgeManager(org.apache.flink.runtime.executiongraph.EdgeManager) Map(java.util.Map) SchedulingTopology(org.apache.flink.runtime.scheduler.strategy.SchedulingTopology) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) CoLocationGroup(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Logger(org.slf4j.Logger) IdentityHashMap(java.util.IdentityHashMap) SchedulingPipelinedRegionComputeUtil(org.apache.flink.runtime.executiongraph.failover.flip1.SchedulingPipelinedRegionComputeUtil) LogicalEdge(org.apache.flink.runtime.jobgraph.topology.LogicalEdge) Set(java.util.Set) IterableUtils(org.apache.flink.util.IterableUtils) ExecutionVertexID(org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) ConsumedPartitionGroup(org.apache.flink.runtime.scheduler.strategy.ConsumedPartitionGroup) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) Stream(java.util.stream.Stream) IntermediateResultPartition(org.apache.flink.runtime.executiongraph.IntermediateResultPartition) SchedulingTopologyListener(org.apache.flink.runtime.scheduler.SchedulingTopologyListener) DefaultLogicalTopology(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalTopology) CoLocationConstraint(org.apache.flink.runtime.jobmanager.scheduler.CoLocationConstraint) DefaultExecutionGraph(org.apache.flink.runtime.executiongraph.DefaultExecutionGraph) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Collections(java.util.Collections) HashSet(java.util.HashSet) Set(java.util.Set) IntermediateDataSet(org.apache.flink.runtime.jobgraph.IntermediateDataSet) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) IdentityHashMap(java.util.IdentityHashMap) ArrayList(java.util.ArrayList) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) SchedulingExecutionVertex(org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) IdentityHashMap(java.util.IdentityHashMap)

Example 3 with DefaultLogicalPipelinedRegion

use of org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion in project flink by apache.

the class StreamingJobGraphGenerator method buildVertexRegionSlotSharingGroups.

/**
 * Maps a vertex to its region slot sharing group. If {@link
 * StreamGraph#isAllVerticesInSameSlotSharingGroupByDefault()} returns true, all regions will be
 * in the same slot sharing group.
 */
private Map<JobVertexID, SlotSharingGroup> buildVertexRegionSlotSharingGroups() {
    final Map<JobVertexID, SlotSharingGroup> vertexRegionSlotSharingGroups = new HashMap<>();
    final SlotSharingGroup defaultSlotSharingGroup = new SlotSharingGroup();
    streamGraph.getSlotSharingGroupResource(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP).ifPresent(defaultSlotSharingGroup::setResourceProfile);
    final boolean allRegionsInSameSlotSharingGroup = streamGraph.isAllVerticesInSameSlotSharingGroupByDefault();
    final Iterable<DefaultLogicalPipelinedRegion> regions = DefaultLogicalTopology.fromJobGraph(jobGraph).getAllPipelinedRegions();
    for (DefaultLogicalPipelinedRegion region : regions) {
        final SlotSharingGroup regionSlotSharingGroup;
        if (allRegionsInSameSlotSharingGroup) {
            regionSlotSharingGroup = defaultSlotSharingGroup;
        } else {
            regionSlotSharingGroup = new SlotSharingGroup();
            streamGraph.getSlotSharingGroupResource(StreamGraphGenerator.DEFAULT_SLOT_SHARING_GROUP).ifPresent(regionSlotSharingGroup::setResourceProfile);
        }
        for (LogicalVertex vertex : region.getVertices()) {
            vertexRegionSlotSharingGroups.put(vertex.getId(), regionSlotSharingGroup);
        }
    }
    return vertexRegionSlotSharingGroups;
}
Also used : LogicalVertex(org.apache.flink.runtime.jobgraph.topology.LogicalVertex) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup)

Aggregations

HashMap (java.util.HashMap)3 IdentityHashMap (java.util.IdentityHashMap)3 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)3 DefaultLogicalPipelinedRegion (org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion)3 LogicalVertex (org.apache.flink.runtime.jobgraph.topology.LogicalVertex)3 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)2 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)2 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Set (java.util.Set)1 Function (java.util.function.Function)1 Supplier (java.util.function.Supplier)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 DefaultExecutionGraph (org.apache.flink.runtime.executiongraph.DefaultExecutionGraph)1 EdgeManager (org.apache.flink.runtime.executiongraph.EdgeManager)1