Search in sources :

Example 76 with JobVertex

use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.

the class DefaultExecutionTopology method computeLogicalPipelinedRegionsByJobVertexId.

private static Map<JobVertexID, DefaultLogicalPipelinedRegion> computeLogicalPipelinedRegionsByJobVertexId(final ExecutionGraph executionGraph) {
    List<JobVertex> topologicallySortedJobVertices = IterableUtils.toStream(executionGraph.getVerticesTopologically()).map(ExecutionJobVertex::getJobVertex).collect(Collectors.toList());
    Iterable<DefaultLogicalPipelinedRegion> logicalPipelinedRegions = DefaultLogicalTopology.fromTopologicallySortedJobVertices(topologicallySortedJobVertices).getAllPipelinedRegions();
    Map<JobVertexID, DefaultLogicalPipelinedRegion> logicalPipelinedRegionsByJobVertexId = new HashMap<>();
    for (DefaultLogicalPipelinedRegion logicalPipelinedRegion : logicalPipelinedRegions) {
        for (LogicalVertex vertex : logicalPipelinedRegion.getVertices()) {
            logicalPipelinedRegionsByJobVertexId.put(vertex.getId(), logicalPipelinedRegion);
        }
    }
    return logicalPipelinedRegionsByJobVertexId;
}
Also used : LogicalVertex(org.apache.flink.runtime.jobgraph.topology.LogicalVertex) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) HashMap(java.util.HashMap) IdentityHashMap(java.util.IdentityHashMap) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID)

Example 77 with JobVertex

use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.

the class AdaptiveScheduler method createExecutionGraphWithAvailableResourcesAsync.

private CompletableFuture<CreatingExecutionGraph.ExecutionGraphWithVertexParallelism> createExecutionGraphWithAvailableResourcesAsync() {
    final VertexParallelism vertexParallelism;
    final VertexParallelismStore adjustedParallelismStore;
    try {
        vertexParallelism = determineParallelism(slotAllocator);
        JobGraph adjustedJobGraph = jobInformation.copyJobGraph();
        for (JobVertex vertex : adjustedJobGraph.getVertices()) {
            JobVertexID id = vertex.getID();
            // use the determined "available parallelism" to use
            // the resources we have access to
            vertex.setParallelism(vertexParallelism.getParallelism(id));
        }
        // use the originally configured max parallelism
        // as the default for consistent runs
        adjustedParallelismStore = computeVertexParallelismStoreForExecution(adjustedJobGraph, executionMode, (vertex) -> {
            VertexParallelismInformation vertexParallelismInfo = initialParallelismStore.getParallelismInfo(vertex.getID());
            return vertexParallelismInfo.getMaxParallelism();
        });
    } catch (Exception exception) {
        return FutureUtils.completedExceptionally(exception);
    }
    return createExecutionGraphAndRestoreStateAsync(adjustedParallelismStore).thenApply(executionGraph -> CreatingExecutionGraph.ExecutionGraphWithVertexParallelism.create(executionGraph, vertexParallelism));
}
Also used : ExecutionGraphFactory(org.apache.flink.runtime.scheduler.ExecutionGraphFactory) DeclarativeSlotPool(org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPool) TaskNotRunningException(org.apache.flink.runtime.operators.coordination.TaskNotRunningException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) PhysicalSlot(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot) Duration(java.time.Duration) FunctionWithException(org.apache.flink.util.function.FunctionWithException) ExecutionGraphHandler(org.apache.flink.runtime.scheduler.ExecutionGraphHandler) JobManagerJobMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerJobMetricGroup) ReservedSlots(org.apache.flink.runtime.scheduler.adaptive.allocator.ReservedSlots) MetricOptions(org.apache.flink.configuration.MetricOptions) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) ExecutionFailureHandler(org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler) JobStatusStore(org.apache.flink.runtime.scheduler.JobStatusStore) VertexParallelismStore(org.apache.flink.runtime.scheduler.VertexParallelismStore) CheckpointScheduling(org.apache.flink.runtime.checkpoint.CheckpointScheduling) RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) CoordinationResponse(org.apache.flink.runtime.operators.coordination.CoordinationResponse) ExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.ExceptionHistoryEntry) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ResourceCounter(org.apache.flink.runtime.util.ResourceCounter) JobStatus(org.apache.flink.api.common.JobStatus) CheckpointFailureReason(org.apache.flink.runtime.checkpoint.CheckpointFailureReason) ArrayList(java.util.ArrayList) SchedulerNG(org.apache.flink.runtime.scheduler.SchedulerNG) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Nullable(javax.annotation.Nullable) DefaultVertexParallelismStore(org.apache.flink.runtime.scheduler.DefaultVertexParallelismStore) KvStateLocation(org.apache.flink.runtime.query.KvStateLocation) Executor(java.util.concurrent.Executor) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) VertexParallelism(org.apache.flink.runtime.scheduler.adaptive.allocator.VertexParallelism) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) IOException(java.io.IOException) OperatorCoordinatorHandler(org.apache.flink.runtime.scheduler.OperatorCoordinatorHandler) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) RestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.RestartBackoffTimeStrategy) ReactiveScaleUpController(org.apache.flink.runtime.scheduler.adaptive.scalingpolicy.ReactiveScaleUpController) TaskExecutionStateTransition(org.apache.flink.runtime.executiongraph.TaskExecutionStateTransition) JobID(org.apache.flink.api.common.JobID) SlotInfo(org.apache.flink.runtime.jobmaster.SlotInfo) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KvStateID(org.apache.flink.queryablestate.KvStateID) ScheduledFuture(java.util.concurrent.ScheduledFuture) ScaleUpController(org.apache.flink.runtime.scheduler.adaptive.scalingpolicy.ScaleUpController) TaskDeploymentDescriptorFactory(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorFactory) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) ExceptionUtils(org.apache.flink.util.ExceptionUtils) UpdateSchedulerNgOnInternalFailuresListener(org.apache.flink.runtime.scheduler.UpdateSchedulerNgOnInternalFailuresListener) MutableVertexAttemptNumberStore(org.apache.flink.runtime.executiongraph.MutableVertexAttemptNumberStore) BoundedFIFOQueue(org.apache.flink.runtime.util.BoundedFIFOQueue) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) UnknownKvStateLocation(org.apache.flink.runtime.query.UnknownKvStateLocation) SerializedInputSplit(org.apache.flink.runtime.jobmaster.SerializedInputSplit) JobDetails(org.apache.flink.runtime.messages.webmonitor.JobDetails) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) Collection(java.util.Collection) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Preconditions(org.apache.flink.util.Preconditions) InetSocketAddress(java.net.InetSocketAddress) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) List(java.util.List) CoordinationRequest(org.apache.flink.runtime.operators.coordination.CoordinationRequest) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) SchedulerExecutionMode(org.apache.flink.configuration.SchedulerExecutionMode) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SchedulerUtils(org.apache.flink.runtime.scheduler.SchedulerUtils) SlotAllocator(org.apache.flink.runtime.scheduler.adaptive.allocator.SlotAllocator) CompletableFuture(java.util.concurrent.CompletableFuture) Function(java.util.function.Function) JobType(org.apache.flink.runtime.jobgraph.JobType) VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobException(org.apache.flink.runtime.JobException) DeploymentStateTimeMetrics(org.apache.flink.runtime.scheduler.metrics.DeploymentStateTimeMetrics) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) DefaultVertexParallelismInfo(org.apache.flink.runtime.scheduler.DefaultVertexParallelismInfo) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Nonnull(javax.annotation.Nonnull) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Configuration(org.apache.flink.configuration.Configuration) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) AccumulatorSnapshot(org.apache.flink.runtime.accumulators.AccumulatorSnapshot) JobStatusListener(org.apache.flink.runtime.executiongraph.JobStatusListener) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) TimeUnit(java.util.concurrent.TimeUnit) WebOptions(org.apache.flink.configuration.WebOptions) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) DefaultVertexAttemptNumberStore(org.apache.flink.runtime.executiongraph.DefaultVertexAttemptNumberStore) Collections(java.util.Collections) OperatorEvent(org.apache.flink.runtime.operators.coordination.OperatorEvent) VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) VertexParallelism(org.apache.flink.runtime.scheduler.adaptive.allocator.VertexParallelism) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) VertexParallelismStore(org.apache.flink.runtime.scheduler.VertexParallelismStore) DefaultVertexParallelismStore(org.apache.flink.runtime.scheduler.DefaultVertexParallelismStore) TaskNotRunningException(org.apache.flink.runtime.operators.coordination.TaskNotRunningException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FunctionWithException(org.apache.flink.util.function.FunctionWithException) FlinkException(org.apache.flink.util.FlinkException) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) JobException(org.apache.flink.runtime.JobException)

Example 78 with JobVertex

use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.

the class SchedulerBase method computeVertexParallelismStore.

/**
 * Compute the {@link VertexParallelismStore} for all given vertices, which will set defaults
 * and ensure that the returned store contains valid parallelisms, with a custom function for
 * default max parallelism calculation and a custom function for normalizing vertex parallelism.
 *
 * @param vertices the vertices to compute parallelism for
 * @param defaultMaxParallelismFunc a function for computing a default max parallelism if none
 *     is specified on a given vertex
 * @param normalizeParallelismFunc a function for normalizing vertex parallelism
 * @return the computed parallelism store
 */
public static VertexParallelismStore computeVertexParallelismStore(Iterable<JobVertex> vertices, Function<JobVertex, Integer> defaultMaxParallelismFunc, Function<Integer, Integer> normalizeParallelismFunc) {
    DefaultVertexParallelismStore store = new DefaultVertexParallelismStore();
    for (JobVertex vertex : vertices) {
        int parallelism = normalizeParallelismFunc.apply(vertex.getParallelism());
        int maxParallelism = vertex.getMaxParallelism();
        final boolean autoConfigured;
        // if no max parallelism was configured by the user, we calculate and set a default
        if (maxParallelism == JobVertex.MAX_PARALLELISM_DEFAULT) {
            maxParallelism = defaultMaxParallelismFunc.apply(vertex);
            autoConfigured = true;
        } else {
            autoConfigured = false;
        }
        VertexParallelismInformation parallelismInfo = new DefaultVertexParallelismInfo(parallelism, maxParallelism, // user
        (newMax) -> autoConfigured ? Optional.empty() : Optional.of("Cannot override a configured max parallelism."));
        store.setParallelismInfo(vertex.getID(), parallelismInfo);
    }
    return store;
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)

Example 79 with JobVertex

use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.

the class ForwardGroupComputeUtil method computeForwardGroups.

public static Map<JobVertexID, ForwardGroup> computeForwardGroups(final Iterable<JobVertex> topologicallySortedVertices, Function<JobVertexID, ExecutionJobVertex> executionJobVertexRetriever) {
    final Map<JobVertex, Set<JobVertex>> vertexToGroup = new IdentityHashMap<>();
    // iterate all the vertices which are topologically sorted
    for (JobVertex vertex : topologicallySortedVertices) {
        Set<JobVertex> currentGroup = new HashSet<>();
        currentGroup.add(vertex);
        vertexToGroup.put(vertex, currentGroup);
        for (JobEdge input : getForwardInputs(vertex)) {
            final JobVertex producerVertex = input.getSource().getProducer();
            final Set<JobVertex> producerGroup = vertexToGroup.get(producerVertex);
            if (producerGroup == null) {
                throw new IllegalStateException("Producer task " + producerVertex.getID() + " forward group is null" + " while calculating forward group for the consumer task " + vertex.getID() + ". This should be a forward group building bug.");
            }
            if (currentGroup != producerGroup) {
                currentGroup = VertexGroupComputeUtil.mergeVertexGroups(currentGroup, producerGroup, vertexToGroup);
            }
        }
    }
    final Map<JobVertexID, ForwardGroup> ret = new HashMap<>();
    for (Set<JobVertex> vertexGroup : VertexGroupComputeUtil.uniqueVertexGroups(vertexToGroup)) {
        if (vertexGroup.size() > 1) {
            ForwardGroup forwardGroup = new ForwardGroup(vertexGroup.stream().map(vertex -> executionJobVertexRetriever.apply(vertex.getID())).collect(Collectors.toSet()));
            for (JobVertexID jobVertexId : forwardGroup.getJobVertexIds()) {
                ret.put(jobVertexId, forwardGroup);
            }
        }
    }
    return ret;
}
Also used : ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) HashSet(java.util.HashSet) Set(java.util.Set) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) IdentityHashMap(java.util.IdentityHashMap) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet)

Example 80 with JobVertex

use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.

the class SsgNetworkMemoryCalculationUtils method buildTaskInputsOutputsDescriptor.

private static TaskInputsOutputsDescriptor buildTaskInputsOutputsDescriptor(ExecutionJobVertex ejv, Function<JobVertexID, ExecutionJobVertex> ejvs) {
    Map<IntermediateDataSetID, Integer> maxInputChannelNums;
    Map<IntermediateDataSetID, Integer> maxSubpartitionNums;
    if (ejv.getGraph().isDynamic()) {
        maxInputChannelNums = getMaxInputChannelNumsForDynamicGraph(ejv);
        maxSubpartitionNums = getMaxSubpartitionNumsForDynamicGraph(ejv);
    } else {
        maxInputChannelNums = getMaxInputChannelNums(ejv);
        maxSubpartitionNums = getMaxSubpartitionNums(ejv, ejvs);
    }
    JobVertex jv = ejv.getJobVertex();
    Map<IntermediateDataSetID, ResultPartitionType> partitionTypes = getPartitionTypes(jv);
    return TaskInputsOutputsDescriptor.from(maxInputChannelNums, maxSubpartitionNums, partitionTypes);
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID)

Aggregations

JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)378 Test (org.junit.Test)230 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)197 Configuration (org.apache.flink.configuration.Configuration)74 JobID (org.apache.flink.api.common.JobID)60 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)58 ArrayList (java.util.ArrayList)57 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)47 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)44 SlotSharingGroup (org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup)41 SchedulerBase (org.apache.flink.runtime.scheduler.SchedulerBase)35 HashMap (java.util.HashMap)30 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)29 IOException (java.io.IOException)24 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)24 TaskConfig (org.apache.flink.runtime.operators.util.TaskConfig)24 Set (java.util.Set)23 JobException (org.apache.flink.runtime.JobException)23 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)23 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)22