Search in sources :

Example 1 with VertexParallelismInformation

use of org.apache.flink.runtime.scheduler.VertexParallelismInformation in project flink by apache.

the class ExecutionJobVertexTest method createDynamicExecutionJobVertex.

public static ExecutionJobVertex createDynamicExecutionJobVertex(int parallelism, int maxParallelism, int defaultMaxParallelism) throws Exception {
    JobVertex jobVertex = new JobVertex("testVertex");
    jobVertex.setInvokableClass(AbstractInvokable.class);
    jobVertex.createAndAddResultDataSet(new IntermediateDataSetID(), ResultPartitionType.BLOCKING);
    if (maxParallelism > 0) {
        jobVertex.setMaxParallelism(maxParallelism);
    }
    if (parallelism > 0) {
        jobVertex.setParallelism(parallelism);
    }
    final DefaultExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().build();
    final VertexParallelismStore vertexParallelismStore = AdaptiveBatchScheduler.computeVertexParallelismStoreForDynamicGraph(Collections.singletonList(jobVertex), defaultMaxParallelism);
    final VertexParallelismInformation vertexParallelismInfo = vertexParallelismStore.getParallelismInfo(jobVertex.getID());
    return new ExecutionJobVertex(eg, jobVertex, vertexParallelismInfo);
}
Also used : VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) VertexParallelismStore(org.apache.flink.runtime.scheduler.VertexParallelismStore) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID)

Example 2 with VertexParallelismInformation

use of org.apache.flink.runtime.scheduler.VertexParallelismInformation in project flink by apache.

the class AdaptiveScheduler method createExecutionGraphWithAvailableResourcesAsync.

private CompletableFuture<CreatingExecutionGraph.ExecutionGraphWithVertexParallelism> createExecutionGraphWithAvailableResourcesAsync() {
    final VertexParallelism vertexParallelism;
    final VertexParallelismStore adjustedParallelismStore;
    try {
        vertexParallelism = determineParallelism(slotAllocator);
        JobGraph adjustedJobGraph = jobInformation.copyJobGraph();
        for (JobVertex vertex : adjustedJobGraph.getVertices()) {
            JobVertexID id = vertex.getID();
            // use the determined "available parallelism" to use
            // the resources we have access to
            vertex.setParallelism(vertexParallelism.getParallelism(id));
        }
        // use the originally configured max parallelism
        // as the default for consistent runs
        adjustedParallelismStore = computeVertexParallelismStoreForExecution(adjustedJobGraph, executionMode, (vertex) -> {
            VertexParallelismInformation vertexParallelismInfo = initialParallelismStore.getParallelismInfo(vertex.getID());
            return vertexParallelismInfo.getMaxParallelism();
        });
    } catch (Exception exception) {
        return FutureUtils.completedExceptionally(exception);
    }
    return createExecutionGraphAndRestoreStateAsync(adjustedParallelismStore).thenApply(executionGraph -> CreatingExecutionGraph.ExecutionGraphWithVertexParallelism.create(executionGraph, vertexParallelism));
}
Also used : ExecutionGraphFactory(org.apache.flink.runtime.scheduler.ExecutionGraphFactory) DeclarativeSlotPool(org.apache.flink.runtime.jobmaster.slotpool.DeclarativeSlotPool) TaskNotRunningException(org.apache.flink.runtime.operators.coordination.TaskNotRunningException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) PhysicalSlot(org.apache.flink.runtime.jobmaster.slotpool.PhysicalSlot) Duration(java.time.Duration) FunctionWithException(org.apache.flink.util.function.FunctionWithException) ExecutionGraphHandler(org.apache.flink.runtime.scheduler.ExecutionGraphHandler) JobManagerJobMetricGroup(org.apache.flink.runtime.metrics.groups.JobManagerJobMetricGroup) ReservedSlots(org.apache.flink.runtime.scheduler.adaptive.allocator.ReservedSlots) MetricOptions(org.apache.flink.configuration.MetricOptions) JobManagerOptions(org.apache.flink.configuration.JobManagerOptions) ExecutionFailureHandler(org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler) JobStatusStore(org.apache.flink.runtime.scheduler.JobStatusStore) VertexParallelismStore(org.apache.flink.runtime.scheduler.VertexParallelismStore) CheckpointScheduling(org.apache.flink.runtime.checkpoint.CheckpointScheduling) RootExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.RootExceptionHistoryEntry) FlinkException(org.apache.flink.util.FlinkException) ComponentMainThreadExecutor(org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) CoordinationResponse(org.apache.flink.runtime.operators.coordination.CoordinationResponse) ExceptionHistoryEntry(org.apache.flink.runtime.scheduler.exceptionhistory.ExceptionHistoryEntry) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ResourceCounter(org.apache.flink.runtime.util.ResourceCounter) JobStatus(org.apache.flink.api.common.JobStatus) CheckpointFailureReason(org.apache.flink.runtime.checkpoint.CheckpointFailureReason) ArrayList(java.util.ArrayList) SchedulerNG(org.apache.flink.runtime.scheduler.SchedulerNG) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Nullable(javax.annotation.Nullable) DefaultVertexParallelismStore(org.apache.flink.runtime.scheduler.DefaultVertexParallelismStore) KvStateLocation(org.apache.flink.runtime.query.KvStateLocation) Executor(java.util.concurrent.Executor) ExecutionState(org.apache.flink.runtime.execution.ExecutionState) VertexParallelism(org.apache.flink.runtime.scheduler.adaptive.allocator.VertexParallelism) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) IOException(java.io.IOException) OperatorCoordinatorHandler(org.apache.flink.runtime.scheduler.OperatorCoordinatorHandler) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) RestartBackoffTimeStrategy(org.apache.flink.runtime.executiongraph.failover.flip1.RestartBackoffTimeStrategy) ReactiveScaleUpController(org.apache.flink.runtime.scheduler.adaptive.scalingpolicy.ReactiveScaleUpController) TaskExecutionStateTransition(org.apache.flink.runtime.executiongraph.TaskExecutionStateTransition) JobID(org.apache.flink.api.common.JobID) SlotInfo(org.apache.flink.runtime.jobmaster.SlotInfo) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) KvStateID(org.apache.flink.queryablestate.KvStateID) ScheduledFuture(java.util.concurrent.ScheduledFuture) ScaleUpController(org.apache.flink.runtime.scheduler.adaptive.scalingpolicy.ScaleUpController) TaskDeploymentDescriptorFactory(org.apache.flink.runtime.deployment.TaskDeploymentDescriptorFactory) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) LoggerFactory(org.slf4j.LoggerFactory) ExceptionUtils(org.apache.flink.util.ExceptionUtils) UpdateSchedulerNgOnInternalFailuresListener(org.apache.flink.runtime.scheduler.UpdateSchedulerNgOnInternalFailuresListener) MutableVertexAttemptNumberStore(org.apache.flink.runtime.executiongraph.MutableVertexAttemptNumberStore) BoundedFIFOQueue(org.apache.flink.runtime.util.BoundedFIFOQueue) TaskStateSnapshot(org.apache.flink.runtime.checkpoint.TaskStateSnapshot) KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) UnknownKvStateLocation(org.apache.flink.runtime.query.UnknownKvStateLocation) SerializedInputSplit(org.apache.flink.runtime.jobmaster.SerializedInputSplit) JobDetails(org.apache.flink.runtime.messages.webmonitor.JobDetails) CheckpointIDCounter(org.apache.flink.runtime.checkpoint.CheckpointIDCounter) Collection(java.util.Collection) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) Preconditions(org.apache.flink.util.Preconditions) InetSocketAddress(java.net.InetSocketAddress) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) List(java.util.List) CoordinationRequest(org.apache.flink.runtime.operators.coordination.CoordinationRequest) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) CheckpointMetrics(org.apache.flink.runtime.checkpoint.CheckpointMetrics) SchedulerExecutionMode(org.apache.flink.configuration.SchedulerExecutionMode) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) SavepointFormatType(org.apache.flink.core.execution.SavepointFormatType) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) SchedulerUtils(org.apache.flink.runtime.scheduler.SchedulerUtils) SlotAllocator(org.apache.flink.runtime.scheduler.adaptive.allocator.SlotAllocator) CompletableFuture(java.util.concurrent.CompletableFuture) Function(java.util.function.Function) JobType(org.apache.flink.runtime.jobgraph.JobType) VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) JobException(org.apache.flink.runtime.JobException) DeploymentStateTimeMetrics(org.apache.flink.runtime.scheduler.metrics.DeploymentStateTimeMetrics) FatalErrorHandler(org.apache.flink.runtime.rpc.FatalErrorHandler) DefaultVertexParallelismInfo(org.apache.flink.runtime.scheduler.DefaultVertexParallelismInfo) ThrowingConsumer(org.apache.flink.util.function.ThrowingConsumer) Nonnull(javax.annotation.Nonnull) ArchivedExecutionGraph(org.apache.flink.runtime.executiongraph.ArchivedExecutionGraph) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Configuration(org.apache.flink.configuration.Configuration) CompletedCheckpointStore(org.apache.flink.runtime.checkpoint.CompletedCheckpointStore) LogicalSlot(org.apache.flink.runtime.jobmaster.LogicalSlot) AccumulatorSnapshot(org.apache.flink.runtime.accumulators.AccumulatorSnapshot) JobStatusListener(org.apache.flink.runtime.executiongraph.JobStatusListener) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionGraphInfo(org.apache.flink.runtime.scheduler.ExecutionGraphInfo) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) TimeUnit(java.util.concurrent.TimeUnit) WebOptions(org.apache.flink.configuration.WebOptions) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) DefaultVertexAttemptNumberStore(org.apache.flink.runtime.executiongraph.DefaultVertexAttemptNumberStore) Collections(java.util.Collections) OperatorEvent(org.apache.flink.runtime.operators.coordination.OperatorEvent) VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) VertexParallelism(org.apache.flink.runtime.scheduler.adaptive.allocator.VertexParallelism) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) VertexParallelismStore(org.apache.flink.runtime.scheduler.VertexParallelismStore) DefaultVertexParallelismStore(org.apache.flink.runtime.scheduler.DefaultVertexParallelismStore) TaskNotRunningException(org.apache.flink.runtime.operators.coordination.TaskNotRunningException) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FunctionWithException(org.apache.flink.util.function.FunctionWithException) FlinkException(org.apache.flink.util.FlinkException) PartitionProducerDisposedException(org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) IOException(java.io.IOException) FlinkJobNotFoundException(org.apache.flink.runtime.messages.FlinkJobNotFoundException) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) JobException(org.apache.flink.runtime.JobException)

Example 3 with VertexParallelismInformation

use of org.apache.flink.runtime.scheduler.VertexParallelismInformation in project flink by apache.

the class AdaptiveSchedulerComputeReactiveModeVertexParallelismTest method testCreateStoreWithoutAdjustedParallelism.

@Test
public void testCreateStoreWithoutAdjustedParallelism() {
    JobVertex jobVertex = createNoOpVertex("test", parallelism, maxParallelism);
    VertexParallelismStore store = AdaptiveScheduler.computeReactiveModeVertexParallelismStore(Collections.singleton(jobVertex), SchedulerBase::getDefaultMaxParallelism, false);
    VertexParallelismInformation info = store.getParallelismInfo(jobVertex.getID());
    Assert.assertEquals("parallelism is not adjusted", parallelism, info.getParallelism());
    Assert.assertEquals("expected max", expectedMaxParallelism, info.getMaxParallelism());
    Assert.assertEquals("can rescale max", expectedCanRescaleTo, info.canRescaleMaxParallelism(maxToScaleTo));
}
Also used : VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) VertexParallelismStore(org.apache.flink.runtime.scheduler.VertexParallelismStore) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) Test(org.junit.Test)

Example 4 with VertexParallelismInformation

use of org.apache.flink.runtime.scheduler.VertexParallelismInformation in project flink by apache.

the class DefaultExecutionGraph method attachJobVertices.

/**
 * Attach job vertices without initializing them.
 */
private void attachJobVertices(List<JobVertex> topologicallySorted) throws JobException {
    for (JobVertex jobVertex : topologicallySorted) {
        if (jobVertex.isInputVertex() && !jobVertex.isStoppable()) {
            this.isStoppable = false;
        }
        VertexParallelismInformation parallelismInfo = parallelismStore.getParallelismInfo(jobVertex.getID());
        // create the execution job vertex and attach it to the graph
        ExecutionJobVertex ejv = new ExecutionJobVertex(this, jobVertex, parallelismInfo);
        ExecutionJobVertex previousTask = this.tasks.putIfAbsent(jobVertex.getID(), ejv);
        if (previousTask != null) {
            throw new JobException(String.format("Encountered two job vertices with ID %s : previous=[%s] / new=[%s]", jobVertex.getID(), ejv, previousTask));
        }
        this.verticesInCreationOrder.add(ejv);
        this.numJobVerticesTotal++;
    }
}
Also used : VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) JobException(org.apache.flink.runtime.JobException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex)

Example 5 with VertexParallelismInformation

use of org.apache.flink.runtime.scheduler.VertexParallelismInformation in project flink by apache.

the class AdaptiveSchedulerComputeReactiveModeVertexParallelismTest method testCreateStoreWithAdjustedParallelism.

@Test
public void testCreateStoreWithAdjustedParallelism() {
    JobVertex jobVertex = createNoOpVertex("test", parallelism, maxParallelism);
    VertexParallelismStore store = AdaptiveScheduler.computeReactiveModeVertexParallelismStore(Collections.singleton(jobVertex), SchedulerBase::getDefaultMaxParallelism, true);
    VertexParallelismInformation info = store.getParallelismInfo(jobVertex.getID());
    Assert.assertEquals("parallelism is adjusted to max", expectedMaxParallelism, info.getParallelism());
    Assert.assertEquals("expected max", expectedMaxParallelism, info.getMaxParallelism());
    Assert.assertEquals("can rescale max", expectedCanRescaleTo, info.canRescaleMaxParallelism(maxToScaleTo));
}
Also used : VertexParallelismInformation(org.apache.flink.runtime.scheduler.VertexParallelismInformation) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) VertexParallelismStore(org.apache.flink.runtime.scheduler.VertexParallelismStore) SchedulerBase(org.apache.flink.runtime.scheduler.SchedulerBase) Test(org.junit.Test)

Aggregations

JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)8 VertexParallelismInformation (org.apache.flink.runtime.scheduler.VertexParallelismInformation)8 VertexParallelismStore (org.apache.flink.runtime.scheduler.VertexParallelismStore)6 SchedulerBase (org.apache.flink.runtime.scheduler.SchedulerBase)5 Test (org.junit.Test)3 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)2 JobException (org.apache.flink.runtime.JobException)2 ArchivedExecutionGraphTest (org.apache.flink.runtime.executiongraph.ArchivedExecutionGraphTest)2 ArchivedExecutionJobVertex (org.apache.flink.runtime.executiongraph.ArchivedExecutionJobVertex)2 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)2 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)2 JobGraphTestUtils.streamingJobGraph (org.apache.flink.runtime.jobgraph.JobGraphTestUtils.streamingJobGraph)2 DefaultSchedulerTest (org.apache.flink.runtime.scheduler.DefaultSchedulerTest)2 IOException (java.io.IOException)1 InetSocketAddress (java.net.InetSocketAddress)1 Duration (java.time.Duration)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Iterator (java.util.Iterator)1