Search in sources :

Example 56 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class StreamingJobGraphGenerator method createChain.

private List<StreamEdge> createChain(final Integer currentNodeId, final int chainIndex, final OperatorChainInfo chainInfo, final Map<Integer, OperatorChainInfo> chainEntryPoints) {
    Integer startNodeId = chainInfo.getStartNodeId();
    if (!builtVertices.contains(startNodeId)) {
        List<StreamEdge> transitiveOutEdges = new ArrayList<StreamEdge>();
        List<StreamEdge> chainableOutputs = new ArrayList<StreamEdge>();
        List<StreamEdge> nonChainableOutputs = new ArrayList<StreamEdge>();
        StreamNode currentNode = streamGraph.getStreamNode(currentNodeId);
        for (StreamEdge outEdge : currentNode.getOutEdges()) {
            if (isChainable(outEdge, streamGraph)) {
                chainableOutputs.add(outEdge);
            } else {
                nonChainableOutputs.add(outEdge);
            }
        }
        for (StreamEdge chainable : chainableOutputs) {
            transitiveOutEdges.addAll(createChain(chainable.getTargetId(), chainIndex + 1, chainInfo, chainEntryPoints));
        }
        for (StreamEdge nonChainable : nonChainableOutputs) {
            transitiveOutEdges.add(nonChainable);
            createChain(nonChainable.getTargetId(), // operators start at position 1 because 0 is for chained source inputs
            1, chainEntryPoints.computeIfAbsent(nonChainable.getTargetId(), (k) -> chainInfo.newChain(nonChainable.getTargetId())), chainEntryPoints);
        }
        chainedNames.put(currentNodeId, createChainedName(currentNodeId, chainableOutputs, Optional.ofNullable(chainEntryPoints.get(currentNodeId))));
        chainedMinResources.put(currentNodeId, createChainedMinResources(currentNodeId, chainableOutputs));
        chainedPreferredResources.put(currentNodeId, createChainedPreferredResources(currentNodeId, chainableOutputs));
        OperatorID currentOperatorId = chainInfo.addNodeToChain(currentNodeId, streamGraph.getStreamNode(currentNodeId).getOperatorName());
        if (currentNode.getInputFormat() != null) {
            getOrCreateFormatContainer(startNodeId).addInputFormat(currentOperatorId, currentNode.getInputFormat());
        }
        if (currentNode.getOutputFormat() != null) {
            getOrCreateFormatContainer(startNodeId).addOutputFormat(currentOperatorId, currentNode.getOutputFormat());
        }
        StreamConfig config = currentNodeId.equals(startNodeId) ? createJobVertex(startNodeId, chainInfo) : new StreamConfig(new Configuration());
        setVertexConfig(currentNodeId, config, chainableOutputs, nonChainableOutputs, chainInfo.getChainedSources());
        if (currentNodeId.equals(startNodeId)) {
            config.setChainStart();
            config.setChainIndex(chainIndex);
            config.setOperatorName(streamGraph.getStreamNode(currentNodeId).getOperatorName());
            for (StreamEdge edge : transitiveOutEdges) {
                connect(startNodeId, edge);
            }
            config.setOutEdgesInOrder(transitiveOutEdges);
            config.setTransitiveChainedTaskConfigs(chainedConfigs.get(startNodeId));
        } else {
            chainedConfigs.computeIfAbsent(startNodeId, k -> new HashMap<Integer, StreamConfig>());
            config.setChainIndex(chainIndex);
            StreamNode node = streamGraph.getStreamNode(currentNodeId);
            config.setOperatorName(node.getOperatorName());
            chainedConfigs.get(startNodeId).put(currentNodeId, config);
        }
        config.setOperatorID(currentOperatorId);
        if (chainableOutputs.isEmpty()) {
            config.setChainEnd();
        }
        return transitiveOutEdges;
    } else {
        return new ArrayList<>();
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Arrays(java.util.Arrays) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CheckpointingMode(org.apache.flink.streaming.api.CheckpointingMode) YieldingOperatorFactory(org.apache.flink.streaming.api.operators.YieldingOperatorFactory) LoggerFactory(org.slf4j.LoggerFactory) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) CoLocationGroupImpl(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroupImpl) StringUtils(org.apache.commons.lang3.StringUtils) StateBackend(org.apache.flink.runtime.state.StateBackend) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) CustomPartitionerWrapper(org.apache.flink.streaming.runtime.partitioner.CustomPartitionerWrapper) Map(java.util.Map) Function(org.apache.flink.api.common.functions.Function) WithMasterCheckpointHook(org.apache.flink.streaming.api.checkpoint.WithMasterCheckpointHook) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) MINIMAL_CHECKPOINT_TIME(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.MINIMAL_CHECKPOINT_TIME) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) IdentityHashMap(java.util.IdentityHashMap) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) Collection(java.util.Collection) Set(java.util.Set) DistributedCache(org.apache.flink.api.common.cache.DistributedCache) Collectors(java.util.stream.Collectors) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) StreamIterationTail(org.apache.flink.streaming.runtime.tasks.StreamIterationTail) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) LogicalVertex(org.apache.flink.runtime.jobgraph.topology.LogicalVertex) ForwardForConsecutiveHashPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardForConsecutiveHashPartitioner) ManagedMemoryUtils(org.apache.flink.runtime.util.config.memory.ManagedMemoryUtils) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) InputOutputFormatVertex(org.apache.flink.runtime.jobgraph.InputOutputFormatVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) ArrayList(java.util.ArrayList) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) JobGraphUtils(org.apache.flink.runtime.jobgraph.JobGraphUtils) ExecutionCheckpointingOptions(org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions) StreamIterationHead(org.apache.flink.streaming.runtime.tasks.StreamIterationHead) LinkedList(java.util.LinkedList) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Nullable(javax.annotation.Nullable) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) Logger(org.slf4j.Logger) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair) ForwardForUnspecifiedPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardForUnspecifiedPartitioner) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) MasterTriggerRestoreHook(org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook) RescalePartitioner(org.apache.flink.streaming.runtime.partitioner.RescalePartitioner) StreamExchangeMode(org.apache.flink.streaming.api.transformations.StreamExchangeMode) JobID(org.apache.flink.api.common.JobID) DefaultLogicalTopology(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalTopology) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) InputOutputFormatContainer(org.apache.flink.runtime.jobgraph.InputOutputFormatContainer) Internal(org.apache.flink.annotation.Internal) Comparator(java.util.Comparator) Collections(java.util.Collections) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) Configuration(org.apache.flink.configuration.Configuration) ArrayList(java.util.ArrayList) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID)

Example 57 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class AbstractStreamOperator method setup.

// ------------------------------------------------------------------------
// Life Cycle
// ------------------------------------------------------------------------
@Override
public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) {
    final Environment environment = containingTask.getEnvironment();
    this.container = containingTask;
    this.config = config;
    try {
        InternalOperatorMetricGroup operatorMetricGroup = environment.getMetricGroup().getOrAddOperator(config.getOperatorID(), config.getOperatorName());
        this.output = new CountingOutput<>(output, operatorMetricGroup.getIOMetricGroup().getNumRecordsOutCounter());
        if (config.isChainEnd()) {
            operatorMetricGroup.getIOMetricGroup().reuseOutputMetricsForTask();
        }
        this.metrics = operatorMetricGroup;
    } catch (Exception e) {
        LOG.warn("An error occurred while instantiating task metrics.", e);
        this.metrics = UnregisteredMetricGroups.createUnregisteredOperatorMetricGroup();
        this.output = output;
    }
    this.combinedWatermark = IndexedCombinedWatermarkStatus.forInputsCount(2);
    try {
        Configuration taskManagerConfig = environment.getTaskManagerInfo().getConfiguration();
        int historySize = taskManagerConfig.getInteger(MetricOptions.LATENCY_HISTORY_SIZE);
        if (historySize <= 0) {
            LOG.warn("{} has been set to a value equal or below 0: {}. Using default.", MetricOptions.LATENCY_HISTORY_SIZE, historySize);
            historySize = MetricOptions.LATENCY_HISTORY_SIZE.defaultValue();
        }
        final String configuredGranularity = taskManagerConfig.getString(MetricOptions.LATENCY_SOURCE_GRANULARITY);
        LatencyStats.Granularity granularity;
        try {
            granularity = LatencyStats.Granularity.valueOf(configuredGranularity.toUpperCase(Locale.ROOT));
        } catch (IllegalArgumentException iae) {
            granularity = LatencyStats.Granularity.OPERATOR;
            LOG.warn("Configured value {} option for {} is invalid. Defaulting to {}.", configuredGranularity, MetricOptions.LATENCY_SOURCE_GRANULARITY.key(), granularity);
        }
        MetricGroup jobMetricGroup = this.metrics.getJobMetricGroup();
        this.latencyStats = new LatencyStats(jobMetricGroup.addGroup("latency"), historySize, container.getIndexInSubtaskGroup(), getOperatorID(), granularity);
    } catch (Exception e) {
        LOG.warn("An error occurred while instantiating latency metrics.", e);
        this.latencyStats = new LatencyStats(UnregisteredMetricGroups.createUnregisteredTaskManagerJobMetricGroup().addGroup("latency"), 1, 0, new OperatorID(), LatencyStats.Granularity.SINGLE);
    }
    this.runtimeContext = new StreamingRuntimeContext(environment, environment.getAccumulatorRegistry().getUserMap(), getMetricGroup(), getOperatorID(), getProcessingTimeService(), null, environment.getExternalResourceInfoProvider());
    stateKeySelector1 = config.getStatePartitioner(0, getUserCodeClassloader());
    stateKeySelector2 = config.getStatePartitioner(1, getUserCodeClassloader());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InternalOperatorMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorMetricGroup) OperatorMetricGroup(org.apache.flink.metrics.groups.OperatorMetricGroup) MetricGroup(org.apache.flink.metrics.MetricGroup) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) InternalOperatorMetricGroup(org.apache.flink.runtime.metrics.groups.InternalOperatorMetricGroup) Environment(org.apache.flink.runtime.execution.Environment) LatencyStats(org.apache.flink.streaming.util.LatencyStats)

Example 58 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class SourceOperatorFactory method createStreamOperator.

@Override
public <T extends StreamOperator<OUT>> T createStreamOperator(StreamOperatorParameters<OUT> parameters) {
    final OperatorID operatorId = parameters.getStreamConfig().getOperatorID();
    final OperatorEventGateway gateway = parameters.getOperatorEventDispatcher().getOperatorEventGateway(operatorId);
    final SourceOperator<OUT, ?> sourceOperator = instantiateSourceOperator(source::createReader, gateway, source.getSplitSerializer(), watermarkStrategy, parameters.getProcessingTimeService(), parameters.getContainingTask().getEnvironment().getTaskManagerInfo().getConfiguration(), parameters.getContainingTask().getEnvironment().getTaskManagerInfo().getTaskManagerExternalAddress(), emitProgressiveWatermarks);
    sourceOperator.setup(parameters.getContainingTask(), parameters.getStreamConfig(), parameters.getOutput());
    parameters.getOperatorEventDispatcher().registerEventHandler(operatorId, sourceOperator);
    // today's lunch is generics spaghetti
    @SuppressWarnings("unchecked") final T castedOperator = (T) sourceOperator;
    return castedOperator;
}
Also used : OperatorEventGateway(org.apache.flink.runtime.operators.coordination.OperatorEventGateway) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID)

Example 59 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class TaskLocalStateStoreImplTest method storeStates.

private List<TestingTaskStateSnapshot> storeStates(int count) {
    List<TestingTaskStateSnapshot> taskStateSnapshots = new ArrayList<>(count);
    for (int i = 0; i < count; ++i) {
        OperatorID operatorID = new OperatorID();
        TestingTaskStateSnapshot taskStateSnapshot = new TestingTaskStateSnapshot();
        OperatorSubtaskState operatorSubtaskState = OperatorSubtaskState.builder().build();
        taskStateSnapshot.putSubtaskStateByOperatorID(operatorID, operatorSubtaskState);
        taskLocalStateStore.storeLocalState(i, taskStateSnapshot);
        taskStateSnapshots.add(taskStateSnapshot);
    }
    return taskStateSnapshots;
}
Also used : ArrayList(java.util.ArrayList) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState)

Example 60 with OperatorID

use of org.apache.flink.runtime.jobgraph.OperatorID in project flink by apache.

the class StateChangelogStorageTest method testWriteAndRead.

@Test
public void testWriteAndRead() throws Exception {
    KeyGroupRange kgRange = KeyGroupRange.of(0, 5);
    Map<Integer, List<byte[]>> appendsByKeyGroup = generateAppends(kgRange, 10, 20);
    try (StateChangelogStorage<T> client = getFactory();
        StateChangelogWriter<T> writer = client.createWriter(new OperatorID().toString(), kgRange)) {
        SequenceNumber prev = writer.initialSequenceNumber();
        for (Map.Entry<Integer, List<byte[]>> entry : appendsByKeyGroup.entrySet()) {
            Integer group = entry.getKey();
            List<byte[]> appends = entry.getValue();
            for (byte[] bytes : appends) {
                writer.append(group, bytes);
            }
        }
        T handle = writer.persist(prev).get();
        StateChangelogHandleReader<T> reader = client.createReader();
        assertByteMapsEqual(appendsByKeyGroup, extract(handle, reader));
    }
}
Also used : KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) SequenceNumber(org.apache.flink.runtime.state.changelog.SequenceNumber) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map) Test(org.junit.Test)

Aggregations

OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)211 Test (org.junit.Test)132 HashMap (java.util.HashMap)46 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)44 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)41 JobID (org.apache.flink.api.common.JobID)38 Configuration (org.apache.flink.configuration.Configuration)30 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)28 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)28 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)24 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)23 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)21 OperatorStateHandle (org.apache.flink.runtime.state.OperatorStateHandle)21 ArrayList (java.util.ArrayList)20 HashSet (java.util.HashSet)20 TaskStateSnapshot (org.apache.flink.runtime.checkpoint.TaskStateSnapshot)19 OperatorStreamStateHandle (org.apache.flink.runtime.state.OperatorStreamStateHandle)19 MemoryStateBackend (org.apache.flink.runtime.state.memory.MemoryStateBackend)19 IOException (java.io.IOException)18 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)18