Search in sources :

Example 6 with TaskInvokable

use of org.apache.flink.runtime.jobgraph.tasks.TaskInvokable in project flink by apache.

the class StreamingJobGraphGenerator method setVertexConfig.

private void setVertexConfig(Integer vertexID, StreamConfig config, List<StreamEdge> chainableOutputs, List<StreamEdge> nonChainableOutputs, Map<Integer, ChainedSourceInfo> chainedSources) {
    tryConvertPartitionerForDynamicGraph(chainableOutputs, nonChainableOutputs);
    StreamNode vertex = streamGraph.getStreamNode(vertexID);
    config.setVertexID(vertexID);
    // build the inputs as a combination of source and network inputs
    final List<StreamEdge> inEdges = vertex.getInEdges();
    final TypeSerializer<?>[] inputSerializers = vertex.getTypeSerializersIn();
    final StreamConfig.InputConfig[] inputConfigs = new StreamConfig.InputConfig[inputSerializers.length];
    int inputGateCount = 0;
    for (final StreamEdge inEdge : inEdges) {
        final ChainedSourceInfo chainedSource = chainedSources.get(inEdge.getSourceId());
        final int inputIndex = inEdge.getTypeNumber() == 0 ? // single input operator
        0 : // in case of 2 or more inputs
        inEdge.getTypeNumber() - 1;
        if (chainedSource != null) {
            // chained source is the input
            if (inputConfigs[inputIndex] != null) {
                throw new IllegalStateException("Trying to union a chained source with another input.");
            }
            inputConfigs[inputIndex] = chainedSource.getInputConfig();
            chainedConfigs.computeIfAbsent(vertexID, (key) -> new HashMap<>()).put(inEdge.getSourceId(), chainedSource.getOperatorConfig());
        } else {
            // that is union-ed into the same input
            if (inputConfigs[inputIndex] == null) {
                // PASS_THROUGH is a sensible default for streaming jobs. Only for BATCH
                // execution can we have sorted inputs
                StreamConfig.InputRequirement inputRequirement = vertex.getInputRequirements().getOrDefault(inputIndex, StreamConfig.InputRequirement.PASS_THROUGH);
                inputConfigs[inputIndex] = new StreamConfig.NetworkInputConfig(inputSerializers[inputIndex], inputGateCount++, inputRequirement);
            }
        }
    }
    config.setInputs(inputConfigs);
    config.setTypeSerializerOut(vertex.getTypeSerializerOut());
    // iterate edges, find sideOutput edges create and save serializers for each outputTag type
    for (StreamEdge edge : chainableOutputs) {
        if (edge.getOutputTag() != null) {
            config.setTypeSerializerSideOut(edge.getOutputTag(), edge.getOutputTag().getTypeInfo().createSerializer(streamGraph.getExecutionConfig()));
        }
    }
    for (StreamEdge edge : nonChainableOutputs) {
        if (edge.getOutputTag() != null) {
            config.setTypeSerializerSideOut(edge.getOutputTag(), edge.getOutputTag().getTypeInfo().createSerializer(streamGraph.getExecutionConfig()));
        }
    }
    config.setStreamOperatorFactory(vertex.getOperatorFactory());
    config.setNumberOfOutputs(nonChainableOutputs.size());
    config.setNonChainedOutputs(nonChainableOutputs);
    config.setChainedOutputs(chainableOutputs);
    config.setTimeCharacteristic(streamGraph.getTimeCharacteristic());
    final CheckpointConfig checkpointCfg = streamGraph.getCheckpointConfig();
    config.setStateBackend(streamGraph.getStateBackend());
    config.setChangelogStateBackendEnabled(streamGraph.isChangelogStateBackendEnabled());
    config.setCheckpointStorage(streamGraph.getCheckpointStorage());
    config.setSavepointDir(streamGraph.getSavepointDirectory());
    config.setGraphContainingLoops(streamGraph.isIterative());
    config.setTimerServiceProvider(streamGraph.getTimerServiceProvider());
    config.setCheckpointingEnabled(checkpointCfg.isCheckpointingEnabled());
    config.getConfiguration().set(ExecutionCheckpointingOptions.ENABLE_CHECKPOINTS_AFTER_TASKS_FINISH, streamGraph.isEnableCheckpointsAfterTasksFinish());
    config.setCheckpointMode(getCheckpointingMode(checkpointCfg));
    config.setUnalignedCheckpointsEnabled(checkpointCfg.isUnalignedCheckpointsEnabled());
    config.setAlignedCheckpointTimeout(checkpointCfg.getAlignedCheckpointTimeout());
    for (int i = 0; i < vertex.getStatePartitioners().length; i++) {
        config.setStatePartitioner(i, vertex.getStatePartitioners()[i]);
    }
    config.setStateKeySerializer(vertex.getStateKeySerializer());
    Class<? extends TaskInvokable> vertexClass = vertex.getJobVertexClass();
    if (vertexClass.equals(StreamIterationHead.class) || vertexClass.equals(StreamIterationTail.class)) {
        config.setIterationId(streamGraph.getBrokerID(vertexID));
        config.setIterationWaitTime(streamGraph.getLoopTimeout(vertexID));
    }
    vertexConfigs.put(vertexID, config);
}
Also used : Arrays(java.util.Arrays) DefaultLogicalPipelinedRegion(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalPipelinedRegion) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CheckpointingMode(org.apache.flink.streaming.api.CheckpointingMode) YieldingOperatorFactory(org.apache.flink.streaming.api.operators.YieldingOperatorFactory) LoggerFactory(org.slf4j.LoggerFactory) CheckpointCoordinatorConfiguration(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration) CheckpointStorage(org.apache.flink.runtime.state.CheckpointStorage) CoLocationGroupImpl(org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroupImpl) StringUtils(org.apache.commons.lang3.StringUtils) StateBackend(org.apache.flink.runtime.state.StateBackend) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) CustomPartitionerWrapper(org.apache.flink.streaming.runtime.partitioner.CustomPartitionerWrapper) Map(java.util.Map) Function(org.apache.flink.api.common.functions.Function) WithMasterCheckpointHook(org.apache.flink.streaming.api.checkpoint.WithMasterCheckpointHook) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) JobCheckpointingSettings(org.apache.flink.runtime.jobgraph.tasks.JobCheckpointingSettings) MINIMAL_CHECKPOINT_TIME(org.apache.flink.runtime.jobgraph.tasks.CheckpointCoordinatorConfiguration.MINIMAL_CHECKPOINT_TIME) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) IdentityHashMap(java.util.IdentityHashMap) TaskConfig(org.apache.flink.runtime.operators.util.TaskConfig) Collection(java.util.Collection) Set(java.util.Set) DistributedCache(org.apache.flink.api.common.cache.DistributedCache) Collectors(java.util.stream.Collectors) List(java.util.List) SerializedValue(org.apache.flink.util.SerializedValue) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) UdfStreamOperatorFactory(org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) Optional(java.util.Optional) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) StreamIterationTail(org.apache.flink.streaming.runtime.tasks.StreamIterationTail) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) JobEdge(org.apache.flink.runtime.jobgraph.JobEdge) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) LogicalVertex(org.apache.flink.runtime.jobgraph.topology.LogicalVertex) ForwardForConsecutiveHashPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardForConsecutiveHashPartitioner) ManagedMemoryUtils(org.apache.flink.runtime.util.config.memory.ManagedMemoryUtils) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) InputOutputFormatVertex(org.apache.flink.runtime.jobgraph.InputOutputFormatVertex) ResultPartitionType(org.apache.flink.runtime.io.network.partition.ResultPartitionType) HashMap(java.util.HashMap) SourceOperatorFactory(org.apache.flink.streaming.api.operators.SourceOperatorFactory) ArrayList(java.util.ArrayList) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) HashSet(java.util.HashSet) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) JobGraphUtils(org.apache.flink.runtime.jobgraph.JobGraphUtils) ExecutionCheckpointingOptions(org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions) StreamIterationHead(org.apache.flink.streaming.runtime.tasks.StreamIterationHead) LinkedList(java.util.LinkedList) DistributionPattern(org.apache.flink.runtime.jobgraph.DistributionPattern) Nullable(javax.annotation.Nullable) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) Logger(org.slf4j.Logger) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) Configuration(org.apache.flink.configuration.Configuration) IOException(java.io.IOException) OperatorIDPair(org.apache.flink.runtime.OperatorIDPair) ForwardForUnspecifiedPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardForUnspecifiedPartitioner) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) MasterTriggerRestoreHook(org.apache.flink.runtime.checkpoint.MasterTriggerRestoreHook) RescalePartitioner(org.apache.flink.streaming.runtime.partitioner.RescalePartitioner) StreamExchangeMode(org.apache.flink.streaming.api.transformations.StreamExchangeMode) JobID(org.apache.flink.api.common.JobID) DefaultLogicalTopology(org.apache.flink.runtime.jobgraph.topology.DefaultLogicalTopology) OperatorCoordinator(org.apache.flink.runtime.operators.coordination.OperatorCoordinator) InputOutputFormatContainer(org.apache.flink.runtime.jobgraph.InputOutputFormatContainer) Internal(org.apache.flink.annotation.Internal) Comparator(java.util.Comparator) Collections(java.util.Collections) CheckpointRetentionPolicy(org.apache.flink.runtime.checkpoint.CheckpointRetentionPolicy) IdentityHashMap(java.util.IdentityHashMap) HashMap(java.util.HashMap) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) StreamIterationTail(org.apache.flink.streaming.runtime.tasks.StreamIterationTail) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StreamIterationHead(org.apache.flink.streaming.runtime.tasks.StreamIterationHead)

Example 7 with TaskInvokable

use of org.apache.flink.runtime.jobgraph.tasks.TaskInvokable in project flink by apache.

the class MultiInputSortingDataInput method wrapInputs.

public static <K> SelectableSortingInputs wrapInputs(TaskInvokable containingTask, StreamTaskInput<Object>[] sortingInputs, KeySelector<Object, K>[] keySelectors, TypeSerializer<Object>[] inputSerializers, TypeSerializer<K> keySerializer, StreamTaskInput<Object>[] passThroughInputs, MemoryManager memoryManager, IOManager ioManager, boolean objectReuse, double managedMemoryFraction, Configuration jobConfiguration, ExecutionConfig executionConfig) {
    int keyLength = keySerializer.getLength();
    final TypeComparator<Tuple2<byte[], StreamRecord<Object>>> comparator;
    DataOutputSerializer dataOutputSerializer;
    if (keyLength > 0) {
        dataOutputSerializer = new DataOutputSerializer(keyLength);
        comparator = new FixedLengthByteKeyComparator<>(keyLength);
    } else {
        dataOutputSerializer = new DataOutputSerializer(64);
        comparator = new VariableLengthByteKeyComparator<>();
    }
    List<Integer> passThroughInputIndices = Arrays.stream(passThroughInputs).map(StreamTaskInput::getInputIndex).collect(Collectors.toList());
    int numberOfInputs = sortingInputs.length + passThroughInputs.length;
    CommonContext commonContext = new CommonContext(sortingInputs);
    InputSelector inputSelector = new InputSelector(commonContext, numberOfInputs, passThroughInputIndices);
    StreamTaskInput<?>[] wrappedSortingInputs = IntStream.range(0, sortingInputs.length).mapToObj(idx -> {
        try {
            KeyAndValueSerializer<Object> keyAndValueSerializer = new KeyAndValueSerializer<>(inputSerializers[idx], keyLength);
            return new MultiInputSortingDataInput<>(commonContext, sortingInputs[idx], sortingInputs[idx].getInputIndex(), ExternalSorter.newBuilder(memoryManager, containingTask, keyAndValueSerializer, comparator, executionConfig).memoryFraction(managedMemoryFraction / numberOfInputs).enableSpilling(ioManager, jobConfiguration.get(AlgorithmOptions.SORT_SPILLING_THRESHOLD)).maxNumFileHandles(jobConfiguration.get(AlgorithmOptions.SPILLING_MAX_FAN) / numberOfInputs).objectReuse(objectReuse).largeRecords(true).build(), keySelectors[idx], keySerializer, dataOutputSerializer);
        } catch (MemoryAllocationException e) {
            throw new RuntimeException();
        }
    }).toArray(StreamTaskInput[]::new);
    StreamTaskInput<?>[] wrappedPassThroughInputs = Arrays.stream(passThroughInputs).map(input -> new ObservableStreamTaskInput<>(input, inputSelector)).toArray(StreamTaskInput[]::new);
    return new SelectableSortingInputs(wrappedSortingInputs, wrappedPassThroughInputs, inputSelector);
}
Also used : IntStream(java.util.stream.IntStream) MemoryAllocationException(org.apache.flink.runtime.memory.MemoryAllocationException) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Arrays(java.util.Arrays) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) Tuple2(org.apache.flink.api.java.tuple.Tuple2) PriorityQueue(java.util.PriorityQueue) AvailabilityProvider(org.apache.flink.runtime.io.AvailabilityProvider) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) Watermark(org.apache.flink.streaming.api.watermark.Watermark) ExceptionUtils(org.apache.flink.util.ExceptionUtils) CompletableFuture(java.util.concurrent.CompletableFuture) PushingAsyncDataInput(org.apache.flink.streaming.runtime.io.PushingAsyncDataInput) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) DataOutputSerializer(org.apache.flink.core.memory.DataOutputSerializer) ChannelStateWriter(org.apache.flink.runtime.checkpoint.channel.ChannelStateWriter) LinkedList(java.util.LinkedList) Nonnull(javax.annotation.Nonnull) StreamInputProcessor(org.apache.flink.streaming.runtime.io.StreamInputProcessor) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) AlgorithmOptions(org.apache.flink.configuration.AlgorithmOptions) Configuration(org.apache.flink.configuration.Configuration) ExternalSorter(org.apache.flink.runtime.operators.sort.ExternalSorter) IOException(java.io.IOException) TypeComparator(org.apache.flink.api.common.typeutils.TypeComparator) Collectors(java.util.stream.Collectors) List(java.util.List) PushSorter(org.apache.flink.runtime.operators.sort.PushSorter) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Queue(java.util.Queue) InputSelection(org.apache.flink.streaming.api.operators.InputSelection) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) DataOutputSerializer(org.apache.flink.core.memory.DataOutputSerializer) MemoryAllocationException(org.apache.flink.runtime.memory.MemoryAllocationException) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Tuple2(org.apache.flink.api.java.tuple.Tuple2)

Aggregations

TaskInvokable (org.apache.flink.runtime.jobgraph.tasks.TaskInvokable)7 IOException (java.io.IOException)5 TaskNotRunningException (org.apache.flink.runtime.operators.coordination.TaskNotRunningException)4 FlinkRuntimeException (org.apache.flink.util.FlinkRuntimeException)4 InvocationTargetException (java.lang.reflect.InvocationTargetException)3 Arrays (java.util.Arrays)3 List (java.util.List)3 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)3 Configuration (org.apache.flink.configuration.Configuration)3 CheckpointException (org.apache.flink.runtime.checkpoint.CheckpointException)3 CancelTaskException (org.apache.flink.runtime.execution.CancelTaskException)3 InputSelectable (org.apache.flink.streaming.api.operators.InputSelectable)3 FlinkException (org.apache.flink.util.FlinkException)3 WrappingRuntimeException (org.apache.flink.util.WrappingRuntimeException)3 RunnableWithException (org.apache.flink.util.function.RunnableWithException)3 HashMap (java.util.HashMap)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2