Search in sources :

Example 61 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class StateBootstrapTransformation method getConfig.

@VisibleForTesting
StreamConfig getConfig(OperatorID operatorID, StateBackend stateBackend, Configuration additionalConfig, StreamOperator<TaggedOperatorSubtaskState> operator) {
    // Eagerly perform a deep copy of the configuration, otherwise it will result in undefined
    // behavior when deploying with multiple bootstrap transformations.
    Configuration deepCopy = new Configuration(MutableConfig.of(stream.getExecutionEnvironment().getConfiguration()));
    deepCopy.addAll(additionalConfig);
    final StreamConfig config = new StreamConfig(deepCopy);
    config.setChainStart();
    config.setCheckpointingEnabled(true);
    config.setCheckpointMode(CheckpointingMode.EXACTLY_ONCE);
    if (keyType != null) {
        TypeSerializer<?> keySerializer = keyType.createSerializer(stream.getExecutionEnvironment().getConfig());
        config.setStateKeySerializer(keySerializer);
        config.setStatePartitioner(0, keySelector);
    }
    config.setStreamOperator(operator);
    config.setOperatorName(operatorID.toHexString());
    config.setOperatorID(operatorID);
    config.setStateBackend(stateBackend);
    // This means leaving this stateBackend unwrapped.
    config.setChangelogStateBackendEnabled(TernaryBoolean.FALSE);
    config.setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.STATE_BACKEND, 1.0);
    return config;
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting)

Example 62 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class MultipleInputStreamTask method init.

@SuppressWarnings("rawtypes")
@Override
public void init() throws Exception {
    StreamConfig configuration = getConfiguration();
    ClassLoader userClassLoader = getUserCodeClassLoader();
    InputConfig[] inputs = configuration.getInputs(userClassLoader);
    WatermarkGauge[] watermarkGauges = new WatermarkGauge[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        watermarkGauges[i] = new WatermarkGauge();
        mainOperator.getMetricGroup().gauge(MetricNames.currentInputWatermarkName(i + 1), watermarkGauges[i]);
    }
    MinWatermarkGauge minInputWatermarkGauge = new MinWatermarkGauge(watermarkGauges);
    mainOperator.getMetricGroup().gauge(MetricNames.IO_CURRENT_INPUT_WATERMARK, minInputWatermarkGauge);
    List<StreamEdge> inEdges = configuration.getInPhysicalEdges(userClassLoader);
    // Those two number may differ for example when one of the inputs is a union. In that case
    // the number of logical network inputs is smaller compared to the number of inputs (input
    // gates)
    int numberOfNetworkInputs = configuration.getNumberOfNetworkInputs();
    ArrayList[] inputLists = new ArrayList[inputs.length];
    for (int i = 0; i < inputLists.length; i++) {
        inputLists[i] = new ArrayList<>();
    }
    for (int i = 0; i < numberOfNetworkInputs; i++) {
        int inputType = inEdges.get(i).getTypeNumber();
        IndexedInputGate reader = getEnvironment().getInputGate(i);
        inputLists[inputType - 1].add(reader);
    }
    ArrayList<ArrayList<?>> networkInputLists = new ArrayList<>();
    for (ArrayList<?> inputList : inputLists) {
        if (!inputList.isEmpty()) {
            networkInputLists.add(inputList);
        }
    }
    createInputProcessor(networkInputLists.toArray(new ArrayList[0]), inputs, watermarkGauges, (index) -> inEdges.get(index).getPartitioner());
    // wrap watermark gauge since registered metrics must be unique
    getEnvironment().getMetricGroup().gauge(MetricNames.IO_CURRENT_INPUT_WATERMARK, minInputWatermarkGauge::getValue);
}
Also used : ArrayList(java.util.ArrayList) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) MinWatermarkGauge(org.apache.flink.streaming.runtime.metrics.MinWatermarkGauge) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) IndexedInputGate(org.apache.flink.runtime.io.network.partition.consumer.IndexedInputGate) MinWatermarkGauge(org.apache.flink.streaming.runtime.metrics.MinWatermarkGauge) WatermarkGauge(org.apache.flink.streaming.runtime.metrics.WatermarkGauge) InputConfig(org.apache.flink.streaming.api.graph.StreamConfig.InputConfig)

Example 63 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class OperatorChain method createChainedSources.

@SuppressWarnings("rawtypes")
private Map<StreamConfig.SourceInputConfig, ChainedSource> createChainedSources(StreamTask<OUT, OP> containingTask, StreamConfig.InputConfig[] configuredInputs, Map<Integer, StreamConfig> chainedConfigs, ClassLoader userCodeClassloader, List<StreamOperatorWrapper<?, ?>> allOpWrappers) {
    if (Arrays.stream(configuredInputs).noneMatch(input -> input instanceof StreamConfig.SourceInputConfig)) {
        return Collections.emptyMap();
    }
    checkState(mainOperatorWrapper.getStreamOperator() instanceof MultipleInputStreamOperator, "Creating chained input is only supported with MultipleInputStreamOperator and MultipleInputStreamTask");
    Map<StreamConfig.SourceInputConfig, ChainedSource> chainedSourceInputs = new HashMap<>();
    MultipleInputStreamOperator<?> multipleInputOperator = (MultipleInputStreamOperator<?>) mainOperatorWrapper.getStreamOperator();
    List<Input> operatorInputs = multipleInputOperator.getInputs();
    int sourceInputGateIndex = Arrays.stream(containingTask.getEnvironment().getAllInputGates()).mapToInt(IndexedInputGate::getInputGateIndex).max().orElse(-1) + 1;
    for (int inputId = 0; inputId < configuredInputs.length; inputId++) {
        if (!(configuredInputs[inputId] instanceof StreamConfig.SourceInputConfig)) {
            continue;
        }
        StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInputs[inputId];
        int sourceEdgeId = sourceInput.getInputEdge().getSourceId();
        StreamConfig sourceInputConfig = chainedConfigs.get(sourceEdgeId);
        OutputTag outputTag = sourceInput.getInputEdge().getOutputTag();
        WatermarkGaugeExposingOutput chainedSourceOutput = createChainedSourceOutput(containingTask, sourceInputConfig, userCodeClassloader, getFinishedOnRestoreInputOrDefault(operatorInputs.get(inputId)), multipleInputOperator.getMetricGroup(), outputTag);
        SourceOperator<?, ?> sourceOperator = (SourceOperator<?, ?>) createOperator(containingTask, sourceInputConfig, userCodeClassloader, (WatermarkGaugeExposingOutput<StreamRecord<OUT>>) chainedSourceOutput, allOpWrappers, true);
        chainedSourceInputs.put(sourceInput, new ChainedSource(chainedSourceOutput, this.isTaskDeployedAsFinished() ? new StreamTaskFinishedOnRestoreSourceInput<>(sourceOperator, sourceInputGateIndex++, inputId) : new StreamTaskSourceInput<>(sourceOperator, sourceInputGateIndex++, inputId)));
    }
    return chainedSourceInputs;
}
Also used : HashMap(java.util.HashMap) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) StreamTaskSourceInput(org.apache.flink.streaming.runtime.io.StreamTaskSourceInput) Input(org.apache.flink.streaming.api.operators.Input) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) SourceOperator(org.apache.flink.streaming.api.operators.SourceOperator) OutputTag(org.apache.flink.util.OutputTag)

Example 64 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class OperatorChain method createOutputCollector.

private <T> WatermarkGaugeExposingOutput<StreamRecord<T>> createOutputCollector(StreamTask<?, ?> containingTask, StreamConfig operatorConfig, Map<Integer, StreamConfig> chainedConfigs, ClassLoader userCodeClassloader, Map<StreamEdge, RecordWriterOutput<?>> streamOutputs, List<StreamOperatorWrapper<?, ?>> allOperatorWrappers, MailboxExecutorFactory mailboxExecutorFactory) {
    List<Tuple2<WatermarkGaugeExposingOutput<StreamRecord<T>>, StreamEdge>> allOutputs = new ArrayList<>(4);
    // create collectors for the network outputs
    for (StreamEdge outputEdge : operatorConfig.getNonChainedOutputs(userCodeClassloader)) {
        @SuppressWarnings("unchecked") RecordWriterOutput<T> output = (RecordWriterOutput<T>) streamOutputs.get(outputEdge);
        allOutputs.add(new Tuple2<>(output, outputEdge));
    }
    // Create collectors for the chained outputs
    for (StreamEdge outputEdge : operatorConfig.getChainedOutputs(userCodeClassloader)) {
        int outputId = outputEdge.getTargetId();
        StreamConfig chainedOpConfig = chainedConfigs.get(outputId);
        WatermarkGaugeExposingOutput<StreamRecord<T>> output = createOperatorChain(containingTask, chainedOpConfig, chainedConfigs, userCodeClassloader, streamOutputs, allOperatorWrappers, outputEdge.getOutputTag(), mailboxExecutorFactory);
        allOutputs.add(new Tuple2<>(output, outputEdge));
    }
    if (allOutputs.size() == 1) {
        return allOutputs.get(0).f0;
    } else {
        // send to N outputs. Note that this includes the special case
        // of sending to zero outputs
        @SuppressWarnings({ "unchecked" }) Output<StreamRecord<T>>[] asArray = new Output[allOutputs.size()];
        for (int i = 0; i < allOutputs.size(); i++) {
            asArray[i] = allOutputs.get(i).f0;
        }
        // otherwise multi-chaining would not work correctly.
        if (containingTask.getExecutionConfig().isObjectReuseEnabled()) {
            return closer.register(new CopyingBroadcastingOutputCollector<>(asArray));
        } else {
            return closer.register(new BroadcastingOutputCollector<>(asArray));
        }
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ArrayList(java.util.ArrayList) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput) Output(org.apache.flink.streaming.api.operators.Output)

Example 65 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class OneInputStreamTask method init.

@Override
public void init() throws Exception {
    StreamConfig configuration = getConfiguration();
    int numberOfInputs = configuration.getNumberOfNetworkInputs();
    if (numberOfInputs > 0) {
        CheckpointedInputGate inputGate = createCheckpointedInputGate();
        Counter numRecordsIn = setupNumRecordsInCounter(mainOperator);
        DataOutput<IN> output = createDataOutput(numRecordsIn);
        StreamTaskInput<IN> input = createTaskInput(inputGate);
        StreamConfig.InputConfig[] inputConfigs = configuration.getInputs(getUserCodeClassLoader());
        StreamConfig.InputConfig inputConfig = inputConfigs[0];
        if (requiresSorting(inputConfig)) {
            checkState(!configuration.isCheckpointingEnabled(), "Checkpointing is not allowed with sorted inputs.");
            input = wrapWithSorted(input);
        }
        getEnvironment().getMetricGroup().getIOMetricGroup().reuseRecordsInputCounter(numRecordsIn);
        inputProcessor = new StreamOneInputProcessor<>(input, output, operatorChain);
    }
    mainOperator.getMetricGroup().gauge(MetricNames.IO_CURRENT_INPUT_WATERMARK, inputWatermarkGauge);
    // wrap watermark gauge since registered metrics must be unique
    getEnvironment().getMetricGroup().gauge(MetricNames.IO_CURRENT_INPUT_WATERMARK, inputWatermarkGauge::getValue);
}
Also used : Counter(org.apache.flink.metrics.Counter) CheckpointedInputGate(org.apache.flink.streaming.runtime.io.checkpointing.CheckpointedInputGate) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig)

Aggregations

StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)98 Test (org.junit.Test)57 Configuration (org.apache.flink.configuration.Configuration)41 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)40 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)16 Task (org.apache.flink.runtime.taskmanager.Task)16 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)14 ArrayList (java.util.ArrayList)13 StreamEdge (org.apache.flink.streaming.api.graph.StreamEdge)13 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)12 NettyShuffleEnvironmentBuilder (org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder)12 StreamMap (org.apache.flink.streaming.api.operators.StreamMap)12 Environment (org.apache.flink.runtime.execution.Environment)9 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)8 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)8 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)8 CoStreamMap (org.apache.flink.streaming.api.operators.co.CoStreamMap)8 OneInputStreamTaskTestHarness (org.apache.flink.streaming.runtime.tasks.OneInputStreamTaskTestHarness)8 MockStreamTaskBuilder (org.apache.flink.streaming.util.MockStreamTaskBuilder)8 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)7