Search in sources :

Example 11 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class BootstrapTransformation method getConfig.

@VisibleForTesting
StreamConfig getConfig(OperatorID operatorID, @Nullable StateBackend stateBackend, Configuration additionalConfig, StreamOperator<TaggedOperatorSubtaskState> operator) {
    // Eagerly perform a deep copy of the configuration, otherwise it will result in undefined
    // behavior
    // when deploying with multiple bootstrap transformations.
    Configuration deepCopy = new Configuration(dataSet.getExecutionEnvironment().getConfiguration());
    deepCopy.addAll(additionalConfig);
    final StreamConfig config = new StreamConfig(deepCopy);
    config.setChainStart();
    config.setCheckpointingEnabled(true);
    config.setCheckpointMode(CheckpointingMode.EXACTLY_ONCE);
    if (keyType != null) {
        TypeSerializer<?> keySerializer = keyType.createSerializer(dataSet.getExecutionEnvironment().getConfig());
        config.setStateKeySerializer(keySerializer);
        config.setStatePartitioner(0, originalKeySelector);
    }
    config.setStreamOperator(operator);
    config.setOperatorName(operatorID.toHexString());
    config.setOperatorID(operatorID);
    config.setStateBackend(stateBackend);
    // This means leaving this stateBackend unwrapped.
    config.setChangelogStateBackendEnabled(TernaryBoolean.FALSE);
    config.setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.STATE_BACKEND, 1.0);
    return config;
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting)

Example 12 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class StateBootstrapTransformationTest method testStreamConfig.

@Test
public void testStreamConfig() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<String> input = env.fromElements("");
    StateBootstrapTransformation<String> transformation = OperatorTransformation.bootstrapWith(input).keyBy(new CustomKeySelector()).transform(new ExampleKeyedStateBootstrapFunction());
    StreamConfig config = transformation.getConfig(OperatorIDGenerator.fromUid("uid"), new HashMapStateBackend(), new Configuration(), null);
    KeySelector selector = config.getStatePartitioner(0, Thread.currentThread().getContextClassLoader());
    Assert.assertEquals("Incorrect key selector forwarded to stream operator", CustomKeySelector.class, selector.getClass());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) KeySelector(org.apache.flink.api.java.functions.KeySelector) Test(org.junit.Test)

Example 13 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class StreamMultipleInputProcessorFactory method create.

@SuppressWarnings({ "unchecked", "rawtypes" })
public static StreamMultipleInputProcessor create(TaskInvokable ownerTask, CheckpointedInputGate[] checkpointedInputGates, StreamConfig.InputConfig[] configuredInputs, IOManager ioManager, MemoryManager memoryManager, TaskIOMetricGroup ioMetricGroup, Counter mainOperatorRecordsIn, MultipleInputStreamOperator<?> mainOperator, WatermarkGauge[] inputWatermarkGauges, StreamConfig streamConfig, Configuration taskManagerConfig, Configuration jobConfig, ExecutionConfig executionConfig, ClassLoader userClassloader, OperatorChain<?, ?> operatorChain, InflightDataRescalingDescriptor inflightDataRescalingDescriptor, Function<Integer, StreamPartitioner<?>> gatePartitioners, TaskInfo taskInfo) {
    checkNotNull(operatorChain);
    List<Input> operatorInputs = mainOperator.getInputs();
    int inputsCount = operatorInputs.size();
    StreamOneInputProcessor<?>[] inputProcessors = new StreamOneInputProcessor[inputsCount];
    Counter networkRecordsIn = new SimpleCounter();
    ioMetricGroup.reuseRecordsInputCounter(networkRecordsIn);
    checkState(configuredInputs.length == inputsCount, "Number of configured inputs in StreamConfig [%s] doesn't match the main operator's number of inputs [%s]", configuredInputs.length, inputsCount);
    StreamTaskInput[] inputs = new StreamTaskInput[inputsCount];
    for (int i = 0; i < inputsCount; i++) {
        StreamConfig.InputConfig configuredInput = configuredInputs[i];
        if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
            StreamConfig.NetworkInputConfig networkInput = (StreamConfig.NetworkInputConfig) configuredInput;
            inputs[i] = StreamTaskNetworkInputFactory.create(checkpointedInputGates[networkInput.getInputGateIndex()], networkInput.getTypeSerializer(), ioManager, new StatusWatermarkValve(checkpointedInputGates[networkInput.getInputGateIndex()].getNumberOfInputChannels()), i, inflightDataRescalingDescriptor, gatePartitioners, taskInfo);
        } else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
            StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
            inputs[i] = operatorChain.getSourceTaskInput(sourceInput);
        } else {
            throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
        }
    }
    InputSelectable inputSelectable = mainOperator instanceof InputSelectable ? (InputSelectable) mainOperator : null;
    StreamConfig.InputConfig[] inputConfigs = streamConfig.getInputs(userClassloader);
    boolean anyRequiresSorting = Arrays.stream(inputConfigs).anyMatch(StreamConfig::requiresSorting);
    if (anyRequiresSorting) {
        if (inputSelectable != null) {
            throw new IllegalStateException("The InputSelectable interface is not supported with sorting inputs");
        }
        StreamTaskInput[] sortingInputs = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
        KeySelector[] sortingInputKeySelectors = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getStatePartitioner(idx, userClassloader)).toArray(KeySelector[]::new);
        TypeSerializer[] sortingInputKeySerializers = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getTypeSerializerIn(idx, userClassloader)).toArray(TypeSerializer[]::new);
        StreamTaskInput[] passThroughInputs = IntStream.range(0, inputsCount).filter(idx -> !requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(ownerTask, sortingInputs, sortingInputKeySelectors, sortingInputKeySerializers, streamConfig.getStateKeySerializer(userClassloader), passThroughInputs, memoryManager, ioManager, executionConfig.isObjectReuseEnabled(), streamConfig.getManagedMemoryFractionOperatorUseCaseOfSlot(ManagedMemoryUseCase.OPERATOR, taskManagerConfig, userClassloader), jobConfig, executionConfig);
        StreamTaskInput<?>[] sortedInputs = selectableSortingInputs.getSortedInputs();
        StreamTaskInput<?>[] passedThroughInputs = selectableSortingInputs.getPassThroughInputs();
        int sortedIndex = 0;
        int passThroughIndex = 0;
        for (int i = 0; i < inputs.length; i++) {
            if (requiresSorting(inputConfigs[i])) {
                inputs[i] = sortedInputs[sortedIndex];
                sortedIndex++;
            } else {
                inputs[i] = passedThroughInputs[passThroughIndex];
                passThroughIndex++;
            }
        }
        inputSelectable = selectableSortingInputs.getInputSelectable();
    }
    for (int i = 0; i < inputsCount; i++) {
        StreamConfig.InputConfig configuredInput = configuredInputs[i];
        if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
            StreamTaskNetworkOutput dataOutput = new StreamTaskNetworkOutput<>(operatorChain.getFinishedOnRestoreInputOrDefault(operatorInputs.get(i)), inputWatermarkGauges[i], mainOperatorRecordsIn, networkRecordsIn);
            inputProcessors[i] = new StreamOneInputProcessor(inputs[i], dataOutput, operatorChain);
        } else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
            StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
            OperatorChain.ChainedSource chainedSource = operatorChain.getChainedSource(sourceInput);
            inputProcessors[i] = new StreamOneInputProcessor(inputs[i], new StreamTaskSourceOutput(chainedSource.getSourceOutput(), inputWatermarkGauges[i], chainedSource.getSourceTaskInput().getOperator().getSourceMetricGroup()), operatorChain);
        } else {
            throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
        }
    }
    return new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(inputSelectable, inputsCount), inputProcessors);
}
Also used : IntStream(java.util.stream.IntStream) TaskIOMetricGroup(org.apache.flink.runtime.metrics.groups.TaskIOMetricGroup) Arrays(java.util.Arrays) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) CheckpointedInputGate(org.apache.flink.streaming.runtime.io.checkpointing.CheckpointedInputGate) StreamConfig.requiresSorting(org.apache.flink.streaming.api.graph.StreamConfig.requiresSorting) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Function(java.util.function.Function) InflightDataRescalingDescriptor(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptor) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) SourceOperatorStreamTask(org.apache.flink.streaming.runtime.tasks.SourceOperatorStreamTask) SimpleCounter(org.apache.flink.metrics.SimpleCounter) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) WatermarkGaugeExposingOutput(org.apache.flink.streaming.runtime.tasks.WatermarkGaugeExposingOutput) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) StatusWatermarkValve(org.apache.flink.streaming.runtime.watermarkstatus.StatusWatermarkValve) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) Configuration(org.apache.flink.configuration.Configuration) TaskInfo(org.apache.flink.api.common.TaskInfo) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) InternalSourceReaderMetricGroup(org.apache.flink.runtime.metrics.groups.InternalSourceReaderMetricGroup) List(java.util.List) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OperatorChain(org.apache.flink.streaming.runtime.tasks.OperatorChain) Internal(org.apache.flink.annotation.Internal) MultiInputSortingDataInput(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Counter(org.apache.flink.metrics.Counter) WatermarkGauge(org.apache.flink.streaming.runtime.metrics.WatermarkGauge) Input(org.apache.flink.streaming.api.operators.Input) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) KeySelector(org.apache.flink.api.java.functions.KeySelector) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) MultiInputSortingDataInput(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput) Input(org.apache.flink.streaming.api.operators.Input) SimpleCounter(org.apache.flink.metrics.SimpleCounter) Counter(org.apache.flink.metrics.Counter) SimpleCounter(org.apache.flink.metrics.SimpleCounter) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StatusWatermarkValve(org.apache.flink.streaming.runtime.watermarkstatus.StatusWatermarkValve)

Example 14 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class TranslationTest method testCheckpointModeTranslation.

@Test
public void testCheckpointModeTranslation() {
    try {
        // with deactivated fault tolerance, the checkpoint mode should be at-least-once
        StreamExecutionEnvironment deactivated = getSimpleJob();
        for (JobVertex vertex : deactivated.getStreamGraph().getJobGraph().getVertices()) {
            assertEquals(CheckpointingMode.AT_LEAST_ONCE, new StreamConfig(vertex.getConfiguration()).getCheckpointMode());
        }
        // with activated fault tolerance, the checkpoint mode should be by default exactly once
        StreamExecutionEnvironment activated = getSimpleJob();
        activated.enableCheckpointing(1000L);
        for (JobVertex vertex : activated.getStreamGraph().getJobGraph().getVertices()) {
            assertEquals(CheckpointingMode.EXACTLY_ONCE, new StreamConfig(vertex.getConfiguration()).getCheckpointMode());
        }
        // explicitly setting the mode
        StreamExecutionEnvironment explicit = getSimpleJob();
        explicit.enableCheckpointing(1000L, CheckpointingMode.AT_LEAST_ONCE);
        for (JobVertex vertex : explicit.getStreamGraph().getJobGraph().getVertices()) {
            assertEquals(CheckpointingMode.AT_LEAST_ONCE, new StreamConfig(vertex.getConfiguration()).getCheckpointMode());
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 15 with StreamConfig

use of org.apache.flink.streaming.api.graph.StreamConfig in project flink by apache.

the class BatchMultipleInputStreamOperator method createStreamConfig.

protected StreamConfig createStreamConfig(StreamOperatorParameters<RowData> multipleInputOperatorParameters, TableOperatorWrapper<?> wrapper) {
    StreamConfig streamConfig = super.createStreamConfig(multipleInputOperatorParameters, wrapper);
    checkState(wrapper.getManagedMemoryFraction() >= 0);
    Configuration taskManagerConfig = getRuntimeContext().getTaskManagerRuntimeInfo().getConfiguration();
    double managedMemoryFraction = multipleInputOperatorParameters.getStreamConfig().getManagedMemoryFractionOperatorUseCaseOfSlot(ManagedMemoryUseCase.OPERATOR, taskManagerConfig, getRuntimeContext().getUserCodeClassLoader()) * wrapper.getManagedMemoryFraction();
    streamConfig.setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, managedMemoryFraction);
    return streamConfig;
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig)

Aggregations

StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)98 Test (org.junit.Test)57 Configuration (org.apache.flink.configuration.Configuration)41 OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)40 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)16 Task (org.apache.flink.runtime.taskmanager.Task)16 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)14 ArrayList (java.util.ArrayList)13 StreamEdge (org.apache.flink.streaming.api.graph.StreamEdge)13 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)12 NettyShuffleEnvironmentBuilder (org.apache.flink.runtime.io.network.NettyShuffleEnvironmentBuilder)12 StreamMap (org.apache.flink.streaming.api.operators.StreamMap)12 Environment (org.apache.flink.runtime.execution.Environment)9 CheckpointMetaData (org.apache.flink.runtime.checkpoint.CheckpointMetaData)8 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)8 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)8 CoStreamMap (org.apache.flink.streaming.api.operators.co.CoStreamMap)8 OneInputStreamTaskTestHarness (org.apache.flink.streaming.runtime.tasks.OneInputStreamTaskTestHarness)8 MockStreamTaskBuilder (org.apache.flink.streaming.util.MockStreamTaskBuilder)8 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)7