Search in sources :

Example 1 with StreamPartitioner

use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.

the class StreamMultipleInputProcessorFactory method create.

@SuppressWarnings({ "unchecked", "rawtypes" })
public static StreamMultipleInputProcessor create(TaskInvokable ownerTask, CheckpointedInputGate[] checkpointedInputGates, StreamConfig.InputConfig[] configuredInputs, IOManager ioManager, MemoryManager memoryManager, TaskIOMetricGroup ioMetricGroup, Counter mainOperatorRecordsIn, MultipleInputStreamOperator<?> mainOperator, WatermarkGauge[] inputWatermarkGauges, StreamConfig streamConfig, Configuration taskManagerConfig, Configuration jobConfig, ExecutionConfig executionConfig, ClassLoader userClassloader, OperatorChain<?, ?> operatorChain, InflightDataRescalingDescriptor inflightDataRescalingDescriptor, Function<Integer, StreamPartitioner<?>> gatePartitioners, TaskInfo taskInfo) {
    checkNotNull(operatorChain);
    List<Input> operatorInputs = mainOperator.getInputs();
    int inputsCount = operatorInputs.size();
    StreamOneInputProcessor<?>[] inputProcessors = new StreamOneInputProcessor[inputsCount];
    Counter networkRecordsIn = new SimpleCounter();
    ioMetricGroup.reuseRecordsInputCounter(networkRecordsIn);
    checkState(configuredInputs.length == inputsCount, "Number of configured inputs in StreamConfig [%s] doesn't match the main operator's number of inputs [%s]", configuredInputs.length, inputsCount);
    StreamTaskInput[] inputs = new StreamTaskInput[inputsCount];
    for (int i = 0; i < inputsCount; i++) {
        StreamConfig.InputConfig configuredInput = configuredInputs[i];
        if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
            StreamConfig.NetworkInputConfig networkInput = (StreamConfig.NetworkInputConfig) configuredInput;
            inputs[i] = StreamTaskNetworkInputFactory.create(checkpointedInputGates[networkInput.getInputGateIndex()], networkInput.getTypeSerializer(), ioManager, new StatusWatermarkValve(checkpointedInputGates[networkInput.getInputGateIndex()].getNumberOfInputChannels()), i, inflightDataRescalingDescriptor, gatePartitioners, taskInfo);
        } else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
            StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
            inputs[i] = operatorChain.getSourceTaskInput(sourceInput);
        } else {
            throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
        }
    }
    InputSelectable inputSelectable = mainOperator instanceof InputSelectable ? (InputSelectable) mainOperator : null;
    StreamConfig.InputConfig[] inputConfigs = streamConfig.getInputs(userClassloader);
    boolean anyRequiresSorting = Arrays.stream(inputConfigs).anyMatch(StreamConfig::requiresSorting);
    if (anyRequiresSorting) {
        if (inputSelectable != null) {
            throw new IllegalStateException("The InputSelectable interface is not supported with sorting inputs");
        }
        StreamTaskInput[] sortingInputs = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
        KeySelector[] sortingInputKeySelectors = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getStatePartitioner(idx, userClassloader)).toArray(KeySelector[]::new);
        TypeSerializer[] sortingInputKeySerializers = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getTypeSerializerIn(idx, userClassloader)).toArray(TypeSerializer[]::new);
        StreamTaskInput[] passThroughInputs = IntStream.range(0, inputsCount).filter(idx -> !requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(ownerTask, sortingInputs, sortingInputKeySelectors, sortingInputKeySerializers, streamConfig.getStateKeySerializer(userClassloader), passThroughInputs, memoryManager, ioManager, executionConfig.isObjectReuseEnabled(), streamConfig.getManagedMemoryFractionOperatorUseCaseOfSlot(ManagedMemoryUseCase.OPERATOR, taskManagerConfig, userClassloader), jobConfig, executionConfig);
        StreamTaskInput<?>[] sortedInputs = selectableSortingInputs.getSortedInputs();
        StreamTaskInput<?>[] passedThroughInputs = selectableSortingInputs.getPassThroughInputs();
        int sortedIndex = 0;
        int passThroughIndex = 0;
        for (int i = 0; i < inputs.length; i++) {
            if (requiresSorting(inputConfigs[i])) {
                inputs[i] = sortedInputs[sortedIndex];
                sortedIndex++;
            } else {
                inputs[i] = passedThroughInputs[passThroughIndex];
                passThroughIndex++;
            }
        }
        inputSelectable = selectableSortingInputs.getInputSelectable();
    }
    for (int i = 0; i < inputsCount; i++) {
        StreamConfig.InputConfig configuredInput = configuredInputs[i];
        if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
            StreamTaskNetworkOutput dataOutput = new StreamTaskNetworkOutput<>(operatorChain.getFinishedOnRestoreInputOrDefault(operatorInputs.get(i)), inputWatermarkGauges[i], mainOperatorRecordsIn, networkRecordsIn);
            inputProcessors[i] = new StreamOneInputProcessor(inputs[i], dataOutput, operatorChain);
        } else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
            StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
            OperatorChain.ChainedSource chainedSource = operatorChain.getChainedSource(sourceInput);
            inputProcessors[i] = new StreamOneInputProcessor(inputs[i], new StreamTaskSourceOutput(chainedSource.getSourceOutput(), inputWatermarkGauges[i], chainedSource.getSourceTaskInput().getOperator().getSourceMetricGroup()), operatorChain);
        } else {
            throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
        }
    }
    return new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(inputSelectable, inputsCount), inputProcessors);
}
Also used : IntStream(java.util.stream.IntStream) TaskIOMetricGroup(org.apache.flink.runtime.metrics.groups.TaskIOMetricGroup) Arrays(java.util.Arrays) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) CheckpointedInputGate(org.apache.flink.streaming.runtime.io.checkpointing.CheckpointedInputGate) StreamConfig.requiresSorting(org.apache.flink.streaming.api.graph.StreamConfig.requiresSorting) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Function(java.util.function.Function) InflightDataRescalingDescriptor(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptor) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) SourceOperatorStreamTask(org.apache.flink.streaming.runtime.tasks.SourceOperatorStreamTask) SimpleCounter(org.apache.flink.metrics.SimpleCounter) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) WatermarkGaugeExposingOutput(org.apache.flink.streaming.runtime.tasks.WatermarkGaugeExposingOutput) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) StatusWatermarkValve(org.apache.flink.streaming.runtime.watermarkstatus.StatusWatermarkValve) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) Configuration(org.apache.flink.configuration.Configuration) TaskInfo(org.apache.flink.api.common.TaskInfo) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) InternalSourceReaderMetricGroup(org.apache.flink.runtime.metrics.groups.InternalSourceReaderMetricGroup) List(java.util.List) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OperatorChain(org.apache.flink.streaming.runtime.tasks.OperatorChain) Internal(org.apache.flink.annotation.Internal) MultiInputSortingDataInput(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Counter(org.apache.flink.metrics.Counter) WatermarkGauge(org.apache.flink.streaming.runtime.metrics.WatermarkGauge) Input(org.apache.flink.streaming.api.operators.Input) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) KeySelector(org.apache.flink.api.java.functions.KeySelector) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) MultiInputSortingDataInput(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput) Input(org.apache.flink.streaming.api.operators.Input) SimpleCounter(org.apache.flink.metrics.SimpleCounter) Counter(org.apache.flink.metrics.Counter) SimpleCounter(org.apache.flink.metrics.SimpleCounter) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StatusWatermarkValve(org.apache.flink.streaming.runtime.watermarkstatus.StatusWatermarkValve)

Example 2 with StreamPartitioner

use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.

the class StreamGraphGeneratorTest method testSetupOfKeyGroupPartitioner.

/**
 * Tests that the KeyGroupStreamPartitioner are properly set up with the correct value of
 * maximum parallelism.
 */
@Test
public void testSetupOfKeyGroupPartitioner() {
    int maxParallelism = 42;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().setMaxParallelism(maxParallelism);
    DataStream<Integer> source = env.fromElements(1, 2, 3);
    DataStream<Integer> keyedResult = source.keyBy(value -> value).map(new NoOpIntMap());
    keyedResult.addSink(new DiscardingSink<>());
    StreamGraph graph = env.getStreamGraph();
    StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId());
    StreamPartitioner<?> streamPartitioner = keyedResultNode.getInEdges().get(0).getPartitioner();
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) SlotSharingGroup(org.apache.flink.api.common.operators.SlotSharingGroup) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) Function(org.apache.flink.api.common.functions.Function) Assertions(org.assertj.core.api.Assertions) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) StreamTask(org.apache.flink.streaming.runtime.tasks.StreamTask) Collection(java.util.Collection) ConnectedStreams(org.apache.flink.streaming.api.datastream.ConnectedStreams) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) GlobalPartitioner(org.apache.flink.streaming.runtime.partitioner.GlobalPartitioner) List(java.util.List) NoOpIntMap(org.apache.flink.streaming.util.NoOpIntMap) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) Matchers.is(org.hamcrest.Matchers.is) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) AbstractUdfStreamOperator(org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) Watermark(org.apache.flink.streaming.api.watermark.Watermark) SavepointConfigOptions(org.apache.flink.runtime.jobgraph.SavepointConfigOptions) HashMap(java.util.HashMap) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Collector(org.apache.flink.util.Collector) Matchers.iterableWithSize(org.hamcrest.Matchers.iterableWithSize) Output(org.apache.flink.streaming.api.operators.Output) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TestExpandingSink(org.apache.flink.streaming.util.TestExpandingSink) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) Description(org.hamcrest.Description) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) FeatureMatcher(org.hamcrest.FeatureMatcher) StreamExchangeMode(org.apache.flink.streaming.api.transformations.StreamExchangeMode) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) Matcher(org.hamcrest.Matcher) Transformation(org.apache.flink.api.dag.Transformation) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) OutputTypeConfigurable(org.apache.flink.streaming.api.operators.OutputTypeConfigurable) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) NoOpIntMap(org.apache.flink.streaming.util.NoOpIntMap) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 3 with StreamPartitioner

use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.

the class StreamGraphGeneratorTest method testMaxParallelismWithConnectedKeyedStream.

/**
 * Tests that the max parallelism is properly set for connected streams.
 */
@Test
public void testMaxParallelismWithConnectedKeyedStream() {
    int maxParallelism = 42;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Integer> input1 = env.fromElements(1, 2, 3, 4).setMaxParallelism(128);
    DataStream<Integer> input2 = env.fromElements(1, 2, 3, 4).setMaxParallelism(129);
    env.getConfig().setMaxParallelism(maxParallelism);
    DataStream<Integer> keyedResult = input1.connect(input2).keyBy(value -> value, value -> value).map(new NoOpIntCoMap());
    keyedResult.addSink(new DiscardingSink<>());
    StreamGraph graph = env.getStreamGraph();
    StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId());
    StreamPartitioner<?> streamPartitioner1 = keyedResultNode.getInEdges().get(0).getPartitioner();
    StreamPartitioner<?> streamPartitioner2 = keyedResultNode.getInEdges().get(1).getPartitioner();
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) SlotSharingGroup(org.apache.flink.api.common.operators.SlotSharingGroup) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) Function(org.apache.flink.api.common.functions.Function) Assertions(org.assertj.core.api.Assertions) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) StreamTask(org.apache.flink.streaming.runtime.tasks.StreamTask) Collection(java.util.Collection) ConnectedStreams(org.apache.flink.streaming.api.datastream.ConnectedStreams) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) GlobalPartitioner(org.apache.flink.streaming.runtime.partitioner.GlobalPartitioner) List(java.util.List) NoOpIntMap(org.apache.flink.streaming.util.NoOpIntMap) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) Matchers.is(org.hamcrest.Matchers.is) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) AbstractUdfStreamOperator(org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) Watermark(org.apache.flink.streaming.api.watermark.Watermark) SavepointConfigOptions(org.apache.flink.runtime.jobgraph.SavepointConfigOptions) HashMap(java.util.HashMap) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Collector(org.apache.flink.util.Collector) Matchers.iterableWithSize(org.hamcrest.Matchers.iterableWithSize) Output(org.apache.flink.streaming.api.operators.Output) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TestExpandingSink(org.apache.flink.streaming.util.TestExpandingSink) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) Description(org.hamcrest.Description) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) FeatureMatcher(org.hamcrest.FeatureMatcher) StreamExchangeMode(org.apache.flink.streaming.api.transformations.StreamExchangeMode) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) Matcher(org.hamcrest.Matcher) Transformation(org.apache.flink.api.dag.Transformation) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) OutputTypeConfigurable(org.apache.flink.streaming.api.operators.OutputTypeConfigurable) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 4 with StreamPartitioner

use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.

the class StreamTask method createRecordWriter.

@SuppressWarnings("unchecked")
private static <OUT> RecordWriter<SerializationDelegate<StreamRecord<OUT>>> createRecordWriter(StreamEdge edge, int outputIndex, Environment environment, String taskNameWithSubtask, long bufferTimeout) {
    StreamPartitioner<OUT> outputPartitioner = null;
    // like the case of https://issues.apache.org/jira/browse/FLINK-14087.
    try {
        outputPartitioner = InstantiationUtil.clone((StreamPartitioner<OUT>) edge.getPartitioner(), environment.getUserCodeClassLoader().asClassLoader());
    } catch (Exception e) {
        ExceptionUtils.rethrow(e);
    }
    LOG.debug("Using partitioner {} for output {} of task {}", outputPartitioner, outputIndex, taskNameWithSubtask);
    ResultPartitionWriter bufferWriter = environment.getWriter(outputIndex);
    // we initialize the partitioner here with the number of key groups (aka max. parallelism)
    if (outputPartitioner instanceof ConfigurableStreamPartitioner) {
        int numKeyGroups = bufferWriter.getNumTargetKeyGroups();
        if (0 < numKeyGroups) {
            ((ConfigurableStreamPartitioner) outputPartitioner).configure(numKeyGroups);
        }
    }
    RecordWriter<SerializationDelegate<StreamRecord<OUT>>> output = new RecordWriterBuilder<SerializationDelegate<StreamRecord<OUT>>>().setChannelSelector(outputPartitioner).setTimeout(bufferTimeout).setTaskName(taskNameWithSubtask).build(bufferWriter);
    output.setMetricGroup(environment.getMetricGroup().getIOMetricGroup());
    return output;
}
Also used : ConfigurableStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ConfigurableStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) RecordWriterBuilder(org.apache.flink.runtime.io.network.api.writer.RecordWriterBuilder) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) SerializationDelegate(org.apache.flink.runtime.plugable.SerializationDelegate) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) AsynchronousException(org.apache.flink.runtime.taskmanager.AsynchronousException) FlinkException(org.apache.flink.util.FlinkException) RunnableWithException(org.apache.flink.util.function.RunnableWithException) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) IOException(java.io.IOException) FutureUtils.assertNoException(org.apache.flink.util.concurrent.FutureUtils.assertNoException) CompletionException(java.util.concurrent.CompletionException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException)

Example 5 with StreamPartitioner

use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.

the class OperatorChain method createStreamOutput.

private <T> RecordWriterOutput<T> createStreamOutput(StreamEdge edge, StreamConfig upStreamConfig, int outputIndex, Environment taskEnvironment, String taskName) {
    // OutputTag, return null if not sideOutput
    OutputTag sideOutputTag = edge.getOutputTag();
    TypeSerializer outSerializer = null;
    if (edge.getOutputTag() != null) {
        // side output
        outSerializer = upStreamConfig.getTypeSerializerSideOut(edge.getOutputTag(), taskEnvironment.getUserClassLoader());
    } else {
        // main output
        outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserClassLoader());
    }
    @SuppressWarnings("unchecked") StreamPartitioner<T> outputPartitioner = (StreamPartitioner<T>) edge.getPartitioner();
    LOG.debug("Using partitioner {} for output {} of task ", outputPartitioner, outputIndex, taskName);
    ResultPartitionWriter bufferWriter = taskEnvironment.getWriter(outputIndex);
    // we initialize the partitioner here with the number of key groups (aka max. parallelism)
    if (outputPartitioner instanceof ConfigurableStreamPartitioner) {
        int numKeyGroups = bufferWriter.getNumTargetKeyGroups();
        if (0 < numKeyGroups) {
            ((ConfigurableStreamPartitioner) outputPartitioner).configure(numKeyGroups);
        }
    }
    StreamRecordWriter<SerializationDelegate<StreamRecord<T>>> output = new StreamRecordWriter<>(bufferWriter, outputPartitioner, upStreamConfig.getBufferTimeout());
    output.setMetricGroup(taskEnvironment.getMetricGroup().getIOMetricGroup());
    return new RecordWriterOutput<>(output, outSerializer, sideOutputTag, this);
}
Also used : ConfigurableStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) ResultPartitionWriter(org.apache.flink.runtime.io.network.api.writer.ResultPartitionWriter) SerializationDelegate(org.apache.flink.runtime.plugable.SerializationDelegate) RecordWriterOutput(org.apache.flink.streaming.runtime.io.RecordWriterOutput) StreamRecordWriter(org.apache.flink.streaming.runtime.io.StreamRecordWriter) ConfigurableStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.ConfigurableStreamPartitioner) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) OutputTag(org.apache.flink.util.OutputTag)

Aggregations

StreamPartitioner (org.apache.flink.streaming.runtime.partitioner.StreamPartitioner)5 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)4 Arrays (java.util.Arrays)3 List (java.util.List)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)3 Configuration (org.apache.flink.configuration.Configuration)3 ManagedMemoryUseCase (org.apache.flink.core.memory.ManagedMemoryUseCase)3 Watermark (org.apache.flink.streaming.api.watermark.Watermark)3 LatencyMarker (org.apache.flink.streaming.runtime.streamrecord.LatencyMarker)3 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 Function (org.apache.flink.api.common.functions.Function)2 ResourceSpec (org.apache.flink.api.common.operators.ResourceSpec)2 SlotSharingGroup (org.apache.flink.api.common.operators.SlotSharingGroup)2 MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)2 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)2 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)2