use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.
the class StreamMultipleInputProcessorFactory method create.
@SuppressWarnings({ "unchecked", "rawtypes" })
public static StreamMultipleInputProcessor create(TaskInvokable ownerTask, CheckpointedInputGate[] checkpointedInputGates, StreamConfig.InputConfig[] configuredInputs, IOManager ioManager, MemoryManager memoryManager, TaskIOMetricGroup ioMetricGroup, Counter mainOperatorRecordsIn, MultipleInputStreamOperator<?> mainOperator, WatermarkGauge[] inputWatermarkGauges, StreamConfig streamConfig, Configuration taskManagerConfig, Configuration jobConfig, ExecutionConfig executionConfig, ClassLoader userClassloader, OperatorChain<?, ?> operatorChain, InflightDataRescalingDescriptor inflightDataRescalingDescriptor, Function<Integer, StreamPartitioner<?>> gatePartitioners, TaskInfo taskInfo) {
checkNotNull(operatorChain);
List<Input> operatorInputs = mainOperator.getInputs();
int inputsCount = operatorInputs.size();
StreamOneInputProcessor<?>[] inputProcessors = new StreamOneInputProcessor[inputsCount];
Counter networkRecordsIn = new SimpleCounter();
ioMetricGroup.reuseRecordsInputCounter(networkRecordsIn);
checkState(configuredInputs.length == inputsCount, "Number of configured inputs in StreamConfig [%s] doesn't match the main operator's number of inputs [%s]", configuredInputs.length, inputsCount);
StreamTaskInput[] inputs = new StreamTaskInput[inputsCount];
for (int i = 0; i < inputsCount; i++) {
StreamConfig.InputConfig configuredInput = configuredInputs[i];
if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
StreamConfig.NetworkInputConfig networkInput = (StreamConfig.NetworkInputConfig) configuredInput;
inputs[i] = StreamTaskNetworkInputFactory.create(checkpointedInputGates[networkInput.getInputGateIndex()], networkInput.getTypeSerializer(), ioManager, new StatusWatermarkValve(checkpointedInputGates[networkInput.getInputGateIndex()].getNumberOfInputChannels()), i, inflightDataRescalingDescriptor, gatePartitioners, taskInfo);
} else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
inputs[i] = operatorChain.getSourceTaskInput(sourceInput);
} else {
throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
}
}
InputSelectable inputSelectable = mainOperator instanceof InputSelectable ? (InputSelectable) mainOperator : null;
StreamConfig.InputConfig[] inputConfigs = streamConfig.getInputs(userClassloader);
boolean anyRequiresSorting = Arrays.stream(inputConfigs).anyMatch(StreamConfig::requiresSorting);
if (anyRequiresSorting) {
if (inputSelectable != null) {
throw new IllegalStateException("The InputSelectable interface is not supported with sorting inputs");
}
StreamTaskInput[] sortingInputs = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
KeySelector[] sortingInputKeySelectors = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getStatePartitioner(idx, userClassloader)).toArray(KeySelector[]::new);
TypeSerializer[] sortingInputKeySerializers = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getTypeSerializerIn(idx, userClassloader)).toArray(TypeSerializer[]::new);
StreamTaskInput[] passThroughInputs = IntStream.range(0, inputsCount).filter(idx -> !requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(ownerTask, sortingInputs, sortingInputKeySelectors, sortingInputKeySerializers, streamConfig.getStateKeySerializer(userClassloader), passThroughInputs, memoryManager, ioManager, executionConfig.isObjectReuseEnabled(), streamConfig.getManagedMemoryFractionOperatorUseCaseOfSlot(ManagedMemoryUseCase.OPERATOR, taskManagerConfig, userClassloader), jobConfig, executionConfig);
StreamTaskInput<?>[] sortedInputs = selectableSortingInputs.getSortedInputs();
StreamTaskInput<?>[] passedThroughInputs = selectableSortingInputs.getPassThroughInputs();
int sortedIndex = 0;
int passThroughIndex = 0;
for (int i = 0; i < inputs.length; i++) {
if (requiresSorting(inputConfigs[i])) {
inputs[i] = sortedInputs[sortedIndex];
sortedIndex++;
} else {
inputs[i] = passedThroughInputs[passThroughIndex];
passThroughIndex++;
}
}
inputSelectable = selectableSortingInputs.getInputSelectable();
}
for (int i = 0; i < inputsCount; i++) {
StreamConfig.InputConfig configuredInput = configuredInputs[i];
if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
StreamTaskNetworkOutput dataOutput = new StreamTaskNetworkOutput<>(operatorChain.getFinishedOnRestoreInputOrDefault(operatorInputs.get(i)), inputWatermarkGauges[i], mainOperatorRecordsIn, networkRecordsIn);
inputProcessors[i] = new StreamOneInputProcessor(inputs[i], dataOutput, operatorChain);
} else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
OperatorChain.ChainedSource chainedSource = operatorChain.getChainedSource(sourceInput);
inputProcessors[i] = new StreamOneInputProcessor(inputs[i], new StreamTaskSourceOutput(chainedSource.getSourceOutput(), inputWatermarkGauges[i], chainedSource.getSourceTaskInput().getOperator().getSourceMetricGroup()), operatorChain);
} else {
throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
}
}
return new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(inputSelectable, inputsCount), inputProcessors);
}
use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.
the class StreamGraphGeneratorTest method testSetupOfKeyGroupPartitioner.
/**
* Tests that the KeyGroupStreamPartitioner are properly set up with the correct value of
* maximum parallelism.
*/
@Test
public void testSetupOfKeyGroupPartitioner() {
int maxParallelism = 42;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setMaxParallelism(maxParallelism);
DataStream<Integer> source = env.fromElements(1, 2, 3);
DataStream<Integer> keyedResult = source.keyBy(value -> value).map(new NoOpIntMap());
keyedResult.addSink(new DiscardingSink<>());
StreamGraph graph = env.getStreamGraph();
StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId());
StreamPartitioner<?> streamPartitioner = keyedResultNode.getInEdges().get(0).getPartitioner();
}
use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.
the class StreamGraphGeneratorTest method testMaxParallelismWithConnectedKeyedStream.
/**
* Tests that the max parallelism is properly set for connected streams.
*/
@Test
public void testMaxParallelismWithConnectedKeyedStream() {
int maxParallelism = 42;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStream<Integer> input1 = env.fromElements(1, 2, 3, 4).setMaxParallelism(128);
DataStream<Integer> input2 = env.fromElements(1, 2, 3, 4).setMaxParallelism(129);
env.getConfig().setMaxParallelism(maxParallelism);
DataStream<Integer> keyedResult = input1.connect(input2).keyBy(value -> value, value -> value).map(new NoOpIntCoMap());
keyedResult.addSink(new DiscardingSink<>());
StreamGraph graph = env.getStreamGraph();
StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId());
StreamPartitioner<?> streamPartitioner1 = keyedResultNode.getInEdges().get(0).getPartitioner();
StreamPartitioner<?> streamPartitioner2 = keyedResultNode.getInEdges().get(1).getPartitioner();
}
use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.
the class StreamTask method createRecordWriter.
@SuppressWarnings("unchecked")
private static <OUT> RecordWriter<SerializationDelegate<StreamRecord<OUT>>> createRecordWriter(StreamEdge edge, int outputIndex, Environment environment, String taskNameWithSubtask, long bufferTimeout) {
StreamPartitioner<OUT> outputPartitioner = null;
// like the case of https://issues.apache.org/jira/browse/FLINK-14087.
try {
outputPartitioner = InstantiationUtil.clone((StreamPartitioner<OUT>) edge.getPartitioner(), environment.getUserCodeClassLoader().asClassLoader());
} catch (Exception e) {
ExceptionUtils.rethrow(e);
}
LOG.debug("Using partitioner {} for output {} of task {}", outputPartitioner, outputIndex, taskNameWithSubtask);
ResultPartitionWriter bufferWriter = environment.getWriter(outputIndex);
// we initialize the partitioner here with the number of key groups (aka max. parallelism)
if (outputPartitioner instanceof ConfigurableStreamPartitioner) {
int numKeyGroups = bufferWriter.getNumTargetKeyGroups();
if (0 < numKeyGroups) {
((ConfigurableStreamPartitioner) outputPartitioner).configure(numKeyGroups);
}
}
RecordWriter<SerializationDelegate<StreamRecord<OUT>>> output = new RecordWriterBuilder<SerializationDelegate<StreamRecord<OUT>>>().setChannelSelector(outputPartitioner).setTimeout(bufferTimeout).setTaskName(taskNameWithSubtask).build(bufferWriter);
output.setMetricGroup(environment.getMetricGroup().getIOMetricGroup());
return output;
}
use of org.apache.flink.streaming.runtime.partitioner.StreamPartitioner in project flink by apache.
the class OperatorChain method createStreamOutput.
private <T> RecordWriterOutput<T> createStreamOutput(StreamEdge edge, StreamConfig upStreamConfig, int outputIndex, Environment taskEnvironment, String taskName) {
// OutputTag, return null if not sideOutput
OutputTag sideOutputTag = edge.getOutputTag();
TypeSerializer outSerializer = null;
if (edge.getOutputTag() != null) {
// side output
outSerializer = upStreamConfig.getTypeSerializerSideOut(edge.getOutputTag(), taskEnvironment.getUserClassLoader());
} else {
// main output
outSerializer = upStreamConfig.getTypeSerializerOut(taskEnvironment.getUserClassLoader());
}
@SuppressWarnings("unchecked") StreamPartitioner<T> outputPartitioner = (StreamPartitioner<T>) edge.getPartitioner();
LOG.debug("Using partitioner {} for output {} of task ", outputPartitioner, outputIndex, taskName);
ResultPartitionWriter bufferWriter = taskEnvironment.getWriter(outputIndex);
// we initialize the partitioner here with the number of key groups (aka max. parallelism)
if (outputPartitioner instanceof ConfigurableStreamPartitioner) {
int numKeyGroups = bufferWriter.getNumTargetKeyGroups();
if (0 < numKeyGroups) {
((ConfigurableStreamPartitioner) outputPartitioner).configure(numKeyGroups);
}
}
StreamRecordWriter<SerializationDelegate<StreamRecord<T>>> output = new StreamRecordWriter<>(bufferWriter, outputPartitioner, upStreamConfig.getBufferTimeout());
output.setMetricGroup(taskEnvironment.getMetricGroup().getIOMetricGroup());
return new RecordWriterOutput<>(output, outSerializer, sideOutputTag, this);
}
Aggregations