use of org.apache.flink.streaming.api.operators.Input in project flink by apache.
the class StreamMultipleInputProcessorFactory method create.
@SuppressWarnings({ "unchecked", "rawtypes" })
public static StreamMultipleInputProcessor create(TaskInvokable ownerTask, CheckpointedInputGate[] checkpointedInputGates, StreamConfig.InputConfig[] configuredInputs, IOManager ioManager, MemoryManager memoryManager, TaskIOMetricGroup ioMetricGroup, Counter mainOperatorRecordsIn, MultipleInputStreamOperator<?> mainOperator, WatermarkGauge[] inputWatermarkGauges, StreamConfig streamConfig, Configuration taskManagerConfig, Configuration jobConfig, ExecutionConfig executionConfig, ClassLoader userClassloader, OperatorChain<?, ?> operatorChain, InflightDataRescalingDescriptor inflightDataRescalingDescriptor, Function<Integer, StreamPartitioner<?>> gatePartitioners, TaskInfo taskInfo) {
checkNotNull(operatorChain);
List<Input> operatorInputs = mainOperator.getInputs();
int inputsCount = operatorInputs.size();
StreamOneInputProcessor<?>[] inputProcessors = new StreamOneInputProcessor[inputsCount];
Counter networkRecordsIn = new SimpleCounter();
ioMetricGroup.reuseRecordsInputCounter(networkRecordsIn);
checkState(configuredInputs.length == inputsCount, "Number of configured inputs in StreamConfig [%s] doesn't match the main operator's number of inputs [%s]", configuredInputs.length, inputsCount);
StreamTaskInput[] inputs = new StreamTaskInput[inputsCount];
for (int i = 0; i < inputsCount; i++) {
StreamConfig.InputConfig configuredInput = configuredInputs[i];
if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
StreamConfig.NetworkInputConfig networkInput = (StreamConfig.NetworkInputConfig) configuredInput;
inputs[i] = StreamTaskNetworkInputFactory.create(checkpointedInputGates[networkInput.getInputGateIndex()], networkInput.getTypeSerializer(), ioManager, new StatusWatermarkValve(checkpointedInputGates[networkInput.getInputGateIndex()].getNumberOfInputChannels()), i, inflightDataRescalingDescriptor, gatePartitioners, taskInfo);
} else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
inputs[i] = operatorChain.getSourceTaskInput(sourceInput);
} else {
throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
}
}
InputSelectable inputSelectable = mainOperator instanceof InputSelectable ? (InputSelectable) mainOperator : null;
StreamConfig.InputConfig[] inputConfigs = streamConfig.getInputs(userClassloader);
boolean anyRequiresSorting = Arrays.stream(inputConfigs).anyMatch(StreamConfig::requiresSorting);
if (anyRequiresSorting) {
if (inputSelectable != null) {
throw new IllegalStateException("The InputSelectable interface is not supported with sorting inputs");
}
StreamTaskInput[] sortingInputs = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
KeySelector[] sortingInputKeySelectors = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getStatePartitioner(idx, userClassloader)).toArray(KeySelector[]::new);
TypeSerializer[] sortingInputKeySerializers = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getTypeSerializerIn(idx, userClassloader)).toArray(TypeSerializer[]::new);
StreamTaskInput[] passThroughInputs = IntStream.range(0, inputsCount).filter(idx -> !requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(ownerTask, sortingInputs, sortingInputKeySelectors, sortingInputKeySerializers, streamConfig.getStateKeySerializer(userClassloader), passThroughInputs, memoryManager, ioManager, executionConfig.isObjectReuseEnabled(), streamConfig.getManagedMemoryFractionOperatorUseCaseOfSlot(ManagedMemoryUseCase.OPERATOR, taskManagerConfig, userClassloader), jobConfig, executionConfig);
StreamTaskInput<?>[] sortedInputs = selectableSortingInputs.getSortedInputs();
StreamTaskInput<?>[] passedThroughInputs = selectableSortingInputs.getPassThroughInputs();
int sortedIndex = 0;
int passThroughIndex = 0;
for (int i = 0; i < inputs.length; i++) {
if (requiresSorting(inputConfigs[i])) {
inputs[i] = sortedInputs[sortedIndex];
sortedIndex++;
} else {
inputs[i] = passedThroughInputs[passThroughIndex];
passThroughIndex++;
}
}
inputSelectable = selectableSortingInputs.getInputSelectable();
}
for (int i = 0; i < inputsCount; i++) {
StreamConfig.InputConfig configuredInput = configuredInputs[i];
if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
StreamTaskNetworkOutput dataOutput = new StreamTaskNetworkOutput<>(operatorChain.getFinishedOnRestoreInputOrDefault(operatorInputs.get(i)), inputWatermarkGauges[i], mainOperatorRecordsIn, networkRecordsIn);
inputProcessors[i] = new StreamOneInputProcessor(inputs[i], dataOutput, operatorChain);
} else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
OperatorChain.ChainedSource chainedSource = operatorChain.getChainedSource(sourceInput);
inputProcessors[i] = new StreamOneInputProcessor(inputs[i], new StreamTaskSourceOutput(chainedSource.getSourceOutput(), inputWatermarkGauges[i], chainedSource.getSourceTaskInput().getOperator().getSourceMetricGroup()), operatorChain);
} else {
throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
}
}
return new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(inputSelectable, inputsCount), inputProcessors);
}
use of org.apache.flink.streaming.api.operators.Input in project flink by apache.
the class DataStreamBatchExecutionITCase method batchKeyedNonKeyedTwoInputOperator.
/**
* Verifies that all regular input is processed before keyed input.
*
* <p>Here, the first input is keyed while the second input is not keyed.
*/
@Test
public void batchKeyedNonKeyedTwoInputOperator() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStream<Tuple2<String, Integer>> keyedInput = env.fromElements(Tuple2.of("regular2", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 2), Tuple2.of("regular2", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular4", 4), Tuple2.of("regular3", 3), Tuple2.of("regular3", 2), Tuple2.of("regular4", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<String> result = keyedInput.keyBy(in -> in.f0).connect(regularInput).transform("operator", BasicTypeInfo.STRING_TYPE_INFO, new TwoInputIdentityOperator());
try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
List<String> results = CollectionUtil.iteratorToList(resultIterator);
assertThat(results, equalTo(Arrays.asList("(regular4,4)", "(regular3,3)", "(regular3,2)", "(regular4,1)", "(regular1,2)", "(regular1,3)", "(regular2,1)", "(regular2,4)")));
}
}
use of org.apache.flink.streaming.api.operators.Input in project flink by apache.
the class BatchMultipleInputStreamOperatorTest method testProcess.
@Test
public void testProcess() throws Exception {
TestingBatchMultipleInputStreamOperator op = createMultipleInputStreamOperator();
List<StreamElement> outputData = op.getOutputData();
TestingTwoInputStreamOperator joinOp2 = (TestingTwoInputStreamOperator) op.getTailWrapper().getStreamOperator();
TableOperatorWrapper<?> joinWrapper1 = op.getTailWrapper().getInputWrappers().get(0);
TestingTwoInputStreamOperator joinOp1 = (TestingTwoInputStreamOperator) joinWrapper1.getStreamOperator();
TableOperatorWrapper<?> aggWrapper1 = joinWrapper1.getInputWrappers().get(0);
TestingOneInputStreamOperator aggOp1 = (TestingOneInputStreamOperator) aggWrapper1.getStreamOperator();
TableOperatorWrapper<?> aggWrapper2 = joinWrapper1.getInputWrappers().get(1);
TestingOneInputStreamOperator aggOp2 = (TestingOneInputStreamOperator) aggWrapper2.getStreamOperator();
List<Input> inputs = op.getInputs();
assertEquals(3, inputs.size());
Input input1 = inputs.get(0);
Input input2 = inputs.get(1);
Input input3 = inputs.get(2);
assertTrue(input1 instanceof OneInput);
assertTrue(input2 instanceof OneInput);
assertTrue(input3 instanceof SecondInputOfTwoInput);
assertNull(joinOp2.getCurrentElement1());
assertNull(joinOp2.getCurrentElement2());
assertNull(joinOp1.getCurrentElement1());
assertNull(joinOp1.getCurrentElement2());
assertNull(aggOp1.getCurrentElement());
assertNull(aggOp2.getCurrentElement());
assertTrue(outputData.isEmpty());
// process first input (input id is 3)
StreamRecord<RowData> element1 = new StreamRecord<>(GenericRowData.of(StringData.fromString("123")), 456);
input3.processElement(element1);
assertEquals(element1, joinOp2.getCurrentElement2());
assertNull(joinOp2.getCurrentElement1());
assertTrue(outputData.isEmpty());
// finish first input
assertTrue(joinOp2.getEndInputs().isEmpty());
op.endInput(3);
assertTrue(outputData.isEmpty());
assertEquals(Collections.singletonList(2), joinOp2.getEndInputs());
// process second input (input id is 1)
StreamRecord<RowData> element2 = new StreamRecord<>(GenericRowData.of(StringData.fromString("124")), 457);
input1.processElement(element2);
assertEquals(element2, aggOp1.getCurrentElement());
assertNull(joinOp1.getCurrentElement1());
assertNull(joinOp2.getCurrentElement1());
assertTrue(outputData.isEmpty());
// finish second input
assertTrue(joinOp1.getEndInputs().isEmpty());
op.endInput(1);
assertEquals(Collections.singletonList(1), joinOp1.getEndInputs());
assertEquals(Collections.singletonList(2), joinOp2.getEndInputs());
assertEquals(element2, joinOp1.getCurrentElement1());
assertTrue(outputData.isEmpty());
// process third input (input id is 2)
StreamRecord<RowData> element3 = new StreamRecord<>(GenericRowData.of(StringData.fromString("125")), 458);
input2.processElement(element3);
assertEquals(element3, aggOp2.getCurrentElement());
assertNull(joinOp1.getCurrentElement2());
assertNull(joinOp2.getCurrentElement1());
assertTrue(outputData.isEmpty());
// finish third input
assertEquals(Collections.singletonList(1), joinOp1.getEndInputs());
op.endInput(2);
assertEquals(Arrays.asList(1, 2), joinOp1.getEndInputs());
assertEquals(Arrays.asList(2, 1), joinOp2.getEndInputs());
assertEquals(element3, joinOp1.getCurrentElement2());
assertEquals(3, outputData.size());
}
use of org.apache.flink.streaming.api.operators.Input in project flink by apache.
the class MultiInputStreamOperatorTestHarness method processElement.
public void processElement(int idx, StreamRecord<?> element) throws Exception {
Input input = getCastedOperator().getInputs().get(idx);
input.setKeyContextElement(element);
input.processElement(element);
}
use of org.apache.flink.streaming.api.operators.Input in project flink by apache.
the class OperatorChain method createChainedSources.
@SuppressWarnings("rawtypes")
private Map<StreamConfig.SourceInputConfig, ChainedSource> createChainedSources(StreamTask<OUT, OP> containingTask, StreamConfig.InputConfig[] configuredInputs, Map<Integer, StreamConfig> chainedConfigs, ClassLoader userCodeClassloader, List<StreamOperatorWrapper<?, ?>> allOpWrappers) {
if (Arrays.stream(configuredInputs).noneMatch(input -> input instanceof StreamConfig.SourceInputConfig)) {
return Collections.emptyMap();
}
checkState(mainOperatorWrapper.getStreamOperator() instanceof MultipleInputStreamOperator, "Creating chained input is only supported with MultipleInputStreamOperator and MultipleInputStreamTask");
Map<StreamConfig.SourceInputConfig, ChainedSource> chainedSourceInputs = new HashMap<>();
MultipleInputStreamOperator<?> multipleInputOperator = (MultipleInputStreamOperator<?>) mainOperatorWrapper.getStreamOperator();
List<Input> operatorInputs = multipleInputOperator.getInputs();
int sourceInputGateIndex = Arrays.stream(containingTask.getEnvironment().getAllInputGates()).mapToInt(IndexedInputGate::getInputGateIndex).max().orElse(-1) + 1;
for (int inputId = 0; inputId < configuredInputs.length; inputId++) {
if (!(configuredInputs[inputId] instanceof StreamConfig.SourceInputConfig)) {
continue;
}
StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInputs[inputId];
int sourceEdgeId = sourceInput.getInputEdge().getSourceId();
StreamConfig sourceInputConfig = chainedConfigs.get(sourceEdgeId);
OutputTag outputTag = sourceInput.getInputEdge().getOutputTag();
WatermarkGaugeExposingOutput chainedSourceOutput = createChainedSourceOutput(containingTask, sourceInputConfig, userCodeClassloader, getFinishedOnRestoreInputOrDefault(operatorInputs.get(inputId)), multipleInputOperator.getMetricGroup(), outputTag);
SourceOperator<?, ?> sourceOperator = (SourceOperator<?, ?>) createOperator(containingTask, sourceInputConfig, userCodeClassloader, (WatermarkGaugeExposingOutput<StreamRecord<OUT>>) chainedSourceOutput, allOpWrappers, true);
chainedSourceInputs.put(sourceInput, new ChainedSource(chainedSourceOutput, this.isTaskDeployedAsFinished() ? new StreamTaskFinishedOnRestoreSourceInput<>(sourceOperator, sourceInputGateIndex++, inputId) : new StreamTaskSourceInput<>(sourceOperator, sourceInputGateIndex++, inputId)));
}
return chainedSourceInputs;
}
Aggregations