Search in sources :

Example 1 with SelectableSortingInputs

use of org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs in project flink by apache.

the class StreamMultipleInputProcessorFactory method create.

@SuppressWarnings({ "unchecked", "rawtypes" })
public static StreamMultipleInputProcessor create(TaskInvokable ownerTask, CheckpointedInputGate[] checkpointedInputGates, StreamConfig.InputConfig[] configuredInputs, IOManager ioManager, MemoryManager memoryManager, TaskIOMetricGroup ioMetricGroup, Counter mainOperatorRecordsIn, MultipleInputStreamOperator<?> mainOperator, WatermarkGauge[] inputWatermarkGauges, StreamConfig streamConfig, Configuration taskManagerConfig, Configuration jobConfig, ExecutionConfig executionConfig, ClassLoader userClassloader, OperatorChain<?, ?> operatorChain, InflightDataRescalingDescriptor inflightDataRescalingDescriptor, Function<Integer, StreamPartitioner<?>> gatePartitioners, TaskInfo taskInfo) {
    checkNotNull(operatorChain);
    List<Input> operatorInputs = mainOperator.getInputs();
    int inputsCount = operatorInputs.size();
    StreamOneInputProcessor<?>[] inputProcessors = new StreamOneInputProcessor[inputsCount];
    Counter networkRecordsIn = new SimpleCounter();
    ioMetricGroup.reuseRecordsInputCounter(networkRecordsIn);
    checkState(configuredInputs.length == inputsCount, "Number of configured inputs in StreamConfig [%s] doesn't match the main operator's number of inputs [%s]", configuredInputs.length, inputsCount);
    StreamTaskInput[] inputs = new StreamTaskInput[inputsCount];
    for (int i = 0; i < inputsCount; i++) {
        StreamConfig.InputConfig configuredInput = configuredInputs[i];
        if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
            StreamConfig.NetworkInputConfig networkInput = (StreamConfig.NetworkInputConfig) configuredInput;
            inputs[i] = StreamTaskNetworkInputFactory.create(checkpointedInputGates[networkInput.getInputGateIndex()], networkInput.getTypeSerializer(), ioManager, new StatusWatermarkValve(checkpointedInputGates[networkInput.getInputGateIndex()].getNumberOfInputChannels()), i, inflightDataRescalingDescriptor, gatePartitioners, taskInfo);
        } else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
            StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
            inputs[i] = operatorChain.getSourceTaskInput(sourceInput);
        } else {
            throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
        }
    }
    InputSelectable inputSelectable = mainOperator instanceof InputSelectable ? (InputSelectable) mainOperator : null;
    StreamConfig.InputConfig[] inputConfigs = streamConfig.getInputs(userClassloader);
    boolean anyRequiresSorting = Arrays.stream(inputConfigs).anyMatch(StreamConfig::requiresSorting);
    if (anyRequiresSorting) {
        if (inputSelectable != null) {
            throw new IllegalStateException("The InputSelectable interface is not supported with sorting inputs");
        }
        StreamTaskInput[] sortingInputs = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
        KeySelector[] sortingInputKeySelectors = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getStatePartitioner(idx, userClassloader)).toArray(KeySelector[]::new);
        TypeSerializer[] sortingInputKeySerializers = IntStream.range(0, inputsCount).filter(idx -> requiresSorting(inputConfigs[idx])).mapToObj(idx -> streamConfig.getTypeSerializerIn(idx, userClassloader)).toArray(TypeSerializer[]::new);
        StreamTaskInput[] passThroughInputs = IntStream.range(0, inputsCount).filter(idx -> !requiresSorting(inputConfigs[idx])).mapToObj(idx -> inputs[idx]).toArray(StreamTaskInput[]::new);
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(ownerTask, sortingInputs, sortingInputKeySelectors, sortingInputKeySerializers, streamConfig.getStateKeySerializer(userClassloader), passThroughInputs, memoryManager, ioManager, executionConfig.isObjectReuseEnabled(), streamConfig.getManagedMemoryFractionOperatorUseCaseOfSlot(ManagedMemoryUseCase.OPERATOR, taskManagerConfig, userClassloader), jobConfig, executionConfig);
        StreamTaskInput<?>[] sortedInputs = selectableSortingInputs.getSortedInputs();
        StreamTaskInput<?>[] passedThroughInputs = selectableSortingInputs.getPassThroughInputs();
        int sortedIndex = 0;
        int passThroughIndex = 0;
        for (int i = 0; i < inputs.length; i++) {
            if (requiresSorting(inputConfigs[i])) {
                inputs[i] = sortedInputs[sortedIndex];
                sortedIndex++;
            } else {
                inputs[i] = passedThroughInputs[passThroughIndex];
                passThroughIndex++;
            }
        }
        inputSelectable = selectableSortingInputs.getInputSelectable();
    }
    for (int i = 0; i < inputsCount; i++) {
        StreamConfig.InputConfig configuredInput = configuredInputs[i];
        if (configuredInput instanceof StreamConfig.NetworkInputConfig) {
            StreamTaskNetworkOutput dataOutput = new StreamTaskNetworkOutput<>(operatorChain.getFinishedOnRestoreInputOrDefault(operatorInputs.get(i)), inputWatermarkGauges[i], mainOperatorRecordsIn, networkRecordsIn);
            inputProcessors[i] = new StreamOneInputProcessor(inputs[i], dataOutput, operatorChain);
        } else if (configuredInput instanceof StreamConfig.SourceInputConfig) {
            StreamConfig.SourceInputConfig sourceInput = (StreamConfig.SourceInputConfig) configuredInput;
            OperatorChain.ChainedSource chainedSource = operatorChain.getChainedSource(sourceInput);
            inputProcessors[i] = new StreamOneInputProcessor(inputs[i], new StreamTaskSourceOutput(chainedSource.getSourceOutput(), inputWatermarkGauges[i], chainedSource.getSourceTaskInput().getOperator().getSourceMetricGroup()), operatorChain);
        } else {
            throw new UnsupportedOperationException("Unknown input type: " + configuredInput);
        }
    }
    return new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(inputSelectable, inputsCount), inputProcessors);
}
Also used : IntStream(java.util.stream.IntStream) TaskIOMetricGroup(org.apache.flink.runtime.metrics.groups.TaskIOMetricGroup) Arrays(java.util.Arrays) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) TaskInvokable(org.apache.flink.runtime.jobgraph.tasks.TaskInvokable) MemoryManager(org.apache.flink.runtime.memory.MemoryManager) CheckpointedInputGate(org.apache.flink.streaming.runtime.io.checkpointing.CheckpointedInputGate) StreamConfig.requiresSorting(org.apache.flink.streaming.api.graph.StreamConfig.requiresSorting) IOManager(org.apache.flink.runtime.io.disk.iomanager.IOManager) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Function(java.util.function.Function) InflightDataRescalingDescriptor(org.apache.flink.runtime.checkpoint.InflightDataRescalingDescriptor) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) SourceOperatorStreamTask(org.apache.flink.streaming.runtime.tasks.SourceOperatorStreamTask) SimpleCounter(org.apache.flink.metrics.SimpleCounter) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) WatermarkGaugeExposingOutput(org.apache.flink.streaming.runtime.tasks.WatermarkGaugeExposingOutput) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) Preconditions.checkState(org.apache.flink.util.Preconditions.checkState) StatusWatermarkValve(org.apache.flink.streaming.runtime.watermarkstatus.StatusWatermarkValve) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) Configuration(org.apache.flink.configuration.Configuration) TaskInfo(org.apache.flink.api.common.TaskInfo) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) InternalSourceReaderMetricGroup(org.apache.flink.runtime.metrics.groups.InternalSourceReaderMetricGroup) List(java.util.List) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OperatorChain(org.apache.flink.streaming.runtime.tasks.OperatorChain) Internal(org.apache.flink.annotation.Internal) MultiInputSortingDataInput(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Counter(org.apache.flink.metrics.Counter) WatermarkGauge(org.apache.flink.streaming.runtime.metrics.WatermarkGauge) Input(org.apache.flink.streaming.api.operators.Input) InputSelectable(org.apache.flink.streaming.api.operators.InputSelectable) KeySelector(org.apache.flink.api.java.functions.KeySelector) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) MultiInputSortingDataInput(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput) Input(org.apache.flink.streaming.api.operators.Input) SimpleCounter(org.apache.flink.metrics.SimpleCounter) Counter(org.apache.flink.metrics.Counter) SimpleCounter(org.apache.flink.metrics.SimpleCounter) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StatusWatermarkValve(org.apache.flink.streaming.runtime.watermarkstatus.StatusWatermarkValve)

Example 2 with SelectableSortingInputs

use of org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs in project flink by apache.

the class LargeSortingDataInputITCase method multiInputKeySorting.

@Test
@SuppressWarnings({ "rawtypes", "unchecked" })
public void multiInputKeySorting() throws Exception {
    int numberOfRecords = 500_000;
    GeneratedRecordsDataInput input1 = new GeneratedRecordsDataInput(numberOfRecords, 0);
    GeneratedRecordsDataInput input2 = new GeneratedRecordsDataInput(numberOfRecords, 1);
    KeySelector<Tuple3<Integer, String, byte[]>, String> keySelector = value -> value.f1;
    try (MockEnvironment environment = MockEnvironment.builder().build()) {
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { input1, input2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { GeneratedRecordsDataInput.SERIALIZER, GeneratedRecordsDataInput.SERIALIZER }, new StringSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
        StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
        try (StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput1 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[0];
            StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput2 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[1]) {
            VerifyingOutput<String> output = new VerifyingOutput<>(keySelector);
            StreamMultipleInputProcessor multiSortedProcessor = new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2), new StreamOneInputProcessor[] { new StreamOneInputProcessor(sortedInput1, output, new DummyOperatorChain()), new StreamOneInputProcessor(sortedInput2, output, new DummyOperatorChain()) });
            DataInputStatus inputStatus;
            do {
                inputStatus = multiSortedProcessor.processInput();
            } while (inputStatus != DataInputStatus.END_OF_INPUT);
            assertThat(output.getSeenRecords(), equalTo(numberOfRecords * 2));
        }
    }
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Tuple3(org.apache.flink.api.java.tuple.Tuple3) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AvailabilityProvider(org.apache.flink.runtime.io.AvailabilityProvider) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Random(java.util.Random) CompletableFuture(java.util.concurrent.CompletableFuture) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) BytePrimitiveArraySerializer(org.apache.flink.api.common.typeutils.base.array.BytePrimitiveArraySerializer) PushingAsyncDataInput(org.apache.flink.streaming.runtime.io.PushingAsyncDataInput) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ChannelStateWriter(org.apache.flink.runtime.checkpoint.channel.ChannelStateWriter) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) LinkedHashSet(java.util.LinkedHashSet) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) Configuration(org.apache.flink.configuration.Configuration) Set(java.util.Set) Test(org.junit.Test) IOException(java.io.IOException) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Objects(java.util.Objects) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) Configuration(org.apache.flink.configuration.Configuration) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) Test(org.junit.Test)

Example 3 with SelectableSortingInputs

use of org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs in project flink by apache.

the class MultiInputSortingDataInputsTest method twoInputOrderTest.

@SuppressWarnings("unchecked")
public void twoInputOrderTest(int preferredIndex, int sortedIndex) throws Exception {
    CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
    List<StreamElement> sortedInputElements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
    CollectionDataInput<Integer> sortedInput = new CollectionDataInput<>(sortedInputElements, sortedIndex);
    List<StreamElement> preferredInputElements = Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), new Watermark(99L));
    CollectionDataInput<Integer> preferredInput = new CollectionDataInput<>(preferredInputElements, preferredIndex);
    KeySelector<Integer, Integer> keySelector = value -> value;
    try (MockEnvironment environment = MockEnvironment.builder().build()) {
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { sortedInput }, new KeySelector[] { keySelector }, new TypeSerializer[] { new IntSerializer() }, new IntSerializer(), new StreamTaskInput[] { preferredInput }, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
        StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
        StreamTaskInput<?>[] preferredDataInputs = selectableSortingInputs.getPassThroughInputs();
        try (StreamTaskInput<Object> preferredTaskInput = (StreamTaskInput<Object>) preferredDataInputs[0];
            StreamTaskInput<Object> sortedTaskInput = (StreamTaskInput<Object>) sortingDataInputs[0]) {
            MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
            @SuppressWarnings("rawtypes") StreamOneInputProcessor[] inputProcessors = new StreamOneInputProcessor[2];
            inputProcessors[preferredIndex] = new StreamOneInputProcessor<>(preferredTaskInput, collectingDataOutput, new DummyOperatorChain());
            inputProcessors[sortedIndex] = new StreamOneInputProcessor<>(sortedTaskInput, collectingDataOutput, new DummyOperatorChain());
            StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, inputProcessors);
            DataInputStatus inputStatus;
            do {
                inputStatus = processor.processInput();
            } while (inputStatus != DataInputStatus.END_OF_INPUT);
        }
    }
    assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), // max watermark from the preferred input
    new Watermark(99L), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from the sorted input
    Watermark.MAX_WATERMARK)));
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Configuration(org.apache.flink.configuration.Configuration) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) List(java.util.List) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) Configuration(org.apache.flink.configuration.Configuration) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Watermark(org.apache.flink.streaming.api.watermark.Watermark) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler)

Example 4 with SelectableSortingInputs

use of org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs in project flink by apache.

the class MultiInputSortingDataInputsTest method watermarkPropagation.

@Test
@SuppressWarnings("unchecked")
public void watermarkPropagation() throws Exception {
    CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
    List<StreamElement> elements1 = Arrays.asList(new StreamRecord<>(2, 3), new Watermark(3), new StreamRecord<>(3, 3), new Watermark(7));
    List<StreamElement> elements2 = Arrays.asList(new StreamRecord<>(0, 3), new Watermark(1), new StreamRecord<>(1, 3), new Watermark(3));
    CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements1, 0);
    CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements2, 1);
    KeySelector<Integer, Integer> keySelector = value -> value;
    try (MockEnvironment environment = MockEnvironment.builder().build()) {
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
        StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
        try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
            StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
            MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
            StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
            DataInputStatus inputStatus;
            do {
                inputStatus = processor.processInput();
            } while (inputStatus != DataInputStatus.END_OF_INPUT);
        }
    }
    assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(0, 3), new StreamRecord<>(1, 3), // watermark from the second input
    new Watermark(3), new StreamRecord<>(2, 3), new StreamRecord<>(3, 3), // watermark from the first input
    new Watermark(7))));
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Configuration(org.apache.flink.configuration.Configuration) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) List(java.util.List) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) Configuration(org.apache.flink.configuration.Configuration) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Watermark(org.apache.flink.streaming.api.watermark.Watermark) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) Test(org.junit.Test)

Example 5 with SelectableSortingInputs

use of org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs in project flink by apache.

the class MultiInputSortingDataInputsTest method simpleFixedLengthKeySorting.

@Test
@SuppressWarnings("unchecked")
public void simpleFixedLengthKeySorting() throws Exception {
    CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
    List<StreamElement> elements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
    CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements, 0);
    CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements, 1);
    KeySelector<Integer, Integer> keySelector = value -> value;
    try (MockEnvironment environment = MockEnvironment.builder().build()) {
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
        StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
        try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
            StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
            MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
            StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
            DataInputStatus inputStatus;
            do {
                inputStatus = processor.processInput();
            } while (inputStatus != DataInputStatus.END_OF_INPUT);
        }
    }
    assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(1, 1), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from one of the inputs
    Watermark.MAX_WATERMARK, new StreamRecord<>(2, 3), // max watermark from the other input
    Watermark.MAX_WATERMARK)));
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Configuration(org.apache.flink.configuration.Configuration) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) List(java.util.List) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) Configuration(org.apache.flink.configuration.Configuration) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) Test(org.junit.Test)

Aggregations

KeySelector (org.apache.flink.api.java.functions.KeySelector)6 SelectableSortingInputs (org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs)6 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)5 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)5 Configuration (org.apache.flink.configuration.Configuration)5 Watermark (org.apache.flink.streaming.api.watermark.Watermark)5 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)5 Arrays (java.util.Arrays)4 List (java.util.List)4 IntSerializer (org.apache.flink.api.common.typeutils.base.IntSerializer)4 DummyInvokable (org.apache.flink.runtime.operators.testutils.DummyInvokable)4 MockEnvironment (org.apache.flink.runtime.operators.testutils.MockEnvironment)4 BoundedMultiInput (org.apache.flink.streaming.api.operators.BoundedMultiInput)4 DataInputStatus (org.apache.flink.streaming.runtime.io.DataInputStatus)4 MultipleInputSelectionHandler (org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler)4 StreamMultipleInputProcessor (org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor)4 StreamOneInputProcessor (org.apache.flink.streaming.runtime.io.StreamOneInputProcessor)4 StreamTaskInput (org.apache.flink.streaming.runtime.io.StreamTaskInput)4 CoreMatchers.equalTo (org.hamcrest.CoreMatchers.equalTo)4 Assert.assertThat (org.junit.Assert.assertThat)4