use of org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor in project flink by apache.
the class LargeSortingDataInputITCase method multiInputKeySorting.
@Test
@SuppressWarnings({ "rawtypes", "unchecked" })
public void multiInputKeySorting() throws Exception {
int numberOfRecords = 500_000;
GeneratedRecordsDataInput input1 = new GeneratedRecordsDataInput(numberOfRecords, 0);
GeneratedRecordsDataInput input2 = new GeneratedRecordsDataInput(numberOfRecords, 1);
KeySelector<Tuple3<Integer, String, byte[]>, String> keySelector = value -> value.f1;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { input1, input2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { GeneratedRecordsDataInput.SERIALIZER, GeneratedRecordsDataInput.SERIALIZER }, new StringSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput1 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[0];
StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput2 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[1]) {
VerifyingOutput<String> output = new VerifyingOutput<>(keySelector);
StreamMultipleInputProcessor multiSortedProcessor = new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2), new StreamOneInputProcessor[] { new StreamOneInputProcessor(sortedInput1, output, new DummyOperatorChain()), new StreamOneInputProcessor(sortedInput2, output, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = multiSortedProcessor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
assertThat(output.getSeenRecords(), equalTo(numberOfRecords * 2));
}
}
}
use of org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor in project flink by apache.
the class MultiInputSortingDataInputsTest method twoInputOrderTest.
@SuppressWarnings("unchecked")
public void twoInputOrderTest(int preferredIndex, int sortedIndex) throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> sortedInputElements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
CollectionDataInput<Integer> sortedInput = new CollectionDataInput<>(sortedInputElements, sortedIndex);
List<StreamElement> preferredInputElements = Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), new Watermark(99L));
CollectionDataInput<Integer> preferredInput = new CollectionDataInput<>(preferredInputElements, preferredIndex);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { sortedInput }, new KeySelector[] { keySelector }, new TypeSerializer[] { new IntSerializer() }, new IntSerializer(), new StreamTaskInput[] { preferredInput }, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
StreamTaskInput<?>[] preferredDataInputs = selectableSortingInputs.getPassThroughInputs();
try (StreamTaskInput<Object> preferredTaskInput = (StreamTaskInput<Object>) preferredDataInputs[0];
StreamTaskInput<Object> sortedTaskInput = (StreamTaskInput<Object>) sortingDataInputs[0]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
@SuppressWarnings("rawtypes") StreamOneInputProcessor[] inputProcessors = new StreamOneInputProcessor[2];
inputProcessors[preferredIndex] = new StreamOneInputProcessor<>(preferredTaskInput, collectingDataOutput, new DummyOperatorChain());
inputProcessors[sortedIndex] = new StreamOneInputProcessor<>(sortedTaskInput, collectingDataOutput, new DummyOperatorChain());
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, inputProcessors);
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), // max watermark from the preferred input
new Watermark(99L), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from the sorted input
Watermark.MAX_WATERMARK)));
}
use of org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor in project flink by apache.
the class MultiInputSortingDataInputsTest method watermarkPropagation.
@Test
@SuppressWarnings("unchecked")
public void watermarkPropagation() throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> elements1 = Arrays.asList(new StreamRecord<>(2, 3), new Watermark(3), new StreamRecord<>(3, 3), new Watermark(7));
List<StreamElement> elements2 = Arrays.asList(new StreamRecord<>(0, 3), new Watermark(1), new StreamRecord<>(1, 3), new Watermark(3));
CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements1, 0);
CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements2, 1);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(0, 3), new StreamRecord<>(1, 3), // watermark from the second input
new Watermark(3), new StreamRecord<>(2, 3), new StreamRecord<>(3, 3), // watermark from the first input
new Watermark(7))));
}
use of org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor in project flink by apache.
the class MultiInputSortingDataInputsTest method simpleFixedLengthKeySorting.
@Test
@SuppressWarnings("unchecked")
public void simpleFixedLengthKeySorting() throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> elements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements, 0);
CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements, 1);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(1, 1), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from one of the inputs
Watermark.MAX_WATERMARK, new StreamRecord<>(2, 3), // max watermark from the other input
Watermark.MAX_WATERMARK)));
}
Aggregations