use of org.apache.flink.streaming.runtime.io.StreamTaskInput in project flink by apache.
the class LargeSortingDataInputITCase method multiInputKeySorting.
@Test
@SuppressWarnings({ "rawtypes", "unchecked" })
public void multiInputKeySorting() throws Exception {
int numberOfRecords = 500_000;
GeneratedRecordsDataInput input1 = new GeneratedRecordsDataInput(numberOfRecords, 0);
GeneratedRecordsDataInput input2 = new GeneratedRecordsDataInput(numberOfRecords, 1);
KeySelector<Tuple3<Integer, String, byte[]>, String> keySelector = value -> value.f1;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { input1, input2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { GeneratedRecordsDataInput.SERIALIZER, GeneratedRecordsDataInput.SERIALIZER }, new StringSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput1 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[0];
StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput2 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[1]) {
VerifyingOutput<String> output = new VerifyingOutput<>(keySelector);
StreamMultipleInputProcessor multiSortedProcessor = new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2), new StreamOneInputProcessor[] { new StreamOneInputProcessor(sortedInput1, output, new DummyOperatorChain()), new StreamOneInputProcessor(sortedInput2, output, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = multiSortedProcessor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
assertThat(output.getSeenRecords(), equalTo(numberOfRecords * 2));
}
}
}
use of org.apache.flink.streaming.runtime.io.StreamTaskInput in project flink by apache.
the class MultiInputSortingDataInputsTest method twoInputOrderTest.
@SuppressWarnings("unchecked")
public void twoInputOrderTest(int preferredIndex, int sortedIndex) throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> sortedInputElements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
CollectionDataInput<Integer> sortedInput = new CollectionDataInput<>(sortedInputElements, sortedIndex);
List<StreamElement> preferredInputElements = Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), new Watermark(99L));
CollectionDataInput<Integer> preferredInput = new CollectionDataInput<>(preferredInputElements, preferredIndex);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { sortedInput }, new KeySelector[] { keySelector }, new TypeSerializer[] { new IntSerializer() }, new IntSerializer(), new StreamTaskInput[] { preferredInput }, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
StreamTaskInput<?>[] preferredDataInputs = selectableSortingInputs.getPassThroughInputs();
try (StreamTaskInput<Object> preferredTaskInput = (StreamTaskInput<Object>) preferredDataInputs[0];
StreamTaskInput<Object> sortedTaskInput = (StreamTaskInput<Object>) sortingDataInputs[0]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
@SuppressWarnings("rawtypes") StreamOneInputProcessor[] inputProcessors = new StreamOneInputProcessor[2];
inputProcessors[preferredIndex] = new StreamOneInputProcessor<>(preferredTaskInput, collectingDataOutput, new DummyOperatorChain());
inputProcessors[sortedIndex] = new StreamOneInputProcessor<>(sortedTaskInput, collectingDataOutput, new DummyOperatorChain());
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, inputProcessors);
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), // max watermark from the preferred input
new Watermark(99L), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from the sorted input
Watermark.MAX_WATERMARK)));
}
use of org.apache.flink.streaming.runtime.io.StreamTaskInput in project flink by apache.
the class MultiInputSortingDataInputsTest method watermarkPropagation.
@Test
@SuppressWarnings("unchecked")
public void watermarkPropagation() throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> elements1 = Arrays.asList(new StreamRecord<>(2, 3), new Watermark(3), new StreamRecord<>(3, 3), new Watermark(7));
List<StreamElement> elements2 = Arrays.asList(new StreamRecord<>(0, 3), new Watermark(1), new StreamRecord<>(1, 3), new Watermark(3));
CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements1, 0);
CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements2, 1);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(0, 3), new StreamRecord<>(1, 3), // watermark from the second input
new Watermark(3), new StreamRecord<>(2, 3), new StreamRecord<>(3, 3), // watermark from the first input
new Watermark(7))));
}
use of org.apache.flink.streaming.runtime.io.StreamTaskInput in project flink by apache.
the class MultiInputSortingDataInputsTest method simpleFixedLengthKeySorting.
@Test
@SuppressWarnings("unchecked")
public void simpleFixedLengthKeySorting() throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> elements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements, 0);
CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements, 1);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(1, 1), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from one of the inputs
Watermark.MAX_WATERMARK, new StreamRecord<>(2, 3), // max watermark from the other input
Watermark.MAX_WATERMARK)));
}
use of org.apache.flink.streaming.runtime.io.StreamTaskInput in project flink by apache.
the class MultiInputSortingDataInput method wrapInputs.
public static <K> SelectableSortingInputs wrapInputs(TaskInvokable containingTask, StreamTaskInput<Object>[] sortingInputs, KeySelector<Object, K>[] keySelectors, TypeSerializer<Object>[] inputSerializers, TypeSerializer<K> keySerializer, StreamTaskInput<Object>[] passThroughInputs, MemoryManager memoryManager, IOManager ioManager, boolean objectReuse, double managedMemoryFraction, Configuration jobConfiguration, ExecutionConfig executionConfig) {
int keyLength = keySerializer.getLength();
final TypeComparator<Tuple2<byte[], StreamRecord<Object>>> comparator;
DataOutputSerializer dataOutputSerializer;
if (keyLength > 0) {
dataOutputSerializer = new DataOutputSerializer(keyLength);
comparator = new FixedLengthByteKeyComparator<>(keyLength);
} else {
dataOutputSerializer = new DataOutputSerializer(64);
comparator = new VariableLengthByteKeyComparator<>();
}
List<Integer> passThroughInputIndices = Arrays.stream(passThroughInputs).map(StreamTaskInput::getInputIndex).collect(Collectors.toList());
int numberOfInputs = sortingInputs.length + passThroughInputs.length;
CommonContext commonContext = new CommonContext(sortingInputs);
InputSelector inputSelector = new InputSelector(commonContext, numberOfInputs, passThroughInputIndices);
StreamTaskInput<?>[] wrappedSortingInputs = IntStream.range(0, sortingInputs.length).mapToObj(idx -> {
try {
KeyAndValueSerializer<Object> keyAndValueSerializer = new KeyAndValueSerializer<>(inputSerializers[idx], keyLength);
return new MultiInputSortingDataInput<>(commonContext, sortingInputs[idx], sortingInputs[idx].getInputIndex(), ExternalSorter.newBuilder(memoryManager, containingTask, keyAndValueSerializer, comparator, executionConfig).memoryFraction(managedMemoryFraction / numberOfInputs).enableSpilling(ioManager, jobConfiguration.get(AlgorithmOptions.SORT_SPILLING_THRESHOLD)).maxNumFileHandles(jobConfiguration.get(AlgorithmOptions.SPILLING_MAX_FAN) / numberOfInputs).objectReuse(objectReuse).largeRecords(true).build(), keySelectors[idx], keySerializer, dataOutputSerializer);
} catch (MemoryAllocationException e) {
throw new RuntimeException();
}
}).toArray(StreamTaskInput[]::new);
StreamTaskInput<?>[] wrappedPassThroughInputs = Arrays.stream(passThroughInputs).map(input -> new ObservableStreamTaskInput<>(input, inputSelector)).toArray(StreamTaskInput[]::new);
return new SelectableSortingInputs(wrappedSortingInputs, wrappedPassThroughInputs, inputSelector);
}
Aggregations