use of org.apache.flink.streaming.runtime.io.DataInputStatus in project flink by apache.
the class LargeSortingDataInputITCase method multiInputKeySorting.
@Test
@SuppressWarnings({ "rawtypes", "unchecked" })
public void multiInputKeySorting() throws Exception {
int numberOfRecords = 500_000;
GeneratedRecordsDataInput input1 = new GeneratedRecordsDataInput(numberOfRecords, 0);
GeneratedRecordsDataInput input2 = new GeneratedRecordsDataInput(numberOfRecords, 1);
KeySelector<Tuple3<Integer, String, byte[]>, String> keySelector = value -> value.f1;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { input1, input2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { GeneratedRecordsDataInput.SERIALIZER, GeneratedRecordsDataInput.SERIALIZER }, new StringSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput1 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[0];
StreamTaskInput<Tuple3<Integer, String, byte[]>> sortedInput2 = (StreamTaskInput<Tuple3<Integer, String, byte[]>>) sortingDataInputs[1]) {
VerifyingOutput<String> output = new VerifyingOutput<>(keySelector);
StreamMultipleInputProcessor multiSortedProcessor = new StreamMultipleInputProcessor(new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2), new StreamOneInputProcessor[] { new StreamOneInputProcessor(sortedInput1, output, new DummyOperatorChain()), new StreamOneInputProcessor(sortedInput2, output, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = multiSortedProcessor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
assertThat(output.getSeenRecords(), equalTo(numberOfRecords * 2));
}
}
}
use of org.apache.flink.streaming.runtime.io.DataInputStatus in project flink by apache.
the class MultiInputSortingDataInputsTest method twoInputOrderTest.
@SuppressWarnings("unchecked")
public void twoInputOrderTest(int preferredIndex, int sortedIndex) throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> sortedInputElements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
CollectionDataInput<Integer> sortedInput = new CollectionDataInput<>(sortedInputElements, sortedIndex);
List<StreamElement> preferredInputElements = Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), new Watermark(99L));
CollectionDataInput<Integer> preferredInput = new CollectionDataInput<>(preferredInputElements, preferredIndex);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { sortedInput }, new KeySelector[] { keySelector }, new TypeSerializer[] { new IntSerializer() }, new IntSerializer(), new StreamTaskInput[] { preferredInput }, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
StreamTaskInput<?>[] preferredDataInputs = selectableSortingInputs.getPassThroughInputs();
try (StreamTaskInput<Object> preferredTaskInput = (StreamTaskInput<Object>) preferredDataInputs[0];
StreamTaskInput<Object> sortedTaskInput = (StreamTaskInput<Object>) sortingDataInputs[0]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
@SuppressWarnings("rawtypes") StreamOneInputProcessor[] inputProcessors = new StreamOneInputProcessor[2];
inputProcessors[preferredIndex] = new StreamOneInputProcessor<>(preferredTaskInput, collectingDataOutput, new DummyOperatorChain());
inputProcessors[sortedIndex] = new StreamOneInputProcessor<>(sortedTaskInput, collectingDataOutput, new DummyOperatorChain());
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, inputProcessors);
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(99, 3), new StreamRecord<>(99, 1), // max watermark from the preferred input
new Watermark(99L), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from the sorted input
Watermark.MAX_WATERMARK)));
}
use of org.apache.flink.streaming.runtime.io.DataInputStatus in project flink by apache.
the class SortingDataInputTest method simpleVariableLengthKeySorting.
@Test
public void simpleVariableLengthKeySorting() throws Exception {
CollectingDataOutput<Integer> collectingDataOutput = new CollectingDataOutput<>();
CollectionDataInput<Integer> input = new CollectionDataInput<>(Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2)));
MockEnvironment environment = MockEnvironment.builder().build();
SortingDataInput<Integer, String> sortingDataInput = new SortingDataInput<>(input, new IntSerializer(), new StringSerializer(), (KeySelector<Integer, String>) value -> "" + value, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new DummyInvokable(), new ExecutionConfig());
DataInputStatus inputStatus;
do {
inputStatus = sortingDataInput.emitNext(collectingDataOutput);
} while (inputStatus != DataInputStatus.END_OF_INPUT);
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3))));
}
use of org.apache.flink.streaming.runtime.io.DataInputStatus in project flink by apache.
the class StreamTask method processInput.
/**
* This method implements the default action of the task (e.g. processing one event from the
* input). Implementations should (in general) be non-blocking.
*
* @param controller controller object for collaborative interaction between the action and the
* stream task.
* @throws Exception on any problems in the action.
*/
protected void processInput(MailboxDefaultAction.Controller controller) throws Exception {
DataInputStatus status = inputProcessor.processInput();
switch(status) {
case MORE_AVAILABLE:
if (recordWriter.isAvailable() && (changelogWriterAvailabilityProvider == null || changelogWriterAvailabilityProvider.isAvailable())) {
return;
}
break;
case NOTHING_AVAILABLE:
break;
case END_OF_RECOVERY:
throw new IllegalStateException("We should not receive this event here.");
case STOPPED:
endData(StopMode.NO_DRAIN);
return;
case END_OF_DATA:
endData(StopMode.DRAIN);
return;
case END_OF_INPUT:
// Suspend the mailbox processor, it would be resumed in afterInvoke and finished
// after all records processed by the downstream tasks. We also suspend the default
// actions to avoid repeat executing the empty default operation (namely process
// records).
controller.suspendDefaultAction();
mailboxProcessor.suspend();
return;
}
TaskIOMetricGroup ioMetrics = getEnvironment().getMetricGroup().getIOMetricGroup();
PeriodTimer timer;
CompletableFuture<?> resumeFuture;
if (!recordWriter.isAvailable()) {
timer = new GaugePeriodTimer(ioMetrics.getSoftBackPressuredTimePerSecond());
resumeFuture = recordWriter.getAvailableFuture();
} else if (!inputProcessor.isAvailable()) {
timer = new GaugePeriodTimer(ioMetrics.getIdleTimeMsPerSecond());
resumeFuture = inputProcessor.getAvailableFuture();
} else {
// currently, waiting for changelog availability is reported as busy
// todo: add new metric (FLINK-24402)
timer = null;
resumeFuture = changelogWriterAvailabilityProvider.getAvailableFuture();
}
assertNoException(resumeFuture.thenRun(new ResumeWrapper(controller.suspendDefaultAction(timer), timer)));
}
use of org.apache.flink.streaming.runtime.io.DataInputStatus in project flink by apache.
the class LargeSortingDataInputITCase method stringKeySorting.
@Test
public void stringKeySorting() throws Exception {
int numberOfRecords = 500_000;
GeneratedRecordsDataInput input = new GeneratedRecordsDataInput(numberOfRecords, 0);
KeySelector<Tuple3<Integer, String, byte[]>, String> keySelector = value -> value.f1;
try (MockEnvironment environment = MockEnvironment.builder().build();
SortingDataInput<Tuple3<Integer, String, byte[]>, String> sortingDataInput = new SortingDataInput<>(input, GeneratedRecordsDataInput.SERIALIZER, new StringSerializer(), keySelector, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new DummyInvokable(), new ExecutionConfig())) {
DataInputStatus inputStatus;
VerifyingOutput<String> output = new VerifyingOutput<>(keySelector);
do {
inputStatus = sortingDataInput.emitNext(output);
} while (inputStatus != DataInputStatus.END_OF_INPUT);
assertThat(output.getSeenRecords(), equalTo(numberOfRecords));
}
}
Aggregations