use of org.apache.flink.api.java.functions.KeySelector in project beam by apache.
the class DoFnOperatorTest method keyedParDoPushbackDataCheckpointing.
@Test
public void keyedParDoPushbackDataCheckpointing() throws Exception {
pushbackDataCheckpointing(() -> {
StringUtf8Coder keyCoder = StringUtf8Coder.of();
Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */
ImmutableList.of(view1, view2), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
return new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast
null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
});
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class LargeSortingDataInputITCase method stringKeySorting.
@Test
public void stringKeySorting() throws Exception {
int numberOfRecords = 500_000;
GeneratedRecordsDataInput input = new GeneratedRecordsDataInput(numberOfRecords, 0);
KeySelector<Tuple3<Integer, String, byte[]>, String> keySelector = value -> value.f1;
try (MockEnvironment environment = MockEnvironment.builder().build();
SortingDataInput<Tuple3<Integer, String, byte[]>, String> sortingDataInput = new SortingDataInput<>(input, GeneratedRecordsDataInput.SERIALIZER, new StringSerializer(), keySelector, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new DummyInvokable(), new ExecutionConfig())) {
DataInputStatus inputStatus;
VerifyingOutput<String> output = new VerifyingOutput<>(keySelector);
do {
inputStatus = sortingDataInput.emitNext(output);
} while (inputStatus != DataInputStatus.END_OF_INPUT);
assertThat(output.getSeenRecords(), equalTo(numberOfRecords));
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class LargeSortingDataInputITCase method intKeySorting.
@Test
public void intKeySorting() throws Exception {
int numberOfRecords = 500_000;
GeneratedRecordsDataInput input = new GeneratedRecordsDataInput(numberOfRecords, 0);
KeySelector<Tuple3<Integer, String, byte[]>, Integer> keySelector = value -> value.f0;
try (MockEnvironment environment = MockEnvironment.builder().build();
SortingDataInput<Tuple3<Integer, String, byte[]>, Integer> sortingDataInput = new SortingDataInput<>(input, GeneratedRecordsDataInput.SERIALIZER, new IntSerializer(), keySelector, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new DummyInvokable(), new ExecutionConfig())) {
DataInputStatus inputStatus;
VerifyingOutput<Integer> output = new VerifyingOutput<>(keySelector);
do {
inputStatus = sortingDataInput.emitNext(output);
} while (inputStatus != DataInputStatus.END_OF_INPUT);
assertThat(output.getSeenRecords(), equalTo(numberOfRecords));
}
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class MultiInputSortingDataInputsTest method watermarkPropagation.
@Test
@SuppressWarnings("unchecked")
public void watermarkPropagation() throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> elements1 = Arrays.asList(new StreamRecord<>(2, 3), new Watermark(3), new StreamRecord<>(3, 3), new Watermark(7));
List<StreamElement> elements2 = Arrays.asList(new StreamRecord<>(0, 3), new Watermark(1), new StreamRecord<>(1, 3), new Watermark(3));
CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements1, 0);
CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements2, 1);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(0, 3), new StreamRecord<>(1, 3), // watermark from the second input
new Watermark(3), new StreamRecord<>(2, 3), new StreamRecord<>(3, 3), // watermark from the first input
new Watermark(7))));
}
use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.
the class MultiInputSortingDataInputsTest method simpleFixedLengthKeySorting.
@Test
@SuppressWarnings("unchecked")
public void simpleFixedLengthKeySorting() throws Exception {
CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
List<StreamElement> elements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements, 0);
CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements, 1);
KeySelector<Integer, Integer> keySelector = value -> value;
try (MockEnvironment environment = MockEnvironment.builder().build()) {
SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
DataInputStatus inputStatus;
do {
inputStatus = processor.processInput();
} while (inputStatus != DataInputStatus.END_OF_INPUT);
}
}
assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(1, 1), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from one of the inputs
Watermark.MAX_WATERMARK, new StreamRecord<>(2, 3), // max watermark from the other input
Watermark.MAX_WATERMARK)));
}
Aggregations