Search in sources :

Example 96 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project beam by apache.

the class DoFnOperatorTest method keyedParDoPushbackDataCheckpointing.

@Test
public void keyedParDoPushbackDataCheckpointing() throws Exception {
    pushbackDataCheckpointing(() -> {
        StringUtf8Coder keyCoder = StringUtf8Coder.of();
        Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
        TupleTag<String> outputTag = new TupleTag<>("main-output");
        KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
        DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */
        ImmutableList.of(view1, view2), /* side inputs */
        FlinkPipelineOptions.defaults(), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
        return new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast
        null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    });
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) Arrays(java.util.Arrays) StateNamespace(org.apache.beam.runners.core.StateNamespace) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) TimestampCombiner(org.apache.beam.sdk.transforms.windowing.TimestampCombiner) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) StepContext(org.apache.beam.runners.core.StepContext) ValueState(org.apache.beam.sdk.state.ValueState) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteBuffer(java.nio.ByteBuffer) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) TypeFactory(com.fasterxml.jackson.databind.type.TypeFactory) Create(org.apache.beam.sdk.transforms.Create) TwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) LRUMap(com.fasterxml.jackson.databind.util.LRUMap) Window(org.apache.beam.sdk.transforms.windowing.Window) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) FluentIterable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.FluentIterable) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) KvCoder(org.apache.beam.sdk.coders.KvCoder) KeySelector(org.apache.flink.api.java.functions.KeySelector) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) OutputTag(org.apache.flink.util.OutputTag) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Collectors(java.util.stream.Collectors) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Objects(java.util.Objects) List(java.util.List) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Timer(org.apache.beam.sdk.state.Timer) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Optional(java.util.Optional) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) StateTag(org.apache.beam.runners.core.StateTag) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) Whitebox(org.powermock.reflect.Whitebox) KV(org.apache.beam.sdk.values.KV) Assert.assertThrows(org.junit.Assert.assertThrows) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) View(org.apache.beam.sdk.transforms.View) StateNamespaces(org.apache.beam.runners.core.StateNamespaces) Supplier(java.util.function.Supplier) StateTags(org.apache.beam.runners.core.StateTags) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TimerSpec(org.apache.beam.sdk.state.TimerSpec) CoderTypeSerializer(org.apache.beam.runners.flink.translation.types.CoderTypeSerializer) TupleTag(org.apache.beam.sdk.values.TupleTag) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Before(org.junit.Before) DoFn(org.apache.beam.sdk.transforms.DoFn) PCollectionViewTesting(org.apache.beam.sdk.testing.PCollectionViewTesting) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Function(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Function) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) Mockito(org.mockito.Mockito) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) StateSpecs(org.apache.beam.sdk.state.StateSpecs) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Collections(java.util.Collections) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) KeyedTwoInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteBuffer(java.nio.ByteBuffer) PCollectionView(org.apache.beam.sdk.values.PCollectionView) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) Test(org.junit.Test)

Example 97 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class LargeSortingDataInputITCase method stringKeySorting.

@Test
public void stringKeySorting() throws Exception {
    int numberOfRecords = 500_000;
    GeneratedRecordsDataInput input = new GeneratedRecordsDataInput(numberOfRecords, 0);
    KeySelector<Tuple3<Integer, String, byte[]>, String> keySelector = value -> value.f1;
    try (MockEnvironment environment = MockEnvironment.builder().build();
        SortingDataInput<Tuple3<Integer, String, byte[]>, String> sortingDataInput = new SortingDataInput<>(input, GeneratedRecordsDataInput.SERIALIZER, new StringSerializer(), keySelector, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new DummyInvokable(), new ExecutionConfig())) {
        DataInputStatus inputStatus;
        VerifyingOutput<String> output = new VerifyingOutput<>(keySelector);
        do {
            inputStatus = sortingDataInput.emitNext(output);
        } while (inputStatus != DataInputStatus.END_OF_INPUT);
        assertThat(output.getSeenRecords(), equalTo(numberOfRecords));
    }
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Tuple3(org.apache.flink.api.java.tuple.Tuple3) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AvailabilityProvider(org.apache.flink.runtime.io.AvailabilityProvider) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Random(java.util.Random) CompletableFuture(java.util.concurrent.CompletableFuture) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) BytePrimitiveArraySerializer(org.apache.flink.api.common.typeutils.base.array.BytePrimitiveArraySerializer) PushingAsyncDataInput(org.apache.flink.streaming.runtime.io.PushingAsyncDataInput) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ChannelStateWriter(org.apache.flink.runtime.checkpoint.channel.ChannelStateWriter) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) LinkedHashSet(java.util.LinkedHashSet) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) Configuration(org.apache.flink.configuration.Configuration) Set(java.util.Set) Test(org.junit.Test) IOException(java.io.IOException) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Objects(java.util.Objects) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) Configuration(org.apache.flink.configuration.Configuration) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) Test(org.junit.Test)

Example 98 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class LargeSortingDataInputITCase method intKeySorting.

@Test
public void intKeySorting() throws Exception {
    int numberOfRecords = 500_000;
    GeneratedRecordsDataInput input = new GeneratedRecordsDataInput(numberOfRecords, 0);
    KeySelector<Tuple3<Integer, String, byte[]>, Integer> keySelector = value -> value.f0;
    try (MockEnvironment environment = MockEnvironment.builder().build();
        SortingDataInput<Tuple3<Integer, String, byte[]>, Integer> sortingDataInput = new SortingDataInput<>(input, GeneratedRecordsDataInput.SERIALIZER, new IntSerializer(), keySelector, environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new DummyInvokable(), new ExecutionConfig())) {
        DataInputStatus inputStatus;
        VerifyingOutput<Integer> output = new VerifyingOutput<>(keySelector);
        do {
            inputStatus = sortingDataInput.emitNext(output);
        } while (inputStatus != DataInputStatus.END_OF_INPUT);
        assertThat(output.getSeenRecords(), equalTo(numberOfRecords));
    }
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Tuple3(org.apache.flink.api.java.tuple.Tuple3) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AvailabilityProvider(org.apache.flink.runtime.io.AvailabilityProvider) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Random(java.util.Random) CompletableFuture(java.util.concurrent.CompletableFuture) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) BytePrimitiveArraySerializer(org.apache.flink.api.common.typeutils.base.array.BytePrimitiveArraySerializer) PushingAsyncDataInput(org.apache.flink.streaming.runtime.io.PushingAsyncDataInput) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) TupleSerializer(org.apache.flink.api.java.typeutils.runtime.TupleSerializer) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ChannelStateWriter(org.apache.flink.runtime.checkpoint.channel.ChannelStateWriter) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) LinkedHashSet(java.util.LinkedHashSet) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) Configuration(org.apache.flink.configuration.Configuration) Set(java.util.Set) Test(org.junit.Test) IOException(java.io.IOException) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Objects(java.util.Objects) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) Configuration(org.apache.flink.configuration.Configuration) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) Tuple3(org.apache.flink.api.java.tuple.Tuple3) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) Test(org.junit.Test)

Example 99 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class MultiInputSortingDataInputsTest method watermarkPropagation.

@Test
@SuppressWarnings("unchecked")
public void watermarkPropagation() throws Exception {
    CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
    List<StreamElement> elements1 = Arrays.asList(new StreamRecord<>(2, 3), new Watermark(3), new StreamRecord<>(3, 3), new Watermark(7));
    List<StreamElement> elements2 = Arrays.asList(new StreamRecord<>(0, 3), new Watermark(1), new StreamRecord<>(1, 3), new Watermark(3));
    CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements1, 0);
    CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements2, 1);
    KeySelector<Integer, Integer> keySelector = value -> value;
    try (MockEnvironment environment = MockEnvironment.builder().build()) {
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
        StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
        try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
            StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
            MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
            StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
            DataInputStatus inputStatus;
            do {
                inputStatus = processor.processInput();
            } while (inputStatus != DataInputStatus.END_OF_INPUT);
        }
    }
    assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(0, 3), new StreamRecord<>(1, 3), // watermark from the second input
    new Watermark(3), new StreamRecord<>(2, 3), new StreamRecord<>(3, 3), // watermark from the first input
    new Watermark(7))));
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Configuration(org.apache.flink.configuration.Configuration) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) List(java.util.List) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) Configuration(org.apache.flink.configuration.Configuration) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Watermark(org.apache.flink.streaming.api.watermark.Watermark) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) Test(org.junit.Test)

Example 100 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class MultiInputSortingDataInputsTest method simpleFixedLengthKeySorting.

@Test
@SuppressWarnings("unchecked")
public void simpleFixedLengthKeySorting() throws Exception {
    CollectingDataOutput<Object> collectingDataOutput = new CollectingDataOutput<>();
    List<StreamElement> elements = Arrays.asList(new StreamRecord<>(1, 3), new StreamRecord<>(1, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 3), new StreamRecord<>(1, 2), new StreamRecord<>(2, 2), Watermark.MAX_WATERMARK);
    CollectionDataInput<Integer> dataInput1 = new CollectionDataInput<>(elements, 0);
    CollectionDataInput<Integer> dataInput2 = new CollectionDataInput<>(elements, 1);
    KeySelector<Integer, Integer> keySelector = value -> value;
    try (MockEnvironment environment = MockEnvironment.builder().build()) {
        SelectableSortingInputs selectableSortingInputs = MultiInputSortingDataInput.wrapInputs(new DummyInvokable(), new StreamTaskInput[] { dataInput1, dataInput2 }, new KeySelector[] { keySelector, keySelector }, new TypeSerializer[] { new IntSerializer(), new IntSerializer() }, new IntSerializer(), new StreamTaskInput[0], environment.getMemoryManager(), environment.getIOManager(), true, 1.0, new Configuration(), new ExecutionConfig());
        StreamTaskInput<?>[] sortingDataInputs = selectableSortingInputs.getSortedInputs();
        try (StreamTaskInput<Object> input1 = (StreamTaskInput<Object>) sortingDataInputs[0];
            StreamTaskInput<Object> input2 = (StreamTaskInput<Object>) sortingDataInputs[1]) {
            MultipleInputSelectionHandler selectionHandler = new MultipleInputSelectionHandler(selectableSortingInputs.getInputSelectable(), 2);
            StreamMultipleInputProcessor processor = new StreamMultipleInputProcessor(selectionHandler, new StreamOneInputProcessor[] { new StreamOneInputProcessor<>(input1, collectingDataOutput, new DummyOperatorChain()), new StreamOneInputProcessor<>(input2, collectingDataOutput, new DummyOperatorChain()) });
            DataInputStatus inputStatus;
            do {
                inputStatus = processor.processInput();
            } while (inputStatus != DataInputStatus.END_OF_INPUT);
        }
    }
    assertThat(collectingDataOutput.events, equalTo(Arrays.asList(new StreamRecord<>(1, 1), new StreamRecord<>(1, 1), new StreamRecord<>(1, 2), new StreamRecord<>(1, 2), new StreamRecord<>(1, 3), new StreamRecord<>(1, 3), new StreamRecord<>(2, 1), new StreamRecord<>(2, 1), new StreamRecord<>(2, 2), new StreamRecord<>(2, 2), new StreamRecord<>(2, 3), // max watermark from one of the inputs
    Watermark.MAX_WATERMARK, new StreamRecord<>(2, 3), // max watermark from the other input
    Watermark.MAX_WATERMARK)));
}
Also used : StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) Arrays(java.util.Arrays) TypeSerializer(org.apache.flink.api.common.typeutils.TypeSerializer) KeySelector(org.apache.flink.api.java.functions.KeySelector) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Configuration(org.apache.flink.configuration.Configuration) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) Assert.assertThat(org.junit.Assert.assertThat) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) List(java.util.List) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) StreamOneInputProcessor(org.apache.flink.streaming.runtime.io.StreamOneInputProcessor) IntSerializer(org.apache.flink.api.common.typeutils.base.IntSerializer) Configuration(org.apache.flink.configuration.Configuration) StreamTaskInput(org.apache.flink.streaming.runtime.io.StreamTaskInput) StreamElement(org.apache.flink.streaming.runtime.streamrecord.StreamElement) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SelectableSortingInputs(org.apache.flink.streaming.api.operators.sort.MultiInputSortingDataInput.SelectableSortingInputs) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) DataInputStatus(org.apache.flink.streaming.runtime.io.DataInputStatus) DummyInvokable(org.apache.flink.runtime.operators.testutils.DummyInvokable) StreamMultipleInputProcessor(org.apache.flink.streaming.runtime.io.StreamMultipleInputProcessor) MultipleInputSelectionHandler(org.apache.flink.streaming.runtime.io.MultipleInputSelectionHandler) Test(org.junit.Test)

Aggregations

KeySelector (org.apache.flink.api.java.functions.KeySelector)120 Test (org.junit.Test)113 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)45 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)44 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 Watermark (org.apache.flink.streaming.api.watermark.Watermark)30 List (java.util.List)29 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)28 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)22 JobID (org.apache.flink.api.common.JobID)22 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)22 IOException (java.io.IOException)21 Arrays (java.util.Arrays)21 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 Configuration (org.apache.flink.configuration.Configuration)21 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)21 ArrayList (java.util.ArrayList)18 Map (java.util.Map)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)16