Search in sources :

Example 6 with StreamOperator

use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.

the class RandomSortMergeInnerJoinTest method testMergeWithHighNumberOfCommonKeys.

@Test
public void testMergeWithHighNumberOfCommonKeys() {
    // the size of the left and right inputs
    final int input1Size = 200;
    final int input2Size = 100;
    final int input1Duplicates = 10;
    final int input2Duplicates = 4000;
    final int duplicateKey = 13;
    try {
        final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
        final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
        final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, input1Size);
        final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, input2Size);
        final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(duplicateKey, "LEFT String for Duplicate Keys", input1Duplicates);
        final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(duplicateKey, "RIGHT String for Duplicate Keys", input2Duplicates);
        final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
        inList1.add(gen1Iter);
        inList1.add(const1Iter);
        final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
        inList2.add(gen2Iter);
        inList2.add(const2Iter);
        MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<>(inList1, comparator1.duplicate());
        MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<>(inList2, comparator2.duplicate());
        // collect expected data
        final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(collectData(input1), collectData(input2));
        // re-create the whole thing for actual processing
        // reset the generators and iterators
        generator1.reset();
        generator2.reset();
        const1Iter.reset();
        const2Iter.reset();
        gen1Iter.reset();
        gen2Iter.reset();
        inList1.clear();
        inList1.add(gen1Iter);
        inList1.add(const1Iter);
        inList2.clear();
        inList2.add(gen2Iter);
        inList2.add(const2Iter);
        input1 = new MergeIterator<>(inList1, comparator1.duplicate());
        input2 = new MergeIterator<>(inList2, comparator2.duplicate());
        StreamOperator operator = getOperator();
        match(expectedMatchesMap, transformToBinary(join(operator, input1, input2)));
        // assert that each expected match was seen
        for (Map.Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
            if (!entry.getValue().isEmpty()) {
                Assert.fail("Collection for key " + entry.getKey() + " is not empty");
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail("An exception occurred during the test: " + e.getMessage());
    }
}
Also used : TestData(org.apache.flink.runtime.operators.testutils.TestData) MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ArrayList(java.util.ArrayList) TupleGenerator(org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) HashMap(java.util.HashMap) Map(java.util.Map) MergeIterator(org.apache.flink.runtime.operators.sort.MergeIterator) Test(org.junit.Test)

Example 7 with StreamOperator

use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.

the class Int2HashJoinOperatorTest method joinAndAssert.

@SuppressWarnings("unchecked")
static void joinAndAssert(Object operator, MutableObjectIterator<BinaryRowData> input1, MutableObjectIterator<BinaryRowData> input2, int expectOutSize, int expectOutKeySize, int expectOutVal, boolean semiJoin) throws Exception {
    InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType());
    InternalTypeInfo<RowData> rowDataTypeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType(), new IntType(), new IntType());
    TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 1, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, rowDataTypeInfo);
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.getExecutionConfig().enableObjectReuse();
    testHarness.setupOutputForSingletonOperatorChain();
    if (operator instanceof StreamOperator) {
        testHarness.getStreamConfig().setStreamOperator((StreamOperator<?>) operator);
    } else {
        testHarness.getStreamConfig().setStreamOperatorFactory((StreamOperatorFactory<?>) operator);
    }
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    Random random = new Random();
    do {
        BinaryRowData row1 = null;
        BinaryRowData row2 = null;
        if (random.nextInt(2) == 0) {
            row1 = input1.next();
            if (row1 == null) {
                row2 = input2.next();
            }
        } else {
            row2 = input2.next();
            if (row2 == null) {
                row1 = input1.next();
            }
        }
        if (row1 == null && row2 == null) {
            break;
        }
        if (row1 != null) {
            testHarness.processElement(new StreamRecord<>(row1), 0, 0);
        } else {
            testHarness.processElement(new StreamRecord<>(row2), 1, 0);
        }
    } while (true);
    testHarness.endInput(0, 0);
    testHarness.endInput(1, 0);
    testHarness.waitForInputProcessing();
    testHarness.waitForTaskCompletion();
    Queue<Object> actual = testHarness.getOutput();
    Assert.assertEquals("Output was not correct.", expectOutSize, actual.size());
    // Don't verify the output value when experOutVal is -1
    if (expectOutVal != -1) {
        if (semiJoin) {
            HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
            for (Object o : actual) {
                StreamRecord<RowData> record = (StreamRecord<RowData>) o;
                RowData row = record.getValue();
                int key = row.getInt(0);
                int val = row.getInt(1);
                Long contained = map.get(key);
                if (contained == null) {
                    contained = (long) val;
                } else {
                    contained = valueOf(contained + val);
                }
                map.put(key, contained);
            }
            Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                long val = entry.getValue();
                int key = entry.getKey();
                Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
            }
        } else {
            // create the map for validating the results
            HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
            for (Object o : actual) {
                StreamRecord<RowData> record = (StreamRecord<RowData>) o;
                RowData row = record.getValue();
                int key = row.isNullAt(0) ? row.getInt(2) : row.getInt(0);
                int val1 = 0;
                int val2 = 0;
                if (!row.isNullAt(1)) {
                    val1 = row.getInt(1);
                }
                if (!row.isNullAt(3)) {
                    val2 = row.getInt(3);
                }
                int val = val1 + val2;
                Long contained = map.get(key);
                if (contained == null) {
                    contained = (long) val;
                } else {
                    contained = valueOf(contained + val);
                }
                map.put(key, contained);
            }
            Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                long val = entry.getValue();
                int key = entry.getKey();
                Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
            }
        }
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) HashMap(java.util.HashMap) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness) IntType(org.apache.flink.table.types.logical.IntType) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) Random(java.util.Random) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) HashMap(java.util.HashMap) Map(java.util.Map)

Example 8 with StreamOperator

use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.

the class RandomSortMergeOuterJoinTest method testOuterJoinWithHighNumberOfCommonKeys.

@SuppressWarnings("unchecked, rawtypes")
protected void testOuterJoinWithHighNumberOfCommonKeys(FlinkJoinType outerJoinType, int input1Size, int input1Duplicates, int input1ValueLength, float input1KeyDensity, int input2Size, int input2Duplicates, int input2ValueLength, float input2KeyDensity) {
    TypeComparator<Tuple2<Integer, String>> comparator1 = new TupleComparator<>(new int[] { 0 }, new TypeComparator<?>[] { new IntComparator(true) }, new TypeSerializer<?>[] { IntSerializer.INSTANCE });
    TypeComparator<Tuple2<Integer, String>> comparator2 = new TupleComparator<>(new int[] { 0 }, new TypeComparator<?>[] { new IntComparator(true) }, new TypeSerializer<?>[] { IntSerializer.INSTANCE });
    final int duplicateKey = 13;
    try {
        final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, input1KeyDensity, input1ValueLength, KeyMode.SORTED_SPARSE, ValueMode.RANDOM_LENGTH, null);
        final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, input2KeyDensity, input2ValueLength, KeyMode.SORTED_SPARSE, ValueMode.RANDOM_LENGTH, null);
        final TupleGeneratorIterator gen1Iter = new TupleGeneratorIterator(generator1, input1Size);
        final TupleGeneratorIterator gen2Iter = new TupleGeneratorIterator(generator2, input2Size);
        final TupleConstantValueIterator const1Iter = new TupleConstantValueIterator(duplicateKey, "LEFT String for Duplicate Keys", input1Duplicates);
        final TupleConstantValueIterator const2Iter = new TupleConstantValueIterator(duplicateKey, "RIGHT String for Duplicate Keys", input2Duplicates);
        final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
        inList1.add(gen1Iter);
        inList1.add(const1Iter);
        final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
        inList2.add(gen2Iter);
        inList2.add(const2Iter);
        MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<>(inList1, comparator1.duplicate());
        MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<>(inList2, comparator2.duplicate());
        // collect expected data
        final Map<Integer, Collection<Match>> expectedMatchesMap = joinValues(RandomSortMergeInnerJoinTest.collectData(input1), RandomSortMergeInnerJoinTest.collectData(input2), outerJoinType);
        // re-create the whole thing for actual processing
        // reset the generators and iterators
        generator1.reset();
        generator2.reset();
        const1Iter.reset();
        const2Iter.reset();
        gen1Iter.reset();
        gen2Iter.reset();
        inList1.clear();
        inList1.add(gen1Iter);
        inList1.add(const1Iter);
        inList2.clear();
        inList2.add(gen2Iter);
        inList2.add(const2Iter);
        input1 = new MergeIterator<>(inList1, comparator1.duplicate());
        input2 = new MergeIterator<>(inList2, comparator2.duplicate());
        StreamOperator operator = getOperator(outerJoinType);
        RandomSortMergeInnerJoinTest.match(expectedMatchesMap, RandomSortMergeInnerJoinTest.transformToBinary(myJoin(operator, input1, input2)));
        // assert that each expected match was seen
        for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
            if (!entry.getValue().isEmpty()) {
                Assert.fail("Collection for key " + entry.getKey() + " is not empty");
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail("An exception occurred during the test: " + e.getMessage());
    }
}
Also used : MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) ArrayList(java.util.ArrayList) IntComparator(org.apache.flink.api.common.typeutils.base.IntComparator) TupleGenerator(org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator) TupleComparator(org.apache.flink.api.java.typeutils.runtime.TupleComparator) TupleGeneratorIterator(org.apache.flink.runtime.operators.testutils.TestData.TupleGeneratorIterator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collection(java.util.Collection) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) TupleConstantValueIterator(org.apache.flink.runtime.operators.testutils.TestData.TupleConstantValueIterator) MergeIterator(org.apache.flink.runtime.operators.sort.MergeIterator)

Example 9 with StreamOperator

use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.

the class StreamTaskTest method testExecuteMailboxActionsAfterLeavingInputProcessorMailboxLoop.

@Test
public void testExecuteMailboxActionsAfterLeavingInputProcessorMailboxLoop() throws Exception {
    OneShotLatch latch = new OneShotLatch();
    try (MockEnvironment mockEnvironment = new MockEnvironmentBuilder().build()) {
        RunningTask<StreamTask<?, ?>> task = runTask(() -> new StreamTask<Object, StreamOperator<Object>>(mockEnvironment) {

            @Override
            protected void init() throws Exception {
            }

            @Override
            protected void processInput(MailboxDefaultAction.Controller controller) throws Exception {
                mailboxProcessor.getMailboxExecutor(0).execute(latch::trigger, "trigger");
                controller.suspendDefaultAction();
                mailboxProcessor.suspend();
            }
        });
        latch.await();
        task.waitForTaskCompletion(false);
    }
}
Also used : MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) MailboxDefaultAction(org.apache.flink.streaming.runtime.tasks.mailbox.MailboxDefaultAction) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) CheckpointException(org.apache.flink.runtime.checkpoint.CheckpointException) FunctionWithException(org.apache.flink.util.function.FunctionWithException) AsynchronousException(org.apache.flink.runtime.taskmanager.AsynchronousException) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CancelTaskException(org.apache.flink.runtime.execution.CancelTaskException) RunnableWithException(org.apache.flink.util.function.RunnableWithException) TimeoutException(java.util.concurrent.TimeoutException) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) SupplierWithException(org.apache.flink.util.function.SupplierWithException) BiConsumerWithException(org.apache.flink.util.function.BiConsumerWithException) ExpectedException(org.junit.rules.ExpectedException) Test(org.junit.Test)

Example 10 with StreamOperator

use of org.apache.flink.streaming.api.operators.StreamOperator in project flink by apache.

the class StreamTaskMailboxTestHarnessBuilder method initializeSourceInput.

private SourceInputConfig initializeSourceInput(int inputId, SourceInputConfigPlaceHolder sourceInput, StreamNode mainNode) {
    Map<Integer, StreamConfig> transitiveChainedTaskConfigs = streamConfig.getTransitiveChainedTaskConfigs(getClass().getClassLoader());
    Integer maxNodeId = transitiveChainedTaskConfigs.isEmpty() ? StreamConfigChainer.MAIN_NODE_ID : Collections.max(transitiveChainedTaskConfigs.keySet());
    List<StreamEdge> outEdgesInOrder = new LinkedList<>();
    StreamEdge sourceToMainEdge = new StreamEdge(new StreamNode(maxNodeId + inputId + 1337, null, null, (StreamOperator<?>) null, null, null), mainNode, 0, new ForwardPartitioner<>(), null);
    outEdgesInOrder.add(sourceToMainEdge);
    StreamConfig sourceConfig = new StreamConfig(new Configuration());
    sourceConfig.setTimeCharacteristic(streamConfig.getTimeCharacteristic());
    sourceConfig.setOutEdgesInOrder(outEdgesInOrder);
    sourceConfig.setChainedOutputs(outEdgesInOrder);
    sourceConfig.setTypeSerializerOut(sourceInput.getSourceSerializer());
    sourceConfig.setOperatorID(sourceInput.getOperatorId());
    sourceConfig.setStreamOperatorFactory(sourceInput.getSourceOperatorFactory());
    transitiveChainedTaskConfigs.put(sourceToMainEdge.getSourceId(), sourceConfig);
    streamConfig.setTransitiveChainedTaskConfigs(transitiveChainedTaskConfigs);
    return new SourceInputConfig(sourceToMainEdge);
}
Also used : BufferDebloatConfiguration(org.apache.flink.runtime.throughput.BufferDebloatConfiguration) Configuration(org.apache.flink.configuration.Configuration) SourceInputConfig(org.apache.flink.streaming.api.graph.StreamConfig.SourceInputConfig) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) StreamNode(org.apache.flink.streaming.api.graph.StreamNode) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) LinkedList(java.util.LinkedList)

Aggregations

StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)18 Test (org.junit.Test)10 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)7 StreamConfig (org.apache.flink.streaming.api.graph.StreamConfig)5 AbstractStreamOperator (org.apache.flink.streaming.api.operators.AbstractStreamOperator)5 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)5 HashMap (java.util.HashMap)4 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)4 Configuration (org.apache.flink.configuration.Configuration)4 StreamEdge (org.apache.flink.streaming.api.graph.StreamEdge)4 StreamNode (org.apache.flink.streaming.api.graph.StreamNode)4 OneInputStreamOperator (org.apache.flink.streaming.api.operators.OneInputStreamOperator)4 ArrayList (java.util.ArrayList)3 Collection (java.util.Collection)3 Map (java.util.Map)3 TupleGenerator (org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator)3 IOException (java.io.IOException)2 LinkedList (java.util.LinkedList)2 ExecutionException (java.util.concurrent.ExecutionException)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2