Search in sources :

Example 1 with TwoInputStreamTask

use of org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask in project flink by apache.

the class String2SortMergeJoinOperatorTest method buildSortMergeJoin.

private TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> buildSortMergeJoin(StreamOperator operator) throws Exception {
    final TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 2, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, joinedInfo);
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.getStreamConfig().setStreamOperator(operator);
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    long initialTime = 0L;
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    testHarness.processElement(new StreamRecord<>(newRow("a", "0"), initialTime), 0, 0);
    testHarness.processElement(new StreamRecord<>(newRow("d", "0"), initialTime), 0, 0);
    testHarness.processElement(new StreamRecord<>(newRow("a", "2"), initialTime), 1, 1);
    testHarness.processElement(new StreamRecord<>(newRow("b", "1"), initialTime), 0, 1);
    testHarness.processElement(new StreamRecord<>(newRow("c", "2"), initialTime), 1, 1);
    testHarness.processElement(new StreamRecord<>(newRow("b", "4"), initialTime), 1, 0);
    testHarness.waitForInputProcessing();
    testHarness.endInput();
    return testHarness;
}
Also used : TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness)

Example 2 with TwoInputStreamTask

use of org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask in project flink by apache.

the class RandomSortMergeInnerJoinTest method join.

public static LinkedBlockingQueue<Object> join(StreamOperator operator, MutableObjectIterator<Tuple2<Integer, String>> input1, MutableObjectIterator<Tuple2<Integer, String>> input2, boolean input1First) throws Exception {
    InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.ofFields(new IntType(), VarCharType.STRING_TYPE);
    InternalTypeInfo<RowData> joinedInfo = InternalTypeInfo.ofFields(new IntType(), VarCharType.STRING_TYPE, new IntType(), VarCharType.STRING_TYPE);
    final TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 1, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, joinedInfo);
    // Deep pit!!! Cause in TwoInputStreamTaskTestHarness, one record one buffer.
    testHarness.bufferSize = 10 * 1024;
    testHarness.getExecutionConfig().enableObjectReuse();
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.getStreamConfig().setStreamOperator(operator);
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    long initialTime = 0L;
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    if (input1First) {
        Tuple2<Integer, String> tuple2 = new Tuple2<>();
        while ((tuple2 = input1.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 0, 0);
        }
        testHarness.waitForInputProcessing();
        tuple2 = new Tuple2<>();
        while ((tuple2 = input2.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 1, 0);
        }
        testHarness.waitForInputProcessing();
    } else {
        Tuple2<Integer, String> tuple2 = new Tuple2<>();
        while ((tuple2 = input2.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 1, 0);
        }
        testHarness.waitForInputProcessing();
        tuple2 = new Tuple2<>();
        while ((tuple2 = input1.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 0, 0);
        }
        testHarness.waitForInputProcessing();
    }
    testHarness.endInput();
    testHarness.waitForTaskCompletion();
    return testHarness.getOutput();
}
Also used : OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness) IntType(org.apache.flink.table.types.logical.IntType) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData)

Example 3 with TwoInputStreamTask

use of org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask in project flink by apache.

the class Int2HashJoinOperatorTest method joinAndAssert.

@SuppressWarnings("unchecked")
static void joinAndAssert(Object operator, MutableObjectIterator<BinaryRowData> input1, MutableObjectIterator<BinaryRowData> input2, int expectOutSize, int expectOutKeySize, int expectOutVal, boolean semiJoin) throws Exception {
    InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType());
    InternalTypeInfo<RowData> rowDataTypeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType(), new IntType(), new IntType());
    TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 1, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, rowDataTypeInfo);
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.getExecutionConfig().enableObjectReuse();
    testHarness.setupOutputForSingletonOperatorChain();
    if (operator instanceof StreamOperator) {
        testHarness.getStreamConfig().setStreamOperator((StreamOperator<?>) operator);
    } else {
        testHarness.getStreamConfig().setStreamOperatorFactory((StreamOperatorFactory<?>) operator);
    }
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    Random random = new Random();
    do {
        BinaryRowData row1 = null;
        BinaryRowData row2 = null;
        if (random.nextInt(2) == 0) {
            row1 = input1.next();
            if (row1 == null) {
                row2 = input2.next();
            }
        } else {
            row2 = input2.next();
            if (row2 == null) {
                row1 = input1.next();
            }
        }
        if (row1 == null && row2 == null) {
            break;
        }
        if (row1 != null) {
            testHarness.processElement(new StreamRecord<>(row1), 0, 0);
        } else {
            testHarness.processElement(new StreamRecord<>(row2), 1, 0);
        }
    } while (true);
    testHarness.endInput(0, 0);
    testHarness.endInput(1, 0);
    testHarness.waitForInputProcessing();
    testHarness.waitForTaskCompletion();
    Queue<Object> actual = testHarness.getOutput();
    Assert.assertEquals("Output was not correct.", expectOutSize, actual.size());
    // Don't verify the output value when experOutVal is -1
    if (expectOutVal != -1) {
        if (semiJoin) {
            HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
            for (Object o : actual) {
                StreamRecord<RowData> record = (StreamRecord<RowData>) o;
                RowData row = record.getValue();
                int key = row.getInt(0);
                int val = row.getInt(1);
                Long contained = map.get(key);
                if (contained == null) {
                    contained = (long) val;
                } else {
                    contained = valueOf(contained + val);
                }
                map.put(key, contained);
            }
            Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                long val = entry.getValue();
                int key = entry.getKey();
                Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
            }
        } else {
            // create the map for validating the results
            HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
            for (Object o : actual) {
                StreamRecord<RowData> record = (StreamRecord<RowData>) o;
                RowData row = record.getValue();
                int key = row.isNullAt(0) ? row.getInt(2) : row.getInt(0);
                int val1 = 0;
                int val2 = 0;
                if (!row.isNullAt(1)) {
                    val1 = row.getInt(1);
                }
                if (!row.isNullAt(3)) {
                    val2 = row.getInt(3);
                }
                int val = val1 + val2;
                Long contained = map.get(key);
                if (contained == null) {
                    contained = (long) val;
                } else {
                    contained = valueOf(contained + val);
                }
                map.put(key, contained);
            }
            Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                long val = entry.getValue();
                int key = entry.getKey();
                Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
            }
        }
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) HashMap(java.util.HashMap) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness) IntType(org.apache.flink.table.types.logical.IntType) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) Random(java.util.Random) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

OperatorID (org.apache.flink.runtime.jobgraph.OperatorID)3 TwoInputStreamTask (org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask)3 TwoInputStreamTaskTestHarness (org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness)3 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)3 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)3 RowData (org.apache.flink.table.data.RowData)2 IntType (org.apache.flink.table.types.logical.IntType)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Random (java.util.Random)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)1 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)1