Search in sources :

Example 31 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class Int2SortMergeJoinOperatorTest method testSemiJoin.

@Test
public void testSemiJoin() throws Exception {
    int numKeys1 = 10;
    int numKeys2 = 9;
    int buildValsPerKey = 10;
    int probeValsPerKey = 3;
    MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numKeys1, buildValsPerKey, true);
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys2, probeValsPerKey, true);
    StreamOperator operator = newOperator(FlinkJoinType.SEMI, false);
    joinAndAssert(operator, buildInput, probeInput, 90, 9, 45, true);
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 32 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class Int2SortMergeJoinOperatorTest method testAntiJoin.

@Test
public void testAntiJoin() throws Exception {
    int numKeys1 = 10;
    int numKeys2 = 9;
    int buildValsPerKey = 10;
    int probeValsPerKey = 3;
    MutableObjectIterator<BinaryRowData> buildInput = new UniformBinaryRowGenerator(numKeys1, buildValsPerKey, true);
    MutableObjectIterator<BinaryRowData> probeInput = new UniformBinaryRowGenerator(numKeys2, probeValsPerKey, true);
    StreamOperator operator = newOperator(FlinkJoinType.ANTI, false);
    joinAndAssert(operator, buildInput, probeInput, 10, 1, 45, true);
}
Also used : BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) UniformBinaryRowGenerator(org.apache.flink.table.runtime.util.UniformBinaryRowGenerator) Test(org.junit.Test)

Example 33 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class RandomSortMergeInnerJoinTest method join.

public static LinkedBlockingQueue<Object> join(StreamOperator operator, MutableObjectIterator<Tuple2<Integer, String>> input1, MutableObjectIterator<Tuple2<Integer, String>> input2, boolean input1First) throws Exception {
    InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.ofFields(new IntType(), VarCharType.STRING_TYPE);
    InternalTypeInfo<RowData> joinedInfo = InternalTypeInfo.ofFields(new IntType(), VarCharType.STRING_TYPE, new IntType(), VarCharType.STRING_TYPE);
    final TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 1, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, joinedInfo);
    // Deep pit!!! Cause in TwoInputStreamTaskTestHarness, one record one buffer.
    testHarness.bufferSize = 10 * 1024;
    testHarness.getExecutionConfig().enableObjectReuse();
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.setupOutputForSingletonOperatorChain();
    testHarness.getStreamConfig().setStreamOperator(operator);
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    long initialTime = 0L;
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    if (input1First) {
        Tuple2<Integer, String> tuple2 = new Tuple2<>();
        while ((tuple2 = input1.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 0, 0);
        }
        testHarness.waitForInputProcessing();
        tuple2 = new Tuple2<>();
        while ((tuple2 = input2.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 1, 0);
        }
        testHarness.waitForInputProcessing();
    } else {
        Tuple2<Integer, String> tuple2 = new Tuple2<>();
        while ((tuple2 = input2.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 1, 0);
        }
        testHarness.waitForInputProcessing();
        tuple2 = new Tuple2<>();
        while ((tuple2 = input1.next(tuple2)) != null) {
            testHarness.processElement(new StreamRecord<>(newRow(tuple2.f0, tuple2.f1), initialTime), 0, 0);
        }
        testHarness.waitForInputProcessing();
    }
    testHarness.endInput();
    testHarness.waitForTaskCompletion();
    return testHarness.getOutput();
}
Also used : OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness) IntType(org.apache.flink.table.types.logical.IntType) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData)

Example 34 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class RandomSortMergeInnerJoinTest method transformToBinary.

public static LinkedBlockingQueue<Object> transformToBinary(LinkedBlockingQueue<Object> output) {
    LinkedBlockingQueue<Object> ret = new LinkedBlockingQueue<>();
    for (Object o : output) {
        RowData row = ((StreamRecord<RowData>) o).getValue();
        BinaryRowData binaryRow;
        if (row.isNullAt(0)) {
            binaryRow = newRow(row.getInt(2), null, row.getString(3).toString());
        } else if (row.isNullAt(2)) {
            binaryRow = newRow(row.getInt(0), row.getString(1).toString(), null);
        } else {
            String value1 = row.getString(1).toString();
            String value2 = row.getString(3).toString();
            binaryRow = newRow(row.getInt(0), value1, value2);
        }
        ret.add(new StreamRecord(binaryRow));
    }
    return ret;
}
Also used : RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue)

Example 35 with BinaryRowData

use of org.apache.flink.table.data.binary.BinaryRowData in project flink by apache.

the class Int2HashJoinOperatorTest method joinAndAssert.

@SuppressWarnings("unchecked")
static void joinAndAssert(Object operator, MutableObjectIterator<BinaryRowData> input1, MutableObjectIterator<BinaryRowData> input2, int expectOutSize, int expectOutKeySize, int expectOutVal, boolean semiJoin) throws Exception {
    InternalTypeInfo<RowData> typeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType());
    InternalTypeInfo<RowData> rowDataTypeInfo = InternalTypeInfo.ofFields(new IntType(), new IntType(), new IntType(), new IntType());
    TwoInputStreamTaskTestHarness<BinaryRowData, BinaryRowData, JoinedRowData> testHarness = new TwoInputStreamTaskTestHarness<>(TwoInputStreamTask::new, 2, 1, new int[] { 1, 2 }, typeInfo, (TypeInformation) typeInfo, rowDataTypeInfo);
    testHarness.memorySize = 36 * 1024 * 1024;
    testHarness.getExecutionConfig().enableObjectReuse();
    testHarness.setupOutputForSingletonOperatorChain();
    if (operator instanceof StreamOperator) {
        testHarness.getStreamConfig().setStreamOperator((StreamOperator<?>) operator);
    } else {
        testHarness.getStreamConfig().setStreamOperatorFactory((StreamOperatorFactory<?>) operator);
    }
    testHarness.getStreamConfig().setOperatorID(new OperatorID());
    testHarness.getStreamConfig().setManagedMemoryFractionOperatorOfUseCase(ManagedMemoryUseCase.OPERATOR, 0.99);
    testHarness.invoke();
    testHarness.waitForTaskRunning();
    Random random = new Random();
    do {
        BinaryRowData row1 = null;
        BinaryRowData row2 = null;
        if (random.nextInt(2) == 0) {
            row1 = input1.next();
            if (row1 == null) {
                row2 = input2.next();
            }
        } else {
            row2 = input2.next();
            if (row2 == null) {
                row1 = input1.next();
            }
        }
        if (row1 == null && row2 == null) {
            break;
        }
        if (row1 != null) {
            testHarness.processElement(new StreamRecord<>(row1), 0, 0);
        } else {
            testHarness.processElement(new StreamRecord<>(row2), 1, 0);
        }
    } while (true);
    testHarness.endInput(0, 0);
    testHarness.endInput(1, 0);
    testHarness.waitForInputProcessing();
    testHarness.waitForTaskCompletion();
    Queue<Object> actual = testHarness.getOutput();
    Assert.assertEquals("Output was not correct.", expectOutSize, actual.size());
    // Don't verify the output value when experOutVal is -1
    if (expectOutVal != -1) {
        if (semiJoin) {
            HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
            for (Object o : actual) {
                StreamRecord<RowData> record = (StreamRecord<RowData>) o;
                RowData row = record.getValue();
                int key = row.getInt(0);
                int val = row.getInt(1);
                Long contained = map.get(key);
                if (contained == null) {
                    contained = (long) val;
                } else {
                    contained = valueOf(contained + val);
                }
                map.put(key, contained);
            }
            Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                long val = entry.getValue();
                int key = entry.getKey();
                Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
            }
        } else {
            // create the map for validating the results
            HashMap<Integer, Long> map = new HashMap<>(expectOutKeySize);
            for (Object o : actual) {
                StreamRecord<RowData> record = (StreamRecord<RowData>) o;
                RowData row = record.getValue();
                int key = row.isNullAt(0) ? row.getInt(2) : row.getInt(0);
                int val1 = 0;
                int val2 = 0;
                if (!row.isNullAt(1)) {
                    val1 = row.getInt(1);
                }
                if (!row.isNullAt(3)) {
                    val2 = row.getInt(3);
                }
                int val = val1 + val2;
                Long contained = map.get(key);
                if (contained == null) {
                    contained = (long) val;
                } else {
                    contained = valueOf(contained + val);
                }
                map.put(key, contained);
            }
            Assert.assertEquals("Wrong number of keys", expectOutKeySize, map.size());
            for (Map.Entry<Integer, Long> entry : map.entrySet()) {
                long val = entry.getValue();
                int key = entry.getKey();
                Assert.assertEquals("Wrong number of values in per-key cross product for key " + key, expectOutVal, val);
            }
        }
    }
}
Also used : StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) HashMap(java.util.HashMap) OperatorID(org.apache.flink.runtime.jobgraph.OperatorID) TwoInputStreamTaskTestHarness(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTaskTestHarness) IntType(org.apache.flink.table.types.logical.IntType) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) TwoInputStreamTask(org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask) Random(java.util.Random) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)173 Test (org.junit.Test)81 BinaryRowWriter (org.apache.flink.table.data.writer.BinaryRowWriter)54 RowData (org.apache.flink.table.data.RowData)31 ArrayList (java.util.ArrayList)30 MemoryManager (org.apache.flink.runtime.memory.MemoryManager)22 UniformBinaryRowGenerator (org.apache.flink.table.runtime.util.UniformBinaryRowGenerator)21 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)16 MemorySegment (org.apache.flink.core.memory.MemorySegment)15 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)14 GenericRowData (org.apache.flink.table.data.GenericRowData)13 Random (java.util.Random)12 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)12 HashMap (java.util.HashMap)9 RowDataSerializer (org.apache.flink.table.runtime.typeutils.RowDataSerializer)9 Map (java.util.Map)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)7 RandomAccessInputView (org.apache.flink.runtime.io.disk.RandomAccessInputView)6 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)6