Search in sources :

Example 1 with IntRecordComparator

use of org.apache.flink.table.runtime.operators.sort.IntRecordComparator in project flink by apache.

the class SortMergeJoinIteratorTest method oneSideOuter.

public void oneSideOuter(Tuple2<MutableObjectIterator<BinaryRowData>, MutableObjectIterator<BinaryRowData>> data, List<Tuple2<BinaryRowData, BinaryRowData>> compare) throws Exception {
    MutableObjectIterator input1 = data.f0;
    MutableObjectIterator input2 = data.f1;
    if (leftIsSmall) {
        input1 = data.f1;
        input2 = data.f0;
    }
    try (SortMergeOneSideOuterJoinIterator iterator = new SortMergeOneSideOuterJoinIterator(new BinaryRowDataSerializer(1), new BinaryRowDataSerializer(1), new MyProjection(), new MyProjection(), new IntRecordComparator(), input1, input2, new ResettableExternalBuffer(ioManager, new LazyMemorySegmentPool(this, memManager, BUFFER_MEMORY), serializer, false), new boolean[] { true })) {
        int id = 0;
        while (iterator.nextOuterJoin()) {
            RowData probe = iterator.getProbeRow();
            if (iterator.matchKey == null) {
                Tuple2<BinaryRowData, BinaryRowData> expected = compare.get(id++);
                if (leftIsSmall) {
                    assertEquals(expected, new Tuple2<>(null, probe));
                } else {
                    assertEquals(expected, new Tuple2<>(probe, null));
                }
            } else {
                ResettableExternalBuffer.BufferIterator iter = iterator.getMatchBuffer().newIterator();
                while (iter.advanceNext()) {
                    RowData row = iter.getRow();
                    Tuple2<BinaryRowData, BinaryRowData> expected = compare.get(id++);
                    assertEquals(expected, new Tuple2<>(row, probe));
                }
            }
        }
        assertEquals(compare.size(), id);
    }
}
Also used : ResettableExternalBuffer(org.apache.flink.table.runtime.util.ResettableExternalBuffer) MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) IntRecordComparator(org.apache.flink.table.runtime.operators.sort.IntRecordComparator) LazyMemorySegmentPool(org.apache.flink.table.runtime.util.LazyMemorySegmentPool) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MyProjection(org.apache.flink.table.runtime.operators.join.Int2HashJoinOperatorTest.MyProjection) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Example 2 with IntRecordComparator

use of org.apache.flink.table.runtime.operators.sort.IntRecordComparator in project flink by apache.

the class SortMergeJoinIteratorTest method inner.

public void inner(Tuple2<MutableObjectIterator<BinaryRowData>, MutableObjectIterator<BinaryRowData>> data, List<Tuple2<BinaryRowData, BinaryRowData>> compare) throws Exception {
    MutableObjectIterator input1 = data.f0;
    MutableObjectIterator input2 = data.f1;
    if (leftIsSmall) {
        input1 = data.f1;
        input2 = data.f0;
    }
    try (SortMergeInnerJoinIterator iterator = new SortMergeInnerJoinIterator(new BinaryRowDataSerializer(1), new BinaryRowDataSerializer(1), new MyProjection(), new MyProjection(), new IntRecordComparator(), input1, input2, new ResettableExternalBuffer(ioManager, new LazyMemorySegmentPool(this, memManager, BUFFER_MEMORY), serializer, false), new boolean[] { true })) {
        int id = 0;
        while (iterator.nextInnerJoin()) {
            RowData probe = iterator.getProbeRow();
            ResettableExternalBuffer.BufferIterator iter = iterator.getMatchBuffer().newIterator();
            while (iter.advanceNext()) {
                RowData row = iter.getRow();
                Tuple2<BinaryRowData, BinaryRowData> expected = compare.get(id++);
                if (leftIsSmall) {
                    assertEquals(expected, new Tuple2<>(row, probe));
                } else {
                    assertEquals(expected, new Tuple2<>(probe, row));
                }
            }
        }
        assertEquals(compare.size(), id);
    }
}
Also used : ResettableExternalBuffer(org.apache.flink.table.runtime.util.ResettableExternalBuffer) MutableObjectIterator(org.apache.flink.util.MutableObjectIterator) IntRecordComparator(org.apache.flink.table.runtime.operators.sort.IntRecordComparator) LazyMemorySegmentPool(org.apache.flink.table.runtime.util.LazyMemorySegmentPool) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MyProjection(org.apache.flink.table.runtime.operators.join.Int2HashJoinOperatorTest.MyProjection) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Example 3 with IntRecordComparator

use of org.apache.flink.table.runtime.operators.sort.IntRecordComparator in project flink by apache.

the class SortMergeJoinIteratorTest method fullOuter.

public void fullOuter(Tuple2<MutableObjectIterator<BinaryRowData>, MutableObjectIterator<BinaryRowData>> data, List<Tuple2<BinaryRowData, BinaryRowData>> compare) throws Exception {
    MutableObjectIterator<BinaryRowData> input1 = data.f0;
    MutableObjectIterator<BinaryRowData> input2 = data.f1;
    try (SortMergeFullOuterJoinIterator iterator = new SortMergeFullOuterJoinIterator(new BinaryRowDataSerializer(1), new BinaryRowDataSerializer(1), new MyProjection(), new MyProjection(), new IntRecordComparator(), input1, input2, new ResettableExternalBuffer(ioManager, new LazyMemorySegmentPool(this, memManager, BUFFER_MEMORY), serializer, false), new ResettableExternalBuffer(ioManager, new LazyMemorySegmentPool(this, memManager, BUFFER_MEMORY), serializer, false), new boolean[] { true })) {
        int id = 0;
        while (iterator.nextOuterJoin()) {
            BinaryRowData matchKey = iterator.getMatchKey();
            ResettableExternalBuffer buffer1 = iterator.getBuffer1();
            ResettableExternalBuffer buffer2 = iterator.getBuffer2();
            if (matchKey == null && buffer1.size() > 0) {
                // left outer join.
                ResettableExternalBuffer.BufferIterator iter = buffer1.newIterator();
                while (iter.advanceNext()) {
                    RowData row = iter.getRow();
                    Tuple2<BinaryRowData, BinaryRowData> expected = compare.get(id++);
                    assertEquals(expected, new Tuple2<>(row, null));
                }
            } else if (matchKey == null && buffer2.size() > 0) {
                // right outer join.
                ResettableExternalBuffer.BufferIterator iter = buffer2.newIterator();
                while (iter.advanceNext()) {
                    RowData row = iter.getRow();
                    Tuple2<BinaryRowData, BinaryRowData> expected = compare.get(id++);
                    assertEquals(expected, new Tuple2<>(null, row));
                }
            } else if (matchKey != null) {
                // match join.
                ResettableExternalBuffer.BufferIterator iter1 = buffer1.newIterator();
                while (iter1.advanceNext()) {
                    RowData row1 = iter1.getRow();
                    ResettableExternalBuffer.BufferIterator iter2 = buffer2.newIterator();
                    while (iter2.advanceNext()) {
                        RowData row2 = iter2.getRow();
                        Tuple2<BinaryRowData, BinaryRowData> expected = compare.get(id++);
                        assertEquals(expected, new Tuple2<>(row1, row2));
                    }
                }
            } else {
                // bug...
                throw new RuntimeException("There is a bug.");
            }
        }
        assertEquals(compare.size(), id);
    }
}
Also used : ResettableExternalBuffer(org.apache.flink.table.runtime.util.ResettableExternalBuffer) IntRecordComparator(org.apache.flink.table.runtime.operators.sort.IntRecordComparator) LazyMemorySegmentPool(org.apache.flink.table.runtime.util.LazyMemorySegmentPool) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) MyProjection(org.apache.flink.table.runtime.operators.join.Int2HashJoinOperatorTest.MyProjection) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Example 4 with IntRecordComparator

use of org.apache.flink.table.runtime.operators.sort.IntRecordComparator in project flink by apache.

the class SumHashAggTestOperator method processElement.

@Override
public void processElement(StreamRecord<RowData> element) throws Exception {
    RowData in1 = element.getValue();
    // project key from input
    currentKeyWriter.reset();
    if (in1.isNullAt(0)) {
        currentKeyWriter.setNullAt(0);
    } else {
        currentKeyWriter.writeInt(0, in1.getInt(0));
    }
    currentKeyWriter.complete();
    // look up output buffer using current group key
    BytesMap.LookupInfo<BinaryRowData, BinaryRowData> lookupInfo = aggregateMap.lookup(currentKey);
    BinaryRowData currentAggBuffer = lookupInfo.getValue();
    if (!lookupInfo.isFound()) {
        // append empty agg buffer into aggregate map for current group key
        try {
            currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
        } catch (EOFException exp) {
            // hash map out of memory, spill to external sorter
            if (sorter == null) {
                sorter = new BufferedKVExternalSorter(getIOManager(), new BinaryRowDataSerializer(keyTypes.length), new BinaryRowDataSerializer(aggBufferTypes.length), new IntNormalizedKeyComputer(), new IntRecordComparator(), getMemoryManager().getPageSize(), getConf());
            }
            // sort and spill
            sorter.sortAndSpill(aggregateMap.getRecordAreaMemorySegments(), aggregateMap.getNumElements(), new BytesHashMapSpillMemorySegmentPool(aggregateMap.getBucketAreaMemorySegments()));
            // retry append
            // reset aggregate map retry append
            aggregateMap.reset();
            lookupInfo = aggregateMap.lookup(currentKey);
            try {
                currentAggBuffer = aggregateMap.append(lookupInfo, emptyAggBuffer);
            } catch (EOFException e) {
                throw new OutOfMemoryError("BytesHashMap Out of Memory.");
            }
        }
    }
    if (!in1.isNullAt(1)) {
        long sumInput = in1.getLong(1);
        if (currentAggBuffer.isNullAt(0)) {
            currentAggBuffer.setLong(0, sumInput);
        } else {
            currentAggBuffer.setLong(0, sumInput + currentAggBuffer.getLong(0));
        }
    }
}
Also used : IntRecordComparator(org.apache.flink.table.runtime.operators.sort.IntRecordComparator) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) JoinedRowData(org.apache.flink.table.data.utils.JoinedRowData) IntNormalizedKeyComputer(org.apache.flink.table.runtime.operators.sort.IntNormalizedKeyComputer) BytesMap(org.apache.flink.table.runtime.util.collections.binary.BytesMap) BinaryRowData(org.apache.flink.table.data.binary.BinaryRowData) EOFException(java.io.EOFException) BufferedKVExternalSorter(org.apache.flink.table.runtime.operators.sort.BufferedKVExternalSorter) BinaryRowDataSerializer(org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)

Aggregations

RowData (org.apache.flink.table.data.RowData)4 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)4 IntRecordComparator (org.apache.flink.table.runtime.operators.sort.IntRecordComparator)4 BinaryRowDataSerializer (org.apache.flink.table.runtime.typeutils.BinaryRowDataSerializer)4 MyProjection (org.apache.flink.table.runtime.operators.join.Int2HashJoinOperatorTest.MyProjection)3 LazyMemorySegmentPool (org.apache.flink.table.runtime.util.LazyMemorySegmentPool)3 ResettableExternalBuffer (org.apache.flink.table.runtime.util.ResettableExternalBuffer)3 MutableObjectIterator (org.apache.flink.util.MutableObjectIterator)2 EOFException (java.io.EOFException)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 GenericRowData (org.apache.flink.table.data.GenericRowData)1 JoinedRowData (org.apache.flink.table.data.utils.JoinedRowData)1 BufferedKVExternalSorter (org.apache.flink.table.runtime.operators.sort.BufferedKVExternalSorter)1 IntNormalizedKeyComputer (org.apache.flink.table.runtime.operators.sort.IntNormalizedKeyComputer)1 BytesMap (org.apache.flink.table.runtime.util.collections.binary.BytesMap)1